mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-13 01:05:36 +00:00
Merge pull request #7 from coppolachan/master
Added full support for SSE4
This commit is contained in:
commit
96e5c5c6ca
44
Makefile.in
44
Makefile.in
@ -1,7 +1,7 @@
|
|||||||
# Makefile.in generated by automake 1.15 from Makefile.am.
|
# Makefile.in generated by automake 1.14.1 from Makefile.am.
|
||||||
# @configure_input@
|
# @configure_input@
|
||||||
|
|
||||||
# Copyright (C) 1994-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This Makefile.in is free software; the Free Software Foundation
|
# This Makefile.in is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -14,17 +14,7 @@
|
|||||||
|
|
||||||
@SET_MAKE@
|
@SET_MAKE@
|
||||||
VPATH = @srcdir@
|
VPATH = @srcdir@
|
||||||
am__is_gnu_make = { \
|
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
|
||||||
if test -z '$(MAKELEVEL)'; then \
|
|
||||||
false; \
|
|
||||||
elif test -n '$(MAKE_HOST)'; then \
|
|
||||||
true; \
|
|
||||||
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
|
|
||||||
true; \
|
|
||||||
else \
|
|
||||||
false; \
|
|
||||||
fi; \
|
|
||||||
}
|
|
||||||
am__make_running_with_option = \
|
am__make_running_with_option = \
|
||||||
case $${target_option-} in \
|
case $${target_option-} in \
|
||||||
?) ;; \
|
?) ;; \
|
||||||
@ -89,12 +79,14 @@ build_triplet = @build@
|
|||||||
host_triplet = @host@
|
host_triplet = @host@
|
||||||
target_triplet = @target@
|
target_triplet = @target@
|
||||||
subdir = .
|
subdir = .
|
||||||
|
DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
|
||||||
|
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
|
||||||
|
$(top_srcdir)/configure $(am__configure_deps) COPYING TODO \
|
||||||
|
compile config.guess config.sub depcomp install-sh missing
|
||||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||||
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
|
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
|
||||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||||
$(ACLOCAL_M4)
|
$(ACLOCAL_M4)
|
||||||
DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
|
|
||||||
$(am__configure_deps) $(am__DIST_COMMON)
|
|
||||||
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
||||||
configure.lineno config.status.lineno
|
configure.lineno config.status.lineno
|
||||||
mkinstalldirs = $(install_sh) -d
|
mkinstalldirs = $(install_sh) -d
|
||||||
@ -157,9 +149,6 @@ ETAGS = etags
|
|||||||
CTAGS = ctags
|
CTAGS = ctags
|
||||||
CSCOPE = cscope
|
CSCOPE = cscope
|
||||||
DIST_SUBDIRS = $(SUBDIRS)
|
DIST_SUBDIRS = $(SUBDIRS)
|
||||||
am__DIST_COMMON = $(srcdir)/Makefile.in AUTHORS COPYING ChangeLog \
|
|
||||||
INSTALL NEWS README TODO compile config.guess config.sub \
|
|
||||||
depcomp install-sh missing
|
|
||||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||||
distdir = $(PACKAGE)-$(VERSION)
|
distdir = $(PACKAGE)-$(VERSION)
|
||||||
top_distdir = $(distdir)
|
top_distdir = $(distdir)
|
||||||
@ -325,6 +314,7 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
|||||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
|
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
|
||||||
$(am__cd) $(top_srcdir) && \
|
$(am__cd) $(top_srcdir) && \
|
||||||
$(AUTOMAKE) --gnu Makefile
|
$(AUTOMAKE) --gnu Makefile
|
||||||
|
.PRECIOUS: Makefile
|
||||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||||
@case '$?' in \
|
@case '$?' in \
|
||||||
*config.status*) \
|
*config.status*) \
|
||||||
@ -531,15 +521,15 @@ dist-xz: distdir
|
|||||||
$(am__post_remove_distdir)
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
dist-tarZ: distdir
|
dist-tarZ: distdir
|
||||||
@echo WARNING: "Support for distribution archives compressed with" \
|
@echo WARNING: "Support for shar distribution archives is" \
|
||||||
"legacy program 'compress' is deprecated." >&2
|
"deprecated." >&2
|
||||||
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
|
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
|
||||||
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
|
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
|
||||||
$(am__post_remove_distdir)
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
dist-shar: distdir
|
dist-shar: distdir
|
||||||
@echo WARNING: "Support for shar distribution archives is" \
|
@echo WARNING: "Support for distribution archives compressed with" \
|
||||||
"deprecated." >&2
|
"legacy program 'compress' is deprecated." >&2
|
||||||
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
|
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
|
||||||
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
|
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
|
||||||
$(am__post_remove_distdir)
|
$(am__post_remove_distdir)
|
||||||
@ -575,17 +565,17 @@ distcheck: dist
|
|||||||
esac
|
esac
|
||||||
chmod -R a-w $(distdir)
|
chmod -R a-w $(distdir)
|
||||||
chmod u+w $(distdir)
|
chmod u+w $(distdir)
|
||||||
mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
|
mkdir $(distdir)/_build $(distdir)/_inst
|
||||||
chmod a-w $(distdir)
|
chmod a-w $(distdir)
|
||||||
test -d $(distdir)/_build || exit 0; \
|
test -d $(distdir)/_build || exit 0; \
|
||||||
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
|
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
|
||||||
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
|
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
|
||||||
&& am__cwd=`pwd` \
|
&& am__cwd=`pwd` \
|
||||||
&& $(am__cd) $(distdir)/_build/sub \
|
&& $(am__cd) $(distdir)/_build \
|
||||||
&& ../../configure \
|
&& ../configure \
|
||||||
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
|
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
|
||||||
$(DISTCHECK_CONFIGURE_FLAGS) \
|
$(DISTCHECK_CONFIGURE_FLAGS) \
|
||||||
--srcdir=../.. --prefix="$$dc_install_base" \
|
--srcdir=.. --prefix="$$dc_install_base" \
|
||||||
&& $(MAKE) $(AM_MAKEFLAGS) \
|
&& $(MAKE) $(AM_MAKEFLAGS) \
|
||||||
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
|
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
|
||||||
&& $(MAKE) $(AM_MAKEFLAGS) check \
|
&& $(MAKE) $(AM_MAKEFLAGS) check \
|
||||||
@ -759,8 +749,6 @@ uninstall-am:
|
|||||||
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
|
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
|
||||||
pdf-am ps ps-am tags tags-am uninstall uninstall-am
|
pdf-am ps ps-am tags tags-am uninstall uninstall-am
|
||||||
|
|
||||||
.PRECIOUS: Makefile
|
|
||||||
|
|
||||||
|
|
||||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||||
|
61
aclocal.m4
vendored
61
aclocal.m4
vendored
@ -1,6 +1,6 @@
|
|||||||
# generated automatically by aclocal 1.15 -*- Autoconf -*-
|
# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
|
|||||||
If you have problems, you may need to regenerate the build system entirely.
|
If you have problems, you may need to regenerate the build system entirely.
|
||||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||||
|
|
||||||
# Copyright (C) 2002-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
|
|||||||
# generated from the m4 files accompanying Automake X.Y.
|
# generated from the m4 files accompanying Automake X.Y.
|
||||||
# (This private macro should not be called outside this file.)
|
# (This private macro should not be called outside this file.)
|
||||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||||
[am__api_version='1.15'
|
[am__api_version='1.14'
|
||||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||||
dnl require some minimum version. Point them to the right macro.
|
dnl require some minimum version. Point them to the right macro.
|
||||||
m4_if([$1], [1.15], [],
|
m4_if([$1], [1.14.1], [],
|
||||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||||
])
|
])
|
||||||
|
|
||||||
@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
|
|||||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||||
[AM_AUTOMAKE_VERSION([1.15])dnl
|
[AM_AUTOMAKE_VERSION([1.14.1])dnl
|
||||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||||
|
|
||||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -103,14 +103,15 @@ _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
|||||||
# configured tree to be moved without reconfiguration.
|
# configured tree to be moved without reconfiguration.
|
||||||
|
|
||||||
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||||
[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
|
[dnl Rely on autoconf to set up CDPATH properly.
|
||||||
# Expand $ac_aux_dir to an absolute path.
|
AC_PREREQ([2.50])dnl
|
||||||
am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
# expand $ac_aux_dir to an absolute path
|
||||||
|
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||||
])
|
])
|
||||||
|
|
||||||
# AM_CONDITIONAL -*- Autoconf -*-
|
# AM_CONDITIONAL -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -141,7 +142,7 @@ AC_CONFIG_COMMANDS_PRE(
|
|||||||
Usually this means the macro was only invoked conditionally.]])
|
Usually this means the macro was only invoked conditionally.]])
|
||||||
fi])])
|
fi])])
|
||||||
|
|
||||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -332,7 +333,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
|
|||||||
|
|
||||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -408,7 +409,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
|||||||
|
|
||||||
# Do all the work for Automake. -*- Autoconf -*-
|
# Do all the work for Automake. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -498,8 +499,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
|||||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
||||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||||
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
||||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
# We need awk for the "check" target. The system "awk" is bad on
|
||||||
# system "awk" is bad on some platforms.
|
# some platforms.
|
||||||
AC_REQUIRE([AC_PROG_AWK])dnl
|
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||||
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||||
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||||
@ -572,11 +573,7 @@ to "yes", and re-run configure.
|
|||||||
END
|
END
|
||||||
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
|
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
|
||||||
fi
|
fi
|
||||||
fi
|
fi])
|
||||||
dnl The trailing newline in this macro's definition is deliberate, for
|
|
||||||
dnl backward compatibility and to allow trailing 'dnl'-style comments
|
|
||||||
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
|
|
||||||
])
|
|
||||||
|
|
||||||
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
|
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
|
||||||
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
|
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
|
||||||
@ -605,7 +602,7 @@ for _am_header in $config_headers :; do
|
|||||||
done
|
done
|
||||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||||
|
|
||||||
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -616,7 +613,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
|
|||||||
# Define $install_sh.
|
# Define $install_sh.
|
||||||
AC_DEFUN([AM_PROG_INSTALL_SH],
|
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||||
if test x"${install_sh+set}" != xset; then
|
if test x"${install_sh}" != xset; then
|
||||||
case $am_aux_dir in
|
case $am_aux_dir in
|
||||||
*\ * | *\ *)
|
*\ * | *\ *)
|
||||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||||
@ -626,7 +623,7 @@ if test x"${install_sh+set}" != xset; then
|
|||||||
fi
|
fi
|
||||||
AC_SUBST([install_sh])])
|
AC_SUBST([install_sh])])
|
||||||
|
|
||||||
# Copyright (C) 2003-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2003-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -647,7 +644,7 @@ AC_SUBST([am__leading_dot])])
|
|||||||
|
|
||||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -697,7 +694,7 @@ rm -f confinc confmf
|
|||||||
|
|
||||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -736,7 +733,7 @@ fi
|
|||||||
|
|
||||||
# Helper functions for option handling. -*- Autoconf -*-
|
# Helper functions for option handling. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -767,7 +764,7 @@ AC_DEFUN([_AM_IF_OPTION],
|
|||||||
|
|
||||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -848,7 +845,7 @@ AC_CONFIG_COMMANDS_PRE(
|
|||||||
rm -f conftest.file
|
rm -f conftest.file
|
||||||
])
|
])
|
||||||
|
|
||||||
# Copyright (C) 2009-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2009-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -908,7 +905,7 @@ AC_SUBST([AM_BACKSLASH])dnl
|
|||||||
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
||||||
])
|
])
|
||||||
|
|
||||||
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -936,7 +933,7 @@ fi
|
|||||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||||
|
|
||||||
# Copyright (C) 2006-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2006-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@ -955,7 +952,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
|||||||
|
|
||||||
# Check how to create a tarball. -*- Autoconf -*-
|
# Check how to create a tarball. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2004-2014 Free Software Foundation, Inc.
|
# Copyright (C) 2004-2013 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
2
config.guess
vendored
2
config.guess
vendored
@ -1 +1 @@
|
|||||||
/opt/local/share/automake-1.15/config.guess
|
/usr/share/automake-1.14/config.guess
|
2
config.sub
vendored
2
config.sub
vendored
@ -1 +1 @@
|
|||||||
/opt/local/share/automake-1.15/config.sub
|
/usr/share/automake-1.14/config.sub
|
13
configure
vendored
13
configure
vendored
@ -2466,7 +2466,7 @@ test -n "$target_alias" &&
|
|||||||
NONENONEs,x,x, &&
|
NONENONEs,x,x, &&
|
||||||
program_prefix=${target_alias}-
|
program_prefix=${target_alias}-
|
||||||
|
|
||||||
am__api_version='1.15'
|
am__api_version='1.14'
|
||||||
|
|
||||||
# Find a good install program. We prefer a C program (faster),
|
# Find a good install program. We prefer a C program (faster),
|
||||||
# so one script is as good as another. But avoid the broken or
|
# so one script is as good as another. But avoid the broken or
|
||||||
@ -2638,8 +2638,8 @@ test "$program_suffix" != NONE &&
|
|||||||
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
|
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
|
||||||
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
|
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
|
||||||
|
|
||||||
# Expand $ac_aux_dir to an absolute path.
|
# expand $ac_aux_dir to an absolute path
|
||||||
am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||||
|
|
||||||
if test x"${MISSING+set}" != xset; then
|
if test x"${MISSING+set}" != xset; then
|
||||||
case $am_aux_dir in
|
case $am_aux_dir in
|
||||||
@ -2658,7 +2658,7 @@ else
|
|||||||
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
|
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test x"${install_sh+set}" != xset; then
|
if test x"${install_sh}" != xset; then
|
||||||
case $am_aux_dir in
|
case $am_aux_dir in
|
||||||
*\ * | *\ *)
|
*\ * | *\ *)
|
||||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||||
@ -2986,8 +2986,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
|
|||||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||||
mkdir_p='$(MKDIR_P)'
|
mkdir_p='$(MKDIR_P)'
|
||||||
|
|
||||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
# We need awk for the "check" target. The system "awk" is bad on
|
||||||
# system "awk" is bad on some platforms.
|
# some platforms.
|
||||||
# Always define AMTAR for backward compatibility. Yes, it's still used
|
# Always define AMTAR for backward compatibility. Yes, it's still used
|
||||||
# in the wild :-( We should find a proper way to deprecate it ...
|
# in the wild :-( We should find a proper way to deprecate it ...
|
||||||
AMTAR='$${TAR-tar}'
|
AMTAR='$${TAR-tar}'
|
||||||
@ -3046,7 +3046,6 @@ END
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ac_config_headers="$ac_config_headers lib/Grid_config.h"
|
ac_config_headers="$ac_config_headers lib/Grid_config.h"
|
||||||
|
|
||||||
# Check whether --enable-silent-rules was given.
|
# Check whether --enable-silent-rules was given.
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
#undef __X86_64
|
#undef __X86_64
|
||||||
#define MAC
|
#define MAC
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
/*! @file Grid_sse4.h
|
/*! @file Grid_sse4.h
|
||||||
@brief Optimization libraries
|
@brief Optimization libraries for SSE4 instructions set
|
||||||
|
|
||||||
|
Using intrinsics
|
||||||
*/
|
*/
|
||||||
// Time-stamp: <2015-05-19 17:06:51 neo>
|
// Time-stamp: <2015-05-20 16:45:39 neo>
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
@ -49,6 +51,20 @@ namespace Optimization {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Vstream{
|
||||||
|
//Float
|
||||||
|
inline void operator()(__m128 a, __m128 b){
|
||||||
|
_mm_stream_ps((float *)&a,b);
|
||||||
|
}
|
||||||
|
//Double
|
||||||
|
inline void operator()(__m128d a, __m128d b){
|
||||||
|
_mm_stream_pd((double *)&a,b);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct Vset{
|
struct Vset{
|
||||||
// Complex float
|
// Complex float
|
||||||
@ -75,27 +91,20 @@ namespace Optimization {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Out_type, typename In_type>
|
||||||
struct Reduce{
|
struct Reduce{
|
||||||
//Complex float
|
//Need templated class to overload output type
|
||||||
inline Grid::ComplexF operator()(__m128 in){
|
//General form must generate error if compiled
|
||||||
union {
|
inline Out_type operator()(In_type in){
|
||||||
__m128 v1;
|
printf("Error, using wrong Reduce function\n");
|
||||||
float f[4];
|
exit(1);
|
||||||
} u128;
|
return 0;
|
||||||
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
|
||||||
return Grid::ComplexF(u128.f[0], u128.f[1]);
|
|
||||||
}
|
}
|
||||||
//Complex double
|
|
||||||
inline Grid::ComplexD operator()(__m128d in){
|
|
||||||
printf("Missing complex double implementation -> FIX\n");
|
|
||||||
return Grid::ComplexD(0,0); // FIXME wrong
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
@ -129,25 +138,26 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m128 operator()(__m128 a, __m128 b){
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
__m128 ymm0,ymm1,ymm2;
|
__m128 ymm0,ymm1,ymm2;
|
||||||
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||||
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||||
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
return _mm_addsub_ps(ymm0,ymm1);
|
return _mm_addsub_ps(ymm0,ymm1);
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline __m128d operator()(__m128d a, __m128d b){
|
inline __m128d operator()(__m128d a, __m128d b){
|
||||||
__m128d ymm0,ymm1,ymm2;
|
__m128d ymm0,ymm1,ymm2;
|
||||||
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
|
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
|
||||||
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
|
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
|
||||||
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
|
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
|
||||||
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
return _mm_addsub_pd(ymm0,ymm1);
|
return _mm_addsub_pd(ymm0,ymm1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -165,14 +175,112 @@ namespace Optimization {
|
|||||||
inline __m128i operator()(__m128i a, __m128i b){
|
inline __m128i operator()(__m128i a, __m128i b){
|
||||||
return _mm_mul_epi32(a,b);
|
return _mm_mul_epi32(a,b);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Conj{
|
||||||
|
// Complex single
|
||||||
|
inline __m128 operator()(__m128 in){
|
||||||
|
return _mm_xor_ps(_mm_addsub_ps(_mm_setzero_ps(),in), _mm_set1_ps(-0.f));
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline __m128d operator()(__m128d in){
|
||||||
|
return _mm_xor_pd(_mm_addsub_pd(_mm_setzero_pd(),in), _mm_set1_pd(-0.f));//untested
|
||||||
|
}
|
||||||
|
// do not define for integer input
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TimesMinusI{
|
||||||
|
//Complex single
|
||||||
|
inline __m128 operator()(__m128 in, __m128 ret){
|
||||||
|
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
|
||||||
|
return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline __m128d operator()(__m128d in, __m128d ret){
|
||||||
|
__m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i
|
||||||
|
return _mm_shuffle_pd(tmp,tmp,0x1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TimesI{
|
||||||
|
//Complex single
|
||||||
|
inline __m128 operator()(__m128 in, __m128 ret){
|
||||||
|
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1));
|
||||||
|
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
|
||||||
|
}
|
||||||
|
//Complex double
|
||||||
|
inline __m128d operator()(__m128d in, __m128d ret){
|
||||||
|
__m128d tmp = _mm_shuffle_pd(in,in,0x1);
|
||||||
|
return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
// Some Template specialization
|
||||||
|
|
||||||
|
//Complex float Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
|
||||||
|
union {
|
||||||
|
__m128 v1;
|
||||||
|
float f[4];
|
||||||
|
} u128;
|
||||||
|
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||||
|
return Grid::ComplexF(u128.f[0], u128.f[1]);
|
||||||
|
}
|
||||||
|
//Real float Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
|
||||||
|
// FIXME Hack
|
||||||
|
const Grid::RealF * ptr = (const Grid::RealF *) ∈
|
||||||
|
Grid::RealF ret = 0;
|
||||||
|
for(int i=0;i< 4 ;i++){ // 4 number of simd lanes for float
|
||||||
|
ret = ret+ptr[i];
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Complex double Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::ComplexD Reduce<Grid::ComplexD, __m128d>::operator()(__m128d in){
|
||||||
|
printf("Reduce : Missing good complex double implementation -> FIX\n");
|
||||||
|
return Grid::ComplexD(in[0], in[1]); // inefficient
|
||||||
|
}
|
||||||
|
|
||||||
|
//Real double Reduce
|
||||||
|
template<>
|
||||||
|
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
|
||||||
|
// FIXME Hack
|
||||||
|
const Grid::RealD * ptr =(const Grid::RealD *) ∈
|
||||||
|
Grid::RealD ret = 0;
|
||||||
|
for(int i=0;i< 2 ;i++){// 2 number of simd lanes for float
|
||||||
|
ret = ret+ptr[i];
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Integer Reduce
|
||||||
|
template<>
|
||||||
|
inline Integer Reduce<Integer, __m128i>::operator()(__m128i in){
|
||||||
|
// FIXME unimplemented
|
||||||
|
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
typedef __m128 SIMD_Ftype; // Single precision type
|
typedef __m128 SIMD_Ftype; // Single precision type
|
||||||
@ -180,15 +288,21 @@ namespace Grid {
|
|||||||
typedef __m128i SIMD_Itype; // Integer type
|
typedef __m128i SIMD_Itype; // Integer type
|
||||||
|
|
||||||
|
|
||||||
// Function names
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
typedef Optimization::Vstore VstoreSIMD;
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
|
typedef Optimization::Vset VsetSIMD;
|
||||||
|
typedef Optimization::Vstream VstreamSIMD;
|
||||||
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
|
|
||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Sum SumSIMD;
|
||||||
typedef Optimization::Sub SubSIMD;
|
typedef Optimization::Sub SubSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
typedef Optimization::Vset VsetSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
/*! @file Grid_vector_types.h
|
/*! @file Grid_vector_types.h
|
||||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||||
*/
|
*/
|
||||||
// Time-stamp: <2015-05-19 17:20:36 neo>
|
// Time-stamp: <2015-05-20 17:31:55 neo>
|
||||||
//---------------------------------------------------------------------------
|
//---------------------------------------------------------------------------
|
||||||
#ifndef GRID_VECTOR_TYPES
|
#ifndef GRID_VECTOR_TYPES
|
||||||
#define GRID_VECTOR_TYPES
|
#define GRID_VECTOR_TYPES
|
||||||
@ -22,6 +22,16 @@ namespace Grid {
|
|||||||
typedef T type;
|
typedef T type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// type alias used to simplify the syntax of std::enable_if
|
||||||
|
template <typename T> using Invoke =
|
||||||
|
typename T::type;
|
||||||
|
template <typename Condition, typename ReturnType> using EnableIf =
|
||||||
|
Invoke<std::enable_if<Condition::value, ReturnType>>;
|
||||||
|
template <typename Condition, typename ReturnType> using NotEnableIf =
|
||||||
|
Invoke<std::enable_if<!Condition::value, ReturnType>>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Check for complexity with type traits
|
// Check for complexity with type traits
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -94,31 +104,32 @@ namespace Grid {
|
|||||||
// Initialise to 1,0,i for the correct types
|
// Initialise to 1,0,i for the correct types
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// if not complex overload here
|
// if not complex overload here
|
||||||
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
|
||||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
|
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
|
||||||
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
|
||||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
|
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
|
||||||
|
|
||||||
// overload for complex type
|
// For complex types
|
||||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
|
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
|
||||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
|
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
|
||||||
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
// For integral type
|
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
|
||||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
|
||||||
|
// For integral types
|
||||||
|
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||||
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
|
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
|
||||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||||
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
|
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
|
||||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||||
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
|
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
|
||||||
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||||
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
|
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
|
||||||
|
|
||||||
|
|
||||||
// do not compile if real or integer, send an error message from the compiler
|
|
||||||
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
|
||||||
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
|
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Arithmetic operator overloads +,-,*
|
// Arithmetic operator overloads +,-,*
|
||||||
@ -138,7 +149,7 @@ namespace Grid {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Distinguish between complex types and others
|
// Distinguish between complex types and others
|
||||||
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
||||||
{
|
{
|
||||||
Grid_simd ret;
|
Grid_simd ret;
|
||||||
@ -147,7 +158,7 @@ namespace Grid {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Real/Integer types
|
// Real/Integer types
|
||||||
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
|
||||||
{
|
{
|
||||||
Grid_simd ret;
|
Grid_simd ret;
|
||||||
@ -156,8 +167,6 @@ namespace Grid {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -170,14 +179,14 @@ namespace Grid {
|
|||||||
///////////////////////
|
///////////////////////
|
||||||
// overload if complex
|
// overload if complex
|
||||||
template < class S = Scalar_type >
|
template < class S = Scalar_type >
|
||||||
friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){
|
friend inline void vsplat(Grid_simd &ret, EnableIf<is_complex < S >, S> c){
|
||||||
Real a = real(c);
|
Real a = real(c);
|
||||||
Real b = imag(c);
|
Real b = imag(c);
|
||||||
vsplat(ret,a,b);
|
vsplat(ret,a,b);
|
||||||
}
|
}
|
||||||
|
|
||||||
// this only for the complex version
|
// this is only for the complex version
|
||||||
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline void vsplat(Grid_simd &ret,Real a, Real b){
|
friend inline void vsplat(Grid_simd &ret,Real a, Real b){
|
||||||
ret.v = binary<Vector_type>(a, b, VsplatSIMD());
|
ret.v = binary<Vector_type>(a, b, VsplatSIMD());
|
||||||
}
|
}
|
||||||
@ -187,22 +196,45 @@ namespace Grid {
|
|||||||
ret.v = unary<Vector_type>(a, VsplatSIMD());
|
ret.v = unary<Vector_type>(a, VsplatSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// Vstore
|
||||||
|
///////////////////////
|
||||||
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
|
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
|
||||||
binary<void>(ret.v, (Real*)a, VstoreSIMD());
|
binary<void>(ret.v, (Real*)a, VstoreSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// Vstream
|
||||||
|
///////////////////////
|
||||||
|
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
|
||||||
|
binary<void>(out.v, in.v, VstreamSIMD());
|
||||||
|
}
|
||||||
|
|
||||||
|
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
|
||||||
|
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
|
||||||
|
out=in;
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// Vprefetch
|
||||||
|
///////////////////////
|
||||||
friend inline void vprefetch(const Grid_simd &v)
|
friend inline void vprefetch(const Grid_simd &v)
|
||||||
{
|
{
|
||||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// Reduce
|
||||||
|
///////////////////////
|
||||||
friend inline Scalar_type Reduce(const Grid_simd & in)
|
friend inline Scalar_type Reduce(const Grid_simd & in)
|
||||||
{
|
{
|
||||||
// FIXME add operator
|
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////
|
||||||
|
// opreator scalar * simd
|
||||||
|
////////////////////////////
|
||||||
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
|
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
|
||||||
Grid_simd va;
|
Grid_simd va;
|
||||||
vsplat(va,a);
|
vsplat(va,a);
|
||||||
@ -215,25 +247,63 @@ namespace Grid {
|
|||||||
///////////////////////
|
///////////////////////
|
||||||
// Conjugate
|
// Conjugate
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline Grid_simd conjugate(const Grid_simd &in){
|
friend inline Grid_simd conjugate(const Grid_simd &in){
|
||||||
Grid_simd ret ; vzero(ret);
|
Grid_simd ret ;
|
||||||
// FIXME add operator
|
ret.v = unary<Vector_type>(in.v, ConjSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||||
|
friend inline Grid_simd conjugate(const Grid_simd &in){
|
||||||
|
return in; // for real objects
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// timesMinusI
|
||||||
|
///////////////////////
|
||||||
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
|
friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){
|
||||||
|
ret.v = binary<Vector_type>(in.v, ret.v, TimesMinusISIMD());
|
||||||
|
}
|
||||||
|
|
||||||
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
friend inline Grid_simd timesMinusI(const Grid_simd &in){
|
friend inline Grid_simd timesMinusI(const Grid_simd &in){
|
||||||
Grid_simd ret;
|
Grid_simd ret;
|
||||||
vzero(ret);
|
timesMinusI(ret,in);
|
||||||
// FIXME add operator
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
friend inline Grid_simd timesI(const Grid_simd &in){
|
|
||||||
Grid_simd ret; vzero(ret);
|
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||||
// FIXME add operator
|
friend inline Grid_simd timesMinusI(const Grid_simd &in){
|
||||||
return ret;
|
return in;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// timesI
|
||||||
|
///////////////////////
|
||||||
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
|
friend inline void timesI(Grid_simd &ret,const Grid_simd &in){
|
||||||
|
ret.v = binary<Vector_type>(in.v, ret.v, TimesISIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
|
||||||
|
friend inline Grid_simd timesI(const Grid_simd &in){
|
||||||
|
Grid_simd ret;
|
||||||
|
timesI(ret,in);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
|
||||||
|
friend inline Grid_simd timesI(const Grid_simd &in){
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
// Unary negation
|
// Unary negation
|
||||||
|
///////////////////////
|
||||||
friend inline Grid_simd operator -(const Grid_simd &r) {
|
friend inline Grid_simd operator -(const Grid_simd &r) {
|
||||||
vComplexF ret;
|
vComplexF ret;
|
||||||
vzero(ret);
|
vzero(ret);
|
||||||
@ -257,41 +327,22 @@ namespace Grid {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
|
||||||
{
|
|
||||||
Gpermute<Grid_simd>(y,b,perm);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// General permute; assumes vector length is same across
|
||||||
|
// all subtypes; may not be a good assumption, but could
|
||||||
|
// add the vector width as a template param for BG/Q for example
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
||||||
{
|
{
|
||||||
Gpermute<Grid_simd>(y,b,perm);
|
Gpermute<Grid_simd>(y,b,perm);
|
||||||
}
|
}
|
||||||
friend inline void merge(Grid_simd &y,std::vector<Scalar_type *> &extracted)
|
|
||||||
{
|
|
||||||
Gmerge<Grid_simd,Scalar_type >(y,extracted);
|
|
||||||
}
|
|
||||||
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type *> &extracted)
|
|
||||||
{
|
|
||||||
Gextract<Grid_simd,Scalar_type>(y,extracted);
|
|
||||||
}
|
|
||||||
friend inline void merge(Grid_simd &y,std::vector<Scalar_type > &extracted)
|
|
||||||
{
|
|
||||||
Gmerge<Grid_simd,Scalar_type >(y,extracted);
|
|
||||||
}
|
|
||||||
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type > &extracted)
|
|
||||||
{
|
|
||||||
Gextract<Grid_simd,Scalar_type>(y,extracted);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
};// end of Grid_simd class definition
|
};// end of Grid_simd class definition
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<class scalar_type, class vector_type >
|
template<class scalar_type, class vector_type >
|
||||||
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
|
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
|
||||||
{
|
{
|
||||||
@ -315,7 +366,7 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Define available types (now change names to avoid clashing)
|
// Define available types (now change names to avoid clashing with the rest of the code)
|
||||||
|
|
||||||
typedef Grid_simd< float , SIMD_Ftype > MyRealF;
|
typedef Grid_simd< float , SIMD_Ftype > MyRealF;
|
||||||
typedef Grid_simd< double , SIMD_Dtype > MyRealD;
|
typedef Grid_simd< double , SIMD_Dtype > MyRealD;
|
||||||
@ -324,6 +375,29 @@ namespace Grid {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// Temporary hack to keep independent from the rest of the code
|
||||||
|
template<> struct isGridTensor<MyRealD > {
|
||||||
|
static const bool value = false;
|
||||||
|
static const bool notvalue = true;
|
||||||
|
};
|
||||||
|
template<> struct isGridTensor<MyRealF > {
|
||||||
|
static const bool value = false;
|
||||||
|
static const bool notvalue = true;
|
||||||
|
};
|
||||||
|
template<> struct isGridTensor<MyComplexD > {
|
||||||
|
static const bool value = false;
|
||||||
|
static const bool notvalue = true;
|
||||||
|
};
|
||||||
|
template<> struct isGridTensor<MyComplexF > {
|
||||||
|
static const bool value = false;
|
||||||
|
static const bool notvalue = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -161,30 +161,40 @@ int main (int argc, char ** argv)
|
|||||||
///////// Tests the new class Grid_simd
|
///////// Tests the new class Grid_simd
|
||||||
std::complex<double> ctest(3.0,2.0);
|
std::complex<double> ctest(3.0,2.0);
|
||||||
std::complex<float> ctestf(3.0,2.0);
|
std::complex<float> ctestf(3.0,2.0);
|
||||||
MyComplexF TestMe1(1.0); // fill real part
|
MyComplexF TestMe1(1.0); // fills only real part
|
||||||
MyComplexD TestMe2(ctest);
|
MyComplexD TestMe2(ctest);
|
||||||
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
|
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
|
||||||
//MyRealF TestMe5(ctest);// Must generate compiler error
|
//MyRealF TestMe5(ctest);// Must generate compiler error
|
||||||
MyRealD TestMe4(2.0);
|
MyRealD TestRe1(2.0);
|
||||||
|
MyRealF TestRe2(3.0);
|
||||||
|
|
||||||
|
vone(TestRe2);
|
||||||
|
|
||||||
MyComplexF TestMe6(ctestf);
|
MyComplexF TestMe6(ctestf);
|
||||||
MyComplexF TestMe7(ctestf);
|
MyComplexF TestMe7(ctestf);
|
||||||
|
|
||||||
MyComplexD TheSum= TestMe2*TestMe3;
|
MyComplexD TheSum= TestMe2*TestMe3;
|
||||||
MyComplexF TheSumF= TestMe6*TestMe7;
|
MyComplexF TheSumF= TestMe6*TestMe7;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double dsum[2];
|
double dsum[2];
|
||||||
_mm_store_pd(dsum, TheSum.v);
|
_mm_store_pd(dsum, TheSum.v);
|
||||||
for (int i =0; i< 2; i++)
|
for (int i =0; i< 2; i++)
|
||||||
printf("%f\n", dsum[i]);
|
printf("%f\n", dsum[i]);
|
||||||
|
MyComplexD TheSumI = timesMinusI(TheSum);
|
||||||
|
MyComplexF TheSumIF = timesMinusI(TheSumF);
|
||||||
|
|
||||||
float fsum[4];
|
float fsum[4];
|
||||||
_mm_store_ps(fsum, TheSumF.v);
|
_mm_store_ps(fsum, TheSumF.v);
|
||||||
for (int i =0; i< 4; i++)
|
for (int i =0; i< 4; i++)
|
||||||
printf("%f\n", fsum[i]);
|
printf("%f\n", fsum[i]);
|
||||||
|
|
||||||
vstore(TheSum, &ctest);
|
vstore(TheSumI, &ctest);
|
||||||
|
std::complex<float> sum = Reduce(TheSumF);
|
||||||
std::cout << ctest<< std::endl;
|
std::cout << ctest<< std::endl;
|
||||||
|
std::cout << sum<< std::endl;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
|
||||||
|
165
tests/Grid_simd_new.cc
Normal file
165
tests/Grid_simd_new.cc
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
#include "simd/Grid_vector_types.h"
|
||||||
|
#include <parallelIO/GridNerscIO.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
class funcPlus {
|
||||||
|
public:
|
||||||
|
funcPlus() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
|
||||||
|
std::string name(void) const { return std::string("Plus"); }
|
||||||
|
};
|
||||||
|
class funcMinus {
|
||||||
|
public:
|
||||||
|
funcMinus() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
|
||||||
|
std::string name(void) const { return std::string("Minus"); }
|
||||||
|
};
|
||||||
|
class funcTimes {
|
||||||
|
public:
|
||||||
|
funcTimes() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
|
||||||
|
std::string name(void) const { return std::string("Times"); }
|
||||||
|
};
|
||||||
|
class funcConj {
|
||||||
|
public:
|
||||||
|
funcConj() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);}
|
||||||
|
std::string name(void) const { return std::string("Conj"); }
|
||||||
|
};
|
||||||
|
class funcAdj {
|
||||||
|
public:
|
||||||
|
funcAdj() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
|
||||||
|
std::string name(void) const { return std::string("Adj"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class funcTimesI {
|
||||||
|
public:
|
||||||
|
funcTimesI() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
|
||||||
|
std::string name(void) const { return std::string("timesI"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class funcTimesMinusI {
|
||||||
|
public:
|
||||||
|
funcTimesMinusI() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
|
||||||
|
std::string name(void) const { return std::string("timesMinusI"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class scal, class vec,class functor >
|
||||||
|
void Tester(const functor &func)
|
||||||
|
{
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedRandomDevice();
|
||||||
|
|
||||||
|
int Nsimd = vec::Nsimd();
|
||||||
|
|
||||||
|
std::vector<scal> input1(Nsimd);
|
||||||
|
std::vector<scal> input2(Nsimd);
|
||||||
|
std::vector<scal> result(Nsimd);
|
||||||
|
std::vector<scal> reference(Nsimd);
|
||||||
|
|
||||||
|
std::vector<vec,alignedAllocator<vec> > buf(3);
|
||||||
|
vec & v_input1 = buf[0];
|
||||||
|
vec & v_input2 = buf[1];
|
||||||
|
vec & v_result = buf[2];
|
||||||
|
|
||||||
|
|
||||||
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
random(sRNG,input1[i]);
|
||||||
|
random(sRNG,input2[i]);
|
||||||
|
random(sRNG,result[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
merge<vec,scal>(v_input1,input1);
|
||||||
|
merge<vec,scal>(v_input2,input2);
|
||||||
|
merge<vec,scal>(v_result,result);
|
||||||
|
|
||||||
|
func(v_result,v_input1,v_input2);
|
||||||
|
|
||||||
|
for(int i=0;i<Nsimd;i++) {
|
||||||
|
func(reference[i],input1[i],input2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
extract<vec,scal>(v_result,result);
|
||||||
|
std::cout << " " << func.name()<<std::endl;
|
||||||
|
|
||||||
|
int ok=0;
|
||||||
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
if ( abs(reference[i]-result[i])>0){
|
||||||
|
std::cout<< "*****" << std::endl;
|
||||||
|
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
|
||||||
|
ok++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( ok==0 ) std::cout << " OK!" <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
|
||||||
|
// Insist that operations on random scalars gives
|
||||||
|
// identical results to on vectors.
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing MyComplexF "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
Tester<ComplexF,MyComplexF>(funcTimesI());
|
||||||
|
Tester<ComplexF,MyComplexF>(funcTimesMinusI());
|
||||||
|
Tester<ComplexF,MyComplexF>(funcPlus());
|
||||||
|
Tester<ComplexF,MyComplexF>(funcMinus());
|
||||||
|
Tester<ComplexF,MyComplexF>(funcTimes());
|
||||||
|
Tester<ComplexF,MyComplexF>(funcConj());
|
||||||
|
Tester<ComplexF,MyComplexF>(funcAdj());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing MyComplexD "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
Tester<ComplexD,MyComplexD>(funcTimesI());
|
||||||
|
Tester<ComplexD,MyComplexD>(funcTimesMinusI());
|
||||||
|
Tester<ComplexD,MyComplexD>(funcPlus());
|
||||||
|
Tester<ComplexD,MyComplexD>(funcMinus());
|
||||||
|
Tester<ComplexD,MyComplexD>(funcTimes());
|
||||||
|
Tester<ComplexD,MyComplexD>(funcConj());
|
||||||
|
Tester<ComplexD,MyComplexD>(funcAdj());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing MyRealF "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
Tester<RealF,MyRealF>(funcPlus());
|
||||||
|
Tester<RealF,MyRealF>(funcMinus());
|
||||||
|
Tester<RealF,MyRealF>(funcTimes());
|
||||||
|
Tester<RealF,MyRealF>(funcAdj());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing MyRealD "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
Tester<RealD,MyRealD>(funcPlus());
|
||||||
|
Tester<RealD,MyRealD>(funcMinus());
|
||||||
|
Tester<RealD,MyRealD>(funcTimes());
|
||||||
|
Tester<RealD,MyRealD>(funcAdj());
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
|
|||||||
#
|
#
|
||||||
# Test code
|
# Test code
|
||||||
#
|
#
|
||||||
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed
|
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_simd_new
|
||||||
|
|
||||||
Grid_main_SOURCES = Grid_main.cc
|
Grid_main_SOURCES = Grid_main.cc
|
||||||
Grid_main_LDADD = -lGrid
|
Grid_main_LDADD = -lGrid
|
||||||
@ -33,3 +33,6 @@ Grid_stencil_LDADD = -lGrid
|
|||||||
|
|
||||||
Grid_simd_SOURCES = Grid_simd.cc
|
Grid_simd_SOURCES = Grid_simd.cc
|
||||||
Grid_simd_LDADD = -lGrid
|
Grid_simd_LDADD = -lGrid
|
||||||
|
|
||||||
|
Grid_simd_new_SOURCES = Grid_simd_new.cc
|
||||||
|
Grid_simd_new_LDADD = -lGrid
|
||||||
|
Loading…
Reference in New Issue
Block a user