1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00
This commit is contained in:
Peter Boyle 2015-05-23 09:36:08 +01:00
commit 5cf285bce9
11 changed files with 512 additions and 161 deletions

View File

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.15 from Makefile.am.
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2014 Free Software Foundation, Inc.
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -14,17 +14,7 @@
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
@ -89,12 +79,14 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = .
DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/configure $(am__configure_deps) COPYING TODO \
compile config.guess config.sub depcomp install-sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
$(am__configure_deps) $(am__DIST_COMMON)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d
@ -157,9 +149,6 @@ ETAGS = etags
CTAGS = ctags
CSCOPE = cscope
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in AUTHORS COPYING ChangeLog \
INSTALL NEWS README TODO compile config.guess config.sub \
depcomp install-sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
@ -325,6 +314,7 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
@ -531,15 +521,15 @@ dist-xz: distdir
$(am__post_remove_distdir)
dist-tarZ: distdir
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir)
dist-shar: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir)
@ -575,17 +565,17 @@ distcheck: dist
esac
chmod -R a-w $(distdir)
chmod u+w $(distdir)
mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
mkdir $(distdir)/_build $(distdir)/_inst
chmod a-w $(distdir)
test -d $(distdir)/_build || exit 0; \
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build/sub \
&& ../../configure \
&& $(am__cd) $(distdir)/_build \
&& ../configure \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
--srcdir=../.. --prefix="$$dc_install_base" \
--srcdir=.. --prefix="$$dc_install_base" \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \
@ -759,8 +749,6 @@ uninstall-am:
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.

61
aclocal.m4 vendored
View File

@ -1,6 +1,6 @@
# generated automatically by aclocal 1.15 -*- Autoconf -*-
# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
# Copyright (C) 2002-2014 Free Software Foundation, Inc.
# Copyright (C) 2002-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.15'
[am__api_version='1.14'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.15], [],
m4_if([$1], [1.14.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.15])dnl
[AM_AUTOMAKE_VERSION([1.14.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -103,14 +103,15 @@ _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# configured tree to be moved without reconfiguration.
AC_DEFUN([AM_AUX_DIR_EXPAND],
[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
# Expand $ac_aux_dir to an absolute path.
am_aux_dir=`cd "$ac_aux_dir" && pwd`
[dnl Rely on autoconf to set up CDPATH properly.
AC_PREREQ([2.50])dnl
# expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
])
# AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -141,7 +142,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -332,7 +333,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -408,7 +409,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -498,8 +499,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
# We need awk for the "check" target (and possibly the TAP driver). The
# system "awk" is bad on some platforms.
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
AC_REQUIRE([AC_PROG_AWK])dnl
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
@ -572,11 +573,7 @@ to "yes", and re-run configure.
END
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
fi
fi
dnl The trailing newline in this macro's definition is deliberate, for
dnl backward compatibility and to allow trailing 'dnl'-style comments
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
])
fi])
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
@ -605,7 +602,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -616,7 +613,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
# Define $install_sh.
AC_DEFUN([AM_PROG_INSTALL_SH],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
if test x"${install_sh+set}" != xset; then
if test x"${install_sh}" != xset; then
case $am_aux_dir in
*\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -626,7 +623,7 @@ if test x"${install_sh+set}" != xset; then
fi
AC_SUBST([install_sh])])
# Copyright (C) 2003-2014 Free Software Foundation, Inc.
# Copyright (C) 2003-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -647,7 +644,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -697,7 +694,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -736,7 +733,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -767,7 +764,7 @@ AC_DEFUN([_AM_IF_OPTION],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -848,7 +845,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
# Copyright (C) 2009-2014 Free Software Foundation, Inc.
# Copyright (C) 2009-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -908,7 +905,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -936,7 +933,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006-2014 Free Software Foundation, Inc.
# Copyright (C) 2006-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -955,7 +952,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004-2014 Free Software Foundation, Inc.
# Copyright (C) 2004-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,

2
config.guess vendored
View File

@ -1 +1 @@
/opt/local/share/automake-1.15/config.guess
/usr/share/automake-1.14/config.guess

2
config.sub vendored
View File

@ -1 +1 @@
/opt/local/share/automake-1.15/config.sub
/usr/share/automake-1.14/config.sub

13
configure vendored
View File

@ -2466,7 +2466,7 @@ test -n "$target_alias" &&
NONENONEs,x,x, &&
program_prefix=${target_alias}-
am__api_version='1.15'
am__api_version='1.14'
# Find a good install program. We prefer a C program (faster),
# so one script is as good as another. But avoid the broken or
@ -2638,8 +2638,8 @@ test "$program_suffix" != NONE &&
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
# Expand $ac_aux_dir to an absolute path.
am_aux_dir=`cd "$ac_aux_dir" && pwd`
# expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
if test x"${MISSING+set}" != xset; then
case $am_aux_dir in
@ -2658,7 +2658,7 @@ else
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
fi
if test x"${install_sh+set}" != xset; then
if test x"${install_sh}" != xset; then
case $am_aux_dir in
*\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -2986,8 +2986,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
mkdir_p='$(MKDIR_P)'
# We need awk for the "check" target (and possibly the TAP driver). The
# system "awk" is bad on some platforms.
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
# Always define AMTAR for backward compatibility. Yes, it's still used
# in the wild :-( We should find a proper way to deprecate it ...
AMTAR='$${TAR-tar}'
@ -3046,7 +3046,6 @@ END
fi
ac_config_headers="$ac_config_headers lib/Grid_config.h"
# Check whether --enable-silent-rules was given.

View File

@ -14,6 +14,7 @@
#include <iterator>
#include <Grid.h>
#include <algorithm>
#include <iterator>
#undef __X86_64
#define MAC

View File

@ -1,8 +1,10 @@
//----------------------------------------------------------------------
/*! @file Grid_sse4.h
@brief Optimization libraries
@brief Optimization libraries for SSE4 instructions set
Using intrinsics
*/
// Time-stamp: <2015-05-19 17:06:51 neo>
// Time-stamp: <2015-05-20 16:45:39 neo>
//----------------------------------------------------------------------
#include <pmmintrin.h>
@ -49,6 +51,20 @@ namespace Optimization {
};
struct Vstream{
//Float
inline void operator()(__m128 a, __m128 b){
_mm_stream_ps((float *)&a,b);
}
//Double
inline void operator()(__m128d a, __m128d b){
_mm_stream_pd((double *)&a,b);
}
};
struct Vset{
// Complex float
@ -75,27 +91,20 @@ namespace Optimization {
};
template <typename Out_type, typename In_type>
struct Reduce{
//Complex float
inline Grid::ComplexF operator()(__m128 in){
union {
__m128 v1;
float f[4];
} u128;
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return Grid::ComplexF(u128.f[0], u128.f[1]);
//Need templated class to overload output type
//General form must generate error if compiled
inline Out_type operator()(In_type in){
printf("Error, using wrong Reduce function\n");
exit(1);
return 0;
}
//Complex double
inline Grid::ComplexD operator()(__m128d in){
printf("Missing complex double implementation -> FIX\n");
return Grid::ComplexD(0,0); // FIXME wrong
}
};
/////////////////////////////////////////////////////
// Arithmetic operations
/////////////////////////////////////////////////////
@ -129,25 +138,26 @@ namespace Optimization {
}
};
struct MultComplex{
// Complex float
inline __m128 operator()(__m128 a, __m128 b){
__m128 ymm0,ymm1,ymm2;
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
return _mm_addsub_ps(ymm0,ymm1);
}
// Complex double
inline __m128d operator()(__m128d a, __m128d b){
__m128d ymm0,ymm1,ymm2;
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
return _mm_addsub_pd(ymm0,ymm1);
}
};
@ -165,14 +175,112 @@ namespace Optimization {
inline __m128i operator()(__m128i a, __m128i b){
return _mm_mul_epi32(a,b);
}
};
struct Conj{
// Complex single
inline __m128 operator()(__m128 in){
return _mm_xor_ps(_mm_addsub_ps(_mm_setzero_ps(),in), _mm_set1_ps(-0.f));
}
// Complex double
inline __m128d operator()(__m128d in){
return _mm_xor_pd(_mm_addsub_pd(_mm_setzero_pd(),in), _mm_set1_pd(-0.f));//untested
}
// do not define for integer input
};
struct TimesMinusI{
//Complex single
inline __m128 operator()(__m128 in, __m128 ret){
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
}
//Complex double
inline __m128d operator()(__m128d in, __m128d ret){
__m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i
return _mm_shuffle_pd(tmp,tmp,0x1);
}
};
struct TimesI{
//Complex single
inline __m128 operator()(__m128 in, __m128 ret){
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1));
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
}
//Complex double
inline __m128d operator()(__m128d in, __m128d ret){
__m128d tmp = _mm_shuffle_pd(in,in,0x1);
return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i
}
};
//////////////////////////////////////////////
// Some Template specialization
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
union {
__m128 v1;
float f[4];
} u128;
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return Grid::ComplexF(u128.f[0], u128.f[1]);
}
//Real float Reduce
template<>
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
// FIXME Hack
const Grid::RealF * ptr = (const Grid::RealF *) &in;
Grid::RealF ret = 0;
for(int i=0;i< 4 ;i++){ // 4 number of simd lanes for float
ret = ret+ptr[i];
}
return ret;
}
//Complex double Reduce
template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, __m128d>::operator()(__m128d in){
printf("Reduce : Missing good complex double implementation -> FIX\n");
return Grid::ComplexD(in[0], in[1]); // inefficient
}
//Real double Reduce
template<>
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
// FIXME Hack
const Grid::RealD * ptr =(const Grid::RealD *) &in;
Grid::RealD ret = 0;
for(int i=0;i< 2 ;i++){// 2 number of simd lanes for float
ret = ret+ptr[i];
}
return ret;
}
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m128i>::operator()(__m128i in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
namespace Grid {
typedef __m128 SIMD_Ftype; // Single precision type
@ -180,15 +288,21 @@ namespace Grid {
typedef __m128i SIMD_Itype; // Integer type
// Function names
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
// Function name aliases
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
}

View File

@ -2,7 +2,7 @@
/*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types
*/
// Time-stamp: <2015-05-19 17:20:36 neo>
// Time-stamp: <2015-05-20 17:31:55 neo>
//---------------------------------------------------------------------------
#ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES
@ -22,6 +22,16 @@ namespace Grid {
typedef T type;
};
// type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke =
typename T::type;
template <typename Condition, typename ReturnType> using EnableIf =
Invoke<std::enable_if<Condition::value, ReturnType>>;
template <typename Condition, typename ReturnType> using NotEnableIf =
Invoke<std::enable_if<!Condition::value, ReturnType>>;
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T>
@ -94,31 +104,32 @@ namespace Grid {
// Initialise to 1,0,i for the correct types
///////////////////////////////////////////////
// if not complex overload here
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
// overload for complex type
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
// For complex types
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
// For integral type
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
// For integral types
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
// do not compile if real or integer, send an error message from the compiler
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
////////////////////////////////////
// Arithmetic operator overloads +,-,*
@ -138,7 +149,7 @@ namespace Grid {
};
// Distinguish between complex types and others
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
{
Grid_simd ret;
@ -147,7 +158,7 @@ namespace Grid {
};
// Real/Integer types
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 >
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
{
Grid_simd ret;
@ -156,8 +167,6 @@ namespace Grid {
};
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch
////////////////////////////////////////////////////////////////////////
@ -170,14 +179,14 @@ namespace Grid {
///////////////////////
// overload if complex
template < class S = Scalar_type >
friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){
friend inline void vsplat(Grid_simd &ret, EnableIf<is_complex < S >, S> c){
Real a = real(c);
Real b = imag(c);
vsplat(ret,a,b);
}
// this only for the complex version
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
// this is only for the complex version
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vsplat(Grid_simd &ret,Real a, Real b){
ret.v = binary<Vector_type>(a, b, VsplatSIMD());
}
@ -187,22 +196,45 @@ namespace Grid {
ret.v = unary<Vector_type>(a, VsplatSIMD());
}
///////////////////////
// Vstore
///////////////////////
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
binary<void>(ret.v, (Real*)a, VstoreSIMD());
}
///////////////////////
// Vstream
///////////////////////
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
binary<void>(out.v, in.v, VstreamSIMD());
}
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
out=in;
}
///////////////////////
// Vprefetch
///////////////////////
friend inline void vprefetch(const Grid_simd &v)
{
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
}
///////////////////////
// Reduce
///////////////////////
friend inline Scalar_type Reduce(const Grid_simd & in)
{
// FIXME add operator
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
}
////////////////////////////
// opreator scalar * simd
////////////////////////////
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
Grid_simd va;
vsplat(va,a);
@ -215,25 +247,63 @@ namespace Grid {
///////////////////////
// Conjugate
///////////////////////
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd conjugate(const Grid_simd &in){
Grid_simd ret ; vzero(ret);
// FIXME add operator
Grid_simd ret ;
ret.v = unary<Vector_type>(in.v, ConjSIMD());
return ret;
}
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd conjugate(const Grid_simd &in){
return in; // for real objects
}
///////////////////////
// timesMinusI
///////////////////////
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){
ret.v = binary<Vector_type>(in.v, ret.v, TimesMinusISIMD());
}
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesMinusI(const Grid_simd &in){
Grid_simd ret;
vzero(ret);
// FIXME add operator
timesMinusI(ret,in);
return ret;
}
friend inline Grid_simd timesI(const Grid_simd &in){
Grid_simd ret; vzero(ret);
// FIXME add operator
return ret;
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesMinusI(const Grid_simd &in){
return in;
}
///////////////////////
// timesI
///////////////////////
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void timesI(Grid_simd &ret,const Grid_simd &in){
ret.v = binary<Vector_type>(in.v, ret.v, TimesISIMD());
}
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesI(const Grid_simd &in){
Grid_simd ret;
timesI(ret,in);
return ret;
}
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesI(const Grid_simd &in){
return in;
}
///////////////////////
// Unary negation
///////////////////////
friend inline Grid_simd operator -(const Grid_simd &r) {
vComplexF ret;
vzero(ret);
@ -257,41 +327,22 @@ namespace Grid {
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{
Gpermute<Grid_simd>(y,b,perm);
}
/*
////////////////////////////////////////////////////////////////////
// General permute; assumes vector length is same across
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{
Gpermute<Grid_simd>(y,b,perm);
}
friend inline void merge(Grid_simd &y,std::vector<Scalar_type *> &extracted)
{
Gmerge<Grid_simd,Scalar_type >(y,extracted);
}
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type *> &extracted)
{
Gextract<Grid_simd,Scalar_type>(y,extracted);
}
friend inline void merge(Grid_simd &y,std::vector<Scalar_type > &extracted)
{
Gmerge<Grid_simd,Scalar_type >(y,extracted);
}
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type > &extracted)
{
Gextract<Grid_simd,Scalar_type>(y,extracted);
}
*/
};// end of Grid_simd class definition
template<class scalar_type, class vector_type >
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
{
@ -315,7 +366,7 @@ namespace Grid {
}
// Define available types (now change names to avoid clashing)
// Define available types (now change names to avoid clashing with the rest of the code)
typedef Grid_simd< float , SIMD_Ftype > MyRealF;
typedef Grid_simd< double , SIMD_Dtype > MyRealD;
@ -324,6 +375,29 @@ namespace Grid {
////////////////////////////////////////////////////////////////////
// Temporary hack to keep independent from the rest of the code
template<> struct isGridTensor<MyRealD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<MyRealF > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<MyComplexD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<MyComplexF > {
static const bool value = false;
static const bool notvalue = true;
};
}
#endif

View File

@ -161,30 +161,40 @@ int main (int argc, char ** argv)
///////// Tests the new class Grid_simd
std::complex<double> ctest(3.0,2.0);
std::complex<float> ctestf(3.0,2.0);
MyComplexF TestMe1(1.0); // fill real part
MyComplexF TestMe1(1.0); // fills only real part
MyComplexD TestMe2(ctest);
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
//MyRealF TestMe5(ctest);// Must generate compiler error
MyRealD TestMe4(2.0);
MyRealD TestRe1(2.0);
MyRealF TestRe2(3.0);
vone(TestRe2);
MyComplexF TestMe6(ctestf);
MyComplexF TestMe7(ctestf);
MyComplexD TheSum= TestMe2*TestMe3;
MyComplexF TheSumF= TestMe6*TestMe7;
double dsum[2];
_mm_store_pd(dsum, TheSum.v);
for (int i =0; i< 2; i++)
printf("%f\n", dsum[i]);
MyComplexD TheSumI = timesMinusI(TheSum);
MyComplexF TheSumIF = timesMinusI(TheSumF);
float fsum[4];
_mm_store_ps(fsum, TheSumF.v);
for (int i =0; i< 4; i++)
printf("%f\n", fsum[i]);
vstore(TheSum, &ctest);
vstore(TheSumI, &ctest);
std::complex<float> sum = Reduce(TheSumF);
std::cout << ctest<< std::endl;
std::cout << sum<< std::endl;
#endif
///////////////////////

165
tests/Grid_simd_new.cc Normal file
View File

@ -0,0 +1,165 @@
#include <Grid.h>
#include "simd/Grid_vector_types.h"
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
class funcPlus {
public:
funcPlus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
std::string name(void) const { return std::string("Plus"); }
};
class funcMinus {
public:
funcMinus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
std::string name(void) const { return std::string("Minus"); }
};
class funcTimes {
public:
funcTimes() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
std::string name(void) const { return std::string("Times"); }
};
class funcConj {
public:
funcConj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);}
std::string name(void) const { return std::string("Conj"); }
};
class funcAdj {
public:
funcAdj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
std::string name(void) const { return std::string("Adj"); }
};
class funcTimesI {
public:
funcTimesI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
std::string name(void) const { return std::string("timesI"); }
};
class funcTimesMinusI {
public:
funcTimesMinusI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
std::string name(void) const { return std::string("timesMinusI"); }
};
template<class scal, class vec,class functor >
void Tester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
std::vector<scal> result(Nsimd);
std::vector<scal> reference(Nsimd);
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
vec & v_result = buf[2];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
random(sRNG,result[i]);
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
merge<vec,scal>(v_result,result);
func(v_result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func(reference[i],input1[i],input2[i]);
}
extract<vec,scal>(v_result,result);
std::cout << " " << func.name()<<std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>0){
std::cout<< "*****" << std::endl;
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
ok++;
}
}
if ( ok==0 ) std::cout << " OK!" <<std::endl;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
// Insist that operations on random scalars gives
// identical results to on vectors.
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyComplexF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexF,MyComplexF>(funcTimesI());
Tester<ComplexF,MyComplexF>(funcTimesMinusI());
Tester<ComplexF,MyComplexF>(funcPlus());
Tester<ComplexF,MyComplexF>(funcMinus());
Tester<ComplexF,MyComplexF>(funcTimes());
Tester<ComplexF,MyComplexF>(funcConj());
Tester<ComplexF,MyComplexF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyComplexD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexD,MyComplexD>(funcTimesI());
Tester<ComplexD,MyComplexD>(funcTimesMinusI());
Tester<ComplexD,MyComplexD>(funcPlus());
Tester<ComplexD,MyComplexD>(funcMinus());
Tester<ComplexD,MyComplexD>(funcTimes());
Tester<ComplexD,MyComplexD>(funcConj());
Tester<ComplexD,MyComplexD>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyRealF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealF,MyRealF>(funcPlus());
Tester<RealF,MyRealF>(funcMinus());
Tester<RealF,MyRealF>(funcTimes());
Tester<RealF,MyRealF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyRealD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealD,MyRealD>(funcPlus());
Tester<RealD,MyRealD>(funcMinus());
Tester<RealD,MyRealD>(funcTimes());
Tester<RealD,MyRealD>(funcAdj());
Grid_finalize();
}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
#
# Test code
#
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_simd_new
Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid
@ -33,3 +33,6 @@ Grid_stencil_LDADD = -lGrid
Grid_simd_SOURCES = Grid_simd.cc
Grid_simd_LDADD = -lGrid
Grid_simd_new_SOURCES = Grid_simd_new.cc
Grid_simd_new_LDADD = -lGrid