1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-19 16:55:37 +01:00

checked performance of new vector libaries.

Added check for c++11 support on the configure.ac
This commit is contained in:
neo 2015-05-26 12:02:54 +09:00
parent 9e29ac6549
commit 1a24801246
12 changed files with 398 additions and 59 deletions

View File

@ -84,7 +84,8 @@ DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
$(top_srcdir)/configure $(am__configure_deps) COPYING TODO \
compile config.guess config.sub depcomp install-sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cxx_compile_stdcxx_11.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
@ -212,6 +213,7 @@ ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
GREP = @GREP@
HAVE_CXX11 = @HAVE_CXX11@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@

1
aclocal.m4 vendored
View File

@ -1083,3 +1083,4 @@ AC_SUBST([am__tar])
AC_SUBST([am__untar])
]) # _AM_PROG_TAR
m4_include([m4/ax_cxx_compile_stdcxx_11.m4])

186
configure vendored
View File

@ -633,6 +633,7 @@ BUILD_COMMS_MPI_TRUE
EGREP
GREP
CXXCPP
HAVE_CXX11
RANLIB
OPENMP_CXXFLAGS
am__fastdepCXX_FALSE
@ -3965,6 +3966,191 @@ else
RANLIB="$ac_cv_prog_RANLIB"
fi
ax_cxx_compile_cxx11_required=true
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
ac_success=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5
$as_echo_n "checking whether $CXX supports C++11 features by default... " >&6; }
if ${ax_cv_cxx_compile_cxx11+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
template <typename T>
struct check
{
static_assert(sizeof(int) <= sizeof(T), "not big enough");
};
struct Base {
virtual void f() {}
};
struct Child : public Base {
virtual void f() override {}
};
typedef check<check<bool>> right_angle_brackets;
int a;
decltype(a) b;
typedef check<int> check_type;
check_type c;
check_type&& cr = static_cast<check_type&&>(c);
auto d = a;
auto l = [](){};
// Prevent Clang error: unused variable 'l' [-Werror,-Wunused-variable]
struct use_l { use_l() { l(); } };
// http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
// Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function because of this
namespace test_template_alias_sfinae {
struct foo {};
template<typename T>
using member = typename T::member_type;
template<typename T>
void func(...) {}
template<typename T>
void func(member<T>*) {}
void test();
void test() {
func<foo>(0);
}
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
ax_cv_cxx_compile_cxx11=yes
else
ax_cv_cxx_compile_cxx11=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11" >&5
$as_echo "$ax_cv_cxx_compile_cxx11" >&6; }
if test x$ax_cv_cxx_compile_cxx11 = xyes; then
ac_success=yes
fi
if test x$ac_success = xno; then
for switch in -std=c++11 -std=c++0x +std=c++11; do
cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh`
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5
$as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; }
if eval \${$cachevar+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_save_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $switch"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
template <typename T>
struct check
{
static_assert(sizeof(int) <= sizeof(T), "not big enough");
};
struct Base {
virtual void f() {}
};
struct Child : public Base {
virtual void f() override {}
};
typedef check<check<bool>> right_angle_brackets;
int a;
decltype(a) b;
typedef check<int> check_type;
check_type c;
check_type&& cr = static_cast<check_type&&>(c);
auto d = a;
auto l = [](){};
// Prevent Clang error: unused variable 'l' [-Werror,-Wunused-variable]
struct use_l { use_l() { l(); } };
// http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
// Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function because of this
namespace test_template_alias_sfinae {
struct foo {};
template<typename T>
using member = typename T::member_type;
template<typename T>
void func(...) {}
template<typename T>
void func(member<T>*) {}
void test();
void test() {
func<foo>(0);
}
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
eval $cachevar=yes
else
eval $cachevar=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CXXFLAGS="$ac_save_CXXFLAGS"
fi
eval ac_res=\$$cachevar
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
if eval test x\$$cachevar = xyes; then
CXXFLAGS="$CXXFLAGS $switch"
ac_success=yes
break
fi
done
fi
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
if test x$ax_cxx_compile_cxx11_required = xtrue; then
if test x$ac_success = xno; then
as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5
fi
else
if test x$ac_success = xno; then
HAVE_CXX11=0
{ $as_echo "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5
$as_echo "$as_me: No compiler with C++11 support was found" >&6;}
else
HAVE_CXX11=1
$as_echo "#define HAVE_CXX11 1" >>confdefs.h
fi
fi
# Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)

View File

@ -3,7 +3,7 @@
#
# Project Grid package
#
# Time-stamp: <2015-05-22 15:46:09 neo>
# Time-stamp: <2015-05-25 14:54:34 neo>
AC_PREREQ([2.63])
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
@ -26,6 +26,8 @@ AC_LANG(C++)
AC_PROG_CXX
AC_OPENMP
AC_PROG_RANLIB
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
# Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)

View File

@ -16,6 +16,9 @@
/* GRID_COMMS_NONE */
#define GRID_COMMS_NONE 1
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#define HAVE_DECL_BE64TOH 1

View File

@ -15,6 +15,9 @@
/* GRID_COMMS_NONE */
#undef GRID_COMMS_NONE
/* define if the compiler supports basic C++11 syntax */
#undef HAVE_CXX11
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#undef HAVE_DECL_BE64TOH

View File

@ -103,6 +103,10 @@ namespace Grid {
inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
inline void vstream(ComplexF &l, const ComplexF &r){ l=r;}
inline void vstream(ComplexD &l, const ComplexD &r){ l=r;}
inline void vstream(RealF &l, const RealF &r){ l=r;}
inline void vstream(RealD &l, const RealD &r){ l=r;}
class Zero{};

View File

@ -160,13 +160,21 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
/*
int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
lhs._odata[lo+o+b]=rhs._odata[ro+o+b];
}
*/
int o =n*rhs._grid->_slice_stride[dimension]+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
}
}
}

View File

@ -38,6 +38,10 @@ public:
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this);
return *this;
@ -206,6 +210,16 @@ public:
iMatrix(const Zero &z){ *this = zero; };
iMatrix() =default;
// No copy constructor...
iMatrix& operator=(const iMatrix& rhs){
for(int i=0;i<N;i++)
for(int j=0;j<N;j++)
vstream(_internal[i][j],rhs._internal[i][j]);
return *this;
};
iMatrix<vtype,N> & operator= (const Zero &hero){
zeroit(*this);

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-05-22 15:51:24 neo>
// Time-stamp: <2015-05-22 18:58:27 neo>
//----------------------------------------------------------------------
#include <immintrin.h>
@ -307,9 +307,7 @@ namespace Optimization {
conv.v = b;
switch (perm){
// 8x32 bits=>3 permutes
case 2:
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
break;
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
default: assert(0); break;

View File

@ -0,0 +1,167 @@
# ============================================================================
# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html
# ============================================================================
#
# SYNOPSIS
#
# AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional])
#
# DESCRIPTION
#
# Check for baseline language coverage in the compiler for the C++11
# standard; if necessary, add switches to CXXFLAGS to enable support.
#
# The first argument, if specified, indicates whether you insist on an
# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
# -std=c++11). If neither is specified, you get whatever works, with
# preference for an extended mode.
#
# The second argument, if specified 'mandatory' or if left unspecified,
# indicates that baseline C++11 support is required and that the macro
# should error out if no mode with that support is found. If specified
# 'optional', then configuration proceeds regardless, after defining
# HAVE_CXX11 if and only if a supporting mode is found.
#
# LICENSE
#
# Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
# Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
# Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 11
m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [[
template <typename T>
struct check
{
static_assert(sizeof(int) <= sizeof(T), "not big enough");
};
struct Base {
virtual void f() {}
};
struct Child : public Base {
virtual void f() override {}
};
typedef check<check<bool>> right_angle_brackets;
int a;
decltype(a) b;
typedef check<int> check_type;
check_type c;
check_type&& cr = static_cast<check_type&&>(c);
auto d = a;
auto l = [](){};
// Prevent Clang error: unused variable 'l' [-Werror,-Wunused-variable]
struct use_l { use_l() { l(); } };
// http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
// Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function because of this
namespace test_template_alias_sfinae {
struct foo {};
template<typename T>
using member = typename T::member_type;
template<typename T>
void func(...) {}
template<typename T>
void func(member<T>*) {}
void test();
void test() {
func<foo>(0);
}
}
]])
AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl
m4_if([$1], [], [],
[$1], [ext], [],
[$1], [noext], [],
[m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl
m4_if([$2], [], [ax_cxx_compile_cxx11_required=true],
[$2], [mandatory], [ax_cxx_compile_cxx11_required=true],
[$2], [optional], [ax_cxx_compile_cxx11_required=false],
[m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])])
AC_LANG_PUSH([C++])dnl
ac_success=no
AC_CACHE_CHECK(whether $CXX supports C++11 features by default,
ax_cv_cxx_compile_cxx11,
[AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
[ax_cv_cxx_compile_cxx11=yes],
[ax_cv_cxx_compile_cxx11=no])])
if test x$ax_cv_cxx_compile_cxx11 = xyes; then
ac_success=yes
fi
m4_if([$1], [noext], [], [dnl
if test x$ac_success = xno; then
for switch in -std=gnu++11 -std=gnu++0x; do
cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch])
AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch,
$cachevar,
[ac_save_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $switch"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
[eval $cachevar=yes],
[eval $cachevar=no])
CXXFLAGS="$ac_save_CXXFLAGS"])
if eval test x\$$cachevar = xyes; then
CXXFLAGS="$CXXFLAGS $switch"
ac_success=yes
break
fi
done
fi])
m4_if([$1], [ext], [], [dnl
if test x$ac_success = xno; then
dnl HP's aCC needs +std=c++11 according to:
dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
for switch in -std=c++11 -std=c++0x +std=c++11; do
cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch])
AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch,
$cachevar,
[ac_save_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $switch"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
[eval $cachevar=yes],
[eval $cachevar=no])
CXXFLAGS="$ac_save_CXXFLAGS"])
if eval test x\$$cachevar = xyes; then
CXXFLAGS="$CXXFLAGS $switch"
ac_success=yes
break
fi
done
fi])
AC_LANG_POP([C++])
if test x$ax_cxx_compile_cxx11_required = xtrue; then
if test x$ac_success = xno; then
AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.])
fi
else
if test x$ac_success = xno; then
HAVE_CXX11=0
AC_MSG_NOTICE([No compiler with C++11 support was found])
else
HAVE_CXX11=1
AC_DEFINE(HAVE_CXX11,1,
[define if the compiler supports basic C++11 syntax])
fi
AC_SUBST(HAVE_CXX11)
fi
])

View File

@ -105,7 +105,7 @@ int main (int argc, char ** argv)
fflush(stdout);
/*
cVec = cMat * cVec; // LatticeColourVector = LatticeColourMatrix * LatticeColourVector
sVec = sMat * sVec; // LatticeSpinVector = LatticeSpinMatrix * LatticeSpinVector
scVec= scMat * scVec;// LatticeSpinColourVector = LatticeSpinColourMatrix * LatticeSpinColourVector
@ -146,7 +146,7 @@ int main (int argc, char ** argv)
scalar=trace(scalar);
scalar=localInnerProduct(cVec,cVec);
scalar=localNorm2(cVec);
*/
// -=,+=,*=,()
// add,+,sub,-,mult,mac,*
// adj,conjugate
@ -162,50 +162,7 @@ int main (int argc, char ** argv)
scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix
/*
#ifdef SSE4
///////// Tests the new class Grid_simd
std::complex<double> ctest(3.0,2.0);
std::complex<float> ctestf(3.0,2.0);
MyComplexF TestMe1(1.0); // fills only real part
MyComplexD TestMe2(ctest);
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
//MyRealF TestMe5(ctest);// Must generate compiler error
MyRealD TestRe1(2.0);
MyRealF TestRe2(3.0);
vone(TestRe2);
MyComplexF TestMe6(ctestf);
MyComplexF TestMe7(ctestf);
MyComplexD TheSum= TestMe2*TestMe3;
MyComplexF TheSumF= TestMe6*TestMe7;
double dsum[2];
_mm_store_pd(dsum, TheSum.v);
for (int i =0; i< 2; i++)
printf("%f\n", dsum[i]);
MyComplexD TheSumI = timesMinusI(TheSum);
MyComplexF TheSumIF = timesMinusI(TheSumF);
float fsum[4];
_mm_store_ps(fsum, TheSumF.v);
for (int i =0; i< 4; i++)
printf("%f\n", fsum[i]);
vstore(TheSumI, &ctest);
std::complex<float> sum = Reduce(TheSumF);
std::cout << ctest<< std::endl;
std::cout << sum<< std::endl;
#endif
*/
///////////////////////
/*
printf("DEBUG: calling 3.5 \n");
// Non-lattice (const objects) * Lattice
ColourMatrix cm;
SpinColourMatrix scm;
@ -225,7 +182,6 @@ int main (int argc, char ** argv)
vscm = vscm*cplx;
scMat = scMat*cplx;
printf("DEBUG: calling 3.7 \n");
scm = cplx*scm;
vscm = cplx*vscm;
scMat = cplx*scMat;
@ -233,14 +189,12 @@ int main (int argc, char ** argv)
vscm = myint*vscm;
scMat = scMat*myint;
printf("DEBUG: calling 3.9 \n");
scm = scm*mydouble;
vscm = vscm*mydouble;
scMat = scMat*mydouble;
scMat = mydouble*scMat;
cMat = mydouble*cMat;
printf("DEBUG: calling 4 \n");
sMat = adj(sMat); // LatticeSpinMatrix adjoint
sMat = iGammaFive*sMat; // SpinMatrix * LatticeSpinMatrix
sMat = GammaFive*sMat; // SpinMatrix * LatticeSpinMatrix
@ -293,8 +247,6 @@ int main (int argc, char ** argv)
pokeIndex<1> (c_m,c,0,0);
}
*/
FooBar = Bar;
/*
@ -392,7 +344,6 @@ int main (int argc, char ** argv)
t0=usecond();
for(int i=0;i<ncall;i++){
Fine.Barrier();
//Cshift(Bar,1,-1);
mult(FooBar,Foo,Cshift(Bar,1,-1));
//mult(FooBar,Foo,Bar);
//FooBar = Foo * Bar; // this is bad