From 1a2480124654523a02014cdc9d042c5c100e2117 Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 26 May 2015 12:02:54 +0900 Subject: [PATCH] checked performance of new vector libaries. Added check for c++11 support on the configure.ac --- Makefile.in | 4 +- aclocal.m4 | 1 + configure | 186 ++++++++++++++++++++++++++++++++ configure.ac | 4 +- lib/Grid_config.h | 3 + lib/Grid_config.h.in | 3 + lib/Grid_simd.h | 4 + lib/cshift/Grid_cshift_common.h | 12 ++- lib/math/Grid_math_tensors.h | 14 +++ lib/simd/Grid_avx.h | 6 +- m4/ax_cxx_compile_stdcxx_11.m4 | 167 ++++++++++++++++++++++++++++ tests/Grid_main.cc | 53 +-------- 12 files changed, 398 insertions(+), 59 deletions(-) create mode 100644 m4/ax_cxx_compile_stdcxx_11.m4 diff --git a/Makefile.in b/Makefile.in index b6894ef6..d473c2df 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,7 +84,8 @@ DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \ $(top_srcdir)/configure $(am__configure_deps) COPYING TODO \ compile config.guess config.sub depcomp install-sh missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cxx_compile_stdcxx_11.m4 \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ @@ -212,6 +213,7 @@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ GREP = @GREP@ +HAVE_CXX11 = @HAVE_CXX11@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ diff --git a/aclocal.m4 b/aclocal.m4 index 389763bf..a3d1bc9c 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1083,3 +1083,4 @@ AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR +m4_include([m4/ax_cxx_compile_stdcxx_11.m4]) diff --git a/configure b/configure index 8c9e8c59..b7bd49f0 100755 --- a/configure +++ b/configure @@ -633,6 +633,7 @@ BUILD_COMMS_MPI_TRUE EGREP GREP CXXCPP +HAVE_CXX11 RANLIB OPENMP_CXXFLAGS am__fastdepCXX_FALSE @@ -3965,6 +3966,191 @@ else RANLIB="$ac_cv_prog_RANLIB" fi + ax_cxx_compile_cxx11_required=true + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + ac_success=no + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5 +$as_echo_n "checking whether $CXX supports C++11 features by default... " >&6; } +if ${ax_cv_cxx_compile_cxx11+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + struct Base { + virtual void f() {} + }; + struct Child : public Base { + virtual void f() override {} + }; + + typedef check> right_angle_brackets; + + int a; + decltype(a) b; + + typedef check check_type; + check_type c; + check_type&& cr = static_cast(c); + + auto d = a; + auto l = [](){}; + // Prevent Clang error: unused variable 'l' [-Werror,-Wunused-variable] + struct use_l { use_l() { l(); } }; + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function because of this + namespace test_template_alias_sfinae { + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { + func(0); + } + } + +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_cxx_compile_cxx11=yes +else + ax_cv_cxx_compile_cxx11=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11" >&5 +$as_echo "$ax_cv_cxx_compile_cxx11" >&6; } + if test x$ax_cv_cxx_compile_cxx11 = xyes; then + ac_success=yes + fi + + + + if test x$ac_success = xno; then + for switch in -std=c++11 -std=c++0x +std=c++11; do + cachevar=`$as_echo "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh` + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5 +$as_echo_n "checking whether $CXX supports C++11 features with $switch... " >&6; } +if eval \${$cachevar+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="$CXXFLAGS $switch" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + struct Base { + virtual void f() {} + }; + struct Child : public Base { + virtual void f() override {} + }; + + typedef check> right_angle_brackets; + + int a; + decltype(a) b; + + typedef check check_type; + check_type c; + check_type&& cr = static_cast(c); + + auto d = a; + auto l = [](){}; + // Prevent Clang error: unused variable 'l' [-Werror,-Wunused-variable] + struct use_l { use_l() { l(); } }; + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function because of this + namespace test_template_alias_sfinae { + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { + func(0); + } + } + +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + eval $cachevar=yes +else + eval $cachevar=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS="$ac_save_CXXFLAGS" +fi +eval ac_res=\$$cachevar + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + if eval test x\$$cachevar = xyes; then + CXXFLAGS="$CXXFLAGS $switch" + ac_success=yes + break + fi + done + fi + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + if test x$ax_cxx_compile_cxx11_required = xtrue; then + if test x$ac_success = xno; then + as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5 + fi + else + if test x$ac_success = xno; then + HAVE_CXX11=0 + { $as_echo "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5 +$as_echo "$as_me: No compiler with C++11 support was found" >&6;} + else + HAVE_CXX11=1 + +$as_echo "#define HAVE_CXX11 1" >>confdefs.h + + fi + + + fi + + # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) diff --git a/configure.ac b/configure.ac index bfcbfcef..54a36eb1 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Project Grid package # -# Time-stamp: <2015-05-22 15:46:09 neo> +# Time-stamp: <2015-05-25 14:54:34 neo> AC_PREREQ([2.63]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) @@ -26,6 +26,8 @@ AC_LANG(C++) AC_PROG_CXX AC_OPENMP AC_PROG_RANLIB +AX_CXX_COMPILE_STDCXX_11(noext, mandatory) + # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) diff --git a/lib/Grid_config.h b/lib/Grid_config.h index 78582f3e..2397894f 100644 --- a/lib/Grid_config.h +++ b/lib/Grid_config.h @@ -16,6 +16,9 @@ /* GRID_COMMS_NONE */ #define GRID_COMMS_NONE 1 +/* define if the compiler supports basic C++11 syntax */ +/* #undef HAVE_CXX11 */ + /* Define to 1 if you have the declaration of `be64toh', and to 0 if you don't. */ #define HAVE_DECL_BE64TOH 1 diff --git a/lib/Grid_config.h.in b/lib/Grid_config.h.in index b7f56d5b..6f05d6cb 100644 --- a/lib/Grid_config.h.in +++ b/lib/Grid_config.h.in @@ -15,6 +15,9 @@ /* GRID_COMMS_NONE */ #undef GRID_COMMS_NONE +/* define if the compiler supports basic C++11 syntax */ +#undef HAVE_CXX11 + /* Define to 1 if you have the declaration of `be64toh', and to 0 if you don't. */ #undef HAVE_DECL_BE64TOH diff --git a/lib/Grid_simd.h b/lib/Grid_simd.h index 9484c0d6..bdcb2d1b 100644 --- a/lib/Grid_simd.h +++ b/lib/Grid_simd.h @@ -103,6 +103,10 @@ namespace Grid { inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); } inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); } + inline void vstream(ComplexF &l, const ComplexF &r){ l=r;} + inline void vstream(ComplexD &l, const ComplexD &r){ l=r;} + inline void vstream(RealF &l, const RealF &r){ l=r;} + inline void vstream(RealD &l, const RealD &r){ l=r;} class Zero{}; diff --git a/lib/cshift/Grid_cshift_common.h b/lib/cshift/Grid_cshift_common.h index 65c0cb87..90fc10b7 100644 --- a/lib/cshift/Grid_cshift_common.h +++ b/lib/cshift/Grid_cshift_common.h @@ -160,13 +160,21 @@ template void Copy_plane(Lattice& lhs,Lattice &rhs, int PARALLEL_NESTED_LOOP2 for(int n=0;n_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ - + /* int o =n*rhs._grid->_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb&cbmask ) { lhs._odata[lo+o+b]=rhs._odata[ro+o+b]; } - + */ + + int o =n*rhs._grid->_slice_stride[dimension]+b; + int ocb=1<CheckerBoardFromOindex(o); + if ( ocb&cbmask ) { + //lhs._odata[lo+o]=rhs._odata[ro+o]; + vstream(lhs._odata[lo+o],rhs._odata[ro+o]); + } + } } diff --git a/lib/math/Grid_math_tensors.h b/lib/math/Grid_math_tensors.h index a0424576..94cd3d58 100644 --- a/lib/math/Grid_math_tensors.h +++ b/lib/math/Grid_math_tensors.h @@ -38,6 +38,10 @@ public: iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type iScalar(const Zero &z){ *this = zero; }; + + + + iScalar & operator= (const Zero &hero){ zeroit(*this); return *this; @@ -206,6 +210,16 @@ public: iMatrix(const Zero &z){ *this = zero; }; iMatrix() =default; + // No copy constructor... + + iMatrix& operator=(const iMatrix& rhs){ + for(int i=0;i & operator= (const Zero &hero){ zeroit(*this); diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index ea796122..303f30c5 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -4,7 +4,7 @@ Using intrinsics */ -// Time-stamp: <2015-05-22 15:51:24 neo> +// Time-stamp: <2015-05-22 18:58:27 neo> //---------------------------------------------------------------------- #include @@ -307,9 +307,7 @@ namespace Optimization { conv.v = b; switch (perm){ // 8x32 bits=>3 permutes - case 2: - conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); - break; + case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break; case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break; case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break; default: assert(0); break; diff --git a/m4/ax_cxx_compile_stdcxx_11.m4 b/m4/ax_cxx_compile_stdcxx_11.m4 new file mode 100644 index 00000000..395b13d2 --- /dev/null +++ b/m4/ax_cxx_compile_stdcxx_11.m4 @@ -0,0 +1,167 @@ +# ============================================================================ +# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html +# ============================================================================ +# +# SYNOPSIS +# +# AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional]) +# +# DESCRIPTION +# +# Check for baseline language coverage in the compiler for the C++11 +# standard; if necessary, add switches to CXXFLAGS to enable support. +# +# The first argument, if specified, indicates whether you insist on an +# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. +# -std=c++11). If neither is specified, you get whatever works, with +# preference for an extended mode. +# +# The second argument, if specified 'mandatory' or if left unspecified, +# indicates that baseline C++11 support is required and that the macro +# should error out if no mode with that support is found. If specified +# 'optional', then configuration proceeds regardless, after defining +# HAVE_CXX11 if and only if a supporting mode is found. +# +# LICENSE +# +# Copyright (c) 2008 Benjamin Kosnik +# Copyright (c) 2012 Zack Weinberg +# Copyright (c) 2013 Roy Stogner +# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 11 + +m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [[ + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + struct Base { + virtual void f() {} + }; + struct Child : public Base { + virtual void f() override {} + }; + + typedef check> right_angle_brackets; + + int a; + decltype(a) b; + + typedef check check_type; + check_type c; + check_type&& cr = static_cast(c); + + auto d = a; + auto l = [](){}; + // Prevent Clang error: unused variable 'l' [-Werror,-Wunused-variable] + struct use_l { use_l() { l(); } }; + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function because of this + namespace test_template_alias_sfinae { + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { + func(0); + } + } +]]) + +AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl + m4_if([$1], [], [], + [$1], [ext], [], + [$1], [noext], [], + [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl + m4_if([$2], [], [ax_cxx_compile_cxx11_required=true], + [$2], [mandatory], [ax_cxx_compile_cxx11_required=true], + [$2], [optional], [ax_cxx_compile_cxx11_required=false], + [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])]) + AC_LANG_PUSH([C++])dnl + ac_success=no + AC_CACHE_CHECK(whether $CXX supports C++11 features by default, + ax_cv_cxx_compile_cxx11, + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], + [ax_cv_cxx_compile_cxx11=yes], + [ax_cv_cxx_compile_cxx11=no])]) + if test x$ax_cv_cxx_compile_cxx11 = xyes; then + ac_success=yes + fi + + m4_if([$1], [noext], [], [dnl + if test x$ac_success = xno; then + for switch in -std=gnu++11 -std=gnu++0x; do + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, + $cachevar, + [ac_save_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="$CXXFLAGS $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXXFLAGS="$ac_save_CXXFLAGS"]) + if eval test x\$$cachevar = xyes; then + CXXFLAGS="$CXXFLAGS $switch" + ac_success=yes + break + fi + done + fi]) + + m4_if([$1], [ext], [], [dnl + if test x$ac_success = xno; then + dnl HP's aCC needs +std=c++11 according to: + dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf + for switch in -std=c++11 -std=c++0x +std=c++11; do + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, + $cachevar, + [ac_save_CXXFLAGS="$CXXFLAGS" + CXXFLAGS="$CXXFLAGS $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXXFLAGS="$ac_save_CXXFLAGS"]) + if eval test x\$$cachevar = xyes; then + CXXFLAGS="$CXXFLAGS $switch" + ac_success=yes + break + fi + done + fi]) + AC_LANG_POP([C++]) + if test x$ax_cxx_compile_cxx11_required = xtrue; then + if test x$ac_success = xno; then + AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.]) + fi + else + if test x$ac_success = xno; then + HAVE_CXX11=0 + AC_MSG_NOTICE([No compiler with C++11 support was found]) + else + HAVE_CXX11=1 + AC_DEFINE(HAVE_CXX11,1, + [define if the compiler supports basic C++11 syntax]) + fi + + AC_SUBST(HAVE_CXX11) + fi +]) diff --git a/tests/Grid_main.cc b/tests/Grid_main.cc index c1f1c5d0..fd198f30 100644 --- a/tests/Grid_main.cc +++ b/tests/Grid_main.cc @@ -105,7 +105,7 @@ int main (int argc, char ** argv) fflush(stdout); - /* + cVec = cMat * cVec; // LatticeColourVector = LatticeColourMatrix * LatticeColourVector sVec = sMat * sVec; // LatticeSpinVector = LatticeSpinMatrix * LatticeSpinVector scVec= scMat * scVec;// LatticeSpinColourVector = LatticeSpinColourMatrix * LatticeSpinColourVector @@ -146,7 +146,7 @@ int main (int argc, char ** argv) scalar=trace(scalar); scalar=localInnerProduct(cVec,cVec); scalar=localNorm2(cVec); - */ + // -=,+=,*=,() // add,+,sub,-,mult,mac,* // adj,conjugate @@ -162,50 +162,7 @@ int main (int argc, char ** argv) scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix - /* -#ifdef SSE4 - ///////// Tests the new class Grid_simd - std::complex ctest(3.0,2.0); - std::complex ctestf(3.0,2.0); - MyComplexF TestMe1(1.0); // fills only real part - MyComplexD TestMe2(ctest); - MyComplexD TestMe3(ctest);// compiler generate conversion of basic types - //MyRealF TestMe5(ctest);// Must generate compiler error - MyRealD TestRe1(2.0); - MyRealF TestRe2(3.0); - - vone(TestRe2); - - MyComplexF TestMe6(ctestf); - MyComplexF TestMe7(ctestf); - - MyComplexD TheSum= TestMe2*TestMe3; - MyComplexF TheSumF= TestMe6*TestMe7; - - - - double dsum[2]; - _mm_store_pd(dsum, TheSum.v); - for (int i =0; i< 2; i++) - printf("%f\n", dsum[i]); - MyComplexD TheSumI = timesMinusI(TheSum); - MyComplexF TheSumIF = timesMinusI(TheSumF); - - float fsum[4]; - _mm_store_ps(fsum, TheSumF.v); - for (int i =0; i< 4; i++) - printf("%f\n", fsum[i]); - - vstore(TheSumI, &ctest); - std::complex sum = Reduce(TheSumF); - std::cout << ctest<< std::endl; - std::cout << sum<< std::endl; - -#endif - */ /////////////////////// - /* - printf("DEBUG: calling 3.5 \n"); // Non-lattice (const objects) * Lattice ColourMatrix cm; SpinColourMatrix scm; @@ -225,7 +182,6 @@ int main (int argc, char ** argv) vscm = vscm*cplx; scMat = scMat*cplx; - printf("DEBUG: calling 3.7 \n"); scm = cplx*scm; vscm = cplx*vscm; scMat = cplx*scMat; @@ -233,14 +189,12 @@ int main (int argc, char ** argv) vscm = myint*vscm; scMat = scMat*myint; - printf("DEBUG: calling 3.9 \n"); scm = scm*mydouble; vscm = vscm*mydouble; scMat = scMat*mydouble; scMat = mydouble*scMat; cMat = mydouble*cMat; - printf("DEBUG: calling 4 \n"); sMat = adj(sMat); // LatticeSpinMatrix adjoint sMat = iGammaFive*sMat; // SpinMatrix * LatticeSpinMatrix sMat = GammaFive*sMat; // SpinMatrix * LatticeSpinMatrix @@ -293,8 +247,6 @@ int main (int argc, char ** argv) pokeIndex<1> (c_m,c,0,0); } - */ - FooBar = Bar; /* @@ -392,7 +344,6 @@ int main (int argc, char ** argv) t0=usecond(); for(int i=0;i