From d93eac7b1c74555b0bad224bdd0db1ac4afc8421 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 3 Oct 2023 15:53:14 +0000 Subject: [PATCH] Performance regressed and is OK in icpx 2023.2 --- Grid/qcd/utils/WilsonLoops.h | 7 +- Grid/stencil/GeneralLocalStencil.h | 2 +- configure.ac | 2 +- m4/ax_cxx_compile_stdcxx.m4 | 1018 +++++++++++++++++ m4/ax_cxx_compile_stdcxx_14.m4 | 34 + systems/Sunspot/benchmarks/bench.pbs | 3 +- .../Sunspot/benchmarks/gpu_tile_compact.sh | 4 +- systems/Sunspot/config-command | 2 +- 8 files changed, 1063 insertions(+), 9 deletions(-) create mode 100644 m4/ax_cxx_compile_stdcxx.m4 create mode 100644 m4/ax_cxx_compile_stdcxx_14.m4 diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index 78e25a8d..d6c0d621 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -464,7 +464,8 @@ public: //U_padded: the gauge link fields padded out using the PaddedCell class //Cell: the padded cell class //gStencil: the precomputed generalized local stencil for the staple - static void StaplePaddedAll(std::vector &staple, const std::vector &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) { + static void StaplePaddedAll(std::vector &staple, const std::vector &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) + { double t0 = usecond(); assert(U_padded.size() == Nd); assert(staple.size() == Nd); assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back()); @@ -489,7 +490,7 @@ public: autoView( gStaple_v , gStaple, AcceleratorWrite); auto gStencil_v = gStencil.View(); - accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { + accelerator_for(ss, ggrid->oSites(), (size_t)ggrid->Nsimd(), { decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; stencil_ss = Zero(); int off = outer_off; @@ -1201,7 +1202,7 @@ public: autoView( gStaple_v , gStaple, AcceleratorWrite); auto gStencil_v = gStencil.View(); - accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), { + accelerator_for(ss, ggrid->oSites(), (size_t)ggrid->Nsimd(), { decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss; stencil_ss = Zero(); int s=offset; diff --git a/Grid/stencil/GeneralLocalStencil.h b/Grid/stencil/GeneralLocalStencil.h index 333f7028..90fff953 100644 --- a/Grid/stencil/GeneralLocalStencil.h +++ b/Grid/stencil/GeneralLocalStencil.h @@ -43,7 +43,7 @@ class GeneralLocalStencilView { int _npoints; // Move to template param? GeneralStencilEntry* _entries_p; - accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) { + accelerator_inline GeneralStencilEntry * GetEntry(int point,int osite) const { return & this->_entries_p[point+this->_npoints*osite]; } diff --git a/configure.ac b/configure.ac index 15b794fc..c16d90f6 100644 --- a/configure.ac +++ b/configure.ac @@ -41,7 +41,7 @@ AC_PROG_RANLIB ############### Get compiler informations AC_LANG([C++]) -AX_CXX_COMPILE_STDCXX(14,noext,mandatory) +AX_CXX_COMPILE_STDCXX(17,noext,mandatory) AX_COMPILER_VENDOR AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], [vendor of C++ compiler that will compile the code]) diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 new file mode 100644 index 00000000..8edf5152 --- /dev/null +++ b/m4/ax_cxx_compile_stdcxx.m4 @@ -0,0 +1,1018 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) +# +# DESCRIPTION +# +# Check for baseline language coverage in the compiler for the specified +# version of the C++ standard. If necessary, add switches to CXX and +# CXXCPP to enable support. VERSION may be '11', '14', '17', or '20' for +# the respective C++ standard version. +# +# The second argument, if specified, indicates whether you insist on an +# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. +# -std=c++11). If neither is specified, you get whatever works, with +# preference for no added switch, and then for an extended mode. +# +# The third argument, if specified 'mandatory' or if left unspecified, +# indicates that baseline support for the specified C++ standard is +# required and that the macro should error out if no mode with that +# support is found. If specified 'optional', then configuration proceeds +# regardless, after defining HAVE_CXX${VERSION} if and only if a +# supporting mode is found. +# +# LICENSE +# +# Copyright (c) 2008 Benjamin Kosnik +# Copyright (c) 2012 Zack Weinberg +# Copyright (c) 2013 Roy Stogner +# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov +# Copyright (c) 2015 Paul Norman +# Copyright (c) 2015 Moritz Klammler +# Copyright (c) 2016, 2018 Krzesimir Nowak +# Copyright (c) 2019 Enji Cooper +# Copyright (c) 2020 Jason Merrill +# Copyright (c) 2021 Jörn Heusipp +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 18 + +dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro +dnl (serial version number 13). + +AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl + m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"], + [$1], [14], [ax_cxx_compile_alternatives="14 1y"], + [$1], [17], [ax_cxx_compile_alternatives="17 1z"], + [$1], [20], [ax_cxx_compile_alternatives="20"], + [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl + m4_if([$2], [], [], + [$2], [ext], [], + [$2], [noext], [], + [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl + m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], + [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], + [$3], [optional], [ax_cxx_compile_cxx$1_required=false], + [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) + AC_LANG_PUSH([C++])dnl + ac_success=no + + m4_if([$2], [], [dnl + AC_CACHE_CHECK(whether $CXX supports C++$1 features by default, + ax_cv_cxx_compile_cxx$1, + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [ax_cv_cxx_compile_cxx$1=yes], + [ax_cv_cxx_compile_cxx$1=no])]) + if test x$ax_cv_cxx_compile_cxx$1 = xyes; then + ac_success=yes + fi]) + + m4_if([$2], [noext], [], [dnl + if test x$ac_success = xno; then + for alternative in ${ax_cxx_compile_alternatives}; do + switch="-std=gnu++${alternative}" + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, + $cachevar, + [ac_save_CXX="$CXX" + CXX="$CXX $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXX="$ac_save_CXX"]) + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + fi]) + + m4_if([$2], [ext], [], [dnl + if test x$ac_success = xno; then + dnl HP's aCC needs +std=c++11 according to: + dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf + dnl Cray's crayCC needs "-h std=c++11" + dnl MSVC needs -std:c++NN for C++17 and later (default is C++14) + for alternative in ${ax_cxx_compile_alternatives}; do + for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}" MSVC; do + if test x"$switch" = xMSVC; then + dnl AS_TR_SH maps both `:` and `=` to `_` so -std:c++17 would collide + dnl with -std=c++17. We suffix the cache variable name with _MSVC to + dnl avoid this. + switch=-std:c++${alternative} + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_${switch}_MSVC]) + else + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + fi + AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, + $cachevar, + [ac_save_CXX="$CXX" + CXX="$CXX $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXX="$ac_save_CXX"]) + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + if test x$ac_success = xyes; then + break + fi + done + fi]) + AC_LANG_POP([C++]) + if test x$ax_cxx_compile_cxx$1_required = xtrue; then + if test x$ac_success = xno; then + AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) + fi + fi + if test x$ac_success = xno; then + HAVE_CXX$1=0 + AC_MSG_NOTICE([No compiler with C++$1 support was found]) + else + HAVE_CXX$1=1 + AC_DEFINE(HAVE_CXX$1,1, + [define if the compiler supports basic C++$1 syntax]) + fi + AC_SUBST(HAVE_CXX$1) +]) + + +dnl Test body for checking C++11 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 +) + +dnl Test body for checking C++14 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 +) + +dnl Test body for checking C++17 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 +) + +dnl Test body for checking C++20 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_20], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_20 +) + + +dnl Tests for new features in C++11 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ + +// If the compiler admits that it is not ready for C++11, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +// MSVC always sets __cplusplus to 199711L in older versions; newer versions +// only set it correctly if /Zc:__cplusplus is specified as well as a +// /std:c++NN switch: +// https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +#elif __cplusplus < 201103L && !defined _MSC_VER + +#error "This is not a C++11 compiler" + +#else + +namespace cxx11 +{ + + namespace test_static_assert + { + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + } + + namespace test_final_override + { + + struct Base + { + virtual ~Base() {} + virtual void f() {} + }; + + struct Derived : public Base + { + virtual ~Derived() override {} + virtual void f() override {} + }; + + } + + namespace test_double_right_angle_brackets + { + + template < typename T > + struct check {}; + + typedef check single_type; + typedef check> double_type; + typedef check>> triple_type; + typedef check>>> quadruple_type; + + } + + namespace test_decltype + { + + int + f() + { + int a = 1; + decltype(a) b = 2; + return a + b; + } + + } + + namespace test_type_deduction + { + + template < typename T1, typename T2 > + struct is_same + { + static const bool value = false; + }; + + template < typename T > + struct is_same + { + static const bool value = true; + }; + + template < typename T1, typename T2 > + auto + add(T1 a1, T2 a2) -> decltype(a1 + a2) + { + return a1 + a2; + } + + int + test(const int c, volatile int v) + { + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == false, ""); + auto ac = c; + auto av = v; + auto sumi = ac + av + 'x'; + auto sumf = ac + av + 1.0; + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == true, ""); + return (sumf > 0.0) ? sumi : add(c, v); + } + + } + + namespace test_noexcept + { + + int f() { return 0; } + int g() noexcept { return 0; } + + static_assert(noexcept(f()) == false, ""); + static_assert(noexcept(g()) == true, ""); + + } + + namespace test_constexpr + { + + template < typename CharT > + unsigned long constexpr + strlen_c_r(const CharT *const s, const unsigned long acc) noexcept + { + return *s ? strlen_c_r(s + 1, acc + 1) : acc; + } + + template < typename CharT > + unsigned long constexpr + strlen_c(const CharT *const s) noexcept + { + return strlen_c_r(s, 0UL); + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("1") == 1UL, ""); + static_assert(strlen_c("example") == 7UL, ""); + static_assert(strlen_c("another\0example") == 7UL, ""); + + } + + namespace test_rvalue_references + { + + template < int N > + struct answer + { + static constexpr int value = N; + }; + + answer<1> f(int&) { return answer<1>(); } + answer<2> f(const int&) { return answer<2>(); } + answer<3> f(int&&) { return answer<3>(); } + + void + test() + { + int i = 0; + const int c = 0; + static_assert(decltype(f(i))::value == 1, ""); + static_assert(decltype(f(c))::value == 2, ""); + static_assert(decltype(f(0))::value == 3, ""); + } + + } + + namespace test_uniform_initialization + { + + struct test + { + static const int zero {}; + static const int one {1}; + }; + + static_assert(test::zero == 0, ""); + static_assert(test::one == 1, ""); + + } + + namespace test_lambdas + { + + void + test1() + { + auto lambda1 = [](){}; + auto lambda2 = lambda1; + lambda1(); + lambda2(); + } + + int + test2() + { + auto a = [](int i, int j){ return i + j; }(1, 2); + auto b = []() -> int { return '0'; }(); + auto c = [=](){ return a + b; }(); + auto d = [&](){ return c; }(); + auto e = [a, &b](int x) mutable { + const auto identity = [](int y){ return y; }; + for (auto i = 0; i < a; ++i) + a += b--; + return x + identity(a + b); + }(0); + return a + b + c + d + e; + } + + int + test3() + { + const auto nullary = [](){ return 0; }; + const auto unary = [](int x){ return x; }; + using nullary_t = decltype(nullary); + using unary_t = decltype(unary); + const auto higher1st = [](nullary_t f){ return f(); }; + const auto higher2nd = [unary](nullary_t f1){ + return [unary, f1](unary_t f2){ return f2(unary(f1())); }; + }; + return higher1st(nullary) + higher2nd(nullary)(unary); + } + + } + + namespace test_variadic_templates + { + + template + struct sum; + + template + struct sum + { + static constexpr auto value = N0 + sum::value; + }; + + template <> + struct sum<> + { + static constexpr auto value = 0; + }; + + static_assert(sum<>::value == 0, ""); + static_assert(sum<1>::value == 1, ""); + static_assert(sum<23>::value == 23, ""); + static_assert(sum<1, 2>::value == 3, ""); + static_assert(sum<5, 5, 11>::value == 21, ""); + static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); + + } + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function + // because of this. + namespace test_template_alias_sfinae + { + + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { func(0); } + + } + +} // namespace cxx11 + +#endif // __cplusplus >= 201103L + +]]) + + +dnl Tests for new features in C++14 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ + +// If the compiler admits that it is not ready for C++14, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201402L && !defined _MSC_VER + +#error "This is not a C++14 compiler" + +#else + +namespace cxx14 +{ + + namespace test_polymorphic_lambdas + { + + int + test() + { + const auto lambda = [](auto&&... args){ + const auto istiny = [](auto x){ + return (sizeof(x) == 1UL) ? 1 : 0; + }; + const int aretiny[] = { istiny(args)... }; + return aretiny[0]; + }; + return lambda(1, 1L, 1.0f, '1'); + } + + } + + namespace test_binary_literals + { + + constexpr auto ivii = 0b0000000000101010; + static_assert(ivii == 42, "wrong value"); + + } + + namespace test_generalized_constexpr + { + + template < typename CharT > + constexpr unsigned long + strlen_c(const CharT *const s) noexcept + { + auto length = 0UL; + for (auto p = s; *p; ++p) + ++length; + return length; + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("x") == 1UL, ""); + static_assert(strlen_c("test") == 4UL, ""); + static_assert(strlen_c("another\0test") == 7UL, ""); + + } + + namespace test_lambda_init_capture + { + + int + test() + { + auto x = 0; + const auto lambda1 = [a = x](int b){ return a + b; }; + const auto lambda2 = [a = lambda1(x)](){ return a; }; + return lambda2(); + } + + } + + namespace test_digit_separators + { + + constexpr auto ten_million = 100'000'000; + static_assert(ten_million == 100000000, ""); + + } + + namespace test_return_type_deduction + { + + auto f(int& x) { return x; } + decltype(auto) g(int& x) { return x; } + + template < typename T1, typename T2 > + struct is_same + { + static constexpr auto value = false; + }; + + template < typename T > + struct is_same + { + static constexpr auto value = true; + }; + + int + test() + { + auto x = 0; + static_assert(is_same::value, ""); + static_assert(is_same::value, ""); + return x; + } + + } + +} // namespace cxx14 + +#endif // __cplusplus >= 201402L + +]]) + + +dnl Tests for new features in C++17 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ + +// If the compiler admits that it is not ready for C++17, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201703L && !defined _MSC_VER + +#error "This is not a C++17 compiler" + +#else + +#include +#include +#include + +namespace cxx17 +{ + + namespace test_constexpr_lambdas + { + + constexpr int foo = [](){return 42;}(); + + } + + namespace test::nested_namespace::definitions + { + + } + + namespace test_fold_expression + { + + template + int multiply(Args... args) + { + return (args * ... * 1); + } + + template + bool all(Args... args) + { + return (args && ...); + } + + } + + namespace test_extended_static_assert + { + + static_assert (true); + + } + + namespace test_auto_brace_init_list + { + + auto foo = {5}; + auto bar {5}; + + static_assert(std::is_same, decltype(foo)>::value); + static_assert(std::is_same::value); + } + + namespace test_typename_in_template_template_parameter + { + + template typename X> struct D; + + } + + namespace test_fallthrough_nodiscard_maybe_unused_attributes + { + + int f1() + { + return 42; + } + + [[nodiscard]] int f2() + { + [[maybe_unused]] auto unused = f1(); + + switch (f1()) + { + case 17: + f1(); + [[fallthrough]]; + case 42: + f1(); + } + return f1(); + } + + } + + namespace test_extended_aggregate_initialization + { + + struct base1 + { + int b1, b2 = 42; + }; + + struct base2 + { + base2() { + b3 = 42; + } + int b3; + }; + + struct derived : base1, base2 + { + int d; + }; + + derived d1 {{1, 2}, {}, 4}; // full initialization + derived d2 {{}, {}, 4}; // value-initialized bases + + } + + namespace test_general_range_based_for_loop + { + + struct iter + { + int i; + + int& operator* () + { + return i; + } + + const int& operator* () const + { + return i; + } + + iter& operator++() + { + ++i; + return *this; + } + }; + + struct sentinel + { + int i; + }; + + bool operator== (const iter& i, const sentinel& s) + { + return i.i == s.i; + } + + bool operator!= (const iter& i, const sentinel& s) + { + return !(i == s); + } + + struct range + { + iter begin() const + { + return {0}; + } + + sentinel end() const + { + return {5}; + } + }; + + void f() + { + range r {}; + + for (auto i : r) + { + [[maybe_unused]] auto v = i; + } + } + + } + + namespace test_lambda_capture_asterisk_this_by_value + { + + struct t + { + int i; + int foo() + { + return [*this]() + { + return i; + }(); + } + }; + + } + + namespace test_enum_class_construction + { + + enum class byte : unsigned char + {}; + + byte foo {42}; + + } + + namespace test_constexpr_if + { + + template + int f () + { + if constexpr(cond) + { + return 13; + } + else + { + return 42; + } + } + + } + + namespace test_selection_statement_with_initializer + { + + int f() + { + return 13; + } + + int f2() + { + if (auto i = f(); i > 0) + { + return 3; + } + + switch (auto i = f(); i + 4) + { + case 17: + return 2; + + default: + return 1; + } + } + + } + + namespace test_template_argument_deduction_for_class_templates + { + + template + struct pair + { + pair (T1 p1, T2 p2) + : m1 {p1}, + m2 {p2} + {} + + T1 m1; + T2 m2; + }; + + void f() + { + [[maybe_unused]] auto p = pair{13, 42u}; + } + + } + + namespace test_non_type_auto_template_parameters + { + + template + struct B + {}; + + B<5> b1; + B<'a'> b2; + + } + + namespace test_structured_bindings + { + + int arr[2] = { 1, 2 }; + std::pair pr = { 1, 2 }; + + auto f1() -> int(&)[2] + { + return arr; + } + + auto f2() -> std::pair& + { + return pr; + } + + struct S + { + int x1 : 2; + volatile double y1; + }; + + S f3() + { + return {}; + } + + auto [ x1, y1 ] = f1(); + auto& [ xr1, yr1 ] = f1(); + auto [ x2, y2 ] = f2(); + auto& [ xr2, yr2 ] = f2(); + const auto [ x3, y3 ] = f3(); + + } + + namespace test_exception_spec_type_system + { + + struct Good {}; + struct Bad {}; + + void g1() noexcept; + void g2(); + + template + Bad + f(T*, T*); + + template + Good + f(T1*, T2*); + + static_assert (std::is_same_v); + + } + + namespace test_inline_variables + { + + template void f(T) + {} + + template inline T g(T) + { + return T{}; + } + + template<> inline void f<>(int) + {} + + template<> int g<>(int) + { + return 5; + } + + } + +} // namespace cxx17 + +#endif // __cplusplus < 201703L && !defined _MSC_VER + +]]) + + +dnl Tests for new features in C++20 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_20], [[ + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 202002L && !defined _MSC_VER + +#error "This is not a C++20 compiler" + +#else + +#include + +namespace cxx20 +{ + +// As C++20 supports feature test macros in the standard, there is no +// immediate need to actually test for feature availability on the +// Autoconf side. + +} // namespace cxx20 + +#endif // __cplusplus < 202002L && !defined _MSC_VER + +]]) diff --git a/m4/ax_cxx_compile_stdcxx_14.m4 b/m4/ax_cxx_compile_stdcxx_14.m4 new file mode 100644 index 00000000..094db0d0 --- /dev/null +++ b/m4/ax_cxx_compile_stdcxx_14.m4 @@ -0,0 +1,34 @@ +# ============================================================================= +# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_14.html +# ============================================================================= +# +# SYNOPSIS +# +# AX_CXX_COMPILE_STDCXX_14([ext|noext], [mandatory|optional]) +# +# DESCRIPTION +# +# Check for baseline language coverage in the compiler for the C++14 +# standard; if necessary, add switches to CXX and CXXCPP to enable +# support. +# +# This macro is a convenience alias for calling the AX_CXX_COMPILE_STDCXX +# macro with the version set to C++14. The two optional arguments are +# forwarded literally as the second and third argument respectively. +# Please see the documentation for the AX_CXX_COMPILE_STDCXX macro for +# more information. If you want to use this macro, you also need to +# download the ax_cxx_compile_stdcxx.m4 file. +# +# LICENSE +# +# Copyright (c) 2015 Moritz Klammler +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 5 + +AX_REQUIRE_DEFINED([AX_CXX_COMPILE_STDCXX]) +AC_DEFUN([AX_CXX_COMPILE_STDCXX_14], [AX_CXX_COMPILE_STDCXX([14], [$1], [$2])]) diff --git a/systems/Sunspot/benchmarks/bench.pbs b/systems/Sunspot/benchmarks/bench.pbs index f1a26aa4..dc07ca2f 100644 --- a/systems/Sunspot/benchmarks/bench.pbs +++ b/systems/Sunspot/benchmarks/bench.pbs @@ -20,7 +20,7 @@ unset OMP_PLACES cd $PBS_O_WORKDIR -qsub jobscript.pbs +#qsub jobscript.pbs echo Jobid: $PBS_JOBID echo Running on host `hostname` @@ -44,3 +44,4 @@ CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -enva ./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \ --shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32" +$CMD diff --git a/systems/Sunspot/benchmarks/gpu_tile_compact.sh b/systems/Sunspot/benchmarks/gpu_tile_compact.sh index ec532b1b..11ed83aa 100755 --- a/systems/Sunspot/benchmarks/gpu_tile_compact.sh +++ b/systems/Sunspot/benchmarks/gpu_tile_compact.sh @@ -45,8 +45,8 @@ echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_A if [ $PALS_LOCAL_RANKID = 0 ] then - onetrace --chrome-device-timeline "$@" -# "$@" +# onetrace --chrome-device-timeline "$@" + "$@" else "$@" fi diff --git a/systems/Sunspot/config-command b/systems/Sunspot/config-command index 7adf7117..e59ef515 100644 --- a/systems/Sunspot/config-command +++ b/systems/Sunspot/config-command @@ -11,6 +11,6 @@ TOOLS=$HOME/tools --enable-unified=no \ MPICXX=mpicxx \ CXX=icpx \ - LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \ + LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$TOOLS/lib64/" \ CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"