1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

enable --enable-simd=A64FX in configure

This commit is contained in:
nmeyer-ur 2020-07-08 12:43:51 +02:00
parent 1b08cb7300
commit bbd145382b
3 changed files with 45 additions and 5 deletions

View File

@ -29,7 +29,8 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
/* END LEGAL */
#pragma once
#if defined(A64FXASM)
//#if defined(A64FXASM)
#if defined(A64FX)
// safety include
#include <arm_sve.h>
@ -39,13 +40,13 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
// enable A64FX body
#define WILSONKERNELSASMBODYA64FX
#pragma message("A64FX Dslash: WilsonKernelsAsmBodyA64FX.h")
//#pragma message("A64FX Dslash: WilsonKernelsAsmBodyA64FX.h")
///////////////////////////////////////////////////////////
// If we are A64FX specialise the single precision routine
///////////////////////////////////////////////////////////
#if defined(DSLASHINTRIN)
#pragma message ("A64FX Dslash: intrin")
//#pragma message ("A64FX Dslash: intrin")
#include <simd/Fujitsu_A64FX_intrin_single.h>
#else
#pragma message ("A64FX Dslash: asm")

View File

@ -120,7 +120,7 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) {
#if defined(A64FX) || defined(A64FXFIXEDSIZE) // breakout A64FX SVE ACLE here
#include <arm_sve.h>
#if defined(A64FX) // VLA
#pragma message("building for A64FX / SVE ACLE VLA")
#pragma message("building A64FX / SVE ACLE VLA")
#if defined(ARMCLANGCOMPAT)
#pragma message("applying data types patch")
#endif
@ -131,11 +131,41 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) {
#include "Grid_a64fx-fixedsize.h"
#endif
#else
#pragma message("building for GEN") // generic
#pragma message("building GEN") // generic
#include "Grid_generic.h"
#endif
#endif
#ifdef A64FX
#include <arm_sve.h>
#ifdef __ARM_FEATURE_SVE_BITS
//#pragma message("building A64FX SVE VLS")
#include "Grid_a64fx-fixedsize.h"
#else
#pragma message("building A64FX SVE VLA")
#if defined(ARMCLANGCOMPAT)
#pragma message("applying data types patch")
#endif
#include "Grid_a64fx-2.h"
#endif
#endif
/*
#ifdef A64FXVLA
#pragma message("building A64FX VLA")
#if defined(ARMCLANGCOMPAT)
#pragma message("applying data types patch")
#endif
#include <arm_sve.h>
#include "Grid_a64fx-2.h"
#endif
#ifdef A64FXVLS
#pragma message("building A64FX VLS")
#include <arm_sve.h>
#include "Grid_a64fx-fixedsize.h"
#endif
*/
#ifdef SSE4
#include "Grid_sse4.h"

View File

@ -392,6 +392,15 @@ case ${ax_cv_cxx_compiler_vendor} in
[generic SIMD vector width (in bytes)])
SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)"
SIMD_FLAGS='';;
A64FX)
case ${ax_cv_cxx_compiler_vendor} in
gnu)
AC_DEFINE([A64FX],[1],[A64FX / 512-bit SVE VLS])
SIMD_FLAGS='-march=armv8.2-a+sve -msve-vector-bits=512 -fno-gcse -DDSLASHINTRIN';;
clang)
AC_DEFINE([A64FX],[1],[A64FX / 512-bit SVE VLA])
SIMD_FLAGS='-mcpu=a64fx -DARMCLANGCOMPAT -DDSLASHINTRIN';;
esac;;
NEONv8)
AC_DEFINE([NEONV8],[1],[ARMv8 NEON])
SIMD_FLAGS='-march=armv8-a';;