1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

update SVE readme, asm generator

This commit is contained in:
nmeyer-ur 2020-05-18 19:10:36 +02:00
parent 50b1db1e8b
commit 032f7dde1a
2 changed files with 23 additions and 13 deletions

View File

@ -116,7 +116,7 @@ STORE_BASE_PTR_COLOR_OFFSET = 2
OPT = """
* interleave prefetching and compute in MULT_2SPIN
* could test storing U's in MULT_2SPIN to L1d, might be beneficial for life time cache lines
* could test storing U's in MULT_2SPIN to L1d for cache line update
* structure reordering: MAYBEPERM after MULT_2SPIN ?
"""
@ -375,7 +375,12 @@ class Register:
def zero(self, zeroreg=False):
d['zero'] += d['factor']
d['C'] += F' {self.name} = 0; \\\n'
d['I'] += F' {self.name} = __svzero({self.name}); \\\n'
#d['I'] += F' {self.name} = __svzero({self.name}); \\\n' only armclang
if PRECISION == 'double':
d['I'] += F' {self.name} = svdup_f64(0.); \\\n'
else:
d['I'] += F' {self.name} = svdup_f32(0.); \\\n'
if zeroreg == True:
d['A'] += F' "fmov {self.asmregwithsuffix} , 0 \\n\\t" \\\n'
@ -906,8 +911,6 @@ else:
define(F'LOCK_GAUGE(A)')
define(F'UNLOCK_GAUGE(A)')
define(F'MASK_REGS DECLARATIONS_{PRECSUFFIX}')
define(F'COMPLEX_SIGNS(A)')
define(F'LOAD64(A,B)')
define(F'SAVE_RESULT(A,B) RESULT_{PRECSUFFIX}(A); PREFETCH_RESULT_L2_STORE(B)')
define(F'MULT_2SPIN_1(Dir) MULT_2SPIN_1_{PRECSUFFIX}(Dir)')
define(F'MULT_2SPIN_2 MULT_2SPIN_2_{PRECSUFFIX}')

View File

@ -1,32 +1,39 @@
* gcc 10.0.1 VLA
* gcc 10.0.1 VLA (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
* gcc 10.0.1 fixed-size ACLE
* gcc 10.0.1 fixed-size ACLE (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-op enmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
* armclang 20.0 VLA
* armclang 20.0 VLA (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
must use armclang 20.0 with ARMCLANGCOMPAT, otherwise Benchmark_wilson gives wrong result
TODO check ARMCLANGCOMPAT
* armclang 20.1 VLA
* armclang 20.1 VLA (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
must use armclang 20.1 with ARMCLANGCOMPAT, otherwise Benchmark_wilson gives wrong result
TODO check ARMCLANGCOMPAT
* Fujitsu FCC
* armclang 20.1 VLA (fjt cluster)
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN"
TODO check ARMCLANGCOMPAT
* Fujitsu fcc
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
* Fujitsu FCC w/ MPI
* Fujitsu fcc w/ MPI
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"