mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
update SVE readme, asm generator
This commit is contained in:
parent
50b1db1e8b
commit
032f7dde1a
@ -116,7 +116,7 @@ STORE_BASE_PTR_COLOR_OFFSET = 2
|
||||
|
||||
OPT = """
|
||||
* interleave prefetching and compute in MULT_2SPIN
|
||||
* could test storing U's in MULT_2SPIN to L1d, might be beneficial for life time cache lines
|
||||
* could test storing U's in MULT_2SPIN to L1d for cache line update
|
||||
* structure reordering: MAYBEPERM after MULT_2SPIN ?
|
||||
"""
|
||||
|
||||
@ -375,7 +375,12 @@ class Register:
|
||||
def zero(self, zeroreg=False):
|
||||
d['zero'] += d['factor']
|
||||
d['C'] += F' {self.name} = 0; \\\n'
|
||||
d['I'] += F' {self.name} = __svzero({self.name}); \\\n'
|
||||
#d['I'] += F' {self.name} = __svzero({self.name}); \\\n' only armclang
|
||||
|
||||
if PRECISION == 'double':
|
||||
d['I'] += F' {self.name} = svdup_f64(0.); \\\n'
|
||||
else:
|
||||
d['I'] += F' {self.name} = svdup_f32(0.); \\\n'
|
||||
|
||||
if zeroreg == True:
|
||||
d['A'] += F' "fmov {self.asmregwithsuffix} , 0 \\n\\t" \\\n'
|
||||
@ -906,8 +911,6 @@ else:
|
||||
define(F'LOCK_GAUGE(A)')
|
||||
define(F'UNLOCK_GAUGE(A)')
|
||||
define(F'MASK_REGS DECLARATIONS_{PRECSUFFIX}')
|
||||
define(F'COMPLEX_SIGNS(A)')
|
||||
define(F'LOAD64(A,B)')
|
||||
define(F'SAVE_RESULT(A,B) RESULT_{PRECSUFFIX}(A); PREFETCH_RESULT_L2_STORE(B)')
|
||||
define(F'MULT_2SPIN_1(Dir) MULT_2SPIN_1_{PRECSUFFIX}(Dir)')
|
||||
define(F'MULT_2SPIN_2 MULT_2SPIN_2_{PRECSUFFIX}')
|
||||
|
@ -1,32 +1,39 @@
|
||||
* gcc 10.0.1 VLA
|
||||
* gcc 10.0.1 VLA (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
|
||||
|
||||
* gcc 10.0.1 fixed-size ACLE
|
||||
* gcc 10.0.1 fixed-size ACLE (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-op enmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
|
||||
* armclang 20.0 VLA
|
||||
* armclang 20.0 VLA (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
|
||||
must use armclang 20.0 with ARMCLANGCOMPAT, otherwise Benchmark_wilson gives wrong result
|
||||
TODO check ARMCLANGCOMPAT
|
||||
|
||||
|
||||
* armclang 20.1 VLA
|
||||
* armclang 20.1 VLA (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
|
||||
must use armclang 20.1 with ARMCLANGCOMPAT, otherwise Benchmark_wilson gives wrong result
|
||||
TODO check ARMCLANGCOMPAT
|
||||
|
||||
|
||||
* Fujitsu FCC
|
||||
* armclang 20.1 VLA (fjt cluster)
|
||||
|
||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
TODO check ARMCLANGCOMPAT
|
||||
|
||||
|
||||
* Fujitsu fcc
|
||||
|
||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
|
||||
* Fujitsu FCC w/ MPI
|
||||
* Fujitsu fcc w/ MPI
|
||||
|
||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||
|
Loading…
Reference in New Issue
Block a user