mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-12 07:00:45 +01:00
Compare commits
No commits in common. "80359e0d49ed4235e36986f459d7473f1246b452" and "b8a700436513001de3c49dca9dd21042b4337109" have entirely different histories.
80359e0d49
...
b8a7004365
@ -604,8 +604,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
||||||
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
|
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
|
||||||
|
|
||||||
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_device());
|
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
|
||||||
auto zeContext = cl::sycl::get_native<cl::sycl::backend::ext_oneapi_level_zero>(theGridAccelerator->get_context());
|
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
|
||||||
|
|
||||||
ze_ipc_mem_handle_t ihandle;
|
ze_ipc_mem_handle_t ihandle;
|
||||||
clone_mem_t handle;
|
clone_mem_t handle;
|
||||||
|
@ -90,12 +90,10 @@ template<class vtype,int N> accelerator_inline iVector<vtype,N> ProjectOnGroup(c
|
|||||||
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
|
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
|
||||||
accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
||||||
{
|
{
|
||||||
typedef typename iMatrix<vtype,N>::scalar_type scalar;
|
|
||||||
// need a check for the group type?
|
// need a check for the group type?
|
||||||
iMatrix<vtype,N> ret(arg);
|
iMatrix<vtype,N> ret(arg);
|
||||||
vtype nrm;
|
vtype nrm;
|
||||||
vtype inner;
|
vtype inner;
|
||||||
scalar one(1.0);
|
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
|
|
||||||
// Normalises row c1
|
// Normalises row c1
|
||||||
@ -104,7 +102,7 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
|||||||
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||||
|
|
||||||
nrm = sqrt(inner);
|
nrm = sqrt(inner);
|
||||||
nrm = one/nrm;
|
nrm = 1.0/nrm;
|
||||||
for(int c2=0;c2<N;c2++)
|
for(int c2=0;c2<N;c2++)
|
||||||
ret._internal[c1][c2]*= nrm;
|
ret._internal[c1][c2]*= nrm;
|
||||||
|
|
||||||
@ -129,7 +127,7 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
|||||||
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||||
|
|
||||||
nrm = sqrt(inner);
|
nrm = sqrt(inner);
|
||||||
nrm = one/nrm;
|
nrm = 1.0/nrm;
|
||||||
for(int c2=0;c2<N;c2++)
|
for(int c2=0;c2<N;c2++)
|
||||||
ret._internal[c1][c2]*= nrm;
|
ret._internal[c1][c2]*= nrm;
|
||||||
}
|
}
|
||||||
|
@ -1,53 +0,0 @@
|
|||||||
1. Prerequisites:
|
|
||||||
===================
|
|
||||||
Make sure you have the latest Intel ipcx release loaded (via modules or similar)
|
|
||||||
Make sure you have SYCL aware MPICH or Intel MPI loaded (assumed as mpicxx)
|
|
||||||
|
|
||||||
2. Obtain Grid:
|
|
||||||
===================
|
|
||||||
|
|
||||||
bash$
|
|
||||||
git clone https://github.com/paboyle/Grid
|
|
||||||
cd Grid
|
|
||||||
./bootstrap.sh
|
|
||||||
cd systems/PVC
|
|
||||||
|
|
||||||
3. Build Grid:
|
|
||||||
===================
|
|
||||||
|
|
||||||
Here, configure command is stored in file config-command:
|
|
||||||
|
|
||||||
bash$
|
|
||||||
../../configure \
|
|
||||||
--enable-simd=GPU \
|
|
||||||
--enable-gen-simd-width=64 \
|
|
||||||
--enable-comms=mpi-auto \
|
|
||||||
--enable-accelerator-cshift \
|
|
||||||
--disable-gparity \
|
|
||||||
--disable-fermion-reps \
|
|
||||||
--enable-shm=nvlink \
|
|
||||||
--enable-accelerator=sycl \
|
|
||||||
--enable-unified=no \
|
|
||||||
MPICXX=mpicxx \
|
|
||||||
CXX=icpx \
|
|
||||||
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
|
|
||||||
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
|
|
||||||
|
|
||||||
make all
|
|
||||||
|
|
||||||
4. Run a benchmark:
|
|
||||||
===================
|
|
||||||
|
|
||||||
*** Assumes interactive access to node. ***
|
|
||||||
|
|
||||||
run Benchmark_dwf_fp32 using benchmarks/bench.sh
|
|
||||||
|
|
||||||
bash$
|
|
||||||
cd benchmarks
|
|
||||||
./bench.sh
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
export EnableImplicitScaling=0
|
|
||||||
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
|
||||||
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
|
||||||
export ONEAPI_DEVICE_FILTER=gpu,level_zero
|
|
||||||
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
|
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
|
|
||||||
|
|
||||||
mpiexec -launcher ssh -n 1 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 1tile.log
|
|
||||||
mpiexec -launcher ssh -n 2 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 2tile.log
|
|
||||||
|
|
||||||
#mpiexec -launcher ssh -n 4 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 16.16.64.64 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 4tile.log
|
|
||||||
#mpiexec -launcher ssh -n 8 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.4 --grid 16.16.64.128 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 8tile.log
|
|
||||||
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
num_tile=2
|
|
||||||
|
|
||||||
gpu_id=$(( (MPI_LOCAL_RANKID % num_tile ) ))
|
|
||||||
tile_id=$((MPI_LOCAL_RANKID / num_tile))
|
|
||||||
|
|
||||||
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
|
||||||
|
|
||||||
echo "local rank $MPI_LOCALRANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
|
|
||||||
|
|
||||||
"$@"
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
|||||||
../../configure \
|
|
||||||
--enable-simd=GPU \
|
|
||||||
--enable-gen-simd-width=64 \
|
|
||||||
--enable-comms=mpi-auto \
|
|
||||||
--enable-accelerator-cshift \
|
|
||||||
--disable-gparity \
|
|
||||||
--disable-fermion-reps \
|
|
||||||
--enable-shm=nvlink \
|
|
||||||
--enable-accelerator=sycl \
|
|
||||||
--enable-unified=no \
|
|
||||||
MPICXX=mpicxx \
|
|
||||||
CXX=icpx \
|
|
||||||
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
|
|
||||||
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
|||||||
export https_proxy=http://proxy-chain.intel.com:911
|
|
||||||
module load intel-release
|
|
||||||
module load intel/mpich
|
|
Loading…
x
Reference in New Issue
Block a user