From bbbee5660d036a8a39b48f46f75bf790b65b7aa7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 10 May 2020 05:28:09 -0400 Subject: [PATCH] First compiile on HiP --- Grid/Grid_Eigen_Dense.h | 2 +- Grid/allocator/AlignedAllocator.h | 2 +- Grid/lattice/Lattice_reduction.h | 2 +- .../CayleyFermion5DImplementation.h | 7 ++-- Grid/serialisation/Serialisation.h | 2 +- Grid/simd/Grid_gpu_vec.h | 5 +++ Grid/simd/Simd.h | 4 +- Grid/threads/Accelerator.h | 8 ++-- configure.ac | 37 +++++++++++++------ 9 files changed, 44 insertions(+), 25 deletions(-) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index 3aec81b6..9556c03d 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -12,7 +12,7 @@ #endif /* NVCC save and restore compile environment*/ -#ifdef GRID_CUDA +#ifdef __NVCC__ #pragma push #pragma diag_suppress code_is_unreachable #pragma push_macro("__CUDA_ARCH__") diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 7921c415..a29c8bcb 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -155,7 +155,7 @@ public: if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) acceleratorAllocShared(bytes); - assert( ptr != (_Tp *)NULL); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); return ptr; } diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 1f06ac66..997affe8 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -38,7 +38,7 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) { typedef typename vobj::scalar_object sobj; - const int Nsimd = vobj::Nsimd(); + // const int Nsimd = vobj::Nsimd(); const int nthread = GridThread::GetThreads(); Vector sumarray(nthread); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 082e4b73..7542dd34 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -644,7 +644,7 @@ void CayleyFermion5D::ContractConservedCurrent( PropagatorField &q_in_1, Current curr_type, unsigned int mu) { -#ifndef GRID_CUDA +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, @@ -779,9 +779,9 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, assert(mu>=0); assert(mu::SeqConservedCurrent(PropagatorField &q_in, } #endif -#ifndef GRID_CUDA +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) + int tshift = (mu == Nd-1) ? 1 : 0; //////////////////////////////////////////////// // GENERAL CAYLEY CASE //////////////////////////////////////////////// diff --git a/Grid/serialisation/Serialisation.h b/Grid/serialisation/Serialisation.h index 177a65f9..e14120af 100644 --- a/Grid/serialisation/Serialisation.h +++ b/Grid/serialisation/Serialisation.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include "BinaryIO.h" #include "TextIO.h" #include "XmlIO.h" -#ifndef GRID_CUDA +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) #include "JSON_IO.h" #endif diff --git a/Grid/simd/Grid_gpu_vec.h b/Grid/simd/Grid_gpu_vec.h index 0bff4c2f..aa7e385c 100644 --- a/Grid/simd/Grid_gpu_vec.h +++ b/Grid/simd/Grid_gpu_vec.h @@ -32,7 +32,12 @@ Author: Peter Boyle */ //---------------------------------------------------------------------- +#ifdef GRID_CUDA #include +#endif +#ifdef GRID_HIP +#include +#endif namespace Grid { diff --git a/Grid/simd/Simd.h b/Grid/simd/Simd.h index 80f7c2e7..37aee2ed 100644 --- a/Grid/simd/Simd.h +++ b/Grid/simd/Simd.h @@ -31,7 +31,7 @@ directory #ifndef GRID_SIMD_H #define GRID_SIMD_H -#ifdef GRID_CUDA +#if defined(GRID_CUDA) || defined(GRID_HIP) #include #endif @@ -65,7 +65,7 @@ typedef RealD Real; typedef RealF Real; #endif -#ifdef GRID_CUDA +#if defined(GRID_CUDA) || defined(GRID_HIP) typedef thrust::complex ComplexF; typedef thrust::complex ComplexD; typedef thrust::complex Complex; diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index ec20d8c9..6f2e0b04 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -73,9 +73,6 @@ void acceleratorThreads(uint32_t); ////////////////////////////////////////////// // CUDA acceleration ////////////////////////////////////////////// -#ifdef __NVCC__ -#define GRID_CUDA -#endif #ifdef GRID_CUDA @@ -197,6 +194,9 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; // HIP acceleration ////////////////////////////////////////////// #ifdef GRID_HIP +NAMESPACE_END(Grid); +#include +NAMESPACE_BEGIN(Grid); #ifdef __HIP_DEVICE_COMPILE__ #define GRID_SIMT @@ -224,7 +224,7 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; }; \ dim3 hip_threads(acceleratorThreads(),nsimd); \ dim3 hip_blocks ((num+acceleratorThreads()-1)/acceleratorThreads()); \ - hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,simd,lambda);\ + hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,nsimd,lambda);\ } #define accelerator_for( iterator, num, nsimd, ... ) \ diff --git a/configure.ac b/configure.ac index cf5ca85b..f9ea03fc 100644 --- a/configure.ac +++ b/configure.ac @@ -138,7 +138,7 @@ esac ############### SUMMIT JSRUN AC_ARG_ENABLE([summit], [AC_HELP_STRING([--enable-summit=yes|no], [enable IBMs jsrun resource manager for SUMMIT])], - [ac_JSRUN=${enable_summit}], [ac_SUMMIT=no]) + [ac_SUMMIT=${enable_summit}], [ac_SUMMIT=no]) case ${ac_SUMMIT} in no);; yes) @@ -148,18 +148,26 @@ case ${ac_SUMMIT} in esac ############### SYCL -AC_ARG_ENABLE([sycl], - [AC_HELP_STRING([--enable-sycl=yes|no], [enable SYCL])], - [ac_JSRUN=${enable_sycl}], [ac_SYCL=no]) -case ${ac_SYCL} in - no);; - yes) +AC_ARG_ENABLE([accelerator], + [AC_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none], [enable none,cuda,sycl,hip acceleration])], + [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none]) +case ${ac_ACCELERATOR} in + cuda) + echo CUDA acceleration + AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);; + sycl) + echo SYCL acceleration AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);; + hip) + echo HIP acceleration + AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);; + none) + echo NO acceleration + ;; *) - AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);; + AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);; esac - ############### Intel libraries AC_ARG_ENABLE([mkl], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], @@ -289,16 +297,20 @@ esac ##################### Compiler dependent choices case ${CXX} in nvcc) -# CXX="nvcc -keep -v -x cu " -# CXXLD="nvcc -v -link" CXX="nvcc -x cu " CXXLD="nvcc -link" -# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr" CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" if test $ac_openmp = yes; then CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" fi ;; + hipcc) + CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" + CXXLD=${CXX} + if test $ac_openmp = yes; then + CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" + fi + ;; *) CXXLD=${CXX} CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" @@ -599,6 +611,7 @@ compiler version : ${ax_cv_gxx_version} ----- BUILD OPTIONS ----------------------------------- SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} Threading : ${ac_openmp} +Acceleration : ${ac_ACCELERATOR} Communications type : ${comms_type} Shared memory allocator : ${ac_SHM} Shared memory mmap path : ${ac_SHMPATH}