1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-05 03:35:55 +01:00

First compiile on HiP

This commit is contained in:
Peter Boyle 2020-05-10 05:28:09 -04:00
parent 52081acfa5
commit bbbee5660d
9 changed files with 44 additions and 25 deletions

View File

@ -12,7 +12,7 @@
#endif
/* NVCC save and restore compile environment*/
#ifdef GRID_CUDA
#ifdef __NVCC__
#pragma push
#pragma diag_suppress code_is_unreachable
#pragma push_macro("__CUDA_ARCH__")

View File

@ -155,7 +155,7 @@ public:
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) acceleratorAllocShared(bytes);
assert( ptr != (_Tp *)NULL);
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
return ptr;
}

View File

@ -38,7 +38,7 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites)
{
typedef typename vobj::scalar_object sobj;
const int Nsimd = vobj::Nsimd();
// const int Nsimd = vobj::Nsimd();
const int nthread = GridThread::GetThreads();
Vector<sobj> sumarray(nthread);

View File

@ -644,7 +644,7 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
#ifndef GRID_CUDA
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
@ -779,9 +779,9 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
assert(mu>=0);
assert(mu<Nd);
int tshift = (mu == Nd-1) ? 1 : 0;
#if 0
int tshift = (mu == Nd-1) ? 1 : 0;
////////////////////////////////////////////////
// SHAMIR CASE
////////////////////////////////////////////////
@ -828,7 +828,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
}
#endif
#ifndef GRID_CUDA
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
int tshift = (mu == Nd-1) ? 1 : 0;
////////////////////////////////////////////////
// GENERAL CAYLEY CASE
////////////////////////////////////////////////

View File

@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include "BinaryIO.h"
#include "TextIO.h"
#include "XmlIO.h"
#ifndef GRID_CUDA
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
#include "JSON_IO.h"
#endif

View File

@ -32,7 +32,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*/
//----------------------------------------------------------------------
#ifdef GRID_CUDA
#include <cuda_fp16.h>
#endif
#ifdef GRID_HIP
#include <hip_fp16.h>
#endif
namespace Grid {

View File

@ -31,7 +31,7 @@ directory
#ifndef GRID_SIMD_H
#define GRID_SIMD_H
#ifdef GRID_CUDA
#if defined(GRID_CUDA) || defined(GRID_HIP)
#include <thrust/complex.h>
#endif
@ -65,7 +65,7 @@ typedef RealD Real;
typedef RealF Real;
#endif
#ifdef GRID_CUDA
#if defined(GRID_CUDA) || defined(GRID_HIP)
typedef thrust::complex<RealF> ComplexF;
typedef thrust::complex<RealD> ComplexD;
typedef thrust::complex<Real> Complex;

View File

@ -73,9 +73,6 @@ void acceleratorThreads(uint32_t);
//////////////////////////////////////////////
// CUDA acceleration
//////////////////////////////////////////////
#ifdef __NVCC__
#define GRID_CUDA
#endif
#ifdef GRID_CUDA
@ -197,6 +194,9 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
// HIP acceleration
//////////////////////////////////////////////
#ifdef GRID_HIP
NAMESPACE_END(Grid);
#include <hip/hip_runtime.h>
NAMESPACE_BEGIN(Grid);
#ifdef __HIP_DEVICE_COMPILE__
#define GRID_SIMT
@ -224,7 +224,7 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
}; \
dim3 hip_threads(acceleratorThreads(),nsimd); \
dim3 hip_blocks ((num+acceleratorThreads()-1)/acceleratorThreads()); \
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,simd,lambda);\
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,nsimd,lambda);\
}
#define accelerator_for( iterator, num, nsimd, ... ) \

View File

@ -138,7 +138,7 @@ esac
############### SUMMIT JSRUN
AC_ARG_ENABLE([summit],
[AC_HELP_STRING([--enable-summit=yes|no], [enable IBMs jsrun resource manager for SUMMIT])],
[ac_JSRUN=${enable_summit}], [ac_SUMMIT=no])
[ac_SUMMIT=${enable_summit}], [ac_SUMMIT=no])
case ${ac_SUMMIT} in
no);;
yes)
@ -148,18 +148,26 @@ case ${ac_SUMMIT} in
esac
############### SYCL
AC_ARG_ENABLE([sycl],
[AC_HELP_STRING([--enable-sycl=yes|no], [enable SYCL])],
[ac_JSRUN=${enable_sycl}], [ac_SYCL=no])
case ${ac_SYCL} in
no);;
yes)
AC_ARG_ENABLE([accelerator],
[AC_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none], [enable none,cuda,sycl,hip acceleration])],
[ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none])
case ${ac_ACCELERATOR} in
cuda)
echo CUDA acceleration
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
sycl)
echo SYCL acceleration
AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);;
hip)
echo HIP acceleration
AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);;
none)
echo NO acceleration
;;
*)
AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);;
AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);;
esac
############### Intel libraries
AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
@ -289,16 +297,20 @@ esac
##################### Compiler dependent choices
case ${CXX} in
nvcc)
# CXX="nvcc -keep -v -x cu "
# CXXLD="nvcc -v -link"
CXX="nvcc -x cu "
CXXLD="nvcc -link"
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
fi
;;
hipcc)
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
CXXLD=${CXX}
if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
fi
;;
*)
CXXLD=${CXX}
CXXFLAGS="$CXXFLAGS -fno-strict-aliasing"
@ -599,6 +611,7 @@ compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Acceleration : ${ac_ACCELERATOR}
Communications type : ${comms_type}
Shared memory allocator : ${ac_SHM}
Shared memory mmap path : ${ac_SHMPATH}