1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Intel DPCPP Gold happy now (compiles all, runs Benchmark_dwf_fp32 )

This commit is contained in:
Peter Boyle 2020-12-03 03:47:11 -08:00
parent 497e7c1c40
commit cf76741ec6
9 changed files with 97 additions and 9 deletions

View File

@ -37,7 +37,9 @@ directory
#endif
//disables and intel compiler specific warning (in json.hpp)
#ifdef __ICC
#pragma warning disable 488
#endif
#ifdef __NVCC__
//disables nvcc specific warning in json.hpp

View File

@ -21,6 +21,7 @@ if BUILD_HDF5
extra_headers+=serialisation/Hdf5Type.h
endif
all: version-cache Version.h
version-cache:
@ -53,6 +54,17 @@ Version.h: version-cache
include Make.inc
include Eigen.inc
extra_sources+=$(ZWILS_FERMION_FILES)
extra_sources+=$(WILS_FERMION_FILES)
extra_sources+=$(STAG_FERMION_FILES)
if BUILD_GPARITY
extra_sources+=$(GP_FERMION_FILES)
endif
if BUILD_FERMION_REPS
extra_sources+=$(ADJ_FERMION_FILES)
extra_sources+=$(TWOIND_FERMION_FILES)
endif
lib_LIBRARIES = libGrid.a
CCFILES += $(extra_sources)

View File

@ -102,7 +102,7 @@ public:
///////////////////////////////////////////////////
static void SharedMemoryAllocate(uint64_t bytes, int flags);
static void SharedMemoryFree(void);
static void SharedMemoryCopy(void *dest,const void *src,size_t bytes);
static void SharedMemoryCopy(void *dest,void *src,size_t bytes);
static void SharedMemoryZero(void *dest,size_t bytes);
};

View File

@ -715,7 +715,7 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
bzero(dest,bytes);
#endif
}
void GlobalSharedMemory::SharedMemoryCopy(void *dest,const void *src,size_t bytes)
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
{
#ifdef GRID_CUDA
cudaMemcpy(dest,src,bytes,cudaMemcpyDefault);

View File

@ -29,6 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridCore.h>
NAMESPACE_BEGIN(Grid);
#define header "SharedMemoryNone: "
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
@ -55,6 +56,38 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended, use anonymous mmap
////////////////////////////////////////////////////////////////////////////////////////////
#if 1
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " GPU implementation "<<std::endl;
void * ShmCommBuf ;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBuf = acceleratorAllocDevice(bytes);
if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryNone.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE);
}
if ( WorldRank == 0 ){
std::cout << WorldRank << header " SharedMemoryNone.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
}
SharedMemoryZero(ShmCommBuf,bytes);
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node
///////////////////////////////////////////////////////////////////////////////////////////////////////////
WorldShmCommBufs[0] = ShmCommBuf;
_ShmAllocBytes=bytes;
_ShmAlloc=1;
}
#else
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
@ -83,7 +116,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
_ShmAllocBytes=bytes;
_ShmAlloc=1;
};
#endif
void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
{
acceleratorMemSet(dest,0,bytes);
}
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
{
acceleratorCopyToDevice(src,dest,bytes);
}
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality

View File

@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
basis_v.push_back(basis[k].View(AcceleratorWrite));
}
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) )
#if ( (!defined(GRID_CUDA)) )
int max_threads = thread_max();
Vector < vobj > Bt(Nm * max_threads);
thread_region
@ -164,7 +164,8 @@ void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,in
auto basis_vp=& basis_v[0];
autoView(result_v,result,AcceleratorWrite);
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
auto B=coalescedRead(zz);
vobj zzz=Zero();
auto B=coalescedRead(zzz);
for(int k=k0; k<k1; ++k){
B +=Qt_j[k] * coalescedRead(basis_vp[k][ss]);
}

View File

@ -24,7 +24,7 @@ typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
#ifdef ENABLE_GPARITY
int Ls=16;
for(int i=0;i<argc;i++)
if(std::string(argv[i]) == "-Ls"){
@ -184,7 +184,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
DwD.Report();
}
#endif
Grid_finalize();
}

View File

@ -123,6 +123,24 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac
############### fermions
AC_ARG_ENABLE([fermion-reps],
[AC_HELP_STRING([--fermion-reps=yes|no], [enable extra fermion representation support])],
[ac_FERMION_REPS=${enable_fermion_reps}], [ac_FERMION_REPS=yes])
AM_CONDITIONAL(BUILD_FERMION_REPS, [ test "${ac_FERMION_REPS}X" == "yesX" ])
AC_ARG_ENABLE([gparity],
[AC_HELP_STRING([--enable-gparity=yes|no], [enable G-parity support])],
[ac_GPARITY=${enable_gparity}], [ac_GPARITY=yes])
AM_CONDITIONAL(BUILD_GPARITY, [ test "${ac_GPARITY}X" == "yesX" ])
case ${ac_FERMION_REPS} in
yes) AC_DEFINE([ENABLE_FERMION_REPS],[1],[non QCD fermion reps]);;
esac
case ${ac_GPARITY} in
yes) AC_DEFINE([ENABLE_GPARITY],[1],[fermion actions with GPARITY BCs]);;
esac
############### Nc
AC_ARG_ENABLE([Nc],
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])],

View File

@ -6,13 +6,27 @@ home=`pwd`
cd $home/Grid
HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/gamma-gen/*' -not -path '*/Old/*' -not -path '*/Eigen/*'`
HFILES="$HFILES"
CCFILES=`find . -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*SharedMemory*.cc' -not -name '*Hdf5*'`
CCFILES=`find . -name '*.cc' -not -path '*/instantiation/*/*' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*SharedMemory*.cc' -not -name '*Hdf5*'`
ZWILS_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/ZWilsonImpl*' `
WILS_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/WilsonImpl*' `
STAG_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/Staggered*' `
GP_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/Gparity*' `
ADJ_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/WilsonAdj*' `
TWOIND_FERMION_FILES=`find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/WilsonTwoIndex*'`
HPPFILES=`find . -type f -name '*.hpp'`
echo HFILES=$HFILES $HPPFILES > Make.inc
echo >> Make.inc
echo CCFILES=$CCFILES >> Make.inc
echo ZWILS_FERMION_FILES=$ZWILS_FERMION_FILES >> Make.inc
echo WILS_FERMION_FILES=$WILS_FERMION_FILES >> Make.inc
echo STAG_FERMION_FILES=$STAG_FERMION_FILES >> Make.inc
echo GP_FERMION_FILES=$GP_FERMION_FILES >> Make.inc
echo ADJ_FERMION_FILES=$ADJ_FERMION_FILES >> Make.inc
echo TWOIND_FERMION_FILES=$TWOIND_FERMION_FILES >> Make.inc
# tests Make.inc
cd $home/tests