mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'develop' into feature/gparity_twist_GPU
This commit is contained in:
commit
9e7bacb5a4
@ -37,7 +37,9 @@ directory
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
//disables and intel compiler specific warning (in json.hpp)
|
//disables and intel compiler specific warning (in json.hpp)
|
||||||
|
#ifdef __ICC
|
||||||
#pragma warning disable 488
|
#pragma warning disable 488
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __NVCC__
|
#ifdef __NVCC__
|
||||||
//disables nvcc specific warning in json.hpp
|
//disables nvcc specific warning in json.hpp
|
||||||
|
@ -21,6 +21,7 @@ if BUILD_HDF5
|
|||||||
extra_headers+=serialisation/Hdf5Type.h
|
extra_headers+=serialisation/Hdf5Type.h
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
all: version-cache Version.h
|
all: version-cache Version.h
|
||||||
|
|
||||||
version-cache:
|
version-cache:
|
||||||
@ -53,6 +54,17 @@ Version.h: version-cache
|
|||||||
include Make.inc
|
include Make.inc
|
||||||
include Eigen.inc
|
include Eigen.inc
|
||||||
|
|
||||||
|
extra_sources+=$(ZWILS_FERMION_FILES)
|
||||||
|
extra_sources+=$(WILS_FERMION_FILES)
|
||||||
|
extra_sources+=$(STAG_FERMION_FILES)
|
||||||
|
if BUILD_GPARITY
|
||||||
|
extra_sources+=$(GP_FERMION_FILES)
|
||||||
|
endif
|
||||||
|
if BUILD_FERMION_REPS
|
||||||
|
extra_sources+=$(ADJ_FERMION_FILES)
|
||||||
|
extra_sources+=$(TWOIND_FERMION_FILES)
|
||||||
|
endif
|
||||||
|
|
||||||
lib_LIBRARIES = libGrid.a
|
lib_LIBRARIES = libGrid.a
|
||||||
|
|
||||||
CCFILES += $(extra_sources)
|
CCFILES += $(extra_sources)
|
||||||
|
@ -1,67 +0,0 @@
|
|||||||
#include <Grid/GridCore.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
|
||||||
|
|
||||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
|
||||||
bool MemoryProfiler::debug = false;
|
|
||||||
|
|
||||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
|
||||||
{
|
|
||||||
#ifdef __linux__
|
|
||||||
int fd = open("/proc/self/pagemap", O_RDONLY);
|
|
||||||
assert(fd >= 0);
|
|
||||||
const int page_size = 4096;
|
|
||||||
uint64_t virt_pfn = (uint64_t)Buf / page_size;
|
|
||||||
off_t offset = sizeof(uint64_t) * virt_pfn;
|
|
||||||
uint64_t npages = (BYTES + page_size-1) / page_size;
|
|
||||||
uint64_t pagedata[npages];
|
|
||||||
uint64_t ret = lseek(fd, offset, SEEK_SET);
|
|
||||||
assert(ret == offset);
|
|
||||||
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
|
|
||||||
assert(ret == sizeof(uint64_t) * npages);
|
|
||||||
int nhugepages = npages / 512;
|
|
||||||
int n4ktotal, nnothuge;
|
|
||||||
n4ktotal = 0;
|
|
||||||
nnothuge = 0;
|
|
||||||
for (int i = 0; i < nhugepages; ++i) {
|
|
||||||
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
|
|
||||||
for (int j = 0; j < 512; ++j) {
|
|
||||||
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
|
|
||||||
++n4ktotal;
|
|
||||||
if (pageaddr != baseaddr + j * page_size)
|
|
||||||
++nnothuge;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int rank = CartesianCommunicator::RankWorld();
|
|
||||||
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string sizeString(const size_t bytes)
|
|
||||||
{
|
|
||||||
constexpr unsigned int bufSize = 256;
|
|
||||||
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
|
|
||||||
char buf[256];
|
|
||||||
size_t s = 0;
|
|
||||||
double count = bytes;
|
|
||||||
|
|
||||||
while (count >= 1024 && s < 7)
|
|
||||||
{
|
|
||||||
s++;
|
|
||||||
count /= 1024;
|
|
||||||
}
|
|
||||||
if (count - floor(count) == 0.0)
|
|
||||||
{
|
|
||||||
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::string(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
|
||||||
|
|
@ -102,7 +102,7 @@ public:
|
|||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
static void SharedMemoryAllocate(uint64_t bytes, int flags);
|
static void SharedMemoryAllocate(uint64_t bytes, int flags);
|
||||||
static void SharedMemoryFree(void);
|
static void SharedMemoryFree(void);
|
||||||
static void SharedMemoryCopy(void *dest,const void *src,size_t bytes);
|
static void SharedMemoryCopy(void *dest,void *src,size_t bytes);
|
||||||
static void SharedMemoryZero(void *dest,size_t bytes);
|
static void SharedMemoryZero(void *dest,size_t bytes);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -715,7 +715,7 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
|
|||||||
bzero(dest,bytes);
|
bzero(dest,bytes);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void GlobalSharedMemory::SharedMemoryCopy(void *dest,const void *src,size_t bytes)
|
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||||
{
|
{
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
cudaMemcpy(dest,src,bytes,cudaMemcpyDefault);
|
cudaMemcpy(dest,src,bytes,cudaMemcpyDefault);
|
||||||
|
@ -29,6 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
#define header "SharedMemoryNone: "
|
||||||
|
|
||||||
/*Construct from an MPI communicator*/
|
/*Construct from an MPI communicator*/
|
||||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||||
@ -55,6 +56,38 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Hugetlbfs mapping intended, use anonymous mmap
|
// Hugetlbfs mapping intended, use anonymous mmap
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#if 1
|
||||||
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
|
{
|
||||||
|
std::cout << header "SharedMemoryAllocate "<< bytes<< " GPU implementation "<<std::endl;
|
||||||
|
void * ShmCommBuf ;
|
||||||
|
assert(_ShmSetup==1);
|
||||||
|
assert(_ShmAlloc==0);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Each MPI rank should allocate our own buffer
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
ShmCommBuf = acceleratorAllocDevice(bytes);
|
||||||
|
|
||||||
|
if (ShmCommBuf == (void *)NULL ) {
|
||||||
|
std::cerr << " SharedMemoryNone.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
if ( WorldRank == 0 ){
|
||||||
|
std::cout << WorldRank << header " SharedMemoryNone.cc acceleratorAllocDevice "<< bytes
|
||||||
|
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
|
||||||
|
}
|
||||||
|
SharedMemoryZero(ShmCommBuf,bytes);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Loop over ranks/gpu's on our node
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
WorldShmCommBufs[0] = ShmCommBuf;
|
||||||
|
|
||||||
|
_ShmAllocBytes=bytes;
|
||||||
|
_ShmAlloc=1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||||
{
|
{
|
||||||
void * ShmCommBuf ;
|
void * ShmCommBuf ;
|
||||||
@ -83,7 +116,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
_ShmAllocBytes=bytes;
|
_ShmAllocBytes=bytes;
|
||||||
_ShmAlloc=1;
|
_ShmAlloc=1;
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
|
||||||
|
{
|
||||||
|
acceleratorMemSet(dest,0,bytes);
|
||||||
|
}
|
||||||
|
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||||
|
{
|
||||||
|
acceleratorCopyToDevice(src,dest,bytes);
|
||||||
|
}
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Global shared functionality finished
|
// Global shared functionality finished
|
||||||
// Now move to per communicator functionality
|
// Now move to per communicator functionality
|
||||||
|
@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
|||||||
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) )
|
#if ( (!defined(GRID_CUDA)) )
|
||||||
int max_threads = thread_max();
|
int max_threads = thread_max();
|
||||||
Vector < vobj > Bt(Nm * max_threads);
|
Vector < vobj > Bt(Nm * max_threads);
|
||||||
thread_region
|
thread_region
|
||||||
@ -164,7 +164,8 @@ void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,in
|
|||||||
auto basis_vp=& basis_v[0];
|
auto basis_vp=& basis_v[0];
|
||||||
autoView(result_v,result,AcceleratorWrite);
|
autoView(result_v,result,AcceleratorWrite);
|
||||||
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
||||||
auto B=coalescedRead(zz);
|
vobj zzz=Zero();
|
||||||
|
auto B=coalescedRead(zzz);
|
||||||
for(int k=k0; k<k1; ++k){
|
for(int k=k0; k<k1; ++k){
|
||||||
B +=Qt_j[k] * coalescedRead(basis_vp[k][ss]);
|
B +=Qt_j[k] * coalescedRead(basis_vp[k][ss]);
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@ typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD;
|
|||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
#ifdef ENABLE_GPARITY
|
||||||
int Ls=16;
|
int Ls=16;
|
||||||
for(int i=0;i<argc;i++)
|
for(int i=0;i<argc;i++)
|
||||||
if(std::string(argv[i]) == "-Ls"){
|
if(std::string(argv[i]) == "-Ls"){
|
||||||
@ -184,7 +184,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
DwD.Report();
|
DwD.Report();
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
18
configure.ac
18
configure.ac
@ -123,6 +123,24 @@ case ${ac_LAPACK} in
|
|||||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
############### fermions
|
||||||
|
AC_ARG_ENABLE([fermion-reps],
|
||||||
|
[AC_HELP_STRING([--fermion-reps=yes|no], [enable extra fermion representation support])],
|
||||||
|
[ac_FERMION_REPS=${enable_fermion_reps}], [ac_FERMION_REPS=yes])
|
||||||
|
|
||||||
|
AM_CONDITIONAL(BUILD_FERMION_REPS, [ test "${ac_FERMION_REPS}X" == "yesX" ])
|
||||||
|
|
||||||
|
AC_ARG_ENABLE([gparity],
|
||||||
|
[AC_HELP_STRING([--enable-gparity=yes|no], [enable G-parity support])],
|
||||||
|
[ac_GPARITY=${enable_gparity}], [ac_GPARITY=yes])
|
||||||
|
|
||||||
|
AM_CONDITIONAL(BUILD_GPARITY, [ test "${ac_GPARITY}X" == "yesX" ])
|
||||||
|
case ${ac_FERMION_REPS} in
|
||||||
|
yes) AC_DEFINE([ENABLE_FERMION_REPS],[1],[non QCD fermion reps]);;
|
||||||
|
esac
|
||||||
|
case ${ac_GPARITY} in
|
||||||
|
yes) AC_DEFINE([ENABLE_GPARITY],[1],[fermion actions with GPARITY BCs]);;
|
||||||
|
esac
|
||||||
############### Nc
|
############### Nc
|
||||||
AC_ARG_ENABLE([Nc],
|
AC_ARG_ENABLE([Nc],
|
||||||
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])],
|
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])],
|
||||||
|
@ -6,13 +6,27 @@ home=`pwd`
|
|||||||
cd $home/Grid
|
cd $home/Grid
|
||||||
HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/gamma-gen/*' -not -path '*/Old/*' -not -path '*/Eigen/*'`
|
HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/gamma-gen/*' -not -path '*/Old/*' -not -path '*/Eigen/*'`
|
||||||
HFILES="$HFILES"
|
HFILES="$HFILES"
|
||||||
CCFILES=`find . -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*SharedMemory*.cc' -not -name '*Hdf5*'`
|
CCFILES=`find . -name '*.cc' -not -path '*/instantiation/*/*' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*SharedMemory*.cc' -not -name '*Hdf5*'`
|
||||||
|
|
||||||
|
|
||||||
|
ZWILS_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/ZWilsonImpl*' `
|
||||||
|
WILS_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/WilsonImpl*' `
|
||||||
|
STAG_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/Staggered*' `
|
||||||
|
GP_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/Gparity*' `
|
||||||
|
ADJ_FERMION_FILES=` find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/WilsonAdj*' `
|
||||||
|
TWOIND_FERMION_FILES=`find . -name '*.cc' -path '*/instantiation/*' -path '*/instantiation/WilsonTwoIndex*'`
|
||||||
|
|
||||||
HPPFILES=`find . -type f -name '*.hpp'`
|
HPPFILES=`find . -type f -name '*.hpp'`
|
||||||
echo HFILES=$HFILES $HPPFILES > Make.inc
|
echo HFILES=$HFILES $HPPFILES > Make.inc
|
||||||
echo >> Make.inc
|
echo >> Make.inc
|
||||||
echo CCFILES=$CCFILES >> Make.inc
|
echo CCFILES=$CCFILES >> Make.inc
|
||||||
|
|
||||||
|
echo ZWILS_FERMION_FILES=$ZWILS_FERMION_FILES >> Make.inc
|
||||||
|
echo WILS_FERMION_FILES=$WILS_FERMION_FILES >> Make.inc
|
||||||
|
echo STAG_FERMION_FILES=$STAG_FERMION_FILES >> Make.inc
|
||||||
|
echo GP_FERMION_FILES=$GP_FERMION_FILES >> Make.inc
|
||||||
|
echo ADJ_FERMION_FILES=$ADJ_FERMION_FILES >> Make.inc
|
||||||
|
echo TWOIND_FERMION_FILES=$TWOIND_FERMION_FILES >> Make.inc
|
||||||
|
|
||||||
# tests Make.inc
|
# tests Make.inc
|
||||||
cd $home/tests
|
cd $home/tests
|
||||||
|
Loading…
Reference in New Issue
Block a user