1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Trying to pass TeamCity and Travis

This commit is contained in:
paboyle 2017-08-20 01:10:50 +01:00
parent be66e7dd95
commit a446d95c33
7 changed files with 67 additions and 30 deletions

View File

@ -218,7 +218,7 @@ public:
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=48; uint64_t lmax=48;
#define NLOOP (10*lmax*lmax*lmax*lmax/lat/lat/lat/lat) #define NLOOP (50*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=4){ for(int lat=8;lat<=lmax;lat+=4){
@ -368,7 +368,7 @@ public:
const int num_cases = 4; const int num_cases = 4;
#endif #endif
controls Cases [] = { controls Cases [] = {
#if defined(AVX512) #ifdef AVX512
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
#endif #endif
@ -380,6 +380,10 @@ public:
for(int c=0;c<num_cases;c++) { for(int c=0;c<num_cases;c++) {
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
@ -390,10 +394,6 @@ public:
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
int nwarm = 10; int nwarm = 10;
double t0=usecond(); double t0=usecond();
FGrid->Barrier(); FGrid->Barrier();

View File

@ -41,6 +41,7 @@ uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy_t
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
int CartesianCommunicator::nCommThreads = -1; int CartesianCommunicator::nCommThreads = -1;
int CartesianCommunicator::Hugepages = 0;
///////////////////////////////// /////////////////////////////////
// Alloc, free shmem region // Alloc, free shmem region
@ -134,7 +135,10 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
} }
void CartesianCommunicator::ShmInitGeneric(void){ void CartesianCommunicator::ShmInitGeneric(void){
#if 1 #if 1
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, MAP_HUGETLB| MAP_SHARED | MAP_ANONYMOUS, -1, 0);
int mmap_flag = MAP_SHARED | MAP_ANONYMOUS;
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE); if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE);
std::cout << "ShmCommBuf "<<ShmCommBuf<<std::endl; std::cout << "ShmCommBuf "<<ShmCommBuf<<std::endl;
#else #else

View File

@ -50,13 +50,24 @@ namespace Grid {
class CartesianCommunicator { class CartesianCommunicator {
public: public:
// 65536 ranks per node adequate for now
////////////////////////////////////////////
// Isend/Irecv/Wait, or Sendrecv blocking
////////////////////////////////////////////
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
///////////////////////////////////////////
// Up to 65536 ranks per node adequate for now
// 128MB shared memory for comms enought for 48^4 local vol comms // 128MB shared memory for comms enought for 48^4 local vol comms
// Give external control (command line override?) of this // Give external control (command line override?) of this
///////////////////////////////////////////
static const int MAXLOG2RANKSPERNODE = 16; static const int MAXLOG2RANKSPERNODE = 16;
static uint64_t MAX_MPI_SHM_BYTES; static uint64_t MAX_MPI_SHM_BYTES;
static int nCommThreads; static int nCommThreads;
// use explicit huge pages
static int Hugepages;
// Communicator should know nothing of the physics grid, only processor grid. // Communicator should know nothing of the physics grid, only processor grid.
int _Nprocessors; // How many in all int _Nprocessors; // How many in all
@ -122,10 +133,6 @@ class CartesianCommunicator {
///////////////////////////////// /////////////////////////////////
static void * ShmCommBuf; static void * ShmCommBuf;
// Isend/Irecv/Wait, or Sendrecv blocking
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
size_t heap_top; size_t heap_top;
size_t heap_bytes; size_t heap_bytes;

View File

@ -41,8 +41,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifdef HAVE_NUMAIF_H #ifdef HAVE_NUMAIF_H
#include <numaif.h> #include <numaif.h>
#endif #endif
// Make up for linex deficiencies
#ifndef SHM_HUGETLB #ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000 #define SHM_HUGETLB 0x0
#endif
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0x0
#endif #endif
namespace Grid { namespace Grid {
@ -214,7 +219,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
if ( fd < 0 ) { perror("failed shm_open"); assert(0); } if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size); ftruncate(fd, size);
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); int mmap_flag = MAP_SHARED;
if (Hugepages) mmap_flag |= MAP_HUGETLB;
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0); assert(((uint64_t)ptr&0x3F)==0);
@ -628,8 +636,9 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int bytes,int dir) int bytes,int dir)
{ {
std::vector<CommsRequest_t> list; std::vector<CommsRequest_t> list;
StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
StencilSendToRecvFromComplete(list,dir); StencilSendToRecvFromComplete(list,dir);
return offbytes;
} }
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
@ -671,7 +680,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list); this->StencilSendToRecvFromComplete(list,dir);
} }
return off_node_bytes; return off_node_bytes;

View File

@ -135,10 +135,11 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::Report(void) void WilsonFermion5D<Impl>::Report(void)
{ {
std::vector<int> latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors; RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount(); RealD NN = _FourDimGrid->NodeCount();
RealD volume = Ls;
std::vector<int> latt = _FourDimGrid->GlobalDimensions();
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
if ( DhopCalls > 0 ) { if ( DhopCalls > 0 ) {
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
@ -390,17 +391,18 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond(); DhopFaceTime+=usecond();
// Rely on async comms; start comms before merge of local data
double ctime=0; double ctime=0;
double ptime=0; double ptime=0;
// DhopComputeTime-=usecond();
// DhopCommTime-=usecond(); //////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons.
//////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma omp parallel reduction(max:ctime) reduction(max:ptime) #pragma omp parallel reduction(max:ctime) reduction(max:ptime)
{ {
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads(); int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads; int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = st.Packets.size(); if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms); assert(nthreads > ncomms);
if (tid >= ncomms) { if (tid >= ncomms) {
double start = usecond(); double start = usecond();

View File

@ -252,10 +252,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
////////////////////////////////////////// //////////////////////////////////////////
void CommunicateThreaded() void CommunicateThreaded()
{ {
#ifdef GRID_OMP
// must be called in parallel region // must be called in parallel region
int mythread = omp_get_thread_num(); int mythread = omp_get_thread_num();
int nthreads = CartesianCommunicator::nCommThreads; int nthreads = CartesianCommunicator::nCommThreads;
if (nthreads == -1) nthreads = Packets.size(); #else
int mythread = 0;
int nthreads = 1;
#endif
if (nthreads == -1) nthreads = 1;
if (mythread < nthreads) { if (mythread < nthreads) {
for (int i = mythread; i < Packets.size(); i += nthreads) { for (int i = mythread; i < Packets.size(); i += nthreads) {
double start = usecond(); double start = usecond();

View File

@ -222,6 +222,11 @@ void Grid_init(int *argc,char ***argv)
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){
CartesianCommunicator::Hugepages = 1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init(); Grid_debug_handler_init();
} }
@ -304,6 +309,7 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl; std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl; std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl; std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl; std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<std::endl;
@ -356,12 +362,13 @@ void Grid_init(int *argc,char ***argv)
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
LebesgueOrder::UseLebesgueOrder=1; LebesgueOrder::UseLebesgueOrder=1;
} }
CartesianCommunicator::nCommThreads = -1; CartesianCommunicator::nCommThreads = -1;
if( GridCmdOptionExists(*argv,*argv+*argc,"--commthreads") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--commthreads"); arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads); GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
@ -378,7 +385,10 @@ void Grid_init(int *argc,char ***argv)
Grid_default_latt, Grid_default_latt,
Grid_default_mpi); Grid_default_mpi);
std::cout << GridLogDebug << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl; std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
if ( CartesianCommunicator::Hugepages) {
std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";