1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Trying to pass TeamCity and Travis

This commit is contained in:
paboyle 2017-08-20 01:10:50 +01:00
parent be66e7dd95
commit a446d95c33
7 changed files with 67 additions and 30 deletions

View File

@ -218,7 +218,7 @@ public:
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=48;
#define NLOOP (10*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
#define NLOOP (50*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=4){
@ -368,7 +368,7 @@ public:
const int num_cases = 4;
#endif
controls Cases [] = {
#if defined(AVX512)
#ifdef AVX512
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
#endif
@ -380,6 +380,10 @@ public:
for(int c=0;c<num_cases;c++) {
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
@ -390,10 +394,6 @@ public:
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
int nwarm = 10;
double t0=usecond();
FGrid->Barrier();

View File

@ -41,6 +41,7 @@ uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
CartesianCommunicator::CommunicatorPolicy_t
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
int CartesianCommunicator::nCommThreads = -1;
int CartesianCommunicator::Hugepages = 0;
/////////////////////////////////
// Alloc, free shmem region
@ -134,7 +135,10 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
}
void CartesianCommunicator::ShmInitGeneric(void){
#if 1
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, MAP_HUGETLB| MAP_SHARED | MAP_ANONYMOUS, -1, 0);
int mmap_flag = MAP_SHARED | MAP_ANONYMOUS;
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE);
std::cout << "ShmCommBuf "<<ShmCommBuf<<std::endl;
#else

View File

@ -50,13 +50,24 @@ namespace Grid {
class CartesianCommunicator {
public:
// 65536 ranks per node adequate for now
////////////////////////////////////////////
// Isend/Irecv/Wait, or Sendrecv blocking
////////////////////////////////////////////
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
///////////////////////////////////////////
// Up to 65536 ranks per node adequate for now
// 128MB shared memory for comms enought for 48^4 local vol comms
// Give external control (command line override?) of this
///////////////////////////////////////////
static const int MAXLOG2RANKSPERNODE = 16;
static uint64_t MAX_MPI_SHM_BYTES;
static int nCommThreads;
// use explicit huge pages
static int Hugepages;
// Communicator should know nothing of the physics grid, only processor grid.
int _Nprocessors; // How many in all
@ -122,10 +133,6 @@ class CartesianCommunicator {
/////////////////////////////////
static void * ShmCommBuf;
// Isend/Irecv/Wait, or Sendrecv blocking
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
size_t heap_top;
size_t heap_bytes;

View File

@ -41,8 +41,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifdef HAVE_NUMAIF_H
#include <numaif.h>
#endif
// Make up for linex deficiencies
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#define SHM_HUGETLB 0x0
#endif
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0x0
#endif
namespace Grid {
@ -213,8 +218,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
int mmap_flag = MAP_SHARED;
if (Hugepages) mmap_flag |= MAP_HUGETLB;
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
@ -628,8 +636,9 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int bytes,int dir)
{
std::vector<CommsRequest_t> list;
StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
StencilSendToRecvFromComplete(list,dir);
return offbytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
@ -671,7 +680,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list);
this->StencilSendToRecvFromComplete(list,dir);
}
return off_node_bytes;

View File

@ -135,10 +135,11 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
template<class Impl>
void WilsonFermion5D<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount();
RealD NP = _FourDimGrid->_Nprocessors;
RealD NN = _FourDimGrid->NodeCount();
RealD volume = Ls;
std::vector<int> latt = _FourDimGrid->GlobalDimensions();
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
if ( DhopCalls > 0 ) {
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
@ -390,17 +391,18 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond();
// Rely on async comms; start comms before merge of local data
double ctime=0;
double ptime=0;
// DhopComputeTime-=usecond();
// DhopCommTime-=usecond();
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons.
//////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = st.Packets.size();
if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms);
if (tid >= ncomms) {
double start = usecond();

View File

@ -252,10 +252,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
//////////////////////////////////////////
void CommunicateThreaded()
{
#ifdef GRID_OMP
// must be called in parallel region
int mythread = omp_get_thread_num();
int nthreads = CartesianCommunicator::nCommThreads;
if (nthreads == -1) nthreads = Packets.size();
#else
int mythread = 0;
int nthreads = 1;
#endif
if (nthreads == -1) nthreads = 1;
if (mythread < nthreads) {
for (int i = mythread; i < Packets.size(); i += nthreads) {
double start = usecond();

View File

@ -222,6 +222,11 @@ void Grid_init(int *argc,char ***argv)
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){
CartesianCommunicator::Hugepages = 1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
@ -304,6 +309,7 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
std::cout<<GridLogMessage<<std::endl;
@ -317,7 +323,7 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<" --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;
std::cout<<GridLogMessage<<" --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;
std::cout<<GridLogMessage<<" --comms-overlap : Overlap comms with compute "<<std::endl;
std::cout<<GridLogMessage<<" --comms-overlap : Overlap comms with compute "<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl;
std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;
@ -356,12 +362,13 @@ void Grid_init(int *argc,char ***argv)
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
LebesgueOrder::UseLebesgueOrder=1;
}
CartesianCommunicator::nCommThreads = -1;
if( GridCmdOptionExists(*argv,*argv+*argc,"--commthreads") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--commthreads");
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
@ -378,7 +385,10 @@ void Grid_init(int *argc,char ***argv)
Grid_default_latt,
Grid_default_mpi);
std::cout << GridLogDebug << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
if ( CartesianCommunicator::Hugepages) {
std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";