mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Trying to pass TeamCity and Travis
This commit is contained in:
parent
be66e7dd95
commit
a446d95c33
@ -218,7 +218,7 @@ public:
|
|||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
uint64_t lmax=48;
|
uint64_t lmax=48;
|
||||||
#define NLOOP (10*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
#define NLOOP (50*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||||
|
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
for(int lat=8;lat<=lmax;lat+=4){
|
for(int lat=8;lat<=lmax;lat+=4){
|
||||||
@ -368,7 +368,7 @@ public:
|
|||||||
const int num_cases = 4;
|
const int num_cases = 4;
|
||||||
#endif
|
#endif
|
||||||
controls Cases [] = {
|
controls Cases [] = {
|
||||||
#if defined(AVX512)
|
#ifdef AVX512
|
||||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||||
#endif
|
#endif
|
||||||
@ -380,6 +380,10 @@ public:
|
|||||||
|
|
||||||
for(int c=0;c<num_cases;c++) {
|
for(int c=0;c<num_cases;c++) {
|
||||||
|
|
||||||
|
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||||
|
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||||
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
@ -390,10 +394,6 @@ public:
|
|||||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
|
||||||
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
|
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
|
||||||
int nwarm = 10;
|
int nwarm = 10;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
|
@ -41,6 +41,7 @@ uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
|
|||||||
CartesianCommunicator::CommunicatorPolicy_t
|
CartesianCommunicator::CommunicatorPolicy_t
|
||||||
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
||||||
int CartesianCommunicator::nCommThreads = -1;
|
int CartesianCommunicator::nCommThreads = -1;
|
||||||
|
int CartesianCommunicator::Hugepages = 0;
|
||||||
|
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
// Alloc, free shmem region
|
// Alloc, free shmem region
|
||||||
@ -134,7 +135,10 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
|||||||
}
|
}
|
||||||
void CartesianCommunicator::ShmInitGeneric(void){
|
void CartesianCommunicator::ShmInitGeneric(void){
|
||||||
#if 1
|
#if 1
|
||||||
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, MAP_HUGETLB| MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
|
||||||
|
int mmap_flag = MAP_SHARED | MAP_ANONYMOUS;
|
||||||
|
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
|
||||||
|
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
|
||||||
if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE);
|
if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE);
|
||||||
std::cout << "ShmCommBuf "<<ShmCommBuf<<std::endl;
|
std::cout << "ShmCommBuf "<<ShmCommBuf<<std::endl;
|
||||||
#else
|
#else
|
||||||
|
@ -50,13 +50,24 @@ namespace Grid {
|
|||||||
class CartesianCommunicator {
|
class CartesianCommunicator {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// 65536 ranks per node adequate for now
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Isend/Irecv/Wait, or Sendrecv blocking
|
||||||
|
////////////////////////////////////////////
|
||||||
|
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
|
||||||
|
static CommunicatorPolicy_t CommunicatorPolicy;
|
||||||
|
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Up to 65536 ranks per node adequate for now
|
||||||
// 128MB shared memory for comms enought for 48^4 local vol comms
|
// 128MB shared memory for comms enought for 48^4 local vol comms
|
||||||
// Give external control (command line override?) of this
|
// Give external control (command line override?) of this
|
||||||
|
///////////////////////////////////////////
|
||||||
static const int MAXLOG2RANKSPERNODE = 16;
|
static const int MAXLOG2RANKSPERNODE = 16;
|
||||||
static uint64_t MAX_MPI_SHM_BYTES;
|
static uint64_t MAX_MPI_SHM_BYTES;
|
||||||
static int nCommThreads;
|
static int nCommThreads;
|
||||||
|
// use explicit huge pages
|
||||||
|
static int Hugepages;
|
||||||
|
|
||||||
// Communicator should know nothing of the physics grid, only processor grid.
|
// Communicator should know nothing of the physics grid, only processor grid.
|
||||||
int _Nprocessors; // How many in all
|
int _Nprocessors; // How many in all
|
||||||
@ -122,10 +133,6 @@ class CartesianCommunicator {
|
|||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
static void * ShmCommBuf;
|
static void * ShmCommBuf;
|
||||||
|
|
||||||
// Isend/Irecv/Wait, or Sendrecv blocking
|
|
||||||
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
|
|
||||||
static CommunicatorPolicy_t CommunicatorPolicy;
|
|
||||||
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
|
|
||||||
|
|
||||||
size_t heap_top;
|
size_t heap_top;
|
||||||
size_t heap_bytes;
|
size_t heap_bytes;
|
||||||
|
@ -41,8 +41,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef HAVE_NUMAIF_H
|
#ifdef HAVE_NUMAIF_H
|
||||||
#include <numaif.h>
|
#include <numaif.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Make up for linex deficiencies
|
||||||
#ifndef SHM_HUGETLB
|
#ifndef SHM_HUGETLB
|
||||||
#define SHM_HUGETLB 04000
|
#define SHM_HUGETLB 0x0
|
||||||
|
#endif
|
||||||
|
#ifndef MAP_HUGETLB
|
||||||
|
#define MAP_HUGETLB 0x0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
@ -214,7 +219,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
|||||||
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
|
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
|
||||||
ftruncate(fd, size);
|
ftruncate(fd, size);
|
||||||
|
|
||||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
int mmap_flag = MAP_SHARED;
|
||||||
|
if (Hugepages) mmap_flag |= MAP_HUGETLB;
|
||||||
|
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||||
|
|
||||||
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||||
assert(((uint64_t)ptr&0x3F)==0);
|
assert(((uint64_t)ptr&0x3F)==0);
|
||||||
|
|
||||||
@ -628,8 +636,9 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
|||||||
int bytes,int dir)
|
int bytes,int dir)
|
||||||
{
|
{
|
||||||
std::vector<CommsRequest_t> list;
|
std::vector<CommsRequest_t> list;
|
||||||
StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
|
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
|
||||||
StencilSendToRecvFromComplete(list,dir);
|
StencilSendToRecvFromComplete(list,dir);
|
||||||
|
return offbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
@ -671,7 +680,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||||
this->StencilSendToRecvFromComplete(list);
|
this->StencilSendToRecvFromComplete(list,dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
return off_node_bytes;
|
return off_node_bytes;
|
||||||
|
@ -135,10 +135,11 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::Report(void)
|
void WilsonFermion5D<Impl>::Report(void)
|
||||||
{
|
{
|
||||||
std::vector<int> latt = GridDefaultLatt();
|
|
||||||
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
RealD NP = _FourDimGrid->_Nprocessors;
|
RealD NP = _FourDimGrid->_Nprocessors;
|
||||||
RealD NN = _FourDimGrid->NodeCount();
|
RealD NN = _FourDimGrid->NodeCount();
|
||||||
|
RealD volume = Ls;
|
||||||
|
std::vector<int> latt = _FourDimGrid->GlobalDimensions();
|
||||||
|
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
|
|
||||||
if ( DhopCalls > 0 ) {
|
if ( DhopCalls > 0 ) {
|
||||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
@ -390,17 +391,18 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||||
DhopFaceTime+=usecond();
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
// Rely on async comms; start comms before merge of local data
|
|
||||||
double ctime=0;
|
double ctime=0;
|
||||||
double ptime=0;
|
double ptime=0;
|
||||||
// DhopComputeTime-=usecond();
|
|
||||||
// DhopCommTime-=usecond();
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
||||||
{
|
{
|
||||||
int tid = omp_get_thread_num();
|
int tid = omp_get_thread_num();
|
||||||
int nthreads = omp_get_num_threads();
|
int nthreads = omp_get_num_threads();
|
||||||
int ncomms = CartesianCommunicator::nCommThreads;
|
int ncomms = CartesianCommunicator::nCommThreads;
|
||||||
if (ncomms == -1) ncomms = st.Packets.size();
|
if (ncomms == -1) ncomms = 1;
|
||||||
assert(nthreads > ncomms);
|
assert(nthreads > ncomms);
|
||||||
if (tid >= ncomms) {
|
if (tid >= ncomms) {
|
||||||
double start = usecond();
|
double start = usecond();
|
||||||
|
@ -252,10 +252,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
|
|||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
void CommunicateThreaded()
|
void CommunicateThreaded()
|
||||||
{
|
{
|
||||||
|
#ifdef GRID_OMP
|
||||||
// must be called in parallel region
|
// must be called in parallel region
|
||||||
int mythread = omp_get_thread_num();
|
int mythread = omp_get_thread_num();
|
||||||
int nthreads = CartesianCommunicator::nCommThreads;
|
int nthreads = CartesianCommunicator::nCommThreads;
|
||||||
if (nthreads == -1) nthreads = Packets.size();
|
#else
|
||||||
|
int mythread = 0;
|
||||||
|
int nthreads = 1;
|
||||||
|
#endif
|
||||||
|
if (nthreads == -1) nthreads = 1;
|
||||||
if (mythread < nthreads) {
|
if (mythread < nthreads) {
|
||||||
for (int i = mythread; i < Packets.size(); i += nthreads) {
|
for (int i = mythread; i < Packets.size(); i += nthreads) {
|
||||||
double start = usecond();
|
double start = usecond();
|
||||||
|
@ -222,6 +222,11 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
|
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){
|
||||||
|
CartesianCommunicator::Hugepages = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
@ -304,6 +309,7 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
|
||||||
std::cout<<GridLogMessage<<std::endl;
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<std::endl;
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
@ -356,12 +362,13 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||||
LebesgueOrder::UseLebesgueOrder=1;
|
LebesgueOrder::UseLebesgueOrder=1;
|
||||||
}
|
}
|
||||||
CartesianCommunicator::nCommThreads = -1;
|
CartesianCommunicator::nCommThreads = -1;
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--commthreads") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
|
||||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--commthreads");
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
|
||||||
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
|
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||||
@ -378,7 +385,10 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
Grid_default_latt,
|
Grid_default_latt,
|
||||||
Grid_default_mpi);
|
Grid_default_mpi);
|
||||||
|
|
||||||
std::cout << GridLogDebug << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
||||||
|
if ( CartesianCommunicator::Hugepages) {
|
||||||
|
std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||||
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
||||||
|
Loading…
Reference in New Issue
Block a user