mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Trying to pass TeamCity and Travis
This commit is contained in:
		@@ -218,7 +218,7 @@ public:
 | 
			
		||||
    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
  
 | 
			
		||||
  uint64_t lmax=48;
 | 
			
		||||
#define NLOOP (10*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
 | 
			
		||||
#define NLOOP (50*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
 | 
			
		||||
 | 
			
		||||
    GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
			
		||||
    for(int lat=8;lat<=lmax;lat+=4){
 | 
			
		||||
@@ -368,7 +368,7 @@ public:
 | 
			
		||||
      const int num_cases = 4;
 | 
			
		||||
#endif
 | 
			
		||||
      controls Cases [] = {
 | 
			
		||||
#if defined(AVX512) 
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  },
 | 
			
		||||
	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  },
 | 
			
		||||
#endif
 | 
			
		||||
@@ -380,6 +380,10 @@ public:
 | 
			
		||||
 | 
			
		||||
      for(int c=0;c<num_cases;c++) {
 | 
			
		||||
 | 
			
		||||
	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
 | 
			
		||||
	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt;
 | 
			
		||||
	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
			
		||||
	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
 | 
			
		||||
	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl;
 | 
			
		||||
@@ -390,10 +394,6 @@ public:
 | 
			
		||||
	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
 | 
			
		||||
	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt;
 | 
			
		||||
	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
 | 
			
		||||
	int nwarm = 10;
 | 
			
		||||
	double t0=usecond();
 | 
			
		||||
	FGrid->Barrier();
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,7 @@ uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;
 | 
			
		||||
CartesianCommunicator::CommunicatorPolicy_t  
 | 
			
		||||
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
 | 
			
		||||
int CartesianCommunicator::nCommThreads = -1;
 | 
			
		||||
int CartesianCommunicator::Hugepages = 0;
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
// Alloc, free shmem region
 | 
			
		||||
@@ -134,7 +135,10 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::ShmInitGeneric(void){
 | 
			
		||||
#if 1
 | 
			
		||||
  ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE,  MAP_HUGETLB| MAP_SHARED | MAP_ANONYMOUS, -1, 0); 
 | 
			
		||||
 | 
			
		||||
  int mmap_flag = MAP_SHARED | MAP_ANONYMOUS;
 | 
			
		||||
  if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
 | 
			
		||||
  ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); 
 | 
			
		||||
  if (ShmCommBuf == (void *)MAP_FAILED) exit(EXIT_FAILURE);  
 | 
			
		||||
  std::cout << "ShmCommBuf "<<ShmCommBuf<<std::endl;
 | 
			
		||||
#else 
 | 
			
		||||
 
 | 
			
		||||
@@ -50,13 +50,24 @@ namespace Grid {
 | 
			
		||||
class CartesianCommunicator {
 | 
			
		||||
  public:    
 | 
			
		||||
 | 
			
		||||
  // 65536 ranks per node adequate for now
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Isend/Irecv/Wait, or Sendrecv blocking
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
 | 
			
		||||
  static CommunicatorPolicy_t CommunicatorPolicy;
 | 
			
		||||
  static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////
 | 
			
		||||
  // Up to 65536 ranks per node adequate for now
 | 
			
		||||
  // 128MB shared memory for comms enought for 48^4 local vol comms
 | 
			
		||||
  // Give external control (command line override?) of this
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////
 | 
			
		||||
  static const int MAXLOG2RANKSPERNODE = 16;            
 | 
			
		||||
  static uint64_t  MAX_MPI_SHM_BYTES;
 | 
			
		||||
  static int       nCommThreads;
 | 
			
		||||
  // use explicit huge pages
 | 
			
		||||
  static int       Hugepages;
 | 
			
		||||
 | 
			
		||||
  // Communicator should know nothing of the physics grid, only processor grid.
 | 
			
		||||
  int              _Nprocessors;     // How many in all
 | 
			
		||||
@@ -122,10 +133,6 @@ class CartesianCommunicator {
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
  static void * ShmCommBuf;
 | 
			
		||||
 | 
			
		||||
  // Isend/Irecv/Wait, or Sendrecv blocking
 | 
			
		||||
  enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
 | 
			
		||||
  static CommunicatorPolicy_t CommunicatorPolicy;
 | 
			
		||||
  static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
 | 
			
		||||
  
 | 
			
		||||
  size_t heap_top;
 | 
			
		||||
  size_t heap_bytes;
 | 
			
		||||
 
 | 
			
		||||
@@ -41,8 +41,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifdef HAVE_NUMAIF_H
 | 
			
		||||
#include <numaif.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
// Make up for linex deficiencies
 | 
			
		||||
#ifndef SHM_HUGETLB
 | 
			
		||||
#define SHM_HUGETLB 04000
 | 
			
		||||
#define SHM_HUGETLB 0x0
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef MAP_HUGETLB
 | 
			
		||||
#define MAP_HUGETLB 0x0
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
@@ -213,8 +218,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
      int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
 | 
			
		||||
      if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      }
 | 
			
		||||
      ftruncate(fd, size);
 | 
			
		||||
      
 | 
			
		||||
      int mmap_flag = MAP_SHARED;
 | 
			
		||||
      if (Hugepages) mmap_flag |= MAP_HUGETLB;
 | 
			
		||||
      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
 | 
			
		||||
 | 
			
		||||
      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 | 
			
		||||
      if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    }
 | 
			
		||||
      assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
 | 
			
		||||
@@ -628,8 +636,9 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
 | 
			
		||||
						     int bytes,int dir)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<CommsRequest_t> list;
 | 
			
		||||
  StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
 | 
			
		||||
  double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
 | 
			
		||||
  StencilSendToRecvFromComplete(list,dir);
 | 
			
		||||
  return offbytes;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
@@ -671,7 +680,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( CommunicatorPolicy == CommunicatorPolicySequential ) { 
 | 
			
		||||
    this->StencilSendToRecvFromComplete(list);
 | 
			
		||||
    this->StencilSendToRecvFromComplete(list,dir);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return off_node_bytes;
 | 
			
		||||
 
 | 
			
		||||
@@ -135,10 +135,11 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::Report(void)
 | 
			
		||||
{
 | 
			
		||||
    std::vector<int> latt = GridDefaultLatt();          
 | 
			
		||||
    RealD volume = Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
 | 
			
		||||
    RealD NP = _FourDimGrid->_Nprocessors;
 | 
			
		||||
    RealD NN = _FourDimGrid->NodeCount();
 | 
			
		||||
  RealD NP     = _FourDimGrid->_Nprocessors;
 | 
			
		||||
  RealD NN     = _FourDimGrid->NodeCount();
 | 
			
		||||
  RealD volume = Ls;  
 | 
			
		||||
  std::vector<int> latt = _FourDimGrid->GlobalDimensions();
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
 | 
			
		||||
 | 
			
		||||
  if ( DhopCalls > 0 ) {
 | 
			
		||||
    std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
 | 
			
		||||
@@ -390,17 +391,18 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
 | 
			
		||||
  st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
 | 
			
		||||
  DhopFaceTime+=usecond();
 | 
			
		||||
 | 
			
		||||
  // Rely on async comms; start comms before merge of local data
 | 
			
		||||
  double ctime=0;
 | 
			
		||||
  double ptime=0;
 | 
			
		||||
  //  DhopComputeTime-=usecond();
 | 
			
		||||
  //  DhopCommTime-=usecond();
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Ugly explicit thread mapping introduced for OPA reasons.
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
 | 
			
		||||
  { 
 | 
			
		||||
    int tid = omp_get_thread_num();
 | 
			
		||||
    int nthreads = omp_get_num_threads();
 | 
			
		||||
    int ncomms = CartesianCommunicator::nCommThreads;
 | 
			
		||||
    if (ncomms == -1) ncomms = st.Packets.size(); 
 | 
			
		||||
    if (ncomms == -1) ncomms = 1;
 | 
			
		||||
    assert(nthreads > ncomms);
 | 
			
		||||
    if (tid >= ncomms) {
 | 
			
		||||
      double start = usecond();
 | 
			
		||||
 
 | 
			
		||||
@@ -252,10 +252,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 | 
			
		||||
  //////////////////////////////////////////
 | 
			
		||||
  void CommunicateThreaded()
 | 
			
		||||
  {
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
    // must be called in parallel region
 | 
			
		||||
    int mythread = omp_get_thread_num();
 | 
			
		||||
    int nthreads = CartesianCommunicator::nCommThreads;
 | 
			
		||||
    if (nthreads == -1) nthreads = Packets.size();
 | 
			
		||||
#else
 | 
			
		||||
    int mythread = 0;
 | 
			
		||||
    int nthreads = 1;
 | 
			
		||||
#endif
 | 
			
		||||
    if (nthreads == -1) nthreads = 1;
 | 
			
		||||
    if (mythread < nthreads) {
 | 
			
		||||
      for (int i = mythread; i < Packets.size(); i += nthreads) {
 | 
			
		||||
	double start = usecond();
 | 
			
		||||
 
 | 
			
		||||
@@ -222,6 +222,11 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){
 | 
			
		||||
    CartesianCommunicator::Hugepages = 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
 | 
			
		||||
    Grid_debug_handler_init();
 | 
			
		||||
  }
 | 
			
		||||
@@ -304,6 +309,7 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --threads n     : default number of OMP threads"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --grid n.n.n.n  : default Grid size"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --shm  M        : allocate M megabytes of shared memory for comms"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<std::endl;
 | 
			
		||||
@@ -317,7 +323,7 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --comms-overlap : Overlap comms with compute "<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --comms-overlap    : Overlap comms with compute "<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"  --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;    
 | 
			
		||||
@@ -356,12 +362,13 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
 | 
			
		||||
    CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
 | 
			
		||||
    LebesgueOrder::UseLebesgueOrder=1;
 | 
			
		||||
  }
 | 
			
		||||
  CartesianCommunicator::nCommThreads = -1;
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--commthreads") ){
 | 
			
		||||
    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--commthreads");
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
 | 
			
		||||
    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
 | 
			
		||||
    GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
 | 
			
		||||
@@ -378,7 +385,10 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
		  Grid_default_latt,
 | 
			
		||||
		  Grid_default_mpi);
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogDebug << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
 | 
			
		||||
  if ( CartesianCommunicator::Hugepages) {
 | 
			
		||||
    std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
 | 
			
		||||
    std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user