mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-26 17:49:33 +00:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/paboyle/Grid into feature/Lanczos
This commit is contained in:
		| @@ -97,12 +97,55 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | ||||
| } | ||||
|  | ||||
|  | ||||
| #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) | ||||
| #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) | ||||
| void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   std::vector<int> row(_ndimension,1); | ||||
|   assert(dim>=0 && dim<_ndimension); | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  | ||||
|   //  Split the communicator | ||||
|   row[dim] = _processors[dim]; | ||||
|  | ||||
|   int me; | ||||
|   CartesianCommunicator Comm(row,*this,me); | ||||
|   Comm.AllToAll(in,out,words,bytes); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   // MPI is a pain and uses "int" arguments | ||||
|   // 64*64*64*128*16 == 500Million elements of data. | ||||
|   // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. | ||||
|   // (Turns up on 32^3 x 64 Gparity too) | ||||
|   MPI_Datatype object; | ||||
|   int iwords;  | ||||
|   int ibytes; | ||||
|   iwords = words; | ||||
|   ibytes = bytes; | ||||
|   assert(words == iwords); // safe to cast to int ? | ||||
|   assert(bytes == ibytes); // safe to cast to int ? | ||||
|   MPI_Type_contiguous(ibytes,MPI_BYTE,&object); | ||||
|   MPI_Type_commit(&object); | ||||
|   MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); | ||||
|   MPI_Type_free(&object); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  | ||||
| { | ||||
|   _ndimension = processors.size(); | ||||
|    | ||||
|  | ||||
|   int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); | ||||
|   std::vector<int> parent_processor_coor(_ndimension,0); | ||||
|   std::vector<int> parent_processors    (_ndimension,1); | ||||
|  | ||||
|   // Can make 5d grid from 4d etc... | ||||
|   int pad = _ndimension-parent_ndimension; | ||||
|   for(int d=0;d<parent_ndimension;d++){ | ||||
|     parent_processor_coor[pad+d]=parent._processor_coor[d]; | ||||
|     parent_processors    [pad+d]=parent._processors[d]; | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // split the communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -137,21 +180,36 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, | ||||
|     scoor[d] = pcoor[d] / processors[d]; | ||||
|     ssize[d] = pdims[d] / processors[d]; | ||||
|   } | ||||
|   int crank,srank;  // rank within subcomm ; rank of subcomm within blocks of subcomms | ||||
|   Lexicographic::IndexFromCoor(ccoor,crank,processors); | ||||
|   Lexicographic::IndexFromCoor(scoor,srank,ssize); | ||||
|   int crank;  // rank within subcomm ; srank is rank of subcomm within blocks of subcomms | ||||
|   // Mpi uses the reverse Lexico convention to us | ||||
|   Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); | ||||
|   Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); | ||||
|  | ||||
|   MPI_Comm comm_split; | ||||
|   if ( Nchild > 1 ) {  | ||||
|  | ||||
|     //    std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; | ||||
|     //    std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    "; | ||||
|     //    for(int d=0;d<parent._processors.size();d++)  std::cout << parent._processors[d] << " "; | ||||
|     //    std::cout<<std::endl; | ||||
|  | ||||
|     //    std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    "; | ||||
|     //    for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " "; | ||||
|     //    std::cout<<std::endl; | ||||
|     if(0){ | ||||
|       std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; | ||||
|       std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    "; | ||||
|       for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processors[d] << " "; | ||||
|       std::cout<<std::endl; | ||||
|        | ||||
|       std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    "; | ||||
|       for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " "; | ||||
|       std::cout<<std::endl; | ||||
|        | ||||
|       std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"]    "; | ||||
|       for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processor_coor[d] << " "; | ||||
|       std::cout<<std::endl; | ||||
|        | ||||
|       std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"]    "; | ||||
|       for(int d=0;d<processors.size();d++)  std::cout << scoor[d] << " "; | ||||
|       std::cout<<std::endl; | ||||
|        | ||||
|       std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"]    "; | ||||
|       for(int d=0;d<processors.size();d++)  std::cout << ccoor[d] << " "; | ||||
|       std::cout<<std::endl; | ||||
|     } | ||||
|  | ||||
|     int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split); | ||||
|     assert(ierr==0); | ||||
| @@ -159,24 +217,34 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, | ||||
|     // Declare victory | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     //    std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " | ||||
|     // 	      << Nchild <<" communicators with " << childsize << " ranks"<<std::endl; | ||||
|     //	      << Nchild <<" communicators with " << childsize << " ranks"<<std::endl; | ||||
|   } else { | ||||
|     comm_split=parent.communicator; | ||||
|     srank = 0; | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Set up from the new split communicator | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   InitFromMPICommunicator(processors,comm_split); | ||||
|  | ||||
|   if(0){  | ||||
|     std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; | ||||
|     for(int d=0;d<processors.size();d++){ | ||||
|       std::cout << d<< " " << _processor_coor[d] <<" " <<  ccoor[d]<<std::endl; | ||||
|     } | ||||
|   } | ||||
|   for(int d=0;d<processors.size();d++){ | ||||
|     assert(_processor_coor[d] == ccoor[d] ); | ||||
|   } | ||||
|  | ||||
| } | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Take an MPI_Comm and self assemble | ||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) | ||||
| { | ||||
|   //  if ( communicator_base != communicator_world ) { | ||||
|   //    std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; | ||||
|   //  } | ||||
|   _ndimension = processors.size(); | ||||
|   _processor_coor.resize(_ndimension); | ||||
|  | ||||
| @@ -190,14 +258,24 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc | ||||
|   } | ||||
|  | ||||
|   std::vector<int> periodic(_ndimension,1); | ||||
|   MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||
|   MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); | ||||
|   MPI_Comm_rank(communicator,&_processor); | ||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||
|  | ||||
|   if ( 0 && (communicator_base != communicator_world) ) { | ||||
|     std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl; | ||||
|      | ||||
|     std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] "; | ||||
|     for(int d=0;d<_processors.size();d++){ | ||||
|       std::cout << _processor_coor[d]<<" "; | ||||
|     } | ||||
|     std::cout << std::endl; | ||||
|   } | ||||
|  | ||||
|   int Size; | ||||
|   MPI_Comm_size(communicator,&Size); | ||||
|  | ||||
| #ifdef GRID_COMMS_MPIT | ||||
| #if defined(GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) | ||||
|   communicator_halo.resize (2*_ndimension); | ||||
|   for(int i=0;i<_ndimension*2;i++){ | ||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||
| @@ -206,7 +284,9 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc | ||||
|    | ||||
|   assert(Size==_Nprocessors); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)  | ||||
| { | ||||
|   InitFromMPICommunicator(processors,communicator_world); | ||||
| @@ -215,10 +295,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| #endif | ||||
|  | ||||
| #if !defined( GRID_COMMS_MPI3)  | ||||
|  | ||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||
| int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; | ||||
| #endif | ||||
|  | ||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) | ||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, | ||||
| 						     int xmit_to_rank, | ||||
|   | ||||
| @@ -153,12 +153,12 @@ class CartesianCommunicator { | ||||
|   // Constructors to sub-divide a parent communicator | ||||
|   // and default to comm world | ||||
|   //////////////////////////////////////////////// | ||||
|   CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent); | ||||
|   CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank); | ||||
|   CartesianCommunicator(const std::vector<int> &pdimensions_in); | ||||
|   virtual ~CartesianCommunicator(); | ||||
|  | ||||
|  private: | ||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)  | ||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)  || defined (GRID_COMMS_MPI3)  | ||||
|   //////////////////////////////////////////////// | ||||
|   // Private initialise from an MPI communicator | ||||
|   // Can use after an MPI_Comm_split, but hidden from user so private | ||||
| @@ -276,10 +276,11 @@ class CartesianCommunicator { | ||||
|     assert(in.size()==out.size()); | ||||
|     uint64_t bytes=sizeof(T); | ||||
|     uint64_t words=in.size()/numnode; | ||||
|  | ||||
|     //    std:: cout << "AllToAll buffer size "<< in.size()*sizeof(T)<<std::endl; | ||||
|     //    std:: cout << "AllToAll datum bytes "<< bytes<<std::endl; | ||||
|     //    std:: cout << "AllToAll datum count "<< words<<std::endl; | ||||
|     assert(numnode * words == in.size()); | ||||
|     assert(words < (1ULL<<32)); | ||||
|  | ||||
|     assert(words < (1ULL<<31)); | ||||
|     AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes); | ||||
|   } | ||||
|   void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes); | ||||
|   | ||||
| @@ -57,7 +57,7 @@ CartesianCommunicator::~CartesianCommunicator() | ||||
| { | ||||
|   int MPI_is_finalised; | ||||
|   MPI_Finalized(&MPI_is_finalised); | ||||
|   if (communicator && MPI_is_finalised) | ||||
|   if (communicator && !MPI_is_finalised) | ||||
|     MPI_Comm_free(&communicator); | ||||
| } | ||||
|  | ||||
| @@ -196,35 +196,6 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | ||||
| 		     root, | ||||
| 		     communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   std::vector<int> row(_ndimension,1); | ||||
|   assert(dim>=0 && dim<_ndimension); | ||||
|  | ||||
|   //  Split the communicator | ||||
|   row[dim] = _processors[dim]; | ||||
|  | ||||
|   CartesianCommunicator Comm(row,*this); | ||||
|   Comm.AllToAll(in,out,words,bytes); | ||||
| } | ||||
| void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes) | ||||
| { | ||||
|   // MPI is a pain and uses "int" arguments | ||||
|   // 64*64*64*128*16 == 500Million elements of data. | ||||
|   // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. | ||||
|   // (Turns up on 32^3 x 64 Gparity too) | ||||
|   MPI_Datatype object; | ||||
|   int iwords;  | ||||
|   int ibytes; | ||||
|   iwords = words; | ||||
|   ibytes = bytes; | ||||
|   assert(words == iwords); // safe to cast to int ? | ||||
|   assert(bytes == ibytes); // safe to cast to int ? | ||||
|   MPI_Type_contiguous(ibytes,MPI_BYTE,&object); | ||||
|   MPI_Type_commit(&object); | ||||
|   MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); | ||||
|   MPI_Type_free(&object); | ||||
| } | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Should only be used prior to Grid Init finished. | ||||
|   | ||||
| @@ -454,11 +454,15 @@ void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c | ||||
| ////////////////////////////////// | ||||
| // Try to subdivide communicator | ||||
| ////////////////////////////////// | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  | ||||
| /* | ||||
|  * Use default in MPI compile | ||||
|  */ | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  | ||||
|   : CartesianCommunicator(processors)  | ||||
| { | ||||
|   std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl; | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| {  | ||||
|   int ierr; | ||||
| @@ -596,6 +600,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| CartesianCommunicator::~CartesianCommunicator() | ||||
| { | ||||
|   int MPI_is_finalised; | ||||
|   MPI_Finalized(&MPI_is_finalised); | ||||
|   if (communicator && !MPI_is_finalised) { | ||||
|     MPI_Comm_free(&communicator); | ||||
|     for(int i=0;i<communicator_halo.size();i++){ | ||||
|       MPI_Comm_free(&communicator_halo[i]); | ||||
|     } | ||||
|   }   | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
|   | ||||
| @@ -55,11 +55,16 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|  | ||||
| CartesianCommunicator::~CartesianCommunicator() | ||||
| { | ||||
|   if (communicator && !MPI::Is_finalized()) | ||||
|   int MPI_is_finalised; | ||||
|   MPI_Finalized(&MPI_is_finalised); | ||||
|   if (communicator && !MPI_is_finalised){ | ||||
|     MPI_Comm_free(&communicator); | ||||
|     for(int i=0;i<  communicator_halo.size();i++){ | ||||
|       MPI_Comm_free(&communicator_halo[i]); | ||||
|     } | ||||
|   }   | ||||
| } | ||||
|  | ||||
|  | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| @@ -241,7 +246,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque | ||||
| {  | ||||
|   int nreq=waitall.size(); | ||||
|   MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); | ||||
| }; | ||||
| } | ||||
| double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, | ||||
| 						    int xmit_to_rank, | ||||
| 						    void *recv, | ||||
|   | ||||
| @@ -38,8 +38,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv) | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  | ||||
|   : CartesianCommunicator(processors) {} | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  | ||||
|   : CartesianCommunicator(processors) { srank=0;} | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| { | ||||
|   | ||||
| @@ -75,6 +75,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::~CartesianCommunicator(){} | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  | ||||
|   : CartesianCommunicator(processors)  | ||||
| { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user