From 69929f20bbea5bcd125d9d296fd395e04e0e4dd2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 6 Nov 2017 23:45:00 +0000 Subject: [PATCH 1/5] Destructor fix. Split Grid and MPI3 will not yet work without more effort from me. --- lib/communicator/Communicator_base.cc | 41 +++++++++++++++++++++++++-- lib/communicator/Communicator_base.h | 2 +- lib/communicator/Communicator_mpi.cc | 30 -------------------- lib/communicator/Communicator_mpi3.cc | 17 ++++++++++- lib/communicator/Communicator_mpit.cc | 8 +++++- 5 files changed, 62 insertions(+), 36 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index a72c75fe..531dd358 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -98,7 +98,39 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) +{ + std::vector row(_ndimension,1); + assert(dim>=0 && dim<_ndimension); + // Split the communicator + row[dim] = _processors[dim]; + + int me; + CartesianCommunicator Comm(row,*this,me); + Comm.AllToAll(in,out,words,bytes); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); +} +#endif + +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); @@ -176,6 +208,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// InitFromMPICommunicator(processors,comm_split); } + ////////////////////////////////////////////////////////////////////////////////////////////////////// // Take an MPI_Comm and self assemble ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -199,7 +232,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector &proc MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); if ( communicator_base != communicator_world ) { - std::cout << "Cartesian communicator created with a non-world communicator"< &proc int Size; MPI_Comm_size(communicator,&Size); -#ifdef GRID_COMMS_MPIT +#if defined(GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) communicator_halo.resize (2*_ndimension); for(int i=0;i<_ndimension*2;i++){ MPI_Comm_dup(communicator,&communicator_halo[i]); @@ -220,7 +253,9 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector &proc assert(Size==_Nprocessors); } +#endif +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { InitFromMPICommunicator(processors,communicator_world); @@ -229,10 +264,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) #endif #if !defined( GRID_COMMS_MPI3) - int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; #endif + #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, int xmit_to_rank, diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 4374ac93..73ea6165 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -158,7 +158,7 @@ class CartesianCommunicator { virtual ~CartesianCommunicator(); private: -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5593aa8b..f7b2a460 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -196,36 +196,6 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) root, communicator); assert(ierr==0); -} -void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) -{ - std::vector row(_ndimension,1); - assert(dim>=0 && dim<_ndimension); - - // Split the communicator - row[dim] = _processors[dim]; - - int me; - CartesianCommunicator Comm(row,*this,me); - Comm.AllToAll(in,out,words,bytes); -} -void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) -{ - // MPI is a pain and uses "int" arguments - // 64*64*64*128*16 == 500Million elements of data. - // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. - // (Turns up on 32^3 x 64 Gparity too) - MPI_Datatype object; - int iwords; - int ibytes; - iwords = words; - ibytes = bytes; - assert(words == iwords); // safe to cast to int ? - assert(bytes == ibytes); // safe to cast to int ? - MPI_Type_contiguous(ibytes,MPI_BYTE,&object); - MPI_Type_commit(&object); - MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); - MPI_Type_free(&object); } /////////////////////////////////////////////////////// // Should only be used prior to Grid Init finished. diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 3cac726c..9e023fef 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -454,11 +454,15 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) +/* + * Use default in MPI compile + */ +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) : CartesianCommunicator(processors) { std::cout << "Attempts to split MPI3 communicators will fail until implemented" < &processors) { int ierr; @@ -596,6 +600,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } }; +CartesianCommunicator::~CartesianCommunicator() +{ + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && MPI_is_finalised) { + MPI_Comm_free(&communicator); + for(int i=0;i< communicator_halo.size();i++){ + MPI_Comm_free(&communicator_halo[i]); + } + } +} void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 56f96c20..31f786ac 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -55,8 +55,14 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::~CartesianCommunicator() { - if (communicator && !MPI::Is_finalized()) + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && MPI_is_finalised) { MPI_Comm_free(&communicator); + for(int i=0;i< communicator_halo.size();i++){ + MPI_Comm_free(&communicator_halo[i]); + } + } } From c519aab19dcfb559ab21585d9f9221b1fc193a60 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Tue, 7 Nov 2017 13:55:37 +0000 Subject: [PATCH 2/5] Fixing the MPI memory leak in the communicators --- lib/communicator/Communicator_mpi.cc | 2 +- lib/communicator/Communicator_mpi3.cc | 8 ++++++++ lib/communicator/Communicator_mpit.cc | 5 +++-- lib/communicator/Communicator_shmem.cc | 2 ++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5593aa8b..1c1ae6c5 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -57,7 +57,7 @@ CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); - if (communicator && MPI_is_finalised) + if (communicator && !MPI_is_finalised) MPI_Comm_free(&communicator); } diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 3cac726c..52f65c34 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -596,6 +596,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } }; +CartesianCommunicator::~CartesianCommunicator() +{ + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && !MPI_is_finalised) + MPI_Comm_free(&communicator); +} + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 56f96c20..1c24433b 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -55,11 +55,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::~CartesianCommunicator() { - if (communicator && !MPI::Is_finalized()) + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && !MPI_is_finalised) MPI_Comm_free(&communicator); } - void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index ed49285d..03e3173e 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -75,6 +75,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } +CartesianCommunicator::~CartesianCommunicator(){} + CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent) : CartesianCommunicator(processors) { From 9b8d1cc3da4769f250665cc8e05fb305c794bc5d Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Tue, 7 Nov 2017 14:48:45 +0000 Subject: [PATCH 3/5] Staggered Schur decomposed matrix norm changed to not be the Schur anymore :( Carleton wanted this for multimass / multishift --- lib/algorithms/LinearOperator.h | 12 ++++++++++++ lib/algorithms/iterative/SchurRedBlack.h | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 9b0e4942..0fa039c8 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -317,11 +317,23 @@ namespace Grid { } virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in._grid); + Field tmp2(in._grid); + + _Mat.Mooee(in,out); + _Mat.Mooee(out,tmp); + + _Mat.Meooe(in,out); + _Mat.Meooe(out,tmp2); + + return axpy_norm(out,-1.0,tmp2,tmp); +#if 0 + //... much prefer conventional Schur norm _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); _Mat.Meooe(out,tmp); _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); +#endif } virtual RealD MpcDag (const Field &in, Field &out){ return Mpc(in,out); diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index a0fd86a6..b9767aa8 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -90,7 +90,7 @@ namespace Grid { // Take a matrix and form a Red Black solver calling a Herm solver // Use of RB info prevents making SchurRedBlackSolve conform to standard interface /////////////////////////////////////////////////////////////////////////////////////////////////////// - + // Now make the norm reflect extra factor of Mee template class SchurRedBlackStaggeredSolve { private: OperatorFunction & _HermitianRBSolver; @@ -136,8 +136,8 @@ namespace Grid { _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); - src_o = tmp; assert(src_o.checkerboard ==Odd); - // _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source + //src_o = tmp; assert(src_o.checkerboard ==Odd); + _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm. ////////////////////////////////////////////////////////////// // Call the red-black solver From 1860b1698c4b204a332e2fad4dea86c97ffb0abc Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Wed, 8 Nov 2017 09:03:01 +0000 Subject: [PATCH 4/5] Fixed the bag on MPI_T at Cam --- lib/communicator/Communicator_mpit.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index b2fb71a6..bceea0d8 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -57,7 +57,7 @@ CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised) + if (communicator && !MPI_is_finalised){ MPI_Comm_free(&communicator); for(int i=0;i< communicator_halo.size();i++){ MPI_Comm_free(&communicator_halo[i]); @@ -246,7 +246,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector Date: Thu, 9 Nov 2017 19:46:57 +0000 Subject: [PATCH 5/5] Declaring virtual functions as pure virtual functions. --- lib/algorithms/iterative/ImplicitlyRestartedLanczos.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 7a0760c9..7b85c095 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -168,8 +168,8 @@ void basisDeflate(const std::vector &_v,const std::vector& eval,co template class ImplicitlyRestartedLanczosTester { public: - virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); - virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); + virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0; + virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0; }; enum IRLdiagonalisation {