From e30a80a2340275774e464b5ce7b328f0ece84b44 Mon Sep 17 00:00:00 2001 From: Christopher Kelly Date: Thu, 15 Feb 2018 17:13:36 +0000 Subject: [PATCH 1/4] Relaxed constraints on MPI thread mode when not using multiple comms threads --- lib/communicator/Communicator_mpi3.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 6732dcdf..eb0144f0 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -44,7 +44,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - assert (provided == MPI_THREAD_MULTIPLE); + //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE + if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) || + (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) ) + assert(0); } Grid_quiesce_nodes(); From 945684c470845d826fdbb8511ddf098a90779188 Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 20 Feb 2018 14:28:38 +0000 Subject: [PATCH 2/4] updates for deflation in the RB solver --- lib/algorithms/Algorithms.h | 1 + .../iterative/ImplicitlyRestartedLanczos.h | 19 ----------- .../iterative/LocalCoherenceLanczos.h | 16 +-------- lib/algorithms/iterative/SchurRedBlack.h | 33 ++++++++++++++++--- 4 files changed, 30 insertions(+), 39 deletions(-) diff --git a/lib/algorithms/Algorithms.h b/lib/algorithms/Algorithms.h index 070a1019..ef147c53 100644 --- a/lib/algorithms/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -39,6 +39,7 @@ Author: Peter Boyle #include #include +#include #include #include #include diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 7d5a1889..787cf15a 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -149,19 +149,6 @@ void basisSortInPlace(std::vector & _v,std::vector& sort_vals, boo basisReorderInPlace(_v,sort_vals,idx); } -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = zero; - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i &_v, - const std::vector& eval, - const Field& src_orig,Field& result) { - basisDeflate(_v,eval,src_orig,result); - } - ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index 4c05f4c7..b8348c0c 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -31,6 +31,7 @@ Author: paboyle namespace Grid { + struct LanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, @@ -240,21 +241,6 @@ private: std::vector _evec_coarse; public: - static void Deflate(std::vector subspace, - std::vector evec_coarse, - std::vector eval_coarse, - const FineField& src_orig,FineField& result) - { - int N = (int)evec_coarse.size(); - CoarseField src_coarse(evec_coarse[0]._grid); - CoarseField res_coarse(evec_coarse[0]._grid); res_coarse = zero; - blockProject(src_orig,src_coarse,subspace); - for (int i=0;i - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -129,7 +134,6 @@ namespace Grid { pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Even,sol_e,out); pickCheckerboard(Odd ,sol_o,out); - std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" < - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -225,6 +235,7 @@ namespace Grid { // Call the red-black solver ////////////////////////////////////////////////////////////// std::cout< - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -305,6 +321,7 @@ namespace Grid { ////////////////////////////////////////////////////////////// std::cout< - void operator() (Matrix & _Matrix,const Field &in, Field &out){ + void operator() (Matrix & _Matrix,const Field &in, Field &out){ + ZeroGuesser guess; + (*this)(_Matrix,in,out,guess); + } + template + void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){ // FIXME CGdiagonalMee not implemented virtual function // FIXME use CBfactorise to control schur decomp @@ -385,6 +407,7 @@ namespace Grid { std::cout< Date: Tue, 20 Feb 2018 14:29:08 +0000 Subject: [PATCH 3/4] Deflation interface for solvers --- lib/algorithms/iterative/Deflation.h | 101 +++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 lib/algorithms/iterative/Deflation.h diff --git a/lib/algorithms/iterative/Deflation.h b/lib/algorithms/iterative/Deflation.h new file mode 100644 index 00000000..b6aa0d3d --- /dev/null +++ b/lib/algorithms/iterative/Deflation.h @@ -0,0 +1,101 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_DEFLATION_H +#define GRID_DEFLATION_H + +namespace Grid { + +struct ZeroGuesser { +public: + template + void operator()(const Field &src,Field &guess) { guess = Zero(); }; +}; +struct SourceGuesser { +public: + template + void operator()(const Field &src,Field &guess) { guess = src; }; +}; + +//////////////////////////////// +// Fine grid deflation +//////////////////////////////// +template +struct DeflatedGuesser { +private: + const std::vector &evec; + const std::vector &eval; + +public: + + DeflatedGuesser(const std::vector & _evec,const std::vector & _eval) : evec(_evec), eval(_eval) {}; + + void operator()(const Field &src,Field &guess) { + guess = zero; + assert(evec.size()==eval.size()); + auto N = evec.size(); + for (int i=0;i +class LocalCoherenceDeflatedGuesser { +private: + const std::vector &subspace; + const std::vector &evec_coarse; + const std::vector &eval_coarse; +public: + + LocalCoherenceDeflatedGuesser(const std::vector &_subspace, + const std::vector &_evec_coarse, + const std::vector &_eval_coarse) + : subspace(_subspace), + evec_coarse(_evec_coarse), + eval_coarse(_eval_coarse) + { + } + + void operator()(const FineField &src,FineField &guess) { + int N = (int)evec_coarse.size(); + CoarseField src_coarse(evec_coarse[0]._grid); + CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero; + blockProject(src,src_coarse,subspace); + for (int i=0;i Date: Tue, 20 Feb 2018 15:12:31 +0000 Subject: [PATCH 4/4] Extra communicator free that I had missed. Hard to audit them all as this is complex --- lib/communicator/Communicator_mpi3.cc | 12 ++++++++++-- lib/communicator/SharedMemory.h | 1 + lib/communicator/SharedMemoryMPI.cc | 4 ++++ lib/communicator/SharedMemoryNone.cc | 2 ++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index eb0144f0..424b7973 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -89,10 +89,16 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { MPI_Comm optimal_comm; - GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + //////////////////////////////////////////////////// + // Remap using the shared memory optimising routine + // The remap creates a comm which must be freed + //////////////////////////////////////////////////// + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); InitFromMPICommunicator(processors,optimal_comm); SetCommunicator(optimal_comm); + /////////////////////////////////////////////////// // Free the temp communicator + /////////////////////////////////////////////////// MPI_Comm_free(&optimal_comm); } @@ -202,8 +208,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, // Take the right SHM buffers ////////////////////////////////////////////////////////////////////////////////////////////////////// SetCommunicator(comm_split); - + + /////////////////////////////////////////////// // Free the temp communicator + /////////////////////////////////////////////// MPI_Comm_free(&comm_split); if(0){ diff --git a/lib/communicator/SharedMemory.h b/lib/communicator/SharedMemory.h index 0f647dc6..9f6b1a25 100644 --- a/lib/communicator/SharedMemory.h +++ b/lib/communicator/SharedMemory.h @@ -133,6 +133,7 @@ class SharedMemory public: SharedMemory() {}; + ~SharedMemory(); /////////////////////////////////////////////////////////////////////////////////////// // set the buffers & sizes /////////////////////////////////////////////////////////////////////////////////////// diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc index 2a62b7ac..9e5d8f15 100644 --- a/lib/communicator/SharedMemoryMPI.cc +++ b/lib/communicator/SharedMemoryMPI.cc @@ -399,5 +399,9 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) return (void *) remote; } } +SharedMemory::~SharedMemory() +{ + MPI_Comm_free(&ShmComm); +}; } diff --git a/lib/communicator/SharedMemoryNone.cc b/lib/communicator/SharedMemoryNone.cc index 7feed7e4..a23e3c1c 100644 --- a/lib/communicator/SharedMemoryNone.cc +++ b/lib/communicator/SharedMemoryNone.cc @@ -122,5 +122,7 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) { return NULL; } +SharedMemory::~SharedMemory() +{}; }