From 8309f2364bbe57004478435d57eca84662dc2c56 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 25 Oct 2017 10:24:14 +0100 Subject: [PATCH 1/4] Solving again the MPI comm bug with FFTs --- lib/cartesian/Cartesian_base.h | 2 ++ lib/cartesian/Cartesian_full.h | 2 ++ lib/cartesian/Cartesian_red_black.h | 2 ++ lib/communicator/Communicator_base.h | 1 + lib/communicator/Communicator_mpi.cc | 6 ++++++ lib/communicator/Communicator_mpit.cc | 6 ++++++ lib/communicator/Communicator_none.cc | 2 ++ 7 files changed, 21 insertions(+) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 324772c5..6aa0e3c7 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -52,6 +52,8 @@ public: GridBase(const std::vector & processor_grid, const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {}; + virtual ~GridBase() = default; + // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index a6a85ab7..c7ea68c9 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -81,6 +81,8 @@ public: Init(dimensions,simd_layout,processor_grid); } + virtual ~GridCartesian() = default; + void Init(const std::vector &dimensions, const std::vector &simd_layout, const std::vector &processor_grid) diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index f89cacc5..166c8491 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -133,6 +133,8 @@ public: { Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ; } + + virtual ~GridRedBlackCartesian() = default; #if 0 //////////////////////////////////////////////////////////// // Create redblack grid ;; deprecate these. Should not diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 8ff22dbd..22c9e4d0 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -155,6 +155,7 @@ class CartesianCommunicator { //////////////////////////////////////////////// CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent); CartesianCommunicator(const std::vector &pdimensions_in); + virtual ~CartesianCommunicator(); private: #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 678e4517..f1dad1e9 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -52,6 +52,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); ShmInitGeneric(); } + +CartesianCommunicator::~CartesianCommunicator(){ +  if (communicator && !MPI::Is_finalized()) +  MPI_Comm_free(&communicator); +} + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 5137c27b..2d257a44 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -53,6 +53,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } +CartesianCommunicator::~CartesianCommunicator(){ +  if (communicator && !MPI::Is_finalized()) +  MPI_Comm_free(&communicator); +} + + void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index e9d71a15..629a3e4a 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -56,6 +56,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } +CartesianCommunicator::~CartesianCommunicator(){} + void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} From 8a3aae98f6ffba03dcb85e1be23cd387a510e35d Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 25 Oct 2017 10:34:49 +0100 Subject: [PATCH 2/4] Solving minor bug in compilation --- lib/communicator/Communicator_mpi.cc | 7 ++++--- lib/communicator/Communicator_mpit.cc | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index f1dad1e9..5a2dc4d0 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -53,9 +53,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::~CartesianCommunicator(){ -  if (communicator && !MPI::Is_finalized()) -  MPI_Comm_free(&communicator); +CartesianCommunicator::~CartesianCommunicator() +{ + if (communicator && !MPI::Is_finalized()) + MPI_Comm_free(&communicator); } void CartesianCommunicator::GlobalSum(uint32_t &u){ diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc index 2d257a44..15ee13fd 100644 --- a/lib/communicator/Communicator_mpit.cc +++ b/lib/communicator/Communicator_mpit.cc @@ -53,9 +53,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { ShmInitGeneric(); } -CartesianCommunicator::~CartesianCommunicator(){ -  if (communicator && !MPI::Is_finalized()) -  MPI_Comm_free(&communicator); +CartesianCommunicator::~CartesianCommunicator() +{ + if (communicator && !MPI::Is_finalized()) + MPI_Comm_free(&communicator); } From 034de160bf7433480d2e176f3501180fbbf1c043 Mon Sep 17 00:00:00 2001 From: Azusa Yamaguchi Date: Thu, 26 Oct 2017 20:58:46 +0100 Subject: [PATCH 3/4] Staggered updates : Schur fixed and added a unit test for Test_staggered_cg_schur.cc giving stronger check --- lib/algorithms/LinearOperator.h | 2 +- lib/algorithms/iterative/SchurRedBlack.h | 15 ++++- lib/communicator/Communicator_mpi3.cc | 7 ++- lib/communicator/Communicator_mpit.cc | 18 +++--- tests/solver/Test_staggered_cg_prec.cc | 1 - tests/solver/Test_staggered_cg_schur.cc | 76 ++++++++++++++++++++++++ 6 files changed, 103 insertions(+), 16 deletions(-) create mode 100644 tests/solver/Test_staggered_cg_schur.cc diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index f1b8820e..2a757352 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -319,7 +319,7 @@ namespace Grid { Field tmp(in._grid); _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); - _Mat.MeooeDag(out,tmp); + _Mat.Meooe(out,tmp); _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); } diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h index a309386b..a0fd86a6 100644 --- a/lib/algorithms/iterative/SchurRedBlack.h +++ b/lib/algorithms/iterative/SchurRedBlack.h @@ -55,7 +55,15 @@ Author: Peter Boyle *Odd * i) D_oo psi_o = L^{-1} eta_o * eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) + * + * Wilson: * (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o + * Stag: + * D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e) + * + * L^-1 eta_o= (1 0 ) (e + * (-MoeMee^{-1} 1 ) + * *Even * ii) Mee psi_e + Meo psi_o = src_e * @@ -122,18 +130,19 @@ namespace Grid { pickCheckerboard(Odd ,sol_o,out); ///////////////////////////////////////////////////// - // src_o = Mdag * (source_o - Moe MeeInv source_e) + // src_o = (source_o - Moe MeeInv source_e) ///////////////////////////////////////////////////// _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); - _Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd); + src_o = tmp; assert(src_o.checkerboard ==Odd); + // _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source ////////////////////////////////////////////////////////////// // Call the red-black solver ////////////////////////////////////////////////////////////// - std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +int main (int argc, char ** argv) +{ + typedef typename ImprovedStaggeredFermionR::FermionField FermionField; + typename ImprovedStaggeredFermionR::ImplParams params; + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + std::vector seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); + + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + + FermionField src(&Grid); random(pRNG,src); + FermionField result(&Grid); result=zero; + FermionField resid(&Grid); + + RealD mass=0.1; + ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass); + + ConjugateGradient CG(1.0e-8,10000); + SchurRedBlackStaggeredSolve SchurSolver(CG); + + SchurSolver(Ds,src,result); + + Grid_finalize(); +} From 1ef424b1392038df12130b1ce2f855c8b1cc1dbd Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Oct 2017 14:20:35 +0100 Subject: [PATCH 4/4] Split grid Y2K bug fix attempt --- lib/communicator/Communicator_base.h | 14 +++++++++----- lib/communicator/Communicator_mpi.cc | 26 +++++++++++++++++++++----- lib/communicator/Communicator_none.cc | 8 ++++++-- lib/lattice/Lattice_transfer.h | 8 ++++---- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 22c9e4d0..ff054497 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -274,12 +274,16 @@ class CartesianCommunicator { // std::cerr << " AllToAll in.size() "< void Broadcast(int root,obj &data) { diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 5a2dc4d0..ef612f98 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -55,7 +55,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { CartesianCommunicator::~CartesianCommunicator() { - if (communicator && !MPI::Is_finalized()) + int MPI_is_finalised; + MPI_Finalized(&MPI_is_finalised); + if (communicator && MPI_is_finalised) MPI_Comm_free(&communicator); } @@ -195,7 +197,7 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) communicator); assert(ierr==0); } -void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) { std::vector row(_ndimension,1); assert(dim>=0 && dim<_ndimension); @@ -204,11 +206,25 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) row[dim] = _processors[dim]; CartesianCommunicator Comm(row,*this); - Comm.AllToAll(in,out,bytes); + Comm.AllToAll(in,out,words,bytes); } -void CartesianCommunicator::AllToAll(void *in,void *out,int bytes) +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) { - MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator); + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); } /////////////////////////////////////////////////////// // Should only be used prior to Grid Init finished. diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 629a3e4a..a862d52a 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -100,9 +100,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & { assert(0); } -void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes) +void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) { - bcopy(in,out,bytes); + bcopy(in,out,bytes*words); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + bcopy(in,out,bytes*words); } int CartesianCommunicator::RankWorld(void){return 0;} diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 713a8788..bc59e9eb 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -790,8 +790,8 @@ void Grid_split(std::vector > & full,Lattice & split) ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d]; } - int lsites = full_grid->lSites(); - Integer sz = lsites * nvector; + uint64_t lsites = full_grid->lSites(); + uint64_t sz = lsites * nvector; std::vector tmpdata(sz); std::vector alldata(sz); std::vector scalardata(lsites); @@ -908,8 +908,8 @@ void Grid_unsplit(std::vector > & full,Lattice & split) ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d]; } - int lsites = full_grid->lSites(); - Integer sz = lsites * nvector; + uint64_t lsites = full_grid->lSites(); + uint64_t sz = lsites * nvector; std::vector tmpdata(sz); std::vector alldata(sz); std::vector scalardata(lsites);