Merge branch 'develop' into feature/lanczos-reorg

2025-07-23 16:07:08 +01:00 · 2017-10-27 14:21:38 +01:00
parent aa66f41c69 1ef424b139
commit 84b441800f
14 changed files with 166 additions and 32 deletions
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -319,7 +319,7 @@ namespace Grid {
 	Field tmp(in._grid);
 	_Mat.Meooe(in,tmp);
 	_Mat.MooeeInv(tmp,out);
-	_Mat.MeooeDag(out,tmp);
+	_Mat.Meooe(out,tmp);
 	_Mat.Mooee(in,out);
        return axpy_norm(out,-1.0,tmp,out);
      }
--- a/lib/algorithms/iterative/SchurRedBlack.h
+++ b/lib/algorithms/iterative/SchurRedBlack.h
@@ -55,7 +55,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
   *Odd
   * i)                 D_oo psi_o =  L^{-1}  eta_o
   *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
+   *
+   * Wilson:
   *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o
+   * Stag:
+   *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e)
+   *
+   * L^-1 eta_o= (1              0 ) (e
+   *             (-MoeMee^{-1}   1 )   
+   *
   *Even
   * ii)  Mee psi_e + Meo psi_o = src_e
   *
@@ -122,18 +130,19 @@ namespace Grid {
      pickCheckerboard(Odd ,sol_o,out);
    
      /////////////////////////////////////////////////////
-      // src_o = Mdag * (source_o - Moe MeeInv source_e)
+      // src_o = (source_o - Moe MeeInv source_e)
      /////////////////////////////////////////////////////
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     

-      _Matrix.Mooee(tmp,src_o);     assert(src_o.checkerboard ==Odd);
+      src_o = tmp;     assert(src_o.checkerboard ==Odd);
+      //  _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source

      //////////////////////////////////////////////////////////////
      // Call the red-black solver
      //////////////////////////////////////////////////////////////
-      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
+      std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);

      ///////////////////////////////////////////////////
--- a/lib/cartesian/Cartesian_base.h
+++ b/lib/cartesian/Cartesian_base.h
@@ -52,6 +52,8 @@ public:
    GridBase(const std::vector<int> & processor_grid,
 	     const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {};

+    virtual ~GridBase() = default;
+
    // Physics Grid information.
    std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
    std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
--- a/lib/cartesian/Cartesian_full.h
+++ b/lib/cartesian/Cartesian_full.h
@@ -81,6 +81,8 @@ public:
      Init(dimensions,simd_layout,processor_grid);
    }

+    virtual ~GridCartesian() = default;
+
    void Init(const std::vector<int> &dimensions,
 	      const std::vector<int> &simd_layout,
 	      const std::vector<int> &processor_grid)
--- a/lib/cartesian/Cartesian_red_black.h
+++ b/lib/cartesian/Cartesian_red_black.h
@@ -133,6 +133,8 @@ public:
    {
      Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim)  ;
    }
+
+    virtual ~GridRedBlackCartesian() = default;
 #if 0
    ////////////////////////////////////////////////////////////
    // Create redblack grid ;; deprecate these. Should not
--- a/lib/communicator/Communicator_base.h
+++ b/lib/communicator/Communicator_base.h
@@ -155,6 +155,7 @@ class CartesianCommunicator {
  ////////////////////////////////////////////////
  CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent);
  CartesianCommunicator(const std::vector<int> &pdimensions_in);
+  virtual ~CartesianCommunicator();

 private:
 #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) 
@@ -273,12 +274,16 @@ class CartesianCommunicator {
    //    std::cerr << " AllToAll in.size()  "<<in.size()<<std::endl;
    //    std::cerr << " AllToAll out.size() "<<out.size()<<std::endl;
    assert(in.size()==out.size());
-    size_t bytes=(in.size()*sizeof(T))/numnode;
-    assert((bytes*numnode) == in.size()*sizeof(T));
-    AllToAll(dim,(void *)&in[0],(void *)&out[0],bytes);
+    uint64_t bytes=sizeof(T);
+    uint64_t words=in.size()/numnode;
+
+    assert(numnode * words == in.size());
+    assert(words < (1ULL<<32));
+
+    AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
  }
-  void AllToAll(int dim  ,void *in,void *out,int bytes);
-  void AllToAll(void  *in,void *out,int bytes);
+  void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes);
+  void AllToAll(void  *in,void *out,uint64_t words         ,uint64_t bytes);
  
  template<class obj> void Broadcast(int root,obj &data)
    {
--- a/lib/communicator/Communicator_mpi.cc
+++ b/lib/communicator/Communicator_mpi.cc
@@ -52,6 +52,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
  ShmInitGeneric();
 }
+
+CartesianCommunicator::~CartesianCommunicator()
+{
+  int MPI_is_finalised;
+  MPI_Finalized(&MPI_is_finalised);
+  if (communicator && MPI_is_finalised)
+    MPI_Comm_free(&communicator);
+}
+
 void CartesianCommunicator::GlobalSum(uint32_t &u){
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
  assert(ierr==0);
@@ -188,7 +197,7 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 		     communicator);
  assert(ierr==0);
 }
-void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,int bytes)
+void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes)
 {
  std::vector<int> row(_ndimension,1);
  assert(dim>=0 && dim<_ndimension);
@@ -197,11 +206,25 @@ void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,int bytes)
  row[dim] = _processors[dim];

  CartesianCommunicator Comm(row,*this);
-  Comm.AllToAll(in,out,bytes);
+  Comm.AllToAll(in,out,words,bytes);
 }
-void CartesianCommunicator::AllToAll(void  *in,void *out,int bytes)
+void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes)
 {
-  MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator);
+  // MPI is a pain and uses "int" arguments
+  // 64*64*64*128*16 == 500Million elements of data.
+  // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
+  // (Turns up on 32^3 x 64 Gparity too)
+  MPI_Datatype object;
+  int iwords; 
+  int ibytes;
+  iwords = words;
+  ibytes = bytes;
+  assert(words == iwords); // safe to cast to int ?
+  assert(bytes == ibytes); // safe to cast to int ?
+  MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
+  MPI_Type_commit(&object);
+  MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
+  MPI_Type_free(&object);
 }
  ///////////////////////////////////////////////////////
  // Should only be used prior to Grid Init finished.
--- a/lib/communicator/Communicator_mpi3.cc
+++ b/lib/communicator/Communicator_mpi3.cc
@@ -712,7 +712,8 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 							 int from,
 							 int bytes,int dir)
 {
-  assert(dir < communicator_halo.size());
+  int ncomm  =communicator_halo.size(); 
+  int commdir=dir%ncomm;

  MPI_Request xrq;
  MPI_Request rrq;
@@ -732,14 +733,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
  gfrom = MPI_UNDEFINED;
 #endif
  if ( gfrom ==MPI_UNDEFINED) {
-    ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq);
+    ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
    assert(ierr==0);
    list.push_back(rrq);
    off_node_bytes+=bytes;
  }

  if ( gdest == MPI_UNDEFINED ) {
-    ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq);
+    ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
    assert(ierr==0);
    list.push_back(xrq);
    off_node_bytes+=bytes;
--- a/lib/communicator/Communicator_mpit.cc
+++ b/lib/communicator/Communicator_mpit.cc
@@ -53,6 +53,13 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
  ShmInitGeneric();
 }

+CartesianCommunicator::~CartesianCommunicator()
+{
+  if (communicator && !MPI::Is_finalized())
+    MPI_Comm_free(&communicator);
+}
+
+
 void CartesianCommunicator::GlobalSum(uint32_t &u){
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
  assert(ierr==0);
@@ -217,13 +224,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 {
  int myrank = _processor;
  int ierr;
-  assert(dir < communicator_halo.size());
+  int ncomm  =communicator_halo.size(); 
+  int commdir=dir%ncomm;
  
  //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
  // Give the CPU to MPI immediately; can use threads to overlap optionally
  MPI_Request req[2];
-  MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
-  MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]);
+  MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]);
+  MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[commdir],&req[0]);

  list.push_back(req[0]);
  list.push_back(req[1]);
@@ -242,13 +250,14 @@ double CartesianCommunicator::StencilSendToRecvFrom(void *xmit,
 {
  int myrank = _processor;
  int ierr;
-  assert(dir < communicator_halo.size());
-  
-  //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
+  //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo.size()<< <std::endl;
+
+  int ncomm  =communicator_halo.size(); 
+  int commdir=dir%ncomm;
  // Give the CPU to MPI immediately; can use threads to overlap optionally
  MPI_Request req[2];
-  MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
-  MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]);
+  MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]);
+  MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[commdir],&req[0]);
  MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
  return 2.0*bytes;
 }
--- a/lib/communicator/Communicator_none.cc
+++ b/lib/communicator/Communicator_none.cc
@@ -56,6 +56,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
  }
 }

+CartesianCommunicator::~CartesianCommunicator(){}
+
 void CartesianCommunicator::GlobalSum(float &){}
 void CartesianCommunicator::GlobalSumVector(float *,int N){}
 void CartesianCommunicator::GlobalSum(double &){}
@@ -98,9 +100,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
 {
  assert(0);
 }
-void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,int bytes)
+void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes)
 {
-  bcopy(in,out,bytes);
+  bcopy(in,out,bytes*words);
+}
+void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes)
+{
+  bcopy(in,out,bytes*words);
 }

 int  CartesianCommunicator::RankWorld(void){return 0;}
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -800,8 +800,8 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
    ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
  }

-  int lsites = full_grid->lSites();
-  Integer sz = lsites * nvector;
+  uint64_t lsites = full_grid->lSites();
+  uint64_t     sz = lsites * nvector;
  std::vector<Sobj> tmpdata(sz);
  std::vector<Sobj> alldata(sz);
  std::vector<Sobj> scalardata(lsites); 
@@ -918,8 +918,8 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
    ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
  }

-  int lsites = full_grid->lSites();
-  Integer sz = lsites * nvector;
+  uint64_t lsites = full_grid->lSites();
+  uint64_t     sz = lsites * nvector;
  std::vector<Sobj> tmpdata(sz);
  std::vector<Sobj> alldata(sz);
  std::vector<Sobj> scalardata(lsites); 
--- a/lib/qcd/hmc/integrators/Integrator_algorithm.h
+++ b/lib/qcd/hmc/integrators/Integrator_algorithm.h
@@ -231,7 +231,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
    Field Pfg(U._grid);
    Ufg = U;
    Pfg = zero;
-    std::cout << GridLogMessage << "FG update " << fg_dt << " " << ep
+    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
              << std::endl;
    // prepare_fg; no prediction/result cache for now
    // could relax CG stopping conditions for the
--- a/tests/solver/Test_staggered_cg_prec.cc
+++ b/tests/solver/Test_staggered_cg_prec.cc
@@ -48,7 +48,6 @@ struct scal {
 int main (int argc, char ** argv)
 {
  typedef typename ImprovedStaggeredFermionR::FermionField FermionField; 
-  typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; 
  typename ImprovedStaggeredFermionR::ImplParams params; 

  Grid_init(&argc,&argv);
--- a/tests/solver/Test_staggered_cg_schur.cc
+++ b/tests/solver/Test_staggered_cg_schur.cc
@@ -0,0 +1,76 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_wilson_cg_schur.cc
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::Algebra Gmu [] = {
+    Gamma::Algebra::GammaX,
+    Gamma::Algebra::GammaY,
+    Gamma::Algebra::GammaZ,
+    Gamma::Algebra::GammaT
+  };
+
+int main (int argc, char ** argv)
+{
+  typedef typename ImprovedStaggeredFermionR::FermionField FermionField; 
+  typename ImprovedStaggeredFermionR::ImplParams params; 
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian     RBGrid(&Grid);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
+
+  FermionField    src(&Grid); random(pRNG,src);
+  FermionField result(&Grid); result=zero;
+  FermionField  resid(&Grid); 
+
+  RealD mass=0.1;
+  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
+
+  ConjugateGradient<FermionField> CG(1.0e-8,10000);
+  SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
+
+  SchurSolver(Ds,src,result);
+  
+  Grid_finalize();
+}