Debug in progress

2025-07-23 16:07:08 +01:00 · 2017-11-19 01:39:04 +00:00
parent 69929f20bb
commit 94b8fb5686
4 changed files with 102 additions and 20 deletions
--- a/lib/communicator/Communicator_base.cc
+++ b/lib/communicator/Communicator_base.cc
@@ -134,8 +134,18 @@ void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t
 CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank) 
 {
  _ndimension = processors.size();
-  assert(_ndimension = parent._ndimension);
-  
+
+  int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
+  std::vector<int> parent_processor_coor(_ndimension,0);
+  std::vector<int> parent_processors    (_ndimension,1);
+
+  // Can make 5d grid from 4d etc...
+  int pad = _ndimension-parent_ndimension;
+  for(int d=0;d<parent_ndimension;d++){
+    parent_processor_coor[pad+d]=parent._processor_coor[d];
+    parent_processors    [pad+d]=parent._processors[d];
+  }
+
  //////////////////////////////////////////////////////////////////////////////////////////////////////
  // split the communicator
  //////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -154,9 +164,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
  std::vector<int> ssize(_ndimension); // coor of split within parent

  for(int d=0;d<_ndimension;d++){
-    ccoor[d] = parent._processor_coor[d] % processors[d];
-    scoor[d] = parent._processor_coor[d] / processors[d];
-    ssize[d] = parent._processors[d]     / processors[d];
+    ccoor[d] = parent_processor_coor[d] % processors[d];
+    scoor[d] = parent_processor_coor[d] / processors[d];
+    ssize[d] = parent_processors[d]     / processors[d];
  }
  int crank;  // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
  // Mpi uses the reverse Lexico convention to us
@@ -166,38 +176,34 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
  MPI_Comm comm_split;
  if ( Nchild > 1 ) { 

-    /*
    std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
    std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    ";
-    for(int d=0;d<parent._processors.size();d++)  std::cout << parent._processors[d] << " ";
+    for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processors[d] << " ";
    std::cout<<std::endl;

    std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    ";
    for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " ";
    std::cout<<std::endl;

-    std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"]    ";
-    for(int d=0;d<processors.size();d++)  std::cout << parent._processor_coor[d] << " ";
+    std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"]    ";
+    for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processor_coor[d] << " ";
+    std::cout<<std::endl;
+
+    std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"]    ";
+    for(int d=0;d<processors.size();d++)  std::cout << scoor[d] << " ";
    std::cout<<std::endl;

    std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"]    ";
    for(int d=0;d<processors.size();d++)  std::cout << ccoor[d] << " ";
    std::cout<<std::endl;

-    std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"]    ";
-    for(int d=0;d<processors.size();d++)  std::cout << parent._processor_coor[d] << " ";
-    std::cout<<std::endl;
-    */
-
    int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
    assert(ierr==0);
    //////////////////////////////////////////////////////////////////////////////////////////////////////
    // Declare victory
    //////////////////////////////////////////////////////////////////////////////////////////////////////
-    /*
    std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
 	      << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
-    */
  } else {
    comm_split=parent.communicator;
    srank = 0;
@@ -207,6 +213,15 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
  // Set up from the new split communicator
  //////////////////////////////////////////////////////////////////////////////////////////////////////
  InitFromMPICommunicator(processors,comm_split);
+
+  std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
+  for(int d=0;d<processors.size();d++){
+    std::cout << d<< " " << _processor_coor[d] <<" " <<  ccoor[d]<<std::endl;
+  }
+  for(int d=0;d<processors.size();d++){
+    assert(_processor_coor[d] == ccoor[d] );
+  }
+
 }

 //////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/lib/communicator/Communicator_mpi3.cc
+++ b/lib/communicator/Communicator_mpi3.cc
@@ -606,7 +606,7 @@ CartesianCommunicator::~CartesianCommunicator()
  MPI_Finalized(&MPI_is_finalised);
  if (communicator && MPI_is_finalised) {
    MPI_Comm_free(&communicator);
-    for(int i=0;i<  communicator_halo.size();i++){
+    for(int i=0;i<communicator_halo.size();i++){
      MPI_Comm_free(&communicator_halo[i]);
    }
  }  
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -757,7 +757,74 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
 // NB: Easiest to programme if keep in lex order.
 //
 /////////////////////////////////////////////////////////
+/*

+[0,0,0,0,0]	S {V<4>{V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)},V<3>{(0,0),(0,0),(0,0)}}}
+[0,0,0,0,1]	S {V<4>{V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)},V<3>{(1,0),(1,0),(1,0)}}}
+[0,0,0,0,2]	S {V<4>{V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)},V<3>{(4,0),(4,0),(4,0)}}}
+[0,0,0,0,3]	S {V<4>{V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)},V<3>{(5,0),(5,0),(5,0)}}}
+[0,0,0,0,4]	S {V<4>{V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)},V<3>{(2,0),(2,0),(2,0)}}}
+[0,0,0,0,5]	S {V<4>{V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)},V<3>{(3,0),(3,0),(3,0)}}}
+[0,0,0,0,6]	S {V<4>{V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)},V<3>{(6,0),(6,0),(6,0)}}}
+[0,0,0,0,7]	S {V<4>{V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)},V<3>{(7,0),(7,0),(7,0)}}}
+[0,0,0,0,8]	S {V<4>{V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)},V<3>{(8,0),(8,0),(8,0)}}}
+[0,0,0,0,9]	S {V<4>{V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)},V<3>{(9,0),(9,0),(9,0)}}}
+[0,0,0,0,10]	S {V<4>{V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)},V<3>{(12,0),(12,0),(12,0)}}}
+[0,0,0,0,11]	S {V<4>{V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)},V<3>{(13,0),(13,0),(13,0)}}}
+[0,0,0,0,12]	S {V<4>{V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)},V<3>{(10,0),(10,0),(10,0)}}}
+[0,0,0,0,13]	S {V<4>{V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)},V<3>{(11,0),(11,0),(11,0)}}}
+[0,0,0,0,14]	S {V<4>{V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)},V<3>{(14,0),(14,0),(14,0)}}}
+[0,0,0,0,15]	S {V<4>{V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)},V<3>{(15,0),(15,0),(15,0)}}}
+
+
+Process decomp
+[A(0 1) A(2 3) B(0 1) B(2 3)] [ A(4 5) A(6 7) B(4 5) B(6 7)] [ A(8 9) A(10 11) B(8 9) B(10 11)] [A(12 13) A(14 15) B(12 13) B(14 15)]
+
+A2A(Full)
+ -- divides M*fL into fP segments of size M*fL/fP = fL/sP
+ -- total is fP * fL/sP = M * fL
+ A(0 1) A(4 5)  A(8 9)   A(12 13)   
+ A(2 3) A(6 7)  A(10 11) A(14 15) 
+ B(0 1) B(4 5)  B(8 9)   B(12 13) 
+ B(2 3) B(6 7)  B(10 11) B(14 15)
+
+
+A2A(Split)    
+ A(0 1)   A(4 5)   A(2 3)   A(6 7) 
+ A(8 9)   A(12 13) A(10 11) A(14 15) 
+ B(0 1)   B(2 3)   B(4 5)   B(6 7) 
+ B(8 9)   B(10 11) B(12 13) B(14 15) 
+
+--------------------
+--  General case 
+--------------------
+G global lattice
+fP - procs 
+sP - Procs in split grid
+M  - subdivisions/vectors   - M*sP = fP      ** constraint 1
+fL = G/fP  per node (full)
+sL = G/sP  per node split
+
+[ G * M ] total = G*fP/sP.
+[ Subdivide fL*M by fP => fL *M / fP = fL/fP *fP/sP = fL/sP ]
+--------------------
+--  1st A2A chunk is fL*M/fP = G/fP *fP/sP /fP = fL/sP
+--  Let cL = fL/sP chunk.         ( Divide into fP/sP = M chunks )
+
+--    node 0     1st cL of node 0,1,... fP-1  ; vector 0
+--    node 1     2nd cL of node 0,1,... fP-1   
+--    node 2     3nd cL of node 0,1,... fP-1
+--    node 3     4th cL of node 0,1,... fP-1
+... when node > sP get vector 1 etc... 
+
+-- 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
+--    node 0     1st cL of node 0M..(1M-1); 2nd cL of node 0M..(1M-1))..
+--    node 1     1st cL of node 1M..(2M-1); 2nd cL of node 1M..(2M-1)..
+--    node 2     1st cL of node 2M..(3M-1); 2nd cL of node 2M..(3M-1)..
+--    node 3     1st cL of node 3M..(3M-1); 2nd cL of node 2M..(3M-1)..
+--
+-- Insert correctly
+ */
 template<class Vobj>
 void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
 {
--- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc
+++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc
@@ -95,7 +95,7 @@ int main (int argc, char ** argv)
  FermionField tmp(FGrid);

  for(int s=0;s<nrhs;s++) result[s]=zero;
-#undef LEXICO_TEST
+#define LEXICO_TEST
 #ifdef LEXICO_TEST
  {
    LatticeFermion lex(FGrid);  lex = zero;
@@ -121,7 +121,7 @@ int main (int argc, char ** argv)
    random(pRNG5,src[s]);
    tmp = 100.0*s;
    src[s] = (src[s] * 0.1) + tmp;
-    std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl;
+    std::cout << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
  }
 #endif

@@ -189,7 +189,7 @@ int main (int argc, char ** argv)

  MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
  MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
-  ConjugateGradient<FermionField> CG((1.0e-5),10000);
+  ConjugateGradient<FermionField> CG((1.0e-2),10000);
  s_res = zero;
  CG(HermOp,s_src,s_res);