Reorganised the TODO. Really getting somewhere

2025-11-07 07:09:32 +00:00 · 2015-04-23 20:42:30 +01:00
parent 3083d2e908
commit b9939e3974
4 changed files with 239 additions and 227 deletions
--- a/298
+++ b/298
@@ -1,3 +1,77 @@
 * - BinaryWriter, TextWriter etc...
  - protocol buffers? replace xml
 * Stencil operator support                           -----Initial thoughts, trial implementation DONE.
                                                     -----some simple tests that Stencil matches Cshift.
                                                     -----do all permute in comms phase, so that copy permute
 						     -----cases move into a buffer.
 						     -----allow transform in/out buffers spproj
 * CovariantShift support                             -----Use a class to store gauge field? (parallel transport?)
 * Consider switch std::vector to boost arrays or something lighter weight
  boost::multi_array<type, 3> A()...    to replace multi1d, multi2d etc..
 * How to define simple matrix operations, such as flavour matrices?
 * Make the Tensor types and Complex etc... play more nicely.
 * Dirac, Pauli, SU subgroup, etc.. * Gamma/Dirac structures
 * Fourspin, two spin project
 * su3 exponentiation & log etc.. [Jamie's code?]
  TaProj
 * Parallel MPI2 IO
 * rb4d support.
 * Check for missing functionality                    - partially audited against QDP++ layout
 * Optimise the extract/merge SIMD routines; Azusa??
 - I have collated into single location at least.
 - Need to use _mm_*insert/extract routines.
 * Conformable test in Cshift routines.
 * QDP++ regression suite and comparative benchmark
 AUDITS:
 * FIXME audit
 * const audit
 * Replace vset with a call to merge.; 
 * care in Gmerge,Gextract over vset .
 * extract / merge extra implementation removal      
 * Test infrastructure
   // TODO
   //
   // Base class to share common code between vRealF, VComplexF etc...
  //
   // Unary functions
   // cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
   // exp, log, sqrt, fabs
   //
  // transposeColor, transposeSpin,
   // adjColor, adjSpin,
   //
  // copyMask.
   //
   // localMaxAbs
   //
   // Fourier transform equivalent.
 * LinearOperator
  LinearSolver
  Polynomial etc...
 ======================================================================================================
 FUNCTIONALITY:
 * Conditional execution, where etc...                -----DONE, simple test
 * Integer relational support                         -----DONE
@@ -22,222 +96,24 @@ FUNCTIONALITY:
  - lib/qcd/actions
  - lib/qcd/measurements
 * Subset support, slice sums etc...                  -----DONE
  sliceSum(orthog)
  sum
  innerProduct
  norm2
-Not done, or just incomplete
+* Subgrid Transferral                                -----DONE
-* random number generation
+  subBlock (coarseLattice,fineLattice)
  projectBlockBasis  
  promoteBlockBasis
-* Consider switch std::vector to boost arrays or something lighter weight
+* random number generation                           ----- DONE
  boost::multi_array<type, 3> A()...    to replace multi1d, multi2d etc..
-* How to define simple matrix operations, such as flavour matrices?
+* Broadcast, reduction tests. innerProduct, localInnerProduct --- DONE
 * Dirac, Pauli, SU subgroup, etc.. * Gamma/Dirac structures
 * Fourspin, two spin project
 * su3 exponentiation, log etc.. [Jamie's code?]
 * Stencil operator support                           -----Initial thoughts, trial implementation DONE.
                                                     -----some simple tests that Stencil matches Cshift.
                                                     -----do all permute in comms phase, so that copy permute
 						     -----cases move into a buffer.
 						     -----allow transform in/out buffers spproj
 * CovariantShift support                             -----Use a class to store gauge field? (parallel transport?)
 * Subset support, slice sums etc...                  -----Only need slice sum?
                                                     -----Generic cartesian subslicing?
                                                     -----Array ranges / boost extents?
                                                     -----Multigrid grid transferral?
                                                     -----Suggests generalised cartesian subblocking
                                                          sums, returning modified grid?
 					             -----What should interface be?
 * Grid transferral
  * pickCheckerboard, pickSubPlane, pickSubBlock,
  *                    sumSubPlane, sumSubBlocks
 * rb4d support.
 * Check for missing functionality                    - partially audited against QDP++ layout
 * Optimise the extract/merge SIMD routines; Azusa??
 - I have collated into single location at least.
 - Need to use _mm_*insert/extract routines.
 * Conformable test in Cshift routines.
 * Broadcast, reduction tests. innerProduct, localInnerProduct
 * QDP++ regression suite and comparative benchmark
 * I/O support
-
+* NERSC Lattice loading, plaquette test             ------- DONE single node 
 * NERSC Lattice loading, plaquette test
  - MPI IO?
  - BinaryWriter, TextWriter etc...
  - protocol buffers?
 AUDITS:
 // Lattice support audit                 Tested in Grid_main.cc
 //
 //     -=,+=,*=                           Y
 //     add,+,sub,-,mult,mac,*             Y
 //     innerProduct,norm2                 Y
 //     localInnerProduct,outerProduct,    Y
 //     adj,conj                           Y
 //     transpose,                         Y
 //     trace                              Y
 //
 //     transposeIndex                     Y
 //     traceIndex                         Y
 //     peekIndex                          Y
 //
 //     real,imag                          missing, semantic thought needed on real/im support.
 //                                        perhaps I just keep everything complex?
 // 
 * FIXME audit
 * const audit
 * Replace vset with a call to merge.; 
 * care in Gmerge,Gextract over vset .
 * extract / merge extra implementation removal      
 * Test infrastructure
 [ More on subsets and grid transfers ]
 i)  Three classes of subset;   red black parity subsetting (pick checkerboard).
                             cartesian sub-block subsetting
                             rbNd 
 ii) Need to be able to project one Grid to another Grid.
 Lattice<vobj> coarse_data SubBlockSum (GridBase *CoarseGrid, Lattice<vobj> &fine_data)
 Operation ensure either:
 rd[dim] divide rd[dim] fine_data
 This will give a distributed array over mpi ranks in a given dim IF coarse gd != 1 and _processors[d]>1
 Dimension can be *replicated* on all ranks in dimension. Need a "replicated" option on GridCartesian etc..
 This will give "slice" summation and fourier projection assistance.
    Generic concept is to subdivide (based on RD so applies to red/black or full).
    Return a type on SUB-grid from CellSum TOP-grid
    SUB-grid need not distribute but be replicated in some dims if that is how the
    cartesian communicator works.
 Instead of subsetting 
 iii) No general permutation map.
- ? Cell definition <-> sliceSum.
+* Controling std::cout                              ------- DONE
 ? Replicated arrays.
 // Cartesian grid inheritance
 //            Grid::GridBase
 //                     |
 //           __________|___________
 //          |                      |
 // Grid::GridCartesian   Grid::GridCartesianRedBlack
 //
 // TODO: document the following as an API guaranteed public interface
    /* 
     *       Rough map of functionality against QDP++ Layout
     *
     *       Param     |     Grid                     |     QDP++             
     *       -----------------------------------------
     *                 |                              |
     *        void     |     oSites, iSites, lSites   |  sitesOnNode 
     *        void     |     gSites                   |  vol
     *                 |                              |
     *        gcoor    |     oIndex, iIndex           |  linearSiteIndex // no virtual node in QDP
     *        lcoor    |                              |
     * 
     *        void     |     CheckerBoarded           |  -        // No checkerboarded in QDP
     *        void     |     FullDimensions           |  lattSize
     *        void     |     GlobalDimensions         |  lattSize // No checkerboarded in QDP
     *        void     |     LocalDimensions          |  subgridLattSize
     *        void     |     VirtualLocalDimensions   |  subgridLattSize // no virtual node in QDP
     *                 |                              |
     *       int x 3   |     oiSiteRankToGlobal       |  siteCoords
     *                 |     ProcessorCoorLocalCoorToGlobalCoor | 
     *                 |                              |
     *     vector<int> |     GlobalCoorToRankIndex   |  nodeNumber(coord)
     *     vector<int> |     GlobalCoorToProcessorCoorLocalCoor|  nodeCoord(coord)
     *                 |                              |
     *     void        |     Processors               |  logicalSize    // returns cart array shape
     *     void        |     ThisRank        |  nodeNumber();  // returns this node rank
     *     void        |     ThisProcessorCoor        |    // returns this node coor
     *     void        |     isBoss(void)             |  primaryNode();
     *                 |                              |
     *                 |     RankFromProcessorCoor    |  getLogicalCoorFrom(node)
     *                 |     ProcessorCoorFromRank    |  getNodeNumberFrom(logical_coord)
     */
  // Work out whether to permute 
  // ABCDEFGH ->   AE BF CG DH       permute              wrap num
  //
  // Shift 0       AE BF CG DH       0 0 0 0    ABCDEFGH   0   0
  // Shift 1       BF CG DH AE       0 0 0 1    BCDEFGHA   0   1
  // Shift 2       CG DH AE BF       0 0 1 1    CDEFGHAB   0   2
  // Shift 3       DH AE BF CG       0 1 1 1    DEFGHABC   0   3
  // Shift 4       AE BF CG DH       1 1 1 1    EFGHABCD   1   0 
  // Shift 5       BF CG DH AE       1 1 1 0    FGHACBDE   1   1 
  // Shift 6       CG DH AE BF       1 1 0 0    GHABCDEF   1   2
  // Shift 7       DH AE BF CG       1 0 0 0    HABCDEFG   1   3
  // Suppose 4way simd in one dim.
  // ABCDEFGH ->   AECG BFDH      permute              wrap num
  // Shift 0       AECG BFDH      0,00 0,00 ABCDEFGH         0     0
  // Shift 1       BFDH CGEA      0,00 1,01 BCDEFGHA         0     1
  // Shift 2       CGEA DHFB      1,01 1,01 CDEFGHAB         1     0
  // Shift 3       DHFB EAGC      1,01 1,11 DEFGHABC         1     1
  // Shift 4       EAGC FBHD      1,11 1,11 EFGHABCD         2     0 
  // Shift 5       FBHD GCAE      1,11 1,10 FGHABCDE         2     1
  // Shift 6       GCAE HDBF      1,10 1,10 GHABCDEF         3     0
  // Shift 7       HDBF AECG      1,10 0,00 HABCDEFG         3     1
  // Generalisation to 8 way simd, 16 way simd required.
  //
  // Need log2 Nway masks. consisting of 
  //	    1 bit  256 bit granule
  //	    2 bit  128 bit granule
  //        4 bits 64  bit granule
  //        8 bits 32  bit granules
  //
  //        15 bits....
    // TODO
    //
    // Base class to share common code between vRealF, VComplexF etc...
    //
    // lattice Broad cast assignment
    //
    // where() support
    // implement with masks, and/or? Type of the mask & boolean support?
    //
    // Unary functions
    // cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
    // exp, log, sqrt, fabs
    //
    // transposeColor, transposeSpin,
    // adjColor, adjSpin,
    // traceColor, traceSpin.
    // peekColor, peekSpin + pokeColor PokeSpin
    //
    // copyMask.
    //
    // localMaxAbs
    //
    // norm2,
    // sumMulti equivalent.
    // Fourier transform equivalent.
    //
--- a/lib/Grid_init.cc
+++ b/lib/Grid_init.cc
@@ -10,18 +10,57 @@
 #include <sys/stat.h> 
 #include <sys/time.h>
 #include <signal.h>
-
+#include <iostream>
 #include <Grid.h>
 #undef __X86_64
 #define MAC
 #ifdef MAC
 #include <execinfo.h>
 #endif
 namespace Grid {
  std::streambuf *Grid_saved_stream_buf;
 #if 0
  void Grid_quiesce_nodes(void)
  {
 #ifdef GRID_COMMS_MPI
    int me;
    MPI_Comm_rank(MPI_COMM_WORLD,&me);
    std::streambuf* Grid_saved_stream_buf = std::cout.rdbuf();
    if ( me ) { 
      std::ofstream file("log.node");
      std::cout.rdbuf(file.rdbuf());
    }
 #endif
  }
 #endif
  void Grid_quiesce_nodes(void)
  {
 #ifdef GRID_COMMS_MPI
    int me;
    MPI_Comm_rank(MPI_COMM_WORLD,&me);
    if ( me ) { 
      std::cout.setstate(std::ios::badbit);
    }
 #endif
  }
  void Grid_unquiesce_nodes(void)
  {
 #ifdef GRID_COMMS_MPI
    std::cout.clear();
 #endif
  }
 void Grid_init(int *argc,char ***argv)
 {
 #ifdef GRID_COMMS_MPI
  MPI_Init(argc,argv);
 #endif
  Grid_debug_handler_init();
  Grid_quiesce_nodes();
 }
 void Grid_finalize(void)
 {
@@ -35,6 +74,10 @@ double usecond(void) {
  return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
 }
 #define _NBACKTRACE (256)
 void * Grid_backtrace_buffer[_NBACKTRACE];
 void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 {
  printf("Caught signal %d\n",si->si_signo);
@@ -43,10 +86,8 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 #ifdef __X86_64
    ucontext_t * uc= (ucontext_t *)ptr;
  struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
  printf("  instruction %llx\n",(uint64_t)sc->rip);
 #define REG(A)  printf("  %s %lx\n",#A, sc-> A);
  REG(rdi);
  REG(rsi);
@@ -68,14 +109,14 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
  REG(r14);
  REG(r15);
 #endif
-
+#ifdef MAC
-  fflush(stdout);
+  int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);
-
+  char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
-  if ( si->si_signo == SIGSEGV ) {
+  for (int i = 0; i < symbols; i++){
-    printf("Grid_sa_signal_handler: Oops... this was a sigsegv you naughty naughty programmer. Goodbye\n");
+    printf ("%s\n", strings[i]);
    fflush(stdout);
    exit(-1);
  }
 #endif
  exit(0);
  return;
 };
--- a/lib/Grid_summation.h
+++ b/lib/Grid_summation.h
@@ -16,6 +16,96 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
  }
 }
 template<class vobj,int nbasis>
 inline void projectBlockBasis(Lattice<iVector<vComplex,nbasis > > &coarseData,
 			      const             Lattice<vobj>   &fineData,
 			      const std::vector<Lattice<vobj> > &Basis)
 {
  GridBase * fine  = fineData._grid;
  GridBase * coarse= coarseData._grid;
  int  _ndimension = coarse->_ndimension;
  // checks
  assert( nbasis == Basis.size() );
  subdivides(coarse,fine); 
  for(int i=0;i<nbasis;i++){
    conformable(Basis,fineData);
  }
  std::vector<int>  block_r      (_ndimension);
  for(int d=0 ; d<_ndimension;d++){
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
  }
  coarseData=zero;
  // Loop with a cache friendly loop ordering
  for(int sf=0;sf<fine->oSites();sf++){
    int sc;
    std::vector<int> coor_c(_ndimension);
    std::vector<int> coor_f(_ndimension);
    GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
    GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
    for(int i=0;i<nbasis;i++) {
      coarseData._odata[sc][i]=coarseData._odata[sc][i]
 	+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
    }
  }
  return;
 }
 template<class vobj,int nbasis>
 inline void promoteBlockBasis(const Lattice<iVector<vComplex,nbasis > > &coarseData,
 			                        Lattice<vobj>   &fineData,
 			      const std::vector<Lattice<vobj> > &Basis)
 {
  GridBase * fine  = fineData._grid;
  GridBase * coarse= coarseData._grid;
  int  _ndimension = coarse->_ndimension;
  // checks
  assert( nbasis == Basis.size() );
  subdivides(coarse,fine); 
  for(int i=0;i<nbasis;i++){
    conformable(Basis,fineData);
  }
  std::vector<int>  block_r      (_ndimension);
  for(int d=0 ; d<_ndimension;d++){
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
  }
  // Loop with a cache friendly loop ordering
  for(int sf=0;sf<fine->oSites();sf++){
    int sc;
    std::vector<int> coor_c(_ndimension);
    std::vector<int> coor_f(_ndimension);
    GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
    GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
    for(int i=0;i<nbasis;i++) {
      if(i==0) fineData._odata[sf]=                    coarseData._odata[sc][i]*Basis[i]._odata[sf];
      else     fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc][i]*Basis[i]._odata[sf];
    }
  }
  return;
 }
 // useful in multigrid project;
 // Generic name : Coarsen?
 template<class vobj>
@@ -30,10 +120,6 @@ inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
  std::vector<int>  block_r      (_ndimension);
  ///////////////////////////////////////////////////////////
  // Detect whether the result is replicated in dimension d
  ///////////////////////////////////////////////////////////
  for(int d=0 ; d<_ndimension;d++){
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
  }
--- a/tests/Grid_nersc_io.cc
+++ b/tests/Grid_nersc_io.cc
@@ -11,12 +11,15 @@ int main (int argc, char ** argv)
  Grid_init(&argc,&argv);
  std::vector<int> simd_layout({1,1,2,2});
-  std::vector<int> mpi_layout ({1,1,1,1});
+  std::vector<int> mpi_layout ({2,2,2,2});
  std::vector<int> latt_size  ({16,16,16,32});
  std::vector<int> clatt_size  ({4,4,4,8});
  int orthodir=3;
  int orthosz =latt_size[orthodir];
  GridCartesian     Fine(latt_size,simd_layout,mpi_layout);
  GridCartesian     Coarse(clatt_size,simd_layout,mpi_layout);
  GridRNG           FineRNG(&Fine);
  LatticeGaugeField Umu(&Fine);
@@ -40,6 +43,7 @@ int main (int argc, char ** argv)
  // (1+2+3)=6 = N(N-1)/2 terms
  LatticeComplex Plaq(&Fine);
  LatticeComplex cPlaq(&Coarse);
  Plaq = zero;
  for(int mu=1;mu<Nd;mu++){
    for(int nu=0;nu<mu;nu++){
@@ -47,6 +51,7 @@ int main (int argc, char ** argv)
    }
  }
  double vol = Fine.gSites();
  Complex PlaqScale(1.0/vol/6.0/3.0);
@@ -66,7 +71,6 @@ int main (int argc, char ** argv)
    std::cout << "total " <<Pt*PlaqScale<<std::endl;
  }  
  TComplex Tp = sum(Plaq);
  Complex p  = TensorRemove(Tp);
  std::cout << "calculated plaquettes " <<p*PlaqScale<<std::endl;
@@ -77,5 +81,10 @@ int main (int argc, char ** argv)
  Complex l  = TensorRemove(Tl);
  std::cout << "calculated link trace " <<l*LinkTraceScale<<std::endl;
  sumBlocks(cPlaq,Plaq);
  TComplex TcP = sum(cPlaq);
  Complex ll= TensorRemove(TcP);
  std::cout << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
  Grid_finalize();
 }