multRHS initial support -- needs optimisation for multi project/promote.

Bug fix in freeing intermediate grids to stop double free
2026-05-31 06:24:18 +01:00 · 2023-11-23 18:18:35 -05:00
parent 639cc6f73a
commit 031f85247c
2 changed files with 34 additions and 3 deletions
@@ -265,8 +265,8 @@ inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
 ////////////////////////////////////////////////////////////////////////////////////////////
 template<class vobj,class CComplex,int nbasis,class VLattice>
 inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
-			   const             Lattice<vobj>   &fineData,
-			   const VLattice &Basis)
+			 const             Lattice<vobj>   &fineData,
+			 const VLattice &Basis)
 {
  GridBase * fine  = fineData.Grid();
  GridBase * coarse= coarseData.Grid();
@@ -300,6 +300,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
  //  std::cout << GridLogPerformance << " blockProject : conv              :  "<<t_co<<" us"<<std::endl;
  //  std::cout << GridLogPerformance << " blockProject : blockZaxpy        :  "<<t_za<<" us"<<std::endl;
 }
+
 // This only minimises data motion from CPU to GPU
 // there is chance of better implementation that does a vxk loop of inner products to data share
 // at the GPU thread level
@@ -1802,5 +1803,32 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj>   & split)
  }
 }

+//////////////////////////////////////////////////////
+// MultiRHS interface support for coarse space
+// -- Simplest possible implementation to begin with
+//////////////////////////////////////////////////////
+template<class vobj,class CComplex,int nbasis,class VLattice>
+inline void blockProjectMany(Lattice<iVector<CComplex,nbasis > > &coarseIP,
+			     Lattice<iVector<CComplex,nbasis > > &coarseTMP,
+			     const VLattice &fineData, // Basis and fineData necessarily same type
+			     const VLattice &Basis)
+{
+  for(int r=0;r<fineData.size();r++){
+    blockProject(coarseTMP,fineData[r],Basis);
+    InsertSliceLocal(coarseTMP, coarseIP,r,r,0);
+  }
+}
+template<class vobj,class CComplex,int nbasis,class VLattice>
+inline void blockPromoteMany(Lattice<iVector<CComplex,nbasis > > &coarseIP,
+			     Lattice<iVector<CComplex,nbasis > > &coarseTMP,
+			     const VLattice &fineData, // Basis and fineData necessarily same type
+			     const VLattice &Basis)
+{
+  for(int r=0;r<fineData.size();r++){
+    ExtractSliceLocal(coarseTMP, coarseIP,r,r,0);
+    blockPromote(coarseTMP,fineData[r],Basis);
+  }
+}
+
 NAMESPACE_END(Grid);

@@ -234,8 +234,11 @@ public:
  }
  void DeleteGrids(void)
  {
+    Coordinate processors=unpadded_grid->_processors;
    for(int d=0;d<grids.size();d++){
-      delete grids[d];
+      if ( processors[d] > 1 ) { 
+	delete grids[d];
+      }
    }
    grids.resize(0);
  };