Merge branch 'develop' of https://github.com/paboyle/Grid into develop

New Frontier config
FInal for paper
2025-07-09 01:47:06 +01:00 · 2024-07-23 09:53:58 -04:00 · 2024-07-23 09:53:08 -04:00 · 2024-07-22 15:26:45 -04:00 · 2024-07-22 15:25:17 -04:00 · 2024-07-22 15:24:56 -04:00
6 changed files with 174 additions and 38 deletions
--- a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h
+++ b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h
@ -279,11 +279,11 @@ public:
      Qt = Eigen::MatrixXcd::Identity(Nm,Nm);
      diagonalize(eval2,lmd2,lme2,Nu,Nm,Nm,Qt,grid);
      _sort.push(eval2,Nm);
-      //      Glog << "#Ritz value before shift: "<< std::endl;
+      Glog << "#Ritz value before shift: "<< std::endl;
      for(int i=0; i<Nm; ++i){
-	//        std::cout.precision(13);
-	//        std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
-	//        std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
+	std::cout.precision(13);
+	std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
+	std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
      }
      
      //----------------------------------------------------------------------
@ -298,6 +298,7 @@ public:
        unpackHermitBlockTriDiagMatToEigen(lmd,lme,Nu,Nblock_m,Nm,Nm,BTDM);

        for(int ip=Nk; ip<Nm; ++ip){
+	  Glog << " ip "<<ip<<" / "<<Nm<<std::endl;
          shiftedQRDecompEigen(BTDM,Nu,Nm,eval2[ip],Q);
        }
        
@ -325,7 +326,7 @@ public:
        Qt = Eigen::MatrixXcd::Identity(Nm,Nm);
        diagonalize(eval2,lmd2,lme2,Nu,Nk,Nm,Qt,grid);
        _sort.push(eval2,Nk);
-	//        Glog << "#Ritz value after shift: "<< std::endl;
+	Glog << "#Ritz value after shift: "<< std::endl;
        for(int i=0; i<Nk; ++i){
 	  //          std::cout.precision(13);
 	  //          std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
@ -467,10 +468,10 @@ public:
  
    // set initial vector
    for (int i=0; i<Nu; ++i) {
-      //      Glog << "norm2(src[" << i << "])= "<< norm2(src[i]) << std::endl;
+      Glog << "norm2(src[" << i << "])= "<< norm2(src[i]) << std::endl;
      evec[i] = src[i];
      orthogonalize(evec[i],evec,i);
-      //      Glog << "norm2(evec[" << i << "])= "<< norm2(evec[i]) << std::endl;
+      Glog << "norm2(evec[" << i << "])= "<< norm2(evec[i]) << std::endl;
    }
 //    exit(-43);
    
@ -506,11 +507,11 @@ public:
      Qt = Eigen::MatrixXcd::Identity(Nr,Nr);
      diagonalize(eval2,lmd2,lme2,Nu,Nr,Nr,Qt,grid);
      _sort.push(eval2,Nr);
-      //      Glog << "#Ritz value: "<< std::endl;
+      Glog << "#Ritz value: "<< std::endl;
      for(int i=0; i<Nr; ++i){
-	//        std::cout.precision(13);
-	//        std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
-	//        std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
+	std::cout.precision(13);
+	std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
+	std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
      }
      
      // Convergence test
@ -570,6 +571,7 @@ public:
      Glog << fname + " NOT converged ; Summary :\n";
    } else {
      Glog << fname + " CONVERGED ; Summary :\n";
+      Nstop = Nconv_guess; // Just take them all
      // Sort convered eigenpairs.
      std::vector<Field>  Btmp(Nstop,grid); // waste of space replicating

@ -642,7 +644,7 @@ private:
      //      for (int u=0; u<mrhs; ++u) Glog << " out["<<u<<"] = "<<norm2(out[u])<<std::endl;
      k_start +=mrhs;
    }
-    //    Glog << "LinAlg "<< std::endl;
+    Glog << "LinAlg "<< std::endl;
    
    if (b>0) {
      for (int u=0; u<Nu; ++u) {
@ -676,7 +678,7 @@ private:
      }
      w_copy[u] = w[u];
    }
-    //    Glog << "LinAlg done"<< std::endl;
+    Glog << "LinAlg done"<< std::endl;
    
    // In block version, the steps 6 and 7 in Lanczos construction is
    // replaced by the QR decomposition of new basis block.
@ -689,15 +691,15 @@ private:
    }

    // re-orthogonalization for numerical stability
-    //    Glog << "Gram Schmidt"<< std::endl;
+    Glog << "Gram Schmidt"<< std::endl;
    orthogonalize(w,Nu,evec,R);
    // QR part
    for (int u=1; u<Nu; ++u) {
      orthogonalize(w[u],w,u);
    }
-    //    Glog << "Gram Schmidt done "<< std::endl;
+    Glog << "Gram Schmidt done "<< std::endl;
    
-    //    Glog << "LinAlg "<< std::endl;
+    Glog << "LinAlg "<< std::endl;
    for (int u=0; u<Nu; ++u) {
      //for (int v=0; v<Nu; ++v) {
      for (int v=u; v<Nu; ++v) {
@ -714,7 +716,7 @@ private:
 	//        Glog <<" In block "<< b << "," <<" beta[" << u << "," << k-L << "] = " << lme[u][k] << std::endl;
      }
    }
-    //    Glog << "LinAlg done "<< std::endl;
+    Glog << "LinAlg done "<< std::endl;

    if (b < Nm/Nu-1) {
      for (int u=0; u<Nu; ++u) {
@ -933,7 +935,7 @@ if (1){
         int Nu, int Nb, int Nk, int Nm,
         Eigen::MatrixXcd& M)
  {
-    //Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n'; 
+    Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n'; 
    assert( Nk%Nu == 0 && Nm%Nu == 0 );
    assert( Nk <= Nm );
    M = Eigen::MatrixXcd::Zero(Nk,Nk);
@ -951,7 +953,7 @@ if (1){
        M(u+(k/Nu)*Nu,k-Nu) = lme[u][k-Nu];
      }
    }
-    //Glog << "unpackHermitBlockTriDiagMatToEigen() end" << endl; 
+    Glog << "unpackHermitBlockTriDiagMatToEigen() end" << std::endl; 
  }
 

@ -961,7 +963,7 @@ if (1){
         int Nu, int Nb, int Nk, int Nm,
         Eigen::MatrixXcd& M)
  {
-    //Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n'; 
+    Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n'; 
    assert( Nk%Nu == 0 && Nm%Nu == 0 );
    assert( Nk <= Nm );
    
@ -977,7 +979,7 @@ if (1){
        lme[u][k-Nu] = M(u+(k/Nu)*Nu,k-Nu);
      }
    }
-    //Glog << "packHermitBlockTriDiagMatfromEigen() end" << endl; 
+    Glog << "packHermitBlockTriDiagMatfromEigen() end" <<std::endl; 
  }


@ -986,7 +988,7 @@ if (1){
 		            RealD Dsh,
 		            Eigen::MatrixXcd& Qprod)
  {
-    //Glog << "shiftedQRDecompEigen() begin" << '\n'; 
+    Glog << "shiftedQRDecompEigen() begin" << '\n'; 
    Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm);
    Eigen::MatrixXcd R = Eigen::MatrixXcd::Zero(Nm,Nm);
    Eigen::MatrixXcd Mtmp = Eigen::MatrixXcd::Zero(Nm,Nm);
@ -1002,6 +1004,7 @@ if (1){
                        // lower triangular part used to represent series
                        // of Q sequence.

+    Glog << "shiftedQRDecompEigen() Housholder & QR" << '\n'; 
    // equivalent operation of Qprod *= Q
    //M = Eigen::MatrixXcd::Zero(Nm,Nm);
    
@ -1022,6 +1025,7 @@ if (1){
    
    Mtmp = Eigen::MatrixXcd::Zero(Nm,Nm);

+    Glog << "shiftedQRDecompEigen() Mtmp create" << '\n'; 
    for (int i=0; i<Nm; ++i) {
      for (int j=0; j<Nm-(Nu+1); ++j) {
        for (int k=0; k<Nu+1+j; ++k) {
@ -1029,6 +1033,7 @@ if (1){
        }
      }
    }
+    Glog << "shiftedQRDecompEigen() Mtmp loop1" << '\n'; 
    for (int i=0; i<Nm; ++i) {
      for (int j=Nm-(Nu+1); j<Nm; ++j) {
        for (int k=0; k<Nm; ++k) {
@ -1036,6 +1041,7 @@ if (1){
        }
      }
    }
+    Glog << "shiftedQRDecompEigen() Mtmp loop2" << '\n'; 
    
    //static int ntimes = 2;
    //for (int j=0; j<Nm-(ntimes*Nu); ++j) {
@ -1061,11 +1067,13 @@ if (1){
        Mtmp(j,i) = conj(Mtmp(i,j));
      }
    }
+    Glog << "shiftedQRDecompEigen() Mtmp loop3" << '\n'; 

    for (int i=0; i<Nm; ++i) {
      Mtmp(i,i) = real(Mtmp(i,i)) + Dsh;
    }
    
+    Glog << "shiftedQRDecompEigen() Mtmp loop4" << '\n'; 
    M = Mtmp;

    //M = Q.adjoint()*(M*Q);
@ -1077,7 +1085,7 @@ if (1){
    //  }
    //}
    
-    //Glog << "shiftedQRDecompEigen() end" << endl; 
+    Glog << "shiftedQRDecompEigen() end" <<std::endl; 
  }

  void exampleQRDecompEigen(void)
--- a/systems/Frontier/sourceme.sh
+++ b/systems/Frontier/sourceme.sh
@ -3,7 +3,7 @@ spack load c-lime
 module load emacs 
 module load PrgEnv-gnu
 module load rocm
-module load cray-mpich/8.1.23
+module load cray-mpich
 module load gmp
 module load cray-fftw
 module load craype-accel-amd-gfx90a
--- a/tests/debug/Test_8888.cc
+++ b/tests/debug/Test_8888.cc
@ -0,0 +1,118 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_general_coarse_hdcg.cc
+
+    Copyright (C) 2023
+
+Author: Peter Boyle <pboyle@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h>
+#include <Grid/algorithms/iterative/AdefMrhs.h>
+
+using namespace std;
+using namespace Grid;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  const int Ls=8;
+  const int nbasis = 40;
+  const int cb = 0 ;
+  RealD mass=0.01;
+  RealD M5=1.8;
+  RealD b=1.0;
+  RealD c=0.0;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
+								   GridDefaultSimd(Nd,vComplex::Nsimd()),
+								   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  ///////////////////////// RNGs /////////////////////////////////
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  std::vector<int> cseeds({5,6,7,8});
+
+  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
+
+  ///////////////////////// Configuration /////////////////////////////////
+  LatticeGaugeField Umu(UGrid);
+
+  FieldMetaData header;
+  std::string file("ckpoint_EODWF_lat.125");
+  NerscIO::readConfiguration(Umu,header,file);
+
+  //////////////////////// Fermion action //////////////////////////////////
+  MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
+
+  MdagMLinearOperator<MobiusFermionD, LatticeFermion> HermOp(Ddwf);
+
+  
+  std::cout << "**************************************"<<std::endl;
+  std::cout << "         Fine Power method            "<<std::endl;
+  std::cout << "**************************************"<<std::endl;
+
+  LatticeFermionD pm_src(FGrid);
+  pm_src = ComplexD(1.0);
+  PowerMethod<LatticeFermionD>       fPM;
+  fPM(HermOp,pm_src);
+
+  
+  std::cout << "**************************************"<<std::endl;
+  std::cout << "         Fine Lanczos  (poly, low)    "<<std::endl;
+  std::cout << "**************************************"<<std::endl;
+  
+  int Nk=80;
+  int Nm=Nk*3;
+  int Nstop=8;
+  int Nconv_test_interval=1;
+  
+  //  Chebyshev<LatticeFermionD>      IRLChebyLo(0.2,64.0,201);  // 1 iter
+  Chebyshev<LatticeFermionD>      IRLChebyLo(0.0,55.0,101);  // 1 iter
+  FunctionHermOp<LatticeFermionD>    PolyOp(IRLChebyLo,HermOp);
+  PlainHermOp<LatticeFermionD>          Op(HermOp);
+
+  ImplicitlyRestartedLanczos IRL(PolyOp,
+				 Op,
+				 Nk, // sought vecs
+				 Nk, // sought vecs
+				 Nm, // spare vecs
+				 1.0e-8,
+				 10 // Max iterations
+				 );
+
+  int Nconv;
+  std::vector<RealD>            eval(Nm);
+  std::vector<LatticeFermionD>     evec(Nm,FGrid);
+  LatticeFermionD     irl_src(FGrid);
+
+  IRL.calc(eval,evec,irl_src,Nconv);
+
+  Grid_finalize();
+  return 0;
+}
--- a/tests/debug/Test_general_coarse.cc
+++ b/tests/debug/Test_general_coarse.cc
@ -244,7 +244,7 @@ int main (int argc, char ** argv)

  GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi); 

-  
+#if 0  
  MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
  typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t;
  
@ -308,6 +308,7 @@ int main (int argc, char ** argv)
    mrhsCG(MrhsCoarseOp,rh_src,rh_res);
  }

+#endif
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
--- a/tests/debug/Test_general_coarse_hdcg_phys48_mixed.cc
+++ b/tests/debug/Test_general_coarse_hdcg_phys48_mixed.cc
@ -145,7 +145,7 @@ int main (int argc, char ** argv)
  Grid_init(&argc,&argv);

  const int Ls=24;
-  const int nbasis = 60;
+  const int nbasis = 62;
  const int cb = 0 ;
  RealD mass=0.00078;
  RealD M5=1.8;
@ -160,7 +160,7 @@ int main (int argc, char ** argv)
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);

  // Construct a coarsened grid with 4^4 cell
-  Coordinate Block({4,4,4,4});
+  Coordinate Block({4,4,6,4});
  Coordinate clatt = GridDefaultLatt();
  for(int d=0;d<clatt.size();d++){
    clatt[d] = clatt[d]/Block[d];
--- a/tests/debug/Test_general_coarse_hdcg_phys96_mixed.cc
+++ b/tests/debug/Test_general_coarse_hdcg_phys96_mixed.cc
@ -160,7 +160,8 @@ int main (int argc, char ** argv)
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);

  // Construct a coarsened grid with 4^4 cell
-  Coordinate Block({4,4,6,6});
+  //  Coordinate Block({4,4,6,4});
+  Coordinate Block({4,4,4,4});
  Coordinate clatt = GridDefaultLatt();
  for(int d=0;d<clatt.size();d++){
    clatt[d] = clatt[d]/Block[d];
@ -217,7 +218,7 @@ int main (int argc, char ** argv)
  std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/evecs.scidac");
  std::string eval_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/eval.xml");
  bool load_agg=true;
-  bool load_refine=false;
+  bool load_refine=true;
  bool load_mat=false;
  bool load_evec=false;

@ -276,17 +277,25 @@ int main (int argc, char ** argv)
  std::cout << "**************************************"<<std::endl;

  typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
-  Chebyshev<CoarseVector>      IRLCheby(0.0012,42.0,301);  // 1 iter
+  //  Chebyshev<CoarseVector>      IRLCheby(0.0012,42.0,301);  // 4.4.6.4
+  //  Chebyshev<CoarseVector>      IRLCheby(0.0012,42.0,501);  // for 4.4.4.4 blocking 350 evs
+  //  Chebyshev<CoarseVector>      IRLCheby(0.0014,42.0,501);  // for 4.4.4.4 blocking 700 evs
+  //  Chebyshev<CoarseVector>      IRLCheby(0.002,42.0,501);  // for 4.4.4.4 blocking 1226 evs
+  //  Chebyshev<CoarseVector>      IRLCheby(0.0025,42.0,501);  // for 4.4.4.4 blocking 1059 evs
+							  //							  3e-4,2);
+  Chebyshev<CoarseVector>      IRLCheby(0.0018,42.0,301);  // for 4.4.4.4 blocking  // 790 evs
+  
  MrhsHermMatrix MrhsCoarseOp     (mrhs);

  CoarseVector pm_src(CoarseMrhs);
  pm_src = ComplexD(1.0);
  PowerMethod<CoarseVector>       cPM;   cPM(MrhsCoarseOp,pm_src);

-  int Nk=nrhs*30;
+  //  int Nk=nrhs*30; // 4.4.6.4
  //  int Nk=nrhs*80;
-  int Nm=Nk*4;
-  int Nstop=Nk;
+  int Nk=nrhs*60; // 720
+  int Nm=Nk*4;    // 2880 ; generally finishes at 1440
+  int Nstop=512;
  int Nconv_test_interval=1;
  
  ImplicitlyRestartedBlockLanczosCoarse<CoarseVector> IRL(MrhsCoarseOp,
@ -299,7 +308,7 @@ int main (int argc, char ** argv)
 							  nrhs,
 							  Nk,
 							  Nm,
-							  1e-4,20);
+							  3e-4,2);

  std::vector<RealD>            eval(Nm);
  std::vector<CoarseVector>     evec(Nm,Coarse5d);
@ -331,7 +340,7 @@ int main (int argc, char ** argv)
  // Extra HDCG parameters
  //////////////////////////
  int maxit=3000;
-  ConjugateGradient<CoarseVector>  CG(5.0e-2,maxit,false);
+  ConjugateGradient<CoarseVector>  CG(7.5e-2,maxit,false);
  RealD lo=2.0;
  int ord = 7;
Author	SHA1	Message	Date
Peter Boyle	b461184797	Merge branch 'develop' of https://github.com/paboyle/Grid into develop	2024-07-23 09:53:58 -04:00
Peter Boyle	4563b39305	New Frontier config	2024-07-23 09:53:08 -04:00
Peter Boyle	c9d5674d5b	FInal for paper	2024-07-22 15:26:45 -04:00
Peter Boyle	486412635a	8^4 test for PETSc	2024-07-22 15:25:17 -04:00
Peter Boyle	8b23a1546a	Force compile temporarily	2024-07-22 15:24:56 -04:00
Peter Boyle	a901e4e369	Regressed performance for paper	2024-07-22 15:24:04 -04:00
Peter Boyle	804d9367d4	Regressed performance	2024-07-22 15:23:25 -04:00
Peter Boyle	12b8be7cb9	Best so far on 96^3 350 Evecs converged on 4^4 block	2024-06-18 16:31:37 -04:00