1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Compare commits

..

8 Commits

Author SHA1 Message Date
Peter Boyle
b461184797 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2024-07-23 09:53:58 -04:00
Peter Boyle
4563b39305 New Frontier config 2024-07-23 09:53:08 -04:00
Peter Boyle
c9d5674d5b FInal for paper 2024-07-22 15:26:45 -04:00
Peter Boyle
486412635a 8^4 test for PETSc 2024-07-22 15:25:17 -04:00
Peter Boyle
8b23a1546a Force compile temporarily 2024-07-22 15:24:56 -04:00
Peter Boyle
a901e4e369 Regressed performance for paper 2024-07-22 15:24:04 -04:00
Peter Boyle
804d9367d4 Regressed performance 2024-07-22 15:23:25 -04:00
Peter Boyle
12b8be7cb9 Best so far on 96^3 350 Evecs converged on 4^4 block 2024-06-18 16:31:37 -04:00
6 changed files with 174 additions and 38 deletions

View File

@ -279,11 +279,11 @@ public:
Qt = Eigen::MatrixXcd::Identity(Nm,Nm);
diagonalize(eval2,lmd2,lme2,Nu,Nm,Nm,Qt,grid);
_sort.push(eval2,Nm);
// Glog << "#Ritz value before shift: "<< std::endl;
Glog << "#Ritz value before shift: "<< std::endl;
for(int i=0; i<Nm; ++i){
// std::cout.precision(13);
// std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
// std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
std::cout.precision(13);
std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
}
//----------------------------------------------------------------------
@ -298,6 +298,7 @@ public:
unpackHermitBlockTriDiagMatToEigen(lmd,lme,Nu,Nblock_m,Nm,Nm,BTDM);
for(int ip=Nk; ip<Nm; ++ip){
Glog << " ip "<<ip<<" / "<<Nm<<std::endl;
shiftedQRDecompEigen(BTDM,Nu,Nm,eval2[ip],Q);
}
@ -325,7 +326,7 @@ public:
Qt = Eigen::MatrixXcd::Identity(Nm,Nm);
diagonalize(eval2,lmd2,lme2,Nu,Nk,Nm,Qt,grid);
_sort.push(eval2,Nk);
// Glog << "#Ritz value after shift: "<< std::endl;
Glog << "#Ritz value after shift: "<< std::endl;
for(int i=0; i<Nk; ++i){
// std::cout.precision(13);
// std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
@ -467,10 +468,10 @@ public:
// set initial vector
for (int i=0; i<Nu; ++i) {
// Glog << "norm2(src[" << i << "])= "<< norm2(src[i]) << std::endl;
Glog << "norm2(src[" << i << "])= "<< norm2(src[i]) << std::endl;
evec[i] = src[i];
orthogonalize(evec[i],evec,i);
// Glog << "norm2(evec[" << i << "])= "<< norm2(evec[i]) << std::endl;
Glog << "norm2(evec[" << i << "])= "<< norm2(evec[i]) << std::endl;
}
// exit(-43);
@ -506,11 +507,11 @@ public:
Qt = Eigen::MatrixXcd::Identity(Nr,Nr);
diagonalize(eval2,lmd2,lme2,Nu,Nr,Nr,Qt,grid);
_sort.push(eval2,Nr);
// Glog << "#Ritz value: "<< std::endl;
Glog << "#Ritz value: "<< std::endl;
for(int i=0; i<Nr; ++i){
// std::cout.precision(13);
// std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
// std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
std::cout.precision(13);
std::cout << "[" << std::setw(4)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout << "Rval = "<<std::setw(20)<< std::setiosflags(std::ios_base::left)<< eval2[i] << std::endl;
}
// Convergence test
@ -570,6 +571,7 @@ public:
Glog << fname + " NOT converged ; Summary :\n";
} else {
Glog << fname + " CONVERGED ; Summary :\n";
Nstop = Nconv_guess; // Just take them all
// Sort convered eigenpairs.
std::vector<Field> Btmp(Nstop,grid); // waste of space replicating
@ -642,7 +644,7 @@ private:
// for (int u=0; u<mrhs; ++u) Glog << " out["<<u<<"] = "<<norm2(out[u])<<std::endl;
k_start +=mrhs;
}
// Glog << "LinAlg "<< std::endl;
Glog << "LinAlg "<< std::endl;
if (b>0) {
for (int u=0; u<Nu; ++u) {
@ -676,7 +678,7 @@ private:
}
w_copy[u] = w[u];
}
// Glog << "LinAlg done"<< std::endl;
Glog << "LinAlg done"<< std::endl;
// In block version, the steps 6 and 7 in Lanczos construction is
// replaced by the QR decomposition of new basis block.
@ -689,15 +691,15 @@ private:
}
// re-orthogonalization for numerical stability
// Glog << "Gram Schmidt"<< std::endl;
Glog << "Gram Schmidt"<< std::endl;
orthogonalize(w,Nu,evec,R);
// QR part
for (int u=1; u<Nu; ++u) {
orthogonalize(w[u],w,u);
}
// Glog << "Gram Schmidt done "<< std::endl;
Glog << "Gram Schmidt done "<< std::endl;
// Glog << "LinAlg "<< std::endl;
Glog << "LinAlg "<< std::endl;
for (int u=0; u<Nu; ++u) {
//for (int v=0; v<Nu; ++v) {
for (int v=u; v<Nu; ++v) {
@ -714,7 +716,7 @@ private:
// Glog <<" In block "<< b << "," <<" beta[" << u << "," << k-L << "] = " << lme[u][k] << std::endl;
}
}
// Glog << "LinAlg done "<< std::endl;
Glog << "LinAlg done "<< std::endl;
if (b < Nm/Nu-1) {
for (int u=0; u<Nu; ++u) {
@ -779,7 +781,7 @@ private:
for ( int u=0; u<Nu; ++u ) {
for (int k=0; k<Nk; ++k ) {
// Glog << "lmd "<<u<<" "<<k<<" "<<lmd[u][k] -conjugate(lmd[u][k])<<std::endl;
// Glog << "lmd "<<u<<" "<<k<<" "<<lmd[u][k] -conjugate(lmd[u][k])<<std::endl;
BlockTriDiag(k,u+(k/Nu)*Nu) = lmd[u][k];
}
}
@ -933,7 +935,7 @@ if (1){
int Nu, int Nb, int Nk, int Nm,
Eigen::MatrixXcd& M)
{
//Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n';
Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n';
assert( Nk%Nu == 0 && Nm%Nu == 0 );
assert( Nk <= Nm );
M = Eigen::MatrixXcd::Zero(Nk,Nk);
@ -951,7 +953,7 @@ if (1){
M(u+(k/Nu)*Nu,k-Nu) = lme[u][k-Nu];
}
}
//Glog << "unpackHermitBlockTriDiagMatToEigen() end" << endl;
Glog << "unpackHermitBlockTriDiagMatToEigen() end" << std::endl;
}
@ -961,7 +963,7 @@ if (1){
int Nu, int Nb, int Nk, int Nm,
Eigen::MatrixXcd& M)
{
//Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n';
Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n';
assert( Nk%Nu == 0 && Nm%Nu == 0 );
assert( Nk <= Nm );
@ -977,7 +979,7 @@ if (1){
lme[u][k-Nu] = M(u+(k/Nu)*Nu,k-Nu);
}
}
//Glog << "packHermitBlockTriDiagMatfromEigen() end" << endl;
Glog << "packHermitBlockTriDiagMatfromEigen() end" <<std::endl;
}
@ -986,7 +988,7 @@ if (1){
RealD Dsh,
Eigen::MatrixXcd& Qprod)
{
//Glog << "shiftedQRDecompEigen() begin" << '\n';
Glog << "shiftedQRDecompEigen() begin" << '\n';
Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm);
Eigen::MatrixXcd R = Eigen::MatrixXcd::Zero(Nm,Nm);
Eigen::MatrixXcd Mtmp = Eigen::MatrixXcd::Zero(Nm,Nm);
@ -1002,6 +1004,7 @@ if (1){
// lower triangular part used to represent series
// of Q sequence.
Glog << "shiftedQRDecompEigen() Housholder & QR" << '\n';
// equivalent operation of Qprod *= Q
//M = Eigen::MatrixXcd::Zero(Nm,Nm);
@ -1022,6 +1025,7 @@ if (1){
Mtmp = Eigen::MatrixXcd::Zero(Nm,Nm);
Glog << "shiftedQRDecompEigen() Mtmp create" << '\n';
for (int i=0; i<Nm; ++i) {
for (int j=0; j<Nm-(Nu+1); ++j) {
for (int k=0; k<Nu+1+j; ++k) {
@ -1029,6 +1033,7 @@ if (1){
}
}
}
Glog << "shiftedQRDecompEigen() Mtmp loop1" << '\n';
for (int i=0; i<Nm; ++i) {
for (int j=Nm-(Nu+1); j<Nm; ++j) {
for (int k=0; k<Nm; ++k) {
@ -1036,6 +1041,7 @@ if (1){
}
}
}
Glog << "shiftedQRDecompEigen() Mtmp loop2" << '\n';
//static int ntimes = 2;
//for (int j=0; j<Nm-(ntimes*Nu); ++j) {
@ -1061,11 +1067,13 @@ if (1){
Mtmp(j,i) = conj(Mtmp(i,j));
}
}
Glog << "shiftedQRDecompEigen() Mtmp loop3" << '\n';
for (int i=0; i<Nm; ++i) {
Mtmp(i,i) = real(Mtmp(i,i)) + Dsh;
}
Glog << "shiftedQRDecompEigen() Mtmp loop4" << '\n';
M = Mtmp;
//M = Q.adjoint()*(M*Q);
@ -1077,7 +1085,7 @@ if (1){
// }
//}
//Glog << "shiftedQRDecompEigen() end" << endl;
Glog << "shiftedQRDecompEigen() end" <<std::endl;
}
void exampleQRDecompEigen(void)

View File

@ -3,7 +3,7 @@ spack load c-lime
module load emacs
module load PrgEnv-gnu
module load rocm
module load cray-mpich/8.1.23
module load cray-mpich
module load gmp
module load cray-fftw
module load craype-accel-amd-gfx90a

118
tests/debug/Test_8888.cc Normal file
View File

@ -0,0 +1,118 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_general_coarse_hdcg.cc
Copyright (C) 2023
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h>
#include <Grid/algorithms/iterative/AdefMrhs.h>
using namespace std;
using namespace Grid;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int Ls=8;
const int nbasis = 40;
const int cb = 0 ;
RealD mass=0.01;
RealD M5=1.8;
RealD b=1.0;
RealD c=0.0;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
///////////////////////// RNGs /////////////////////////////////
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::vector<int> cseeds({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
///////////////////////// Configuration /////////////////////////////////
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
std::string file("ckpoint_EODWF_lat.125");
NerscIO::readConfiguration(Umu,header,file);
//////////////////////// Fermion action //////////////////////////////////
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
MdagMLinearOperator<MobiusFermionD, LatticeFermion> HermOp(Ddwf);
std::cout << "**************************************"<<std::endl;
std::cout << " Fine Power method "<<std::endl;
std::cout << "**************************************"<<std::endl;
LatticeFermionD pm_src(FGrid);
pm_src = ComplexD(1.0);
PowerMethod<LatticeFermionD> fPM;
fPM(HermOp,pm_src);
std::cout << "**************************************"<<std::endl;
std::cout << " Fine Lanczos (poly, low) "<<std::endl;
std::cout << "**************************************"<<std::endl;
int Nk=80;
int Nm=Nk*3;
int Nstop=8;
int Nconv_test_interval=1;
// Chebyshev<LatticeFermionD> IRLChebyLo(0.2,64.0,201); // 1 iter
Chebyshev<LatticeFermionD> IRLChebyLo(0.0,55.0,101); // 1 iter
FunctionHermOp<LatticeFermionD> PolyOp(IRLChebyLo,HermOp);
PlainHermOp<LatticeFermionD> Op(HermOp);
ImplicitlyRestartedLanczos IRL(PolyOp,
Op,
Nk, // sought vecs
Nk, // sought vecs
Nm, // spare vecs
1.0e-8,
10 // Max iterations
);
int Nconv;
std::vector<RealD> eval(Nm);
std::vector<LatticeFermionD> evec(Nm,FGrid);
LatticeFermionD irl_src(FGrid);
IRL.calc(eval,evec,irl_src,Nconv);
Grid_finalize();
return 0;
}

View File

@ -244,7 +244,7 @@ int main (int argc, char ** argv)
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
#if 0
MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t;
@ -308,6 +308,7 @@ int main (int argc, char ** argv)
mrhsCG(MrhsCoarseOp,rh_src,rh_res);
}
#endif
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;

View File

@ -145,7 +145,7 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv);
const int Ls=24;
const int nbasis = 60;
const int nbasis = 62;
const int cb = 0 ;
RealD mass=0.00078;
RealD M5=1.8;
@ -160,7 +160,7 @@ int main (int argc, char ** argv)
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
// Construct a coarsened grid with 4^4 cell
Coordinate Block({4,4,4,4});
Coordinate Block({4,4,6,4});
Coordinate clatt = GridDefaultLatt();
for(int d=0;d<clatt.size();d++){
clatt[d] = clatt[d]/Block[d];

View File

@ -160,7 +160,8 @@ int main (int argc, char ** argv)
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
// Construct a coarsened grid with 4^4 cell
Coordinate Block({4,4,6,6});
// Coordinate Block({4,4,6,4});
Coordinate Block({4,4,4,4});
Coordinate clatt = GridDefaultLatt();
for(int d=0;d<clatt.size();d++){
clatt[d] = clatt[d]/Block[d];
@ -217,7 +218,7 @@ int main (int argc, char ** argv)
std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/evecs.scidac");
std::string eval_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/eval.xml");
bool load_agg=true;
bool load_refine=false;
bool load_refine=true;
bool load_mat=false;
bool load_evec=false;
@ -276,17 +277,25 @@ int main (int argc, char ** argv)
std::cout << "**************************************"<<std::endl;
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
Chebyshev<CoarseVector> IRLCheby(0.0012,42.0,301); // 1 iter
// Chebyshev<CoarseVector> IRLCheby(0.0012,42.0,301); // 4.4.6.4
// Chebyshev<CoarseVector> IRLCheby(0.0012,42.0,501); // for 4.4.4.4 blocking 350 evs
// Chebyshev<CoarseVector> IRLCheby(0.0014,42.0,501); // for 4.4.4.4 blocking 700 evs
// Chebyshev<CoarseVector> IRLCheby(0.002,42.0,501); // for 4.4.4.4 blocking 1226 evs
// Chebyshev<CoarseVector> IRLCheby(0.0025,42.0,501); // for 4.4.4.4 blocking 1059 evs
// 3e-4,2);
Chebyshev<CoarseVector> IRLCheby(0.0018,42.0,301); // for 4.4.4.4 blocking // 790 evs
MrhsHermMatrix MrhsCoarseOp (mrhs);
CoarseVector pm_src(CoarseMrhs);
pm_src = ComplexD(1.0);
PowerMethod<CoarseVector> cPM; cPM(MrhsCoarseOp,pm_src);
int Nk=nrhs*30;
// int Nk=nrhs*30; // 4.4.6.4
// int Nk=nrhs*80;
int Nm=Nk*4;
int Nstop=Nk;
int Nk=nrhs*60; // 720
int Nm=Nk*4; // 2880 ; generally finishes at 1440
int Nstop=512;
int Nconv_test_interval=1;
ImplicitlyRestartedBlockLanczosCoarse<CoarseVector> IRL(MrhsCoarseOp,
@ -299,7 +308,7 @@ int main (int argc, char ** argv)
nrhs,
Nk,
Nm,
1e-4,20);
3e-4,2);
std::vector<RealD> eval(Nm);
std::vector<CoarseVector> evec(Nm,Coarse5d);
@ -331,7 +340,7 @@ int main (int argc, char ** argv)
// Extra HDCG parameters
//////////////////////////
int maxit=3000;
ConjugateGradient<CoarseVector> CG(5.0e-2,maxit,false);
ConjugateGradient<CoarseVector> CG(7.5e-2,maxit,false);
RealD lo=2.0;
int ord = 7;