mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-29 11:09:33 +00:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			b7c7000d0d
			...
			25f71913b7
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 25f71913b7 | ||
|  | 34ddd2b7b1 | ||
|  | d5fd90b2f3 | 
| @@ -69,279 +69,339 @@ class TwoLevelCG : public LinearFunction<Field> | ||||
|    | ||||
|   virtual void operator() (const Field &src, Field &x) | ||||
|   { | ||||
| #if 0 | ||||
|     Field resid(grid); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg starting"<<std::endl; | ||||
|     RealD f; | ||||
|     RealD rtzp,rtz,a,d,b; | ||||
|     RealD rptzp; | ||||
|      | ||||
|     Field p(grid); | ||||
|  | ||||
|     ///////////////////////////// | ||||
|     // Set up history vectors | ||||
|     ///////////////////////////// | ||||
|     int mmax = 5; | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocating"<<std::endl; | ||||
|     std::vector<Field> p(mmax,grid); | ||||
|     std::vector<Field> mmp(mmax,grid); | ||||
|     std::vector<RealD> pAp(mmax); | ||||
|     Field z(grid); | ||||
|     Field tmp(grid); | ||||
|     Field mmp(grid); | ||||
|     Field r  (grid); | ||||
|     Field mu (grid); | ||||
|     Field rp (grid); | ||||
|     Field  mp (grid); | ||||
|     Field  r  (grid); | ||||
|     Field  mu (grid); | ||||
|      | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocated"<<std::endl; | ||||
|     //Initial residual computation & set up | ||||
|     double tn; | ||||
|  | ||||
|     RealD guess   = norm2(x); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg guess nrm "<<guess<<std::endl; | ||||
|     RealD src_nrm = norm2(src); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg src nrm "<<src_nrm<<std::endl; | ||||
|      | ||||
|     if ( src_nrm == 0.0 ) { | ||||
|       std::cout << GridLogMessage<<"HDCG: fPcg given trivial source norm "<<src_nrm<<std::endl; | ||||
|       x=Zero(); | ||||
|     } | ||||
|     RealD tn; | ||||
|      | ||||
|     GridStopWatch HDCGTimer; | ||||
|     HDCGTimer.Start(); | ||||
|     ////////////////////////// | ||||
|     // x0 = Vstart -- possibly modify guess | ||||
|     ////////////////////////// | ||||
|     x=Zero(); | ||||
|     Vstart(x,src); | ||||
|  | ||||
|      | ||||
|     // r0 = b -A x0 | ||||
|     _FineLinop.HermOp(x,mmp); | ||||
|  | ||||
|     axpy(r, -1.0, mmp, src);    // Recomputes r=src-x0 | ||||
|     rp=r; | ||||
|     _FineLinop.HermOp(x,mmp[0]); | ||||
|     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 | ||||
|     { | ||||
|       double n1 = norm2(x); | ||||
|       double n2 = norm2(mmp[0]); | ||||
|       double n3 = norm2(r); | ||||
|       std::cout<<GridLogMessage<<"x,vstart,r = "<<n1<<" "<<n2<<" "<<n3<<std::endl; | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Compute z = M1 x | ||||
|     ////////////////////////////////// | ||||
|     PcgM1(r,z); | ||||
|     rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|      | ||||
|     /////////////////////////////////////// | ||||
|     // Except Def2, M2 is trivial | ||||
|     // Solve for Mss mu = P A z and set p = z-mu | ||||
|     // Def2 p = 1 - Q Az = Pright z | ||||
|     // Other algos M2 is trivial | ||||
|     /////////////////////////////////////// | ||||
|     p=z; | ||||
|     PcgM2(z,p[0]); | ||||
|  | ||||
|     RealD ssq =  norm2(src); | ||||
|     RealD rsq =  ssq*Tolerance*Tolerance; | ||||
|  | ||||
|     GRID_TRACE("MultiGrid TwoLevel "); | ||||
|     std::cout<<GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" target rsq "<<rsq<<" ssq "<<ssq<<std::endl; | ||||
|     std::cout << GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" rsq "<<rsq<<"\n"; | ||||
|  | ||||
|     Field pp(grid); | ||||
|  | ||||
|     for (int k=0;k<=MaxIterations;k++){ | ||||
|      | ||||
|     for (int k=1;k<=MaxIterations;k++){ | ||||
|       int peri_k  = k % mmax; | ||||
|       int peri_kp = (k+1) % mmax; | ||||
|  | ||||
|       rtz=rtzp; | ||||
|       d= PcgM3(p,mmp); | ||||
|       d= PcgM3(p[peri_k],mmp[peri_k]); | ||||
|       a = rtz/d; | ||||
|      | ||||
|       // Memorise this | ||||
|       pAp[peri_k] = d; | ||||
|        | ||||
|       axpy(x,a,p[peri_k],x); | ||||
|       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); | ||||
|  | ||||
|       axpy(x,a,p,x); | ||||
|       RealD rn = axpy_norm(r,-a,mmp,r); | ||||
|  | ||||
|       // Compute z = M x | ||||
|       PcgM1(r,z); | ||||
|  | ||||
|       rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|       int ipcg=1; // almost free inexact preconditioned CG | ||||
|       if (ipcg) { | ||||
| 	rptzp =real(innerProduct(rp,z)); | ||||
|       } else { | ||||
| 	rptzp =0; | ||||
|        | ||||
|       { | ||||
| 	RealD n1,n2; | ||||
| 	n1=norm2(r); | ||||
| 	n2=norm2(z); | ||||
| 	std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : vector r,z "<<n1<<" "<<n2<<"\n"; | ||||
|       } | ||||
|       b = (rtzp-rptzp)/rtz; | ||||
|       rtzp =real(innerProduct(r,z)); | ||||
|       std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : inner rtzp "<<rtzp<<"\n"; | ||||
|  | ||||
|       //    PcgM2(z,p[0]); | ||||
|       PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||
|        | ||||
|       p[peri_kp]=mu; | ||||
|  | ||||
|       axpy(p,b,p,mu);  // mu = A r | ||||
|       // Standard search direction  p -> z + b p     | ||||
|       b = (rtzp)/rtz; | ||||
|        | ||||
|       int northog; | ||||
|       // k=zero  <=> peri_kp=1;        northog = 1 | ||||
|       // k=1     <=> peri_kp=2;        northog = 2 | ||||
|       // ...               ...                  ... | ||||
|       // k=mmax-2<=> peri_kp=mmax-1;   northog = mmax-1 | ||||
|       // k=mmax-1<=> peri_kp=0;        northog = 1 | ||||
|  | ||||
|       //    northog     = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm | ||||
|       northog     = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm | ||||
|      | ||||
|       std::cout<<GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : orthogonalising to last "<<northog<<" vectors\n"; | ||||
|       for(int back=0; back < northog; back++){ | ||||
| 	int peri_back = (k-back)%mmax; | ||||
| 	RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp])); | ||||
| 	RealD beta = -pbApk/pAp[peri_back]; | ||||
| 	axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]); | ||||
|       } | ||||
|  | ||||
|       RealD rrn=sqrt(rn/ssq); | ||||
|       RealD rtn=sqrt(rtz/ssq); | ||||
|       std::cout<<GridLogMessage<<"HDCG: Pcg k= "<<k<<" residual = "<<rrn<<std::endl; | ||||
|       RealD rtnp=sqrt(rtzp/ssq); | ||||
|  | ||||
|       if ( ipcg ) { | ||||
| 	axpy(rp,0.0,r,r); | ||||
|       } | ||||
|       std::cout<<GridLogMessage<<"HDCG: fPcg k= "<<k<<" residual = "<<rrn<<"\n"; | ||||
|  | ||||
|       // Stopping condition | ||||
|       if ( rn <= rsq ) {  | ||||
|  | ||||
| 	HDCGTimer.Stop(); | ||||
| 	std::cout<<GridLogMessage<<"HDCG: Pcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | ||||
|  | ||||
| 	_FineLinop.HermOp(x,mmp);			   | ||||
| 	axpy(tmp,-1.0,src,mmp); | ||||
|  | ||||
| 	RealD  mmpnorm = sqrt(norm2(mmp)); | ||||
| 	std::cout<<GridLogMessage<<"HDCG: fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | ||||
| 	 | ||||
| 	_FineLinop.HermOp(x,mmp[0]);			   | ||||
| 	axpy(tmp,-1.0,src,mmp[0]); | ||||
| 	 | ||||
| 	RealD  mmpnorm = sqrt(norm2(mmp[0])); | ||||
| 	RealD  xnorm   = sqrt(norm2(x)); | ||||
| 	RealD  srcnorm = sqrt(norm2(src)); | ||||
| 	RealD  tmpnorm = sqrt(norm2(tmp)); | ||||
| 	RealD  true_residual = tmpnorm/srcnorm; | ||||
| 	std::cout<<GridLogMessage | ||||
| 		 <<"HDCG: true residual is "<<true_residual | ||||
| 		 <<" solution "<<xnorm | ||||
| 		 <<" source "<<srcnorm | ||||
| 		 <<" mmp "<<mmpnorm	   | ||||
| 		 <<std::endl; | ||||
|  | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|     } | ||||
|     std::cout<<GridLogMessage<<"HDCG: not converged"<<std::endl; | ||||
|     RealD  xnorm   = sqrt(norm2(x)); | ||||
|     RealD  srcnorm = sqrt(norm2(src)); | ||||
|     std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||
|      | ||||
|     return ; | ||||
| #else | ||||
|   RealD f; | ||||
|   RealD rtzp,rtz,a,d,b; | ||||
|   RealD rptzp; | ||||
|  | ||||
|   ///////////////////////////// | ||||
|   // Set up history vectors | ||||
|   ///////////////////////////// | ||||
|   int mmax = 20; | ||||
|   std::vector<Field> p(mmax,grid); | ||||
|   std::vector<Field> mmp(mmax,grid); | ||||
|   std::vector<RealD> pAp(mmax); | ||||
|   Field z(grid); | ||||
|   Field tmp(grid); | ||||
|   Field  mp (grid); | ||||
|   Field  r  (grid); | ||||
|   Field  mu (grid); | ||||
|  | ||||
|   //Initial residual computation & set up | ||||
|   RealD guess   = norm2(x); | ||||
|   RealD src_nrm = norm2(src); | ||||
|  | ||||
|   if ( src_nrm == 0.0 ) { | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg given trivial source norm "<<src_nrm<<std::endl; | ||||
|     x=Zero(); | ||||
|   } | ||||
|   RealD tn; | ||||
|  | ||||
|   GridStopWatch HDCGTimer; | ||||
|   HDCGTimer.Start(); | ||||
|   ////////////////////////// | ||||
|   // x0 = Vstart -- possibly modify guess | ||||
|   ////////////////////////// | ||||
|   Vstart(x,src); | ||||
|  | ||||
|   // r0 = b -A x0 | ||||
|   _FineLinop.HermOp(x,mmp[0]); | ||||
|   axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 | ||||
|   { | ||||
|     double n1 = norm2(x); | ||||
|     double n2 = norm2(mmp[0]); | ||||
|     double n3 = norm2(r); | ||||
|     std::cout<<GridLogMessage<<"x,vstart,r = "<<n1<<" "<<n2<<" "<<n3<<std::endl; | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////// | ||||
|   // Compute z = M1 x | ||||
|   ////////////////////////////////// | ||||
|   PcgM1(r,z); | ||||
|   rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|   /////////////////////////////////////// | ||||
|   // Solve for Mss mu = P A z and set p = z-mu | ||||
|   // Def2: p = 1 - Q Az = Pright z  | ||||
|   // Other algos M2 is trivial | ||||
|   /////////////////////////////////////// | ||||
|   PcgM2(z,p[0]); | ||||
|  | ||||
|   RealD ssq =  norm2(src); | ||||
|   RealD rsq =  ssq*Tolerance*Tolerance; | ||||
|  | ||||
|   std::cout << GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" rsq "<<rsq<<"\n"; | ||||
|  | ||||
|   Field pp(grid); | ||||
|  | ||||
|   for (int k=0;k<=MaxIterations;k++){ | ||||
|      | ||||
|     int peri_k  = k % mmax; | ||||
|     int peri_kp = (k+1) % mmax; | ||||
|  | ||||
|     rtz=rtzp; | ||||
|     d= PcgM3(p[peri_k],mmp[peri_k]); | ||||
|     a = rtz/d; | ||||
|      | ||||
|     // Memorise this | ||||
|     pAp[peri_k] = d; | ||||
|  | ||||
|      | ||||
|     axpy(x,a,p[peri_k],x); | ||||
|     RealD rn = axpy_norm(r,-a,mmp[peri_k],r); | ||||
|  | ||||
|     // Compute z = M x | ||||
|     PcgM1(r,z); | ||||
|  | ||||
|     { | ||||
|       RealD n1,n2; | ||||
|       n1=norm2(r); | ||||
|       n2=norm2(z); | ||||
|       std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : vector r,z "<<n1<<" "<<n2<<"\n"; | ||||
|     } | ||||
|     rtzp =real(innerProduct(r,z)); | ||||
|     std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : inner rtzp "<<rtzp<<"\n"; | ||||
|  | ||||
|     //    PcgM2(z,p[0]); | ||||
|     PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||
|  | ||||
|     p[peri_kp]=mu; | ||||
|  | ||||
|     // Standard search direction  p -> z + b p    ; b =  | ||||
|     b = (rtzp)/rtz; | ||||
|  | ||||
|     int northog; | ||||
|     // k=zero  <=> peri_kp=1;        northog = 1 | ||||
|     // k=1     <=> peri_kp=2;        northog = 2 | ||||
|     // ...               ...                  ... | ||||
|     // k=mmax-2<=> peri_kp=mmax-1;   northog = mmax-1 | ||||
|     // k=mmax-1<=> peri_kp=0;        northog = 1 | ||||
|  | ||||
|     //    northog     = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm | ||||
|     northog     = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm | ||||
|      | ||||
|     std::cout<<GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : orthogonalising to last "<<northog<<" vectors\n"; | ||||
|     for(int back=0; back < northog; back++){ | ||||
|       int peri_back = (k-back)%mmax; | ||||
|       RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp])); | ||||
|       RealD beta = -pbApk/pAp[peri_back]; | ||||
|       axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]); | ||||
|     } | ||||
|  | ||||
|     RealD rrn=sqrt(rn/ssq); | ||||
|     RealD rtn=sqrt(rtz/ssq); | ||||
|     RealD rtnp=sqrt(rtzp/ssq); | ||||
|  | ||||
|     std::cout<<GridLogMessage<<"HDCG: fPcg k= "<<k<<" residual = "<<rrn<<"\n"; | ||||
|  | ||||
|     // Stopping condition | ||||
|     if ( rn <= rsq ) {  | ||||
|  | ||||
|       HDCGTimer.Stop(); | ||||
|       std::cout<<GridLogMessage<<"HDCG: fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | ||||
|        | ||||
|       _FineLinop.HermOp(x,mmp[0]);			   | ||||
|       axpy(tmp,-1.0,src,mmp[0]); | ||||
|        | ||||
|       RealD  mmpnorm = sqrt(norm2(mmp[0])); | ||||
|       RealD  xnorm   = sqrt(norm2(x)); | ||||
|       RealD  srcnorm = sqrt(norm2(src)); | ||||
|       RealD  tmpnorm = sqrt(norm2(tmp)); | ||||
|       RealD  true_residual = tmpnorm/srcnorm; | ||||
|       std::cout<<GridLogMessage | ||||
| 	       <<"HDCG: true residual is "<<true_residual | ||||
| 	       <<" solution "<<xnorm | ||||
| 	       <<" source "<<srcnorm | ||||
| 	       <<" mmp "<<mmpnorm	   | ||||
| 	       <<std::endl; | ||||
|        | ||||
|       return; | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|     } | ||||
|     HDCGTimer.Stop(); | ||||
|     std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl; | ||||
|     RealD  xnorm   = sqrt(norm2(x)); | ||||
|     RealD  srcnorm = sqrt(norm2(src)); | ||||
|     std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||
|   } | ||||
|  | ||||
|  | ||||
|  | ||||
|   virtual void operator() (std::vector<Field> &src, std::vector<Field> &x) | ||||
|   { | ||||
|     std::cout << GridLogMessage<<"HDCG: mrhs fPcg starting"<<std::endl; | ||||
|     src[0].Grid()->Barrier(); | ||||
|     int nrhs = src.size(); | ||||
|     std::vector<RealD> f(nrhs); | ||||
|     std::vector<RealD> rtzp(nrhs); | ||||
|     std::vector<RealD> rtz(nrhs); | ||||
|     std::vector<RealD> a(nrhs); | ||||
|     std::vector<RealD> d(nrhs); | ||||
|     std::vector<RealD> b(nrhs); | ||||
|     std::vector<RealD> rptzp(nrhs); | ||||
|     ///////////////////////////// | ||||
|     // Set up history vectors | ||||
|     ///////////////////////////// | ||||
|     int mmax = 2; | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocating"<<std::endl; | ||||
|     src[0].Grid()->Barrier(); | ||||
|     std::vector<std::vector<Field> > p(nrhs);   for(int r=0;r<nrhs;r++)  p[r].resize(mmax,grid); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocated p"<<std::endl; | ||||
|     src[0].Grid()->Barrier(); | ||||
|     std::vector<std::vector<Field> > mmp(nrhs); for(int r=0;r<nrhs;r++) mmp[r].resize(mmax,grid); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocated mmp"<<std::endl; | ||||
|     src[0].Grid()->Barrier(); | ||||
|     std::vector<std::vector<RealD> > pAp(nrhs); for(int r=0;r<nrhs;r++) pAp[r].resize(mmax); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocated pAp"<<std::endl; | ||||
|     src[0].Grid()->Barrier(); | ||||
|     std::vector<Field> z(nrhs,grid); | ||||
|     std::vector<Field>  mp (nrhs,grid); | ||||
|     std::vector<Field>  r  (nrhs,grid); | ||||
|     std::vector<Field>  mu (nrhs,grid); | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg allocated z,mp,r,mu"<<std::endl; | ||||
|     src[0].Grid()->Barrier(); | ||||
|  | ||||
|     //Initial residual computation & set up | ||||
|     std::vector<RealD> src_nrm(nrhs); | ||||
|     for(int rhs=0;rhs<nrhs;rhs++) { | ||||
|       src_nrm[rhs]=norm2(src[rhs]); | ||||
|       assert(src_nrm[rhs]!=0.0); | ||||
|     } | ||||
|     std::vector<RealD> tn(nrhs); | ||||
|  | ||||
|     GridStopWatch HDCGTimer; | ||||
|     HDCGTimer.Start(); | ||||
|     ////////////////////////// | ||||
|     // x0 = Vstart -- possibly modify guess | ||||
|     ////////////////////////// | ||||
|     for(int rhs=0;rhs<nrhs;rhs++){ | ||||
|       Vstart(x[rhs],src[rhs]); | ||||
|  | ||||
|       // r0 = b -A x0 | ||||
|       _FineLinop.HermOp(x[rhs],mmp[rhs][0]); | ||||
|       axpy (r[rhs], -1.0,mmp[rhs][0], src[rhs]);    // Recomputes r=src-Ax0 | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   HDCGTimer.Stop(); | ||||
|   std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl; | ||||
|   RealD  xnorm   = sqrt(norm2(x)); | ||||
|   RealD  srcnorm = sqrt(norm2(src)); | ||||
|   std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||
| #endif | ||||
|     ////////////////////////////////// | ||||
|     // Compute z = M1 x | ||||
|     ////////////////////////////////// | ||||
|     // This needs a multiRHS version for acceleration | ||||
|     PcgM1(r,z); | ||||
|  | ||||
|     std::vector<RealD> ssq(nrhs); | ||||
|     std::vector<RealD> rsq(nrhs); | ||||
|     std::vector<Field> pp(nrhs,grid); | ||||
|  | ||||
|     for(int rhs=0;rhs<nrhs;rhs++){ | ||||
|       rtzp[rhs] =real(innerProduct(r[rhs],z[rhs])); | ||||
|       p[rhs][0]=z[rhs]; | ||||
|       ssq[rhs]=norm2(src[rhs]); | ||||
|       rsq[rhs]=  ssq[rhs]*Tolerance*Tolerance; | ||||
|       std::cout << GridLogMessage<<"mrhs HDCG: "<<rhs<<" k=0 residual "<<rtzp[rhs]<<" rsq "<<rsq[rhs]<<"\n"; | ||||
|     } | ||||
|  | ||||
|     std::vector<RealD> rn(nrhs); | ||||
|     for (int k=0;k<=MaxIterations;k++){ | ||||
|      | ||||
|       int peri_k  = k % mmax; | ||||
|       int peri_kp = (k+1) % mmax; | ||||
|  | ||||
|       for(int rhs=0;rhs<nrhs;rhs++){ | ||||
| 	rtz[rhs]=rtzp[rhs]; | ||||
| 	d[rhs]= PcgM3(p[rhs][peri_k],mmp[rhs][peri_k]); | ||||
| 	a[rhs] = rtz[rhs]/d[rhs]; | ||||
|      | ||||
| 	// Memorise this | ||||
| 	pAp[rhs][peri_k] = d[rhs]; | ||||
|  | ||||
| 	axpy(x[rhs],a[rhs],p[rhs][peri_k],x[rhs]); | ||||
| 	rn[rhs] = axpy_norm(r[rhs],-a[rhs],mmp[rhs][peri_k],r[rhs]); | ||||
|       } | ||||
|  | ||||
|       // Compute z = M x (for *all* RHS) | ||||
|       PcgM1(r,z); | ||||
|  | ||||
|       RealD max_rn=0.0; | ||||
|       for(int rhs=0;rhs<nrhs;rhs++){ | ||||
|  | ||||
| 	rtzp[rhs] =real(innerProduct(r[rhs],z[rhs])); | ||||
|  | ||||
| 	std::cout << GridLogMessage<<"HDCG::fPcg rhs"<<rhs<<" iteration "<<k<<" : inner rtzp "<<rtzp[rhs]<<"\n"; | ||||
| 	 | ||||
| 	mu[rhs]=z[rhs]; | ||||
|  | ||||
| 	p[rhs][peri_kp]=mu[rhs]; | ||||
|  | ||||
| 	// Standard search direction p == z + b p  | ||||
| 	b[rhs] = (rtzp[rhs])/rtz[rhs]; | ||||
|  | ||||
| 	int northog = (k>mmax-1)?(mmax-1):k;        // This is the fCG-Tr(mmax-1) algorithm | ||||
| 	std::cout<<GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : orthogonalising to last "<<northog<<" vectors\n"; | ||||
| 	for(int back=0; back < northog; back++){ | ||||
| 	  int peri_back = (k-back)%mmax; | ||||
| 	  RealD pbApk= real(innerProduct(mmp[rhs][peri_back],p[rhs][peri_kp])); | ||||
| 	  RealD beta = -pbApk/pAp[rhs][peri_back]; | ||||
| 	  axpy(p[rhs][peri_kp],beta,p[rhs][peri_back],p[rhs][peri_kp]); | ||||
| 	} | ||||
|  | ||||
| 	RealD rrn=sqrt(rn[rhs]/ssq[rhs]); | ||||
| 	RealD rtn=sqrt(rtz[rhs]/ssq[rhs]); | ||||
| 	RealD rtnp=sqrt(rtzp[rhs]/ssq[rhs]); | ||||
| 	 | ||||
| 	std::cout<<GridLogMessage<<"HDCG: rhs "<<rhs<<"fPcg k= "<<k<<" residual = "<<rrn<<"\n"; | ||||
| 	if ( rrn > max_rn ) max_rn = rrn; | ||||
|       } | ||||
|  | ||||
|       // Stopping condition based on worst case | ||||
|       if ( max_rn <= Tolerance ) {  | ||||
|  | ||||
| 	HDCGTimer.Stop(); | ||||
| 	std::cout<<GridLogMessage<<"HDCG: mrhs fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;; | ||||
|  | ||||
| 	for(int rhs=0;rhs<nrhs;rhs++){ | ||||
| 	  _FineLinop.HermOp(x[rhs],mmp[rhs][0]);			   | ||||
| 	  Field tmp(grid); | ||||
| 	  axpy(tmp,-1.0,src[rhs],mmp[rhs][0]); | ||||
|        | ||||
| 	  RealD  mmpnorm = sqrt(norm2(mmp[rhs][0])); | ||||
| 	  RealD  xnorm   = sqrt(norm2(x[rhs])); | ||||
| 	  RealD  srcnorm = sqrt(norm2(src[rhs])); | ||||
| 	  RealD  tmpnorm = sqrt(norm2(tmp)); | ||||
| 	  RealD  true_residual = tmpnorm/srcnorm; | ||||
| 	  std::cout<<GridLogMessage | ||||
| 		   <<"HDCG: true residual ["<<rhs<<"] is "<<true_residual | ||||
| 		   <<" solution "<<xnorm | ||||
| 		   <<" source "<<srcnorm | ||||
| 		   <<" mmp "<<mmpnorm	   | ||||
| 		   <<std::endl; | ||||
| 	} | ||||
| 	return; | ||||
|       } | ||||
|        | ||||
|     } | ||||
|     HDCGTimer.Stop(); | ||||
|     std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl; | ||||
|     for(int rhs=0;rhs<nrhs;rhs++){ | ||||
|       RealD  xnorm   = sqrt(norm2(x[rhs])); | ||||
|       RealD  srcnorm = sqrt(norm2(src[rhs])); | ||||
|       std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl; | ||||
|     } | ||||
|   } | ||||
|    | ||||
|  | ||||
|  public: | ||||
|  | ||||
|   virtual void PcgM1(std::vector<Field> & in,std::vector<Field> & out) | ||||
|   { | ||||
|     std::cout << "PcgM1 default (cheat) mrhs versoin"<<std::endl; | ||||
|     for(int rhs=0;rhs<in.size();rhs++){ | ||||
|       this->PcgM1(in[rhs],out[rhs]); | ||||
|     } | ||||
|   } | ||||
|   virtual void PcgM1(Field & in, Field & out)     =0; | ||||
|   virtual void Vstart(Field & x,const Field & src)=0; | ||||
|  | ||||
| @@ -440,6 +500,7 @@ class TwoLevelADEF2 : public TwoLevelCG<Field> | ||||
|  | ||||
|   virtual void Vstart(Field & x,const Field & src) | ||||
|   { | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg Vstart "<<std::endl; | ||||
|     /////////////////////////////////// | ||||
|     // Choose x_0 such that  | ||||
|     // x_0 = guess +  (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] | ||||
| @@ -456,14 +517,97 @@ class TwoLevelADEF2 : public TwoLevelCG<Field> | ||||
|     CoarseField PleftProj(this->coarsegrid); | ||||
|     CoarseField PleftMss_proj(this->coarsegrid); | ||||
|  | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg Vstart projecting "<<std::endl; | ||||
|     this->_Aggregates.ProjectToSubspace(PleftProj,src);      | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg Vstart coarse solve "<<std::endl; | ||||
|     this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     std::cout << GridLogMessage<<"HDCG: fPcg Vstart promote "<<std::endl; | ||||
|     this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);   | ||||
|  | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| template<class Field, class CoarseField, class Aggregation> | ||||
| class TwoLevelADEF2mrhs : public TwoLevelADEF2<Field,CoarseField,Aggregation> | ||||
| { | ||||
| public: | ||||
|   GridBase *coarsegridmrhs; | ||||
|   LinearFunction<CoarseField> &_CoarseSolverMrhs; | ||||
|   LinearFunction<CoarseField> &_CoarseGuesser; | ||||
|   TwoLevelADEF2mrhs(RealD tol, | ||||
| 		    Integer maxit, | ||||
| 		    LinearOperatorBase<Field>    &FineLinop, | ||||
| 		    LinearFunction<Field>        &Smoother, | ||||
| 		    LinearFunction<CoarseField>  &CoarseSolver, | ||||
| 		    LinearFunction<CoarseField>  &CoarseSolverPrecise, | ||||
| 		    LinearFunction<CoarseField>  &CoarseSolverMrhs, | ||||
| 		    LinearFunction<CoarseField>  &CoarseGuesser, | ||||
| 		    GridBase *rhsgrid, | ||||
| 		    Aggregation &Aggregates) : | ||||
|     TwoLevelADEF2<Field,CoarseField,Aggregation>(tol, maxit,FineLinop,Smoother,CoarseSolver,CoarseSolverPrecise,Aggregates), | ||||
|     _CoarseSolverMrhs(CoarseSolverMrhs), | ||||
|     _CoarseGuesser(CoarseGuesser) | ||||
|   { | ||||
|     coarsegridmrhs = rhsgrid; | ||||
|   }; | ||||
|    | ||||
|   virtual void PcgM1(std::vector<Field> & in,std::vector<Field> & out){ | ||||
|  | ||||
|     int nrhs=in.size(); | ||||
|     std::cout << " mrhs PcgM1 for "<<nrhs<<" right hand sides"<<std::endl; | ||||
|     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] | ||||
|     Field tmp(this->grid); | ||||
|     std::vector<Field> Min(nrhs,this->grid); | ||||
|     CoarseField PleftProj(this->coarsegrid); | ||||
|     CoarseField PleftMss_proj(this->coarsegrid); | ||||
|  | ||||
|     CoarseField PleftProjMrhs(this->coarsegridmrhs); | ||||
|     CoarseField PleftMss_projMrhs(this->coarsegridmrhs); | ||||
|  | ||||
|     for(int rhs=0;rhs<nrhs;rhs++) { | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " Calling smoother for "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       this->_Smoother(in[rhs],Min[rhs]); | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " smoother done "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       this->_FineLinop.HermOp(Min[rhs],out[rhs]); | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " Hermop for "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       axpy(tmp,-1.0,out[rhs],in[rhs]);          // tmp  = in - A Min | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " axpy "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       this->_Aggregates.ProjectToSubspace(PleftProj,tmp);     // can optimise later | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " project "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       InsertSlice(PleftProj,PleftProjMrhs,rhs,0); | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " insert rhs "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       this->_CoarseGuesser(PleftProj,PleftMss_proj); | ||||
|       this->grid->Barrier(); | ||||
|       std::cout << " insert guess "<<rhs<<std::endl; | ||||
|       this->grid->Barrier(); | ||||
|       InsertSlice(PleftMss_proj,PleftMss_projMrhs,rhs,0); | ||||
|     } | ||||
|  | ||||
|     std::cout << " Coarse solve "<<std::endl; | ||||
|     this->_CoarseSolverMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} [in - A Min]_s | ||||
|  | ||||
|     for(int rhs=0;rhs<nrhs;rhs++) { | ||||
|       ExtractSlice(PleftMss_proj,PleftMss_projMrhs,rhs,0); | ||||
|       this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||
|       axpy(out[rhs],1.0,Min[rhs],tmp); // Min+tmp | ||||
|     } | ||||
|     std::cout << " Extracted "<<std::endl; | ||||
|   } | ||||
| }; | ||||
|    | ||||
| template<class Field> | ||||
| class TwoLevelADEF1defl : public TwoLevelCG<Field> | ||||
| { | ||||
|   | ||||
| @@ -243,9 +243,14 @@ int main (int argc, char ** argv) | ||||
|   Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1}); | ||||
|  | ||||
|   GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);  | ||||
|  | ||||
|    | ||||
|   MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs); | ||||
|  | ||||
|   typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t; | ||||
|    | ||||
|   ////////////////////////////////////////// | ||||
|   // Test against single RHS | ||||
|   ////////////////////////////////////////// | ||||
|   { | ||||
|     GridParallelRNG          rh_CRNG(CoarseMrhs);rh_CRNG.SeedFixedIntegers(cseeds); | ||||
|     CoarseVector rh_phi(CoarseMrhs); | ||||
| @@ -287,7 +292,22 @@ int main (int argc, char ** argv) | ||||
|     std::cout << nrhs<< " srhs " << t1/ncall/nrhs <<" us"<<std::endl; | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////// | ||||
|   // Test against single RHS | ||||
|   ////////////////////////////////////////// | ||||
|   { | ||||
|     typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> HermMatrix; | ||||
|     HermMatrix MrhsCoarseOp     (mrhs); | ||||
|  | ||||
|     GridParallelRNG          rh_CRNG(CoarseMrhs);rh_CRNG.SeedFixedIntegers(cseeds); | ||||
|     ConjugateGradient<CoarseVector>  mrhsCG(1.0e-8,2000,true); | ||||
|     CoarseVector rh_res(CoarseMrhs); | ||||
|     CoarseVector rh_src(CoarseMrhs); | ||||
|     random(rh_CRNG,rh_src); | ||||
|     rh_res= Zero(); | ||||
|     mrhsCG(MrhsCoarseOp,rh_src,rh_res); | ||||
|   } | ||||
|    | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<std::endl; | ||||
|   std::cout<<GridLogMessage<<"*******************************************"<<std::endl; | ||||
|   | ||||
							
								
								
									
										739
									
								
								tests/debug/Test_general_coarse_hdcg_phys48.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										739
									
								
								tests/debug/Test_general_coarse_hdcg_phys48.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,739 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_general_coarse_hdcg.cc | ||||
|  | ||||
|     Copyright (C) 2023 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
| //#include <Grid/algorithms/GeneralCoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/iterative/AdefGeneric.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|  | ||||
| template<class Coarsened> | ||||
| void SaveOperator(Coarsened &Operator,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   ScidacWriter WR(Operator.Grid()->IsBoss()); | ||||
|   assert(Operator._A.size()==Operator.geom.npoint); | ||||
|   WR.open(file); | ||||
|   for(int p=0;p<Operator._A.size();p++){ | ||||
|     auto tmp = Operator.Cell.Extract(Operator._A[p]); | ||||
|     WR.writeScidacFieldRecord(tmp,record,0,0); | ||||
|     //    WR.writeScidacFieldRecord(tmp,record,0,BINARYIO_LEXICOGRAPHIC); | ||||
|   } | ||||
|   WR.close(); | ||||
| #endif | ||||
| } | ||||
| template<class Coarsened> | ||||
| void LoadOperator(Coarsened &Operator,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   Grid::ScidacReader RD ; | ||||
|   RD.open(file); | ||||
|   assert(Operator._A.size()==Operator.geom.npoint); | ||||
|   for(int p=0;p<Operator.geom.npoint;p++){ | ||||
|     conformable(Operator._A[p].Grid(),Operator.CoarseGrid()); | ||||
|     //    RD.readScidacFieldRecord(Operator._A[p],record,BINARYIO_LEXICOGRAPHIC); | ||||
|     RD.readScidacFieldRecord(Operator._A[p],record,0); | ||||
|   }     | ||||
|   RD.close(); | ||||
|   Operator.ExchangeCoarseLinks(); | ||||
| #endif | ||||
| } | ||||
| template<class Coarsened> | ||||
| void ReLoadOperator(Coarsened &Operator,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   Grid::ScidacReader RD ; | ||||
|   RD.open(file); | ||||
|   assert(Operator._A.size()==Operator.geom.npoint); | ||||
|   for(int p=0;p<Operator.geom.npoint;p++){ | ||||
|     auto tmp=Operator.Cell.Extract(Operator._A[p]); | ||||
|     RD.readScidacFieldRecord(tmp,record,0); | ||||
|     Operator._A[p] = Operator.Cell.ExchangePeriodic(tmp); | ||||
|   }     | ||||
|   RD.close(); | ||||
| #endif | ||||
| } | ||||
| template<class aggregation> | ||||
| void SaveBasis(aggregation &Agg,std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   ScidacWriter WR(Agg.FineGrid->IsBoss()); | ||||
|   WR.open(file); | ||||
|   for(int b=0;b<Agg.subspace.size();b++){ | ||||
|     //WR.writeScidacFieldRecord(Agg.subspace[b],record,0,BINARYIO_LEXICOGRAPHIC); | ||||
|     WR.writeScidacFieldRecord(Agg.subspace[b],record,0,0); | ||||
|   } | ||||
|   WR.close(); | ||||
| #endif | ||||
| } | ||||
| template<class aggregation> | ||||
| void LoadBasis(aggregation &Agg, std::string file) | ||||
| { | ||||
| #ifdef HAVE_LIME | ||||
|   emptyUserRecord record; | ||||
|   ScidacReader RD ; | ||||
|   RD.open(file); | ||||
|   for(int b=0;b<Agg.subspace.size();b++){ | ||||
|     //    RD.readScidacFieldRecord(Agg.subspace[b],record,BINARYIO_LEXICOGRAPHIC); | ||||
|     RD.readScidacFieldRecord(Agg.subspace[b],record,0); | ||||
|   }     | ||||
|   RD.close(); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| RealD InverseApproximation(RealD x){ | ||||
|   return 1.0/x; | ||||
| } | ||||
|  | ||||
| // Want Op in CoarsenOp to call MatPcDagMatPc | ||||
| template<class Field> | ||||
| class HermOpAdaptor : public LinearOperatorBase<Field> | ||||
| { | ||||
|   LinearOperatorBase<Field> & wrapped; | ||||
| public: | ||||
|   HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme)  {}; | ||||
|   void Op     (const Field &in, Field &out)   { wrapped.HermOp(in,out);  } | ||||
|   void HermOp(const Field &in, Field &out)    { wrapped.HermOp(in,out); } | ||||
|   void AdjOp     (const Field &in, Field &out){ wrapped.HermOp(in,out);  } | ||||
|   void OpDiag (const Field &in, Field &out)                  {    assert(0);  } | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  } | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  }; | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  } | ||||
| }; | ||||
| template<class Field> class ChebyshevSmoother : public LinearFunction<Field> | ||||
| { | ||||
| public: | ||||
|   using LinearFunction<Field>::operator(); | ||||
|   typedef LinearOperatorBase<Field> FineOperator; | ||||
|   FineOperator   & _SmootherOperator; | ||||
|   Chebyshev<Field> Cheby; | ||||
|   ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) : | ||||
|     _SmootherOperator(SmootherOperator), | ||||
|     Cheby(_lo,_hi,_ord,InverseApproximation) | ||||
|   { | ||||
|     std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl; | ||||
|   }; | ||||
|   void operator() (const Field &in, Field &out)  | ||||
|   { | ||||
|     Field tmp(in.Grid()); | ||||
|     tmp = in; | ||||
|     Cheby(_SmootherOperator,tmp,out); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Field> class CGSmoother : public LinearFunction<Field> | ||||
| { | ||||
| public: | ||||
|   using LinearFunction<Field>::operator(); | ||||
|   typedef LinearOperatorBase<Field> FineOperator; | ||||
|   FineOperator   & _SmootherOperator; | ||||
|   int iters; | ||||
|   CGSmoother(int _iters, FineOperator &SmootherOperator) : | ||||
|     _SmootherOperator(SmootherOperator), | ||||
|     iters(_iters) | ||||
|   { | ||||
|     std::cout << GridLogMessage<<" Mirs smoother order "<<iters<<std::endl; | ||||
|   }; | ||||
|   void operator() (const Field &in, Field &out)  | ||||
|   { | ||||
|     ConjugateGradient<Field>  CG(0.0,iters,false); // non-converge is just fine in a smoother | ||||
|     CG(_SmootherOperator,in,out); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| gridblasHandle_t GridBLAS::gridblasHandle; | ||||
| int            GridBLAS::gridblasInit; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int Ls=24; | ||||
|   //  const int nbasis = 62; | ||||
|   //  const int nbasis = 56; | ||||
|   const int nbasis = 36; | ||||
|   const int cb = 0 ; | ||||
|   RealD mass=0.00078; | ||||
|   RealD M5=1.8; | ||||
|   RealD b=1.5; | ||||
|   RealD c=0.5; | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), | ||||
| 								   GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 								   GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   // Construct a coarsened grid with 4^4 cell | ||||
|   Coordinate Block({4,4,6,4}); | ||||
|   Coordinate clatt = GridDefaultLatt(); | ||||
|   for(int d=0;d<clatt.size();d++){ | ||||
|     clatt[d] = clatt[d]/Block[d]; | ||||
|   } | ||||
|  | ||||
|   GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, | ||||
| 							    GridDefaultSimd(Nd,vComplex::Nsimd()), | ||||
| 							    GridDefaultMpi());; | ||||
|   GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); | ||||
|  | ||||
|   ///////////////////////// RNGs ///////////////////////////////// | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|   std::vector<int> cseeds({5,6,7,8}); | ||||
|  | ||||
|   GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5); | ||||
|   GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); | ||||
|  | ||||
|   ///////////////////////// Configuration ///////////////////////////////// | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|   MemoryManager::Print(); | ||||
|  | ||||
|   FieldMetaData header; | ||||
|   std::string file("ckpoint_lat.1000"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|   MemoryManager::Print(); | ||||
|  | ||||
|   //////////////////////// Fermion action ////////////////////////////////// | ||||
|   MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); | ||||
|  | ||||
|   SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf); | ||||
|  | ||||
|   typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix; | ||||
|   HermFineMatrix FineHermOp(HermOpEO); | ||||
|  | ||||
|   LatticeFermion result(FrbGrid); result=Zero(); | ||||
|  | ||||
|   LatticeFermion    src(FrbGrid); random(RNG5,src); | ||||
|  | ||||
|   // Run power method on FineHermOp | ||||
|   PowerMethod<LatticeFermion>       PM;   PM(HermOpEO,src); | ||||
|   | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   ///////////// Coarse basis and Little Dirac Operator /////// | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator; | ||||
|   typedef LittleDiracOperator::CoarseVector CoarseVector; | ||||
|  | ||||
|   NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d); | ||||
|   NearestStencilGeometry5D geom_nn(Coarse5d); | ||||
|    | ||||
|   // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp | ||||
|   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; | ||||
|   Subspace Aggregates(Coarse5d,FrbGrid,cb); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   // Need to check about red-black grid coarsening | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); | ||||
|  | ||||
|   std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.rat.scidac.62"); | ||||
|   std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.rat.scidac.62"); | ||||
|   std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.rat.scidac.62"); | ||||
|   bool load_agg=true; | ||||
|   bool load_refine=true; | ||||
|   bool load_mat=false; | ||||
|   MemoryManager::Print(); | ||||
|   if ( load_agg ) { | ||||
|     LoadBasis(Aggregates,subspace_file); | ||||
|   } else { | ||||
|  | ||||
|     // NBASIS=40 | ||||
|     // Best so far: ord 2000 [0.01,95], 500,500  -- 466 iters | ||||
|     // slurm-398626.out:Grid : Message : 141.295253 s : 500 filt [1] <n|MdagM|n> 0.000103622063 | ||||
|  | ||||
|  | ||||
|     //Grid : Message : 33.870465 s :  Chebyshev subspace pass-1 : ord 2000 [0.001,95] | ||||
|     //Grid : Message : 33.870485 s :  Chebyshev subspace pass-2 : nbasis40 min 1000 step 1000 lo0 | ||||
|     //slurm-1482200.out : filt ~ 0.004 -- not as low mode projecting -- took 626 iters | ||||
|  | ||||
|     // To try: 2000 [0.1,95]  ,2000,500,500 -- slurm-1482213.out 586 iterations | ||||
|  | ||||
|     // To try: 2000 [0.01,95] ,2000,500,500 -- 469 (think I bumped 92 to 95) (??) | ||||
|     // To try: 2000 [0.025,95],2000,500,500 | ||||
|     // To try: 2000 [0.005,95],2000,500,500 | ||||
|  | ||||
|     // NBASIS=44 -- HDCG paper was 64 vectors; AMD compiler craps out at 48 | ||||
|     // To try: 2000 [0.01,95] ,2000,500,500 -- 419 lowest slurm-1482355.out | ||||
|     // To try: 2000 [0.025,95] ,2000,500,500 -- 487  | ||||
|     // To try: 2000 [0.005,95] ,2000,500,500 | ||||
|     /* | ||||
|       Smoother [3,92] order 16 | ||||
| slurm-1482355.out:Grid : Message : 35.239686 s :  Chebyshev subspace pass-1 : ord 2000 [0.01,95] | ||||
| slurm-1482355.out:Grid : Message : 35.239714 s :  Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0 | ||||
| slurm-1482355.out:Grid : Message : 5561.305552 s : HDCG: Pcg converged in 419 iterations and 2616.202598 s | ||||
|  | ||||
| slurm-1482367.out:Grid : Message : 43.157235 s :  Chebyshev subspace pass-1 : ord 2000 [0.025,95] | ||||
| slurm-1482367.out:Grid : Message : 43.157257 s :  Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0 | ||||
| slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 iterations and 3131.185821 s | ||||
|     */ | ||||
| 		 /* | ||||
| 		   Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, | ||||
| 				       95.0,0.0075, | ||||
| 				       2500, | ||||
| 				       500, | ||||
| 				       500, | ||||
| 				       0.0); | ||||
| 		 */ | ||||
|  | ||||
| 		 /* | ||||
| 		   Aggregates.CreateSubspaceChebyshevPowerLaw(RNG5,HermOpEO,nbasis, | ||||
| 							      95.0, | ||||
| 							      2000); | ||||
| 		 */ | ||||
|  | ||||
|     Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO, | ||||
| 					0.0003,1.0e-5,2000); // Lo, tol, maxit | ||||
|   /* | ||||
|     Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, | ||||
| 				       95.0,0.05, | ||||
| 				       2000, | ||||
| 				       500, | ||||
| 				       500, | ||||
| 				       0.0); | ||||
|  */ | ||||
|     /* | ||||
|       Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, | ||||
| 				       95.0,0.01, | ||||
| 				       2000, | ||||
| 				       500, | ||||
| 				       500, | ||||
| 				       0.0); | ||||
|     */ | ||||
|     //    Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); -- running slurm-1484934.out nbasis 56 | ||||
|  | ||||
|     //    Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run | ||||
|     SaveBasis(Aggregates,subspace_file); | ||||
|   } | ||||
|   MemoryManager::Print(); | ||||
|  | ||||
|   int refine=1; | ||||
|   if(refine){ | ||||
|     if ( load_refine ) { | ||||
|       LoadBasis(Aggregates,refine_file); | ||||
|     } else { | ||||
|       // HDCG used Pcg to refine | ||||
|       Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000); | ||||
|       SaveBasis(Aggregates,refine_file); | ||||
|     } | ||||
|   } | ||||
|   MemoryManager::Print(); | ||||
|   Aggregates.Orthogonalise(); | ||||
|   if ( load_mat ) { | ||||
|     LoadOperator(LittleDiracOp,ldop_file); | ||||
|   } else { | ||||
|     LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates); | ||||
|     SaveOperator(LittleDiracOp,ldop_file); | ||||
|   } | ||||
|  | ||||
|   // I/O test: | ||||
|   CoarseVector c_src(Coarse5d);   random(CRNG,c_src); | ||||
|   CoarseVector c_res(Coarse5d);  | ||||
|   CoarseVector c_ref(Coarse5d); | ||||
|  | ||||
|   // Try projecting to one hop only | ||||
|   //  LittleDiracOp.ShiftMatrix(1.0e-4); | ||||
|   LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); | ||||
|   LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n | ||||
|  | ||||
|   typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix; | ||||
|   HermMatrix CoarseOp     (LittleDiracOp); | ||||
|   HermMatrix CoarseOpProj (LittleDiracOpProj); | ||||
|    | ||||
|   MemoryManager::Print(); | ||||
|   ////////////////////////////////////////// | ||||
|   // Build a coarse lanczos | ||||
|   ////////////////////////////////////////// | ||||
|   //  Chebyshev<CoarseVector>      IRLCheby(0.012,40.0,201);  //500 HDCG iters | ||||
|   //  int Nk=512; // Didn't save much | ||||
|   //  int Nm=640; | ||||
|   //  int Nstop=400; | ||||
|  | ||||
|   //  Chebyshev<CoarseVector>      IRLCheby(0.005,40.0,201);  //319 HDCG iters @ 128//160 nk. | ||||
|   //  int Nk=128; | ||||
|   //  int Nm=160; | ||||
|  | ||||
|   //  Chebyshev<CoarseVector>      IRLCheby(0.005,40.0,201);  //319 HDCG iters @ 128//160 nk. | ||||
|   Chebyshev<CoarseVector>      IRLCheby(0.04,40.0,201);  //319 HDCG iters @ 128//160 nk. | ||||
|   int Nk=192; | ||||
|   int Nm=256; | ||||
|   int Nstop=Nk; | ||||
|    | ||||
|   //  Chebyshev<CoarseVector>      IRLCheby(0.010,45.0,201);  // 1 iter | ||||
|   FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); | ||||
|   PlainHermOp<CoarseVector>    IRLOp    (CoarseOp); | ||||
|    | ||||
|   ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1e-5,10); | ||||
|  | ||||
|   int Nconv; | ||||
|   std::vector<RealD>            eval(Nm); | ||||
|   std::vector<CoarseVector>     evec(Nm,Coarse5d); | ||||
|  | ||||
|   PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src); | ||||
|  | ||||
|   IRL.calc(eval,evec,c_src,Nconv); | ||||
|   DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval); | ||||
|  | ||||
|   ////////////////////////////////////////// | ||||
|   // Build a coarse space solver | ||||
|   ////////////////////////////////////////// | ||||
|   int maxit=30000; | ||||
|   ConjugateGradient<CoarseVector>  CG(1.0e-8,maxit,false); | ||||
|   ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,30000,false); | ||||
|   ZeroGuesser<CoarseVector> CoarseZeroGuesser; | ||||
|    | ||||
|    | ||||
|   //  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser); | ||||
|   HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser); | ||||
|   c_res=Zero(); | ||||
|   //  HPDSolve(c_src,c_res); c_ref = c_res; | ||||
|   //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||
|   //  std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl; | ||||
|   ////////////////////////////////////////////////////////////////////////// | ||||
|   // Deflated (with real op EV's) solve for the projected coarse op | ||||
|   // Work towards ADEF1 in the coarse space | ||||
|   ////////////////////////////////////////////////////////////////////////// | ||||
|   HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser); | ||||
|   c_res=Zero(); | ||||
|   //  HPDSolveProj(c_src,c_res); | ||||
|   //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||
|   //  std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl; | ||||
|   //  c_res = c_res - c_ref; | ||||
|   //  std::cout << "Projected solver error "<<norm2(c_res)<<std::endl; | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   // Coarse ADEF1 with deflation space | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   ChebyshevSmoother<CoarseVector >  CoarseSmoother(1.0,37.,8,CoarseOpProj);  // just go to sloppy 0.1 convergence | ||||
|     //  CoarseSmoother(0.1,37.,8,CoarseOpProj);  // | ||||
|   //  CoarseSmoother(0.5,37.,6,CoarseOpProj);  //  8 iter 0.36s | ||||
|   //    CoarseSmoother(0.5,37.,12,CoarseOpProj);  // 8 iter, 0.55s | ||||
|   //    CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter | ||||
|   //  CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter | ||||
|   //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311 | ||||
|  | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // CG, Cheby mode spacing 200,200 | ||||
|   // Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s | ||||
|   // Unprojected Coarse CG solve to 4e-2 :  33 iters, 0.8s | ||||
|   // Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs  | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s  2.1x gain | ||||
|   // ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s | ||||
|   // HDCG 38 iters 162s | ||||
|   // | ||||
|   // CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs  | ||||
|   // ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s  2.1x gain | ||||
|   // ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s | ||||
|   // HDCG 38 iters 169s | ||||
|  | ||||
| 					       /* | ||||
|   TwoLevelADEF1defl<CoarseVector> | ||||
|     cADEF1(1.0e-8, 500, | ||||
| 	   CoarseOp, | ||||
| 	   CoarseSmoother, | ||||
| 	   evec,eval); | ||||
| 					       */ | ||||
|   //  c_res=Zero(); | ||||
|   //  cADEF1(c_src,c_res); | ||||
|   //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||
|   //  std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl; | ||||
|   //  c_res = c_res - c_ref; | ||||
|   //  std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||
|    | ||||
|   //  cADEF1.Tolerance = 4.0e-2; | ||||
|   //  cADEF1.Tolerance = 1.0e-1; | ||||
|   //  cADEF1.Tolerance = 5.0e-2; | ||||
|   //  c_res=Zero(); | ||||
|   //  cADEF1(c_src,c_res); | ||||
|   //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl; | ||||
|   //  std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl; | ||||
|   //  c_res = c_res - c_ref; | ||||
|   //  std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl; | ||||
|    | ||||
|   ////////////////////////////////////////// | ||||
|   // Build a smoother | ||||
|   ////////////////////////////////////////// | ||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499 | ||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp);  //383 | ||||
|   //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp);  //328 | ||||
|   //  std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1 | ||||
|   //  std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations | ||||
|   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters | ||||
|   //  std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters | ||||
|  | ||||
|   // | ||||
|   // Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40 | ||||
|   // Need to measure cost of coarse space. | ||||
|   // | ||||
|   // -- i) Reduce coarse residual   -- 0.04 | ||||
|   // -- ii) Lanczos on coarse space -- done | ||||
|   // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and | ||||
|   //         use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec | ||||
|   // | ||||
|   // | ||||
|   // | ||||
|   // | ||||
|  | ||||
|   MemoryManager::Print(); | ||||
|   ////////////////////////////////////// | ||||
|   // mrhs coarse solve | ||||
|   //  Create a higher dim coarse grid | ||||
|   ////////////////////////////////////////////////////////////////////////////////////// | ||||
|   ConjugateGradient<CoarseVector>  coarseCG(2.0e-2,20000,true); | ||||
|      | ||||
|   const int nrhs=vComplex::Nsimd(); | ||||
|      | ||||
|   Coordinate mpi=GridDefaultMpi(); | ||||
|   Coordinate rhMpi ({1,1,mpi[0],mpi[1],mpi[2],mpi[3]}); | ||||
|   Coordinate rhLatt({nrhs,1,clatt[0],clatt[1],clatt[2],clatt[3]}); | ||||
|   Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1}); | ||||
|      | ||||
|   GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);  | ||||
|   MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs); | ||||
|   typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t; | ||||
|   typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix; | ||||
|   MrhsHermMatrix MrhsCoarseOp     (mrhs); | ||||
|   MemoryManager::Print(); | ||||
|  | ||||
|   {  | ||||
|     CoarseVector rh_res(CoarseMrhs); | ||||
|     CoarseVector rh_guess(CoarseMrhs); | ||||
|     CoarseVector rh_src(CoarseMrhs); | ||||
|  | ||||
|     rh_res= Zero(); | ||||
|     rh_guess= Zero(); | ||||
|     for(int r=0;r<nrhs;r++){ | ||||
|       random(CRNG,c_src); | ||||
|       DeflCoarseGuesser(c_src,c_res); | ||||
|       InsertSlice(c_res,rh_res,r,0); | ||||
|       InsertSlice(c_res,rh_guess,r,0); | ||||
|       InsertSlice(c_src,rh_src,r,0); | ||||
|     } | ||||
|  | ||||
|   MemoryManager::Print(); | ||||
|     coarseCG(MrhsCoarseOp,rh_src,rh_res); | ||||
|   MemoryManager::Print(); | ||||
|     //redo with block CG | ||||
|      | ||||
|     for(int r=0;r<nrhs;r++){ | ||||
|       std::cout << " compare to single RHS "<<r<<"/"<<nrhs<<std::endl; | ||||
|       ExtractSlice(c_src,rh_src,r,0); | ||||
|       ExtractSlice(c_res,rh_res,r,0); | ||||
|       ExtractSlice(c_ref,rh_guess,r,0); | ||||
|       coarseCG(CoarseOp,c_src,c_ref); | ||||
|       std::cout << " mrhs [" <<r <<"] "<< norm2(c_res)<<std::endl; | ||||
|       std::cout << " srhs [" <<r <<"] "<< norm2(c_ref)<<std::endl; | ||||
|       c_ref=c_ref-c_res; | ||||
|       RealD diff =norm2(c_ref)/norm2(c_src); | ||||
|       std::cout << r << " diff " << diff<<std::endl; | ||||
|       assert(diff < 1.0e-1); | ||||
|     } | ||||
|   } | ||||
|   MemoryManager::Print(); | ||||
|   ////////////////////////////////////// | ||||
|   // fine solve | ||||
|   ////////////////////////////////////// | ||||
|  | ||||
|    | ||||
|   //  std::vector<RealD> los({2.0,2.5}); // Nbasis 40 == 36,36 iters | ||||
|   std::vector<RealD> los({2.0}); // Nbasis 40 == 36,36 iters | ||||
|  | ||||
|   //  std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults) | ||||
|   //  std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults) | ||||
|   std::vector<int> ords({9}); // Nbasis 40 == 40 iters (320 mults)   | ||||
|  | ||||
|  /* | ||||
|    Smoother opt @56 nbasis, 0.04 convergence, 192 evs | ||||
|  ord lo | ||||
|  | ||||
|  16   0.1  no converge -- likely sign indefinite | ||||
|  32   0.1  no converge -- likely sign indefinite(?) | ||||
|  | ||||
|  16   0.5  422 | ||||
|  32   0.5  302 | ||||
|   | ||||
|  8   1.0  575 | ||||
|  12  1.0  449 | ||||
|  16  1.0  375 | ||||
|  32  1.0  302 | ||||
|  | ||||
|  12  3.0  476 | ||||
|  16  3.0  319 | ||||
|  32  3.0  306 | ||||
|  | ||||
|  Powerlaw setup 62 vecs | ||||
| slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32 | ||||
| slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16 | ||||
|  | ||||
|  Cheby setup: 56vecs | ||||
|  -- CG smoother O(16): 487 | ||||
|   | ||||
| Power law setup, 56 vecs -- lambda^-5 | ||||
| slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32 | ||||
|  | ||||
| Power law setup, 56 vecs -- lambda^-3 | ||||
|  | ||||
| slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s  1.0 32 | ||||
| slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s  1.0 16 | ||||
| slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s  0.5 32 | ||||
|  | ||||
|  Power law setup, 56 vecs -- lambda^-4 | ||||
|  -- CG smoother    O(16): 290 | ||||
|  -- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis | ||||
|  | ||||
| Grid : Message : 2790.797194 s : HDCG: Pcg converged in 190 iterations and 1049.563182 s 1.0 32 | ||||
| Grid : Message : 3766.374396 s : HDCG: Pcg converged in 218 iterations and 975.455668 s  1.0 16 | ||||
| Grid : Message : 4888.746190 s : HDCG: Pcg converged in 191 iterations and 1122.252055 s 0.5 32 | ||||
| Grid : Message : 5956.679661 s : HDCG: Pcg converged in 231 iterations and 1067.812850 s 0.5 16 | ||||
|  | ||||
| Grid : Message : 2767.405829 s : HDCG: Pcg converged in 218 iterations and 967.214067 s -- 16 | ||||
| Grid : Message : 3816.165905 s : HDCG: Pcg converged in 251 iterations and 1048.636269 s -- 12 | ||||
| Grid : Message : 5121.206572 s : HDCG: Pcg converged in 318 iterations and 1304.916168 s -- 8 | ||||
|  | ||||
|   | ||||
| [paboyle@login2.crusher debug]$ grep -v Memory slurm-402426.out  | grep converged | grep HDCG -- [1.0,16] cheby | ||||
| Grid : Message : 5185.521063 s : HDCG: Pcg converged in 377 iterations and 1595.843529 s | ||||
|  | ||||
| [paboyle@login2.crusher debug]$ grep HDCG  slurm-402184.out | grep onver | ||||
| Grid : Message : 3760.438160 s : HDCG: Pcg converged in 422 iterations and 2129.243141 s | ||||
| Grid : Message : 5660.588015 s : HDCG: Pcg converged in 308 iterations and 1900.026821 s | ||||
|  | ||||
|   | ||||
| Grid : Message : 4238.206528 s : HDCG: Pcg converged in 575 iterations and 2657.430676 s | ||||
| Grid : Message : 6345.880344 s : HDCG: Pcg converged in 449 iterations and 2108.505208 s | ||||
|  | ||||
| grep onverg slurm-401663.out | grep HDCG | ||||
| Grid : Message : 3900.817781 s : HDCG: Pcg converged in 476 iterations and 1992.591311 s | ||||
| Grid : Message : 5647.202699 s : HDCG: Pcg converged in 306 iterations and 1746.838660 s | ||||
|  | ||||
|  | ||||
| [paboyle@login2.crusher debug]$ grep converged slurm-401775.out | grep HDCG | ||||
| Grid : Message : 3583.177025 s : HDCG: Pcg converged in 375 iterations and 1800.896037 s | ||||
| Grid : Message : 5348.342243 s : HDCG: Pcg converged in 302 iterations and 1765.045018 s | ||||
|  | ||||
| Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version. | ||||
|  | ||||
|  */ | ||||
| 				      // | ||||
|   MemoryManager::Print(); | ||||
|   for(int l=0;l<los.size();l++){ | ||||
|  | ||||
|     RealD lo = los[l]; | ||||
|  | ||||
|     for(int o=0;o<ords.size();o++){ | ||||
|  | ||||
|       ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false); | ||||
|       HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser); | ||||
|        | ||||
|       //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case | ||||
|       ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp);  // 311 | ||||
|  | ||||
|       /* | ||||
|        * CG smooth 11 iter:  | ||||
|        slurm-403825.out:Grid : Message : 4369.824339 s : HDCG: fPcg converged in 215 iterations 3.0 | ||||
|        slurm-403908.out:Grid : Message : 3955.897470 s : HDCG: fPcg converged in 236 iterations 1.0 | ||||
|        slurm-404273.out:Grid : Message : 3843.792191 s : HDCG: fPcg converged in 210 iterations 2.0 | ||||
|        * CG smooth 9 iter:  | ||||
|       */ | ||||
|       // | ||||
|       RealD MirsShift = lo; | ||||
|       ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift); | ||||
|       CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ; | ||||
|    | ||||
|       ////////////////////////////////////////// | ||||
|       // Build a HDCG solver | ||||
|       ////////////////////////////////////////// | ||||
|       /*      TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> | ||||
| 	HDCG(1.0e-8, 700, | ||||
| 	     FineHermOp, | ||||
| 	     CGsmooth, | ||||
| 	     HPDSolveSloppy, | ||||
| 	     HPDSolve, | ||||
| 	     Aggregates); | ||||
|       result=Zero(); | ||||
|       */ | ||||
|       //      std::cout << "Calling HDCG"<<std::endl; | ||||
|       //      HDCG(src,result); | ||||
|  | ||||
|       ////////////////////////////////////////// | ||||
|       // Build a HDCG mrhs solver | ||||
|       ////////////////////////////////////////// | ||||
|   MemoryManager::Print(); | ||||
|       DoNothingGuesser<CoarseVector> DoNothing; | ||||
|       HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,coarseCG,DoNothing); | ||||
|       TwoLevelADEF2mrhs<LatticeFermion,CoarseVector,Subspace> | ||||
| 	HDCGmrhs(1.0e-8, 700, | ||||
| 		 FineHermOp, | ||||
| 		 CGsmooth, | ||||
| 		 HPDSolveSloppy, // Never used | ||||
| 		 HPDSolve,       // Used in Vstart | ||||
| 		 HPDSolveMrhs,    // Used in M1 | ||||
| 		 DeflCoarseGuesser, // single RHS guess used in M1 | ||||
| 		 CoarseMrhs,        // Grid needed to Mrhs grid | ||||
| 		 Aggregates); | ||||
|  | ||||
|   MemoryManager::Print(); | ||||
|       std::cout << "Calling mRHS HDCG"<<std::endl; | ||||
|       FrbGrid->Barrier(); | ||||
|        | ||||
|   MemoryManager::Print(); | ||||
|       std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid); | ||||
|       std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid); | ||||
|   MemoryManager::Print(); | ||||
|       for(int r=0;r<nrhs;r++){ | ||||
| 	random(RNG5,src_mrhs[r]); | ||||
| 	res_mrhs[r]=Zero(); | ||||
| 	std::cout << "Setup mrhs source "<<r<<std::endl; | ||||
|       } | ||||
|       std::cout << "Calling the mRHS HDCG"<<std::endl; | ||||
|   MemoryManager::Print(); | ||||
|       HDCGmrhs(src_mrhs,res_mrhs); | ||||
|   MemoryManager::Print(); | ||||
|  | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Standard CG | ||||
|   result=Zero(); | ||||
|   CGfine(HermOpEO, src, result); | ||||
|    | ||||
|   Grid_finalize(); | ||||
|   return 0; | ||||
| } | ||||
		Reference in New Issue
	
	Block a user