mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Working version of Lanczos without the extra copy.
This commit is contained in:
		@@ -8,6 +8,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Chulwoo Jung <chulwoo@bnl.gov>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    This program is free software; you can redistribute it and/or modify
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
    it under the terms of the GNU General Public License as published by
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
@@ -45,6 +46,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
 | 
				
			|||||||
#include "DenseMatrix.h"
 | 
					#include "DenseMatrix.h"
 | 
				
			||||||
#include "EigenSort.h"
 | 
					#include "EigenSort.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// eliminate temorary vector in calc()
 | 
				
			||||||
 | 
					#define MEM_SAVE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -496,8 +500,6 @@ until convergence
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// alternate implementation for minimizing memory usage.  May affect the performance
 | 
					// alternate implementation for minimizing memory usage.  May affect the performance
 | 
				
			||||||
#define MEM_SAVE
 | 
					 | 
				
			||||||
#undef MEM_SAVE2
 | 
					 | 
				
			||||||
    void calc(DenseVector<RealD>& eval,
 | 
					    void calc(DenseVector<RealD>& eval,
 | 
				
			||||||
	      DenseVector<Field>& evec,
 | 
						      DenseVector<Field>& evec,
 | 
				
			||||||
	      const Field& src,
 | 
						      const Field& src,
 | 
				
			||||||
@@ -520,13 +522,12 @@ until convergence
 | 
				
			|||||||
	DenseVector<RealD> Qt(Nm*Nm);
 | 
						DenseVector<RealD> Qt(Nm*Nm);
 | 
				
			||||||
	DenseVector<int>   Iconv(Nm);
 | 
						DenseVector<int>   Iconv(Nm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if (!defined MEM_SAVE ) || (!defined MEM_SAVE2)
 | 
					#if (!defined MEM_SAVE )
 | 
				
			||||||
	DenseVector<Field>  B(Nm,grid); // waste of space replicating
 | 
						DenseVector<Field>  B(Nm,grid); // waste of space replicating
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	Field f(grid);
 | 
						Field f(grid);
 | 
				
			||||||
	Field v(grid);
 | 
						Field v(grid);
 | 
				
			||||||
//	auto B2 = evec[0]._odata[0];
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
	int k1 = 1;
 | 
						int k1 = 1;
 | 
				
			||||||
	int k2 = Nk;
 | 
						int k2 = Nk;
 | 
				
			||||||
@@ -612,7 +613,7 @@ until convergence
 | 
				
			|||||||
	assert(k2<Nm);
 | 
						assert(k2<Nm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef MEM_SAVE
 | 
					#ifndef MEM_SAVE
 | 
				
			||||||
if (0) {  
 | 
						if (0) {   // old implementation without blocking
 | 
				
			||||||
	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
 | 
						  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
 | 
				
			||||||
	  
 | 
						  
 | 
				
			||||||
	  for(int j=k1-1; j<k2+1; ++j){
 | 
						  for(int j=k1-1; j<k2+1; ++j){
 | 
				
			||||||
@@ -621,13 +622,8 @@ if (0) {
 | 
				
			|||||||
	      B[j] += Qt[k+Nm*j] * evec[k];
 | 
						      B[j] += Qt[k+Nm*j] * evec[k];
 | 
				
			||||||
	    }
 | 
						    }
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
	t1=usecond()/1e6;
 | 
						}
 | 
				
			||||||
	std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 | 
						{
 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef MEM_SAVE
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	for(int i=0; i<(Nk+1); ++i) {
 | 
						for(int i=0; i<(Nk+1); ++i) {
 | 
				
			||||||
		B[i] = 0.0;
 | 
							B[i] = 0.0;
 | 
				
			||||||
	  	B[i].checkerboard = evec[0].checkerboard;
 | 
						  	B[i].checkerboard = evec[0].checkerboard;
 | 
				
			||||||
@@ -643,7 +639,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
						}
 | 
				
			||||||
	for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
 | 
						for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -651,25 +647,24 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
	
 | 
						
 | 
				
			||||||
	assert(k2<Nm);
 | 
						assert(k2<Nm);
 | 
				
			||||||
	assert(k1>0);
 | 
						assert(k1>0);
 | 
				
			||||||
//	DenseVector < decltype(B2) > B(Nm);
 | 
					 | 
				
			||||||
//	std::vector < decltype( B2 ) > B(Nm*thr,B2);
 | 
					 | 
				
			||||||
	Field B(grid);
 | 
						Field B(grid);
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
	for(int ss=0;ss < grid->oSites();ss++){
 | 
						for(int ss=0;ss < grid->oSites();ss++){
 | 
				
			||||||
 | 
					//		auto B2 = evec[0]._odata[0];
 | 
				
			||||||
 | 
					//		std::vector < decltype( B2 ) > B(Nm*thr,B2);
 | 
				
			||||||
		int thr=GridThread::GetThreads();
 | 
							int thr=GridThread::GetThreads();
 | 
				
			||||||
		int me = GridThread::ThreadBarrier();
 | 
							int me = GridThread::ThreadBarrier();
 | 
				
			||||||
		printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
 | 
					//		printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
 | 
				
			||||||
 | 
							assert(Nm*thr<grid->oSites());
 | 
				
			||||||
		for(int j=0; j<Nm; ++j) B._odata[j+Nm*me]=0.;
 | 
							for(int j=0; j<Nm; ++j) B._odata[j+Nm*me]=0.;
 | 
				
			||||||
		for(int j=k1-1; j<(k2+1); ++j){
 | 
							for(int j=k1-1; j<(k2+1); ++j){
 | 
				
			||||||
			for(int k=0; k<Nm ; ++k){
 | 
								for(int k=0; k<Nm ; ++k){
 | 
				
			||||||
			    B._odata[j+Nm*me] +=Qt[k+Nm*j] * evec[k]._odata[ss]; 
 | 
								    B._odata[j+Nm*me] +=Qt[k+Nm*j] * evec[k]._odata[ss]; 
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
#if 1
 | 
					 | 
				
			||||||
		for(int j=k1-1; j<(k2+1); ++j){
 | 
							for(int j=k1-1; j<(k2+1); ++j){
 | 
				
			||||||
			evec[j]._odata[ss] = B._odata[j+Nm*me];
 | 
								evec[j]._odata[ss] = B._odata[j+Nm*me];
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -697,7 +692,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
	t1=usecond()/1e6;
 | 
						t1=usecond()/1e6;
 | 
				
			||||||
	std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 | 
						std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 | 
				
			||||||
	  
 | 
						  
 | 
				
			||||||
#ifndef MEM_SAVE2
 | 
					#ifndef MEM_SAVE
 | 
				
			||||||
if (0) {
 | 
					if (0) {
 | 
				
			||||||
	  for(int k = 0; k<Nk; ++k) B[k]=0.0;
 | 
						  for(int k = 0; k<Nk; ++k) B[k]=0.0;
 | 
				
			||||||
	  
 | 
						  
 | 
				
			||||||
@@ -761,42 +756,39 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
	  }  // i-loop end
 | 
						  }  // i-loop end
 | 
				
			||||||
	  //	  std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific);
 | 
						  //	  std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
{
 | 
						{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	Field B(grid);
 | 
							Field B(grid);
 | 
				
			||||||
	for(int j = 0; j<Nk; ++j){
 | 
							for(int j = 0; j<Nk; ++j){
 | 
				
			||||||
		B=0.;
 | 
								B=0.;
 | 
				
			||||||
		B.checkerboard = evec[0].checkerboard;
 | 
								B.checkerboard = evec[0].checkerboard;
 | 
				
			||||||
	    for(int k = 0; k<Nk; ++k){
 | 
							    for(int k = 0; k<Nk; ++k){
 | 
				
			||||||
	    	B += Qt[k+j*Nm] * evec[k];
 | 
							    	B += Qt[k+j*Nm] * evec[k];
 | 
				
			||||||
//		B[Iconv[j]] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss]; 
 | 
							    }
 | 
				
			||||||
	    }
 | 
							    std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B)<<std::endl;
 | 
				
			||||||
	    std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B)<<std::endl;
 | 
					//		    _poly(_Linop,B,v);
 | 
				
			||||||
//	    _poly(_Linop,B,v);
 | 
							    _Linop.HermOp(B,v);
 | 
				
			||||||
	    _Linop.HermOp(B,v);
 | 
							    
 | 
				
			||||||
 | 
							    RealD vnum = real(innerProduct(B,v)); // HermOp.
 | 
				
			||||||
 | 
							    RealD vden = norm2(B);
 | 
				
			||||||
 | 
							    RealD vv0 = norm2(v);
 | 
				
			||||||
 | 
							    eval2[j] = vnum/vden;
 | 
				
			||||||
 | 
							    v -= eval2[j]*B;
 | 
				
			||||||
 | 
							    RealD vv = norm2(v);
 | 
				
			||||||
	    
 | 
						    
 | 
				
			||||||
	    RealD vnum = real(innerProduct(B,v)); // HermOp.
 | 
							    std::cout.precision(13);
 | 
				
			||||||
	    RealD vden = norm2(B);
 | 
							    std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] ";
 | 
				
			||||||
	    RealD vv0 = norm2(v);
 | 
							    std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j];
 | 
				
			||||||
	    eval2[j] = vnum/vden;
 | 
							    std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv;
 | 
				
			||||||
	    v -= eval2[j]*B;
 | 
							    std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl;
 | 
				
			||||||
	    RealD vv = norm2(v);
 | 
					 | 
				
			||||||
	    
 | 
						    
 | 
				
			||||||
	    std::cout.precision(13);
 | 
					// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
 | 
				
			||||||
	    std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] ";
 | 
							    if((vv<eresid*eresid) && (j == Nconv) ){
 | 
				
			||||||
	    std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j];
 | 
							      Iconv[Nconv] = j;
 | 
				
			||||||
	    std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv;
 | 
							      ++Nconv;
 | 
				
			||||||
	    std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl;
 | 
							    }
 | 
				
			||||||
	    
 | 
							}
 | 
				
			||||||
	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
 | 
						}
 | 
				
			||||||
	    if((vv<eresid*eresid) && (j == Nconv) ){
 | 
					 | 
				
			||||||
	      Iconv[Nconv] = j;
 | 
					 | 
				
			||||||
	      ++Nconv;
 | 
					 | 
				
			||||||
	    }
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
//	t1=usecond()/1e6;
 | 
					 | 
				
			||||||
//	std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	t1=usecond()/1e6;
 | 
						t1=usecond()/1e6;
 | 
				
			||||||
	std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 | 
						std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 | 
				
			||||||
@@ -816,35 +808,34 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
       // Sorting
 | 
					       // Sorting
 | 
				
			||||||
       eval.resize(Nconv);
 | 
					       eval.resize(Nconv);
 | 
				
			||||||
       evec.resize(Nconv,grid);
 | 
					       evec.resize(Nconv,grid);
 | 
				
			||||||
#ifndef MEM_SAVE2
 | 
					#ifndef MEM_SAVE
 | 
				
			||||||
       for(int i=0; i<Nconv; ++i){
 | 
					       for(int i=0; i<Nconv; ++i){
 | 
				
			||||||
         eval[i] = eval2[Iconv[i]];
 | 
					         eval[i] = eval2[Iconv[i]];
 | 
				
			||||||
         evec[i] = B[Iconv[i]];
 | 
					         evec[i] = B[Iconv[i]];
 | 
				
			||||||
       }
 | 
					       }
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#if 0
 | 
					 | 
				
			||||||
	Field B(grid);
 | 
					 | 
				
			||||||
	int thr=GridThread::GetThreads();
 | 
					 | 
				
			||||||
	int me = GridThread::ThreadBarrier();
 | 
					 | 
				
			||||||
	printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
       for(int i=0; i<Nconv; ++i)
 | 
					       for(int i=0; i<Nconv; ++i)
 | 
				
			||||||
         eval[i] = eval2[Iconv[i]];
 | 
					         eval[i] = eval2[Iconv[i]];
 | 
				
			||||||
//	int thr=GridThread::GetThreads();
 | 
					//	int thr=GridThread::GetThreads();
 | 
				
			||||||
//	printf("thr=%d\n",thr);
 | 
					//	printf("thr=%d\n",thr);
 | 
				
			||||||
 | 
						Field B(grid);
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					PARALLEL_FOR_LOOP
 | 
				
			||||||
	for(int ss=0;ss < grid->oSites();ss++){
 | 
						for(int ss=0;ss < grid->oSites();ss++){
 | 
				
			||||||
		auto B2 = evec[0]._odata[0];
 | 
							int thr=GridThread::GetThreads();
 | 
				
			||||||
		std::vector < decltype( B2 ) > B(Nm,B2);
 | 
							int me = GridThread::ThreadBarrier();
 | 
				
			||||||
		for(int j=0; j<Nconv; ++j) B[Iconv[j]]=0.;
 | 
					//		printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
 | 
				
			||||||
 | 
					//		auto B2 = evec[0]._odata[0];
 | 
				
			||||||
 | 
					//		std::vector < decltype( B2 ) > B(Nm,B2);
 | 
				
			||||||
 | 
							assert( (Nm*thr)<grid->oSites());
 | 
				
			||||||
 | 
							for(int j=0; j<Nconv; ++j) B._odata[Iconv[j]+Nm*me]=0.;
 | 
				
			||||||
		for(int j=0; j<Nconv; ++j){
 | 
							for(int j=0; j<Nconv; ++j){
 | 
				
			||||||
			for(int k=0; k<Nm ; ++k){
 | 
								for(int k=0; k<Nm ; ++k){
 | 
				
			||||||
			    B[Iconv[j]] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss]; 
 | 
								    B._odata[Iconv[j]+Nm*me] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss]; 
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		for(int j=0; j<Nconv; ++j){
 | 
							for(int j=0; j<Nconv; ++j){
 | 
				
			||||||
			evec[Iconv[j]]._odata[ss] = B[Iconv[j]];
 | 
								evec[Iconv[j]]._odata[ss] = B._odata[Iconv[j]+Nm*me];
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user