1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-15 10:15:36 +00:00

Working version of Lanczos without the extra copy.

This commit is contained in:
Chulwoo Jung 2017-04-06 23:35:30 -04:00
parent 9e48b7dfda
commit 93cb5d4e97

View File

@ -8,6 +8,7 @@
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -45,6 +46,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
#include "DenseMatrix.h" #include "DenseMatrix.h"
#include "EigenSort.h" #include "EigenSort.h"
// eliminate temorary vector in calc()
#define MEM_SAVE
namespace Grid { namespace Grid {
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
@ -496,8 +500,6 @@ until convergence
*/ */
// alternate implementation for minimizing memory usage. May affect the performance // alternate implementation for minimizing memory usage. May affect the performance
#define MEM_SAVE
#undef MEM_SAVE2
void calc(DenseVector<RealD>& eval, void calc(DenseVector<RealD>& eval,
DenseVector<Field>& evec, DenseVector<Field>& evec,
const Field& src, const Field& src,
@ -520,13 +522,12 @@ until convergence
DenseVector<RealD> Qt(Nm*Nm); DenseVector<RealD> Qt(Nm*Nm);
DenseVector<int> Iconv(Nm); DenseVector<int> Iconv(Nm);
#if (!defined MEM_SAVE ) || (!defined MEM_SAVE2) #if (!defined MEM_SAVE )
DenseVector<Field> B(Nm,grid); // waste of space replicating DenseVector<Field> B(Nm,grid); // waste of space replicating
#endif #endif
Field f(grid); Field f(grid);
Field v(grid); Field v(grid);
// auto B2 = evec[0]._odata[0];
int k1 = 1; int k1 = 1;
int k2 = Nk; int k2 = Nk;
@ -612,7 +613,7 @@ until convergence
assert(k2<Nm); assert(k2<Nm);
#ifndef MEM_SAVE #ifndef MEM_SAVE
if (0) { if (0) { // old implementation without blocking
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
for(int j=k1-1; j<k2+1; ++j){ for(int j=k1-1; j<k2+1; ++j){
@ -621,13 +622,8 @@ if (0) {
B[j] += Qt[k+Nm*j] * evec[k]; B[j] += Qt[k+Nm*j] * evec[k];
} }
} }
t1=usecond()/1e6; }
std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1; {
}
#endif
#ifndef MEM_SAVE
{
for(int i=0; i<(Nk+1); ++i) { for(int i=0; i<(Nk+1); ++i) {
B[i] = 0.0; B[i] = 0.0;
B[i].checkerboard = evec[0].checkerboard; B[i].checkerboard = evec[0].checkerboard;
@ -643,7 +639,7 @@ PARALLEL_FOR_LOOP
} }
} }
} }
} }
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
#else #else
{ {
@ -651,25 +647,24 @@ PARALLEL_FOR_LOOP
assert(k2<Nm); assert(k2<Nm);
assert(k1>0); assert(k1>0);
// DenseVector < decltype(B2) > B(Nm);
// std::vector < decltype( B2 ) > B(Nm*thr,B2);
Field B(grid); Field B(grid);
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss < grid->oSites();ss++){ for(int ss=0;ss < grid->oSites();ss++){
// auto B2 = evec[0]._odata[0];
// std::vector < decltype( B2 ) > B(Nm*thr,B2);
int thr=GridThread::GetThreads(); int thr=GridThread::GetThreads();
int me = GridThread::ThreadBarrier(); int me = GridThread::ThreadBarrier();
printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout); // printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
assert(Nm*thr<grid->oSites());
for(int j=0; j<Nm; ++j) B._odata[j+Nm*me]=0.; for(int j=0; j<Nm; ++j) B._odata[j+Nm*me]=0.;
for(int j=k1-1; j<(k2+1); ++j){ for(int j=k1-1; j<(k2+1); ++j){
for(int k=0; k<Nm ; ++k){ for(int k=0; k<Nm ; ++k){
B._odata[j+Nm*me] +=Qt[k+Nm*j] * evec[k]._odata[ss]; B._odata[j+Nm*me] +=Qt[k+Nm*j] * evec[k]._odata[ss];
} }
} }
#if 1
for(int j=k1-1; j<(k2+1); ++j){ for(int j=k1-1; j<(k2+1); ++j){
evec[j]._odata[ss] = B._odata[j+Nm*me]; evec[j]._odata[ss] = B._odata[j+Nm*me];
} }
#endif
} }
} }
#endif #endif
@ -697,7 +692,7 @@ PARALLEL_FOR_LOOP
t1=usecond()/1e6; t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
#ifndef MEM_SAVE2 #ifndef MEM_SAVE
if (0) { if (0) {
for(int k = 0; k<Nk; ++k) B[k]=0.0; for(int k = 0; k<Nk; ++k) B[k]=0.0;
@ -761,7 +756,7 @@ PARALLEL_FOR_LOOP
} // i-loop end } // i-loop end
// std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific); // std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific);
#else #else
{ {
Field B(grid); Field B(grid);
for(int j = 0; j<Nk; ++j){ for(int j = 0; j<Nk; ++j){
@ -769,7 +764,6 @@ PARALLEL_FOR_LOOP
B.checkerboard = evec[0].checkerboard; B.checkerboard = evec[0].checkerboard;
for(int k = 0; k<Nk; ++k){ for(int k = 0; k<Nk; ++k){
B += Qt[k+j*Nm] * evec[k]; B += Qt[k+j*Nm] * evec[k];
// B[Iconv[j]] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss];
} }
std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B)<<std::endl; std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B)<<std::endl;
// _poly(_Linop,B,v); // _poly(_Linop,B,v);
@ -788,15 +782,13 @@ PARALLEL_FOR_LOOP
std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv; std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv;
std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl; std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (j == Nconv) ){ if((vv<eresid*eresid) && (j == Nconv) ){
Iconv[Nconv] = j; Iconv[Nconv] = j;
++Nconv; ++Nconv;
} }
} }
// t1=usecond()/1e6; }
// std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
}
#endif #endif
t1=usecond()/1e6; t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1; std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
@ -816,35 +808,34 @@ PARALLEL_FOR_LOOP
// Sorting // Sorting
eval.resize(Nconv); eval.resize(Nconv);
evec.resize(Nconv,grid); evec.resize(Nconv,grid);
#ifndef MEM_SAVE2 #ifndef MEM_SAVE
for(int i=0; i<Nconv; ++i){ for(int i=0; i<Nconv; ++i){
eval[i] = eval2[Iconv[i]]; eval[i] = eval2[Iconv[i]];
evec[i] = B[Iconv[i]]; evec[i] = B[Iconv[i]];
} }
#else #else
#if 0
Field B(grid);
int thr=GridThread::GetThreads();
int me = GridThread::ThreadBarrier();
printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
#endif
{ {
for(int i=0; i<Nconv; ++i) for(int i=0; i<Nconv; ++i)
eval[i] = eval2[Iconv[i]]; eval[i] = eval2[Iconv[i]];
// int thr=GridThread::GetThreads(); // int thr=GridThread::GetThreads();
// printf("thr=%d\n",thr); // printf("thr=%d\n",thr);
Field B(grid);
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss < grid->oSites();ss++){ for(int ss=0;ss < grid->oSites();ss++){
auto B2 = evec[0]._odata[0]; int thr=GridThread::GetThreads();
std::vector < decltype( B2 ) > B(Nm,B2); int me = GridThread::ThreadBarrier();
for(int j=0; j<Nconv; ++j) B[Iconv[j]]=0.; // printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
// auto B2 = evec[0]._odata[0];
// std::vector < decltype( B2 ) > B(Nm,B2);
assert( (Nm*thr)<grid->oSites());
for(int j=0; j<Nconv; ++j) B._odata[Iconv[j]+Nm*me]=0.;
for(int j=0; j<Nconv; ++j){ for(int j=0; j<Nconv; ++j){
for(int k=0; k<Nm ; ++k){ for(int k=0; k<Nm ; ++k){
B[Iconv[j]] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss]; B._odata[Iconv[j]+Nm*me] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss];
} }
} }
for(int j=0; j<Nconv; ++j){ for(int j=0; j<Nconv; ++j){
evec[Iconv[j]]._odata[ss] = B[Iconv[j]]; evec[Iconv[j]]._odata[ss] = B._odata[Iconv[j]+Nm*me];
} }
} }
} }