mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-15 02:05:37 +00:00
Working version of Lanczos without the extra copy.
This commit is contained in:
parent
9e48b7dfda
commit
93cb5d4e97
@ -8,6 +8,7 @@
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -45,6 +46,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||
#include "DenseMatrix.h"
|
||||
#include "EigenSort.h"
|
||||
|
||||
// eliminate temorary vector in calc()
|
||||
#define MEM_SAVE
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
@ -496,8 +500,6 @@ until convergence
|
||||
*/
|
||||
|
||||
// alternate implementation for minimizing memory usage. May affect the performance
|
||||
#define MEM_SAVE
|
||||
#undef MEM_SAVE2
|
||||
void calc(DenseVector<RealD>& eval,
|
||||
DenseVector<Field>& evec,
|
||||
const Field& src,
|
||||
@ -520,13 +522,12 @@ until convergence
|
||||
DenseVector<RealD> Qt(Nm*Nm);
|
||||
DenseVector<int> Iconv(Nm);
|
||||
|
||||
#if (!defined MEM_SAVE ) || (!defined MEM_SAVE2)
|
||||
#if (!defined MEM_SAVE )
|
||||
DenseVector<Field> B(Nm,grid); // waste of space replicating
|
||||
#endif
|
||||
|
||||
Field f(grid);
|
||||
Field v(grid);
|
||||
// auto B2 = evec[0]._odata[0];
|
||||
|
||||
int k1 = 1;
|
||||
int k2 = Nk;
|
||||
@ -612,7 +613,7 @@ until convergence
|
||||
assert(k2<Nm);
|
||||
|
||||
#ifndef MEM_SAVE
|
||||
if (0) {
|
||||
if (0) { // old implementation without blocking
|
||||
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
|
||||
|
||||
for(int j=k1-1; j<k2+1; ++j){
|
||||
@ -621,12 +622,7 @@ if (0) {
|
||||
B[j] += Qt[k+Nm*j] * evec[k];
|
||||
}
|
||||
}
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef MEM_SAVE
|
||||
{
|
||||
for(int i=0; i<(Nk+1); ++i) {
|
||||
B[i] = 0.0;
|
||||
@ -651,25 +647,24 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
assert(k2<Nm);
|
||||
assert(k1>0);
|
||||
// DenseVector < decltype(B2) > B(Nm);
|
||||
// std::vector < decltype( B2 ) > B(Nm*thr,B2);
|
||||
Field B(grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss < grid->oSites();ss++){
|
||||
// auto B2 = evec[0]._odata[0];
|
||||
// std::vector < decltype( B2 ) > B(Nm*thr,B2);
|
||||
int thr=GridThread::GetThreads();
|
||||
int me = GridThread::ThreadBarrier();
|
||||
printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
|
||||
// printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
|
||||
assert(Nm*thr<grid->oSites());
|
||||
for(int j=0; j<Nm; ++j) B._odata[j+Nm*me]=0.;
|
||||
for(int j=k1-1; j<(k2+1); ++j){
|
||||
for(int k=0; k<Nm ; ++k){
|
||||
B._odata[j+Nm*me] +=Qt[k+Nm*j] * evec[k]._odata[ss];
|
||||
}
|
||||
}
|
||||
#if 1
|
||||
for(int j=k1-1; j<(k2+1); ++j){
|
||||
evec[j]._odata[ss] = B._odata[j+Nm*me];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -697,7 +692,7 @@ PARALLEL_FOR_LOOP
|
||||
t1=usecond()/1e6;
|
||||
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
|
||||
#ifndef MEM_SAVE2
|
||||
#ifndef MEM_SAVE
|
||||
if (0) {
|
||||
for(int k = 0; k<Nk; ++k) B[k]=0.0;
|
||||
|
||||
@ -769,7 +764,6 @@ PARALLEL_FOR_LOOP
|
||||
B.checkerboard = evec[0].checkerboard;
|
||||
for(int k = 0; k<Nk; ++k){
|
||||
B += Qt[k+j*Nm] * evec[k];
|
||||
// B[Iconv[j]] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss];
|
||||
}
|
||||
std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B)<<std::endl;
|
||||
// _poly(_Linop,B,v);
|
||||
@ -794,8 +788,6 @@ PARALLEL_FOR_LOOP
|
||||
++Nconv;
|
||||
}
|
||||
}
|
||||
// t1=usecond()/1e6;
|
||||
// std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
|
||||
}
|
||||
#endif
|
||||
t1=usecond()/1e6;
|
||||
@ -816,35 +808,34 @@ PARALLEL_FOR_LOOP
|
||||
// Sorting
|
||||
eval.resize(Nconv);
|
||||
evec.resize(Nconv,grid);
|
||||
#ifndef MEM_SAVE2
|
||||
#ifndef MEM_SAVE
|
||||
for(int i=0; i<Nconv; ++i){
|
||||
eval[i] = eval2[Iconv[i]];
|
||||
evec[i] = B[Iconv[i]];
|
||||
}
|
||||
#else
|
||||
#if 0
|
||||
Field B(grid);
|
||||
int thr=GridThread::GetThreads();
|
||||
int me = GridThread::ThreadBarrier();
|
||||
printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
|
||||
#endif
|
||||
{
|
||||
for(int i=0; i<Nconv; ++i)
|
||||
eval[i] = eval2[Iconv[i]];
|
||||
// int thr=GridThread::GetThreads();
|
||||
// printf("thr=%d\n",thr);
|
||||
Field B(grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss < grid->oSites();ss++){
|
||||
auto B2 = evec[0]._odata[0];
|
||||
std::vector < decltype( B2 ) > B(Nm,B2);
|
||||
for(int j=0; j<Nconv; ++j) B[Iconv[j]]=0.;
|
||||
int thr=GridThread::GetThreads();
|
||||
int me = GridThread::ThreadBarrier();
|
||||
// printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout);
|
||||
// auto B2 = evec[0]._odata[0];
|
||||
// std::vector < decltype( B2 ) > B(Nm,B2);
|
||||
assert( (Nm*thr)<grid->oSites());
|
||||
for(int j=0; j<Nconv; ++j) B._odata[Iconv[j]+Nm*me]=0.;
|
||||
for(int j=0; j<Nconv; ++j){
|
||||
for(int k=0; k<Nm ; ++k){
|
||||
B[Iconv[j]] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss];
|
||||
B._odata[Iconv[j]+Nm*me] +=Qt[k+Nm*Iconv[j]] * evec[k]._odata[ss];
|
||||
}
|
||||
}
|
||||
for(int j=0; j<Nconv; ++j){
|
||||
evec[Iconv[j]]._odata[ss] = B[Iconv[j]];
|
||||
evec[Iconv[j]]._odata[ss] = B._odata[Iconv[j]+Nm*me];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user