From 867fe930188b5c4f90031b277037129183bff8fe Mon Sep 17 00:00:00 2001 From: Chulwoo Jung Date: Tue, 2 May 2017 01:26:22 -0400 Subject: [PATCH] First Rotate reorg done. --- .../iterative/ImplicitlyRestartedLanczos.h | 186 ++++++++++-------- 1 file changed, 104 insertions(+), 82 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 33248b4f..d12b6d38 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -484,6 +484,107 @@ public: for(int k=0; k& Qt, + DenseVector& evec, + int k1, int k2 + ) +{ + GridBase *grid = evec[0]._grid; + DenseVector B(Nm,grid); // waste of space replicating + if (0) { // old implementation without blocking + for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; + + for(int j=k1-1; joSites();ss++){ + for(int jj=k1-1; jj& Qt, + DenseVector& evec, + int k1, int k2 + ) +//Christoph's version. Still fails on CJ's workstation for some reason +{ + GridBase *grid = evec[0]._grid; +#pragma omp parallel + { + typedef typename Field::vector_object vobj; + std::vector < vobj > B(Nm); +#pragma omp for + for(int ss=0;ss < grid->oSites();ss++){ + for(int j=0; j& Qt, + DenseVector& evec, + int k1, int k2 + ) +{ + GridBase *grid = evec[0]._grid; + int j_block = 24; int k_block=24; + + assert(k20); + Field B(grid); +PARALLEL_FOR_LOOP + for(int ss=0;ss < grid->oSites();ss++){ +// auto B2 = evec[0]._odata[0]; +// std::vector < decltype( B2 ) > B(Nm*thr,B2); + int thr=GridThread::GetThreads(); + int me = GridThread::ThreadBarrier(); +// printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout); + assert(Nm*throSites()); + for(int j=0; j K P = M − K † @@ -525,9 +626,6 @@ until convergence DenseVector Qt(Nm*Nm); DenseVector Iconv(Nm); -#if (!defined MEM_SAVE ) - DenseVector B(Nm,grid); // waste of space replicating -#endif Field f(grid); Field v(grid); @@ -624,85 +722,9 @@ until convergence assert(k2oSites();ss++){ - for(int jj=k1-1; jj B(Nm); -#pragma omp for - for(int ss=0;ss < grid->oSites();ss++){ - for(int j=0; j0); - Field B(grid); -PARALLEL_FOR_LOOP - for(int ss=0;ss < grid->oSites();ss++){ -// auto B2 = evec[0]._odata[0]; -// std::vector < decltype( B2 ) > B(Nm*thr,B2); - int thr=GridThread::GetThreads(); - int me = GridThread::ThreadBarrier(); -// printf("thr=%d ss=%d me=%d\n",thr,ss,me);fflush(stdout); - assert(Nm*throSites()); - for(int j=0; j