From ac569653060c8249f1d81909660b4ae04cc8e79d Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 24 Jan 2018 13:28:30 +0000 Subject: [PATCH] GPU changes and threading macros replaced --- lib/algorithms/FFT.h | 26 ++++++++----------- lib/algorithms/LinearOperator.h | 2 +- lib/algorithms/approx/Chebyshev.h | 2 +- lib/algorithms/approx/Forecast.h | 8 +++--- lib/algorithms/approx/Remez.cc | 3 +-- .../iterative/BlockConjugateGradient.h | 13 +++++----- lib/algorithms/iterative/ConjugateGradient.h | 6 ++--- .../ConjugateGradientReliableUpdate.h | 8 +++--- lib/algorithms/iterative/ConjugateResidual.h | 6 ++--- .../iterative/ImplicitlyRestartedLanczos.h | 6 ++--- 10 files changed, 38 insertions(+), 42 deletions(-) diff --git a/lib/algorithms/FFT.h b/lib/algorithms/FFT.h index e92540fb..e63ff123 100644 --- a/lib/algorithms/FFT.h +++ b/lib/algorithms/FFT.h @@ -232,19 +232,17 @@ public: result = source; int pc = processor_coor[dim]; for(int p=0;p cbuf(Nd); sobj s; - PARALLEL_FOR_LOOP_INTERN - for(int idx=0;idxlSites();idx++) { + thread_loop_in_region( (int idx=0;idxlSites();idx++), { sgrid->LocalIndexToLocalCoor(idx,cbuf); peekLocalSite(s,result,cbuf); cbuf[dim]+=((pc+p) % processors[dim])*L; // cbuf[dim]+=p*L; pokeLocalSite(s,pgbuf,cbuf); - } + } ); } if (p != processors[dim] - 1) { @@ -256,19 +254,18 @@ public: int NN=pencil_g.lSites(); GridStopWatch timer; timer.Start(); - PARALLEL_REGION - { + thread_region { + std::vector cbuf(Nd); - PARALLEL_FOR_LOOP_INTERN - for(int idx=0;idx::fftw_execute_dft(p,in,out); } - } + }); } timer.Stop(); @@ -280,19 +277,18 @@ public: flops+= flops_call*NN; // writing out result - PARALLEL_REGION - { + thread_region { + std::vector clbuf(Nd), cgbuf(Nd); sobj s; - PARALLEL_FOR_LOOP_INTERN - for(int idx=0;idxlSites();idx++) { + thread_loop_in_region( (int idx=0;idxlSites();idx++), { sgrid->LocalIndexToLocalCoor(idx,clbuf); cgbuf = clbuf; cgbuf[dim] = clbuf[dim]+L*pc; peekLocalSite(s,pgbuf,cgbuf); pokeLocalSite(s,result,clbuf); - } + }); } result = result*div; diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 4bc169eb..968d0097 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid); // LinearOperators Take a something and return a something. ///////////////////////////////////////////////////////////////////////////////////////////// // -// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real): +// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian Conjugateugate (transpose if real): //SBase // i) F(a x + b y) = aF(x) + b F(y). // ii) = ^\ast diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 9ef468e4..d36d7363 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -55,7 +55,7 @@ private: public: void csv(std::ostream &out){ RealD diff = hi-lo; - RealD delta = (hi-lo)*1.0e-9; + RealD delta = diff*1.0e-9; for (RealD x=lo; x std::abs(G[k][k])){ k = j; } } + for(int j=i+1; j abs(G[k][k])){ k = j; } } if(k != i){ xp = b[k]; b[k] = b[i]; @@ -136,7 +136,7 @@ public: for(int i=0; i NAMESPACE_BEGIN(Grid); enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; ////////////////////////////////////////////////////////////////////////// -// Block conjugate gradient. Dimension zero should be the block direction +// Block Conjugate gradient. Dimension zero should be the block direction ////////////////////////////////////////////////////////////////////////// template class BlockConjugateGradient : public OperatorFunction { @@ -178,7 +179,7 @@ public: for(int b=0;b max_resid ) max_resid = rr; } @@ -317,7 +318,7 @@ public: IterationsToComplete = k; } ////////////////////////////////////////////////////////////////////////// - // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction + // Block Conjugate gradient; Original O'Leary Dimension zero should be the block direction ////////////////////////////////////////////////////////////////////////// void BlockCGsolve(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { @@ -360,7 +361,7 @@ public: /************************************************************************ - * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) + * Block Conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) ************************************************************************ * O'Leary : R = B - A X * O'Leary : P = M R ; preconditioner M = 1 @@ -463,7 +464,7 @@ public: IterationsToComplete = k; } ////////////////////////////////////////////////////////////////////////// - // multiRHS conjugate gradient. Dimension zero should be the block direction + // multiRHS Conjugate gradient. Dimension zero should be the block direction // Use this for spread out across nodes ////////////////////////////////////////////////////////////////////////// void CGmultiRHSsolve(LinearOperatorBase &Linop, const Field &Src, Field &Psi) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index 24d69e38..f195f735 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -135,12 +135,12 @@ public: Linop.HermOpAndNorm(psi, mmp, d, qq); p = mmp - src; - RealD srcnorm = sqrt(norm2(src)); - RealD resnorm = sqrt(norm2(p)); + RealD srcnorm = std::sqrt(norm2(src)); + RealD resnorm = std::sqrt(norm2(p)); RealD true_residual = resnorm / srcnorm; std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; - std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)< &Linop,const Field &src, Field &psi){ - RealD a, b, c, d; + RealD a, b; // c, d; RealD cp, ssq,rsq; RealD rAr, rAAr, rArp; @@ -95,8 +95,8 @@ public: axpy(r,-1.0,src,Ap); RealD true_resid = norm2(r)/ssq; std::cout< static RealD normalise(T& v) { RealD nn = norm2(v); - nn = sqrt(nn); + nn = std::sqrt(nn); v = v * (1.0/nn); return nn; } @@ -464,7 +464,7 @@ until convergence f *= Qt(k2-1,Nm-1); f += lme[k2-1] * evec[k2]; beta_k = norm2(f); - beta_k = sqrt(beta_k); + beta_k = std::sqrt(beta_k); std::cout<