1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

GPU changes and threading macros replaced

This commit is contained in:
paboyle 2018-01-24 13:28:30 +00:00
parent 8e99264f40
commit ac56965306
10 changed files with 38 additions and 42 deletions

View File

@ -232,19 +232,17 @@ public:
result = source; result = source;
int pc = processor_coor[dim]; int pc = processor_coor[dim];
for(int p=0;p<processors[dim];p++) { for(int p=0;p<processors[dim];p++) {
PARALLEL_REGION thread_region {
{
std::vector<int> cbuf(Nd); std::vector<int> cbuf(Nd);
sobj s; sobj s;
PARALLEL_FOR_LOOP_INTERN thread_loop_in_region( (int idx=0;idx<sgrid->lSites();idx++), {
for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,cbuf); sgrid->LocalIndexToLocalCoor(idx,cbuf);
peekLocalSite(s,result,cbuf); peekLocalSite(s,result,cbuf);
cbuf[dim]+=((pc+p) % processors[dim])*L; cbuf[dim]+=((pc+p) % processors[dim])*L;
// cbuf[dim]+=p*L; // cbuf[dim]+=p*L;
pokeLocalSite(s,pgbuf,cbuf); pokeLocalSite(s,pgbuf,cbuf);
} } );
} }
if (p != processors[dim] - 1) if (p != processors[dim] - 1)
{ {
@ -256,19 +254,18 @@ public:
int NN=pencil_g.lSites(); int NN=pencil_g.lSites();
GridStopWatch timer; GridStopWatch timer;
timer.Start(); timer.Start();
PARALLEL_REGION thread_region {
{
std::vector<int> cbuf(Nd); std::vector<int> cbuf(Nd);
PARALLEL_FOR_LOOP_INTERN thread_loop_in_region( (int idx=0;idx<NN;idx++), {
for(int idx=0;idx<NN;idx++) {
pencil_g.LocalIndexToLocalCoor(idx, cbuf); pencil_g.LocalIndexToLocalCoor(idx, cbuf);
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx]; FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx]; FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out); FFTW<scalar>::fftw_execute_dft(p,in,out);
} }
} });
} }
timer.Stop(); timer.Stop();
@ -280,19 +277,18 @@ public:
flops+= flops_call*NN; flops+= flops_call*NN;
// writing out result // writing out result
PARALLEL_REGION thread_region {
{
std::vector<int> clbuf(Nd), cgbuf(Nd); std::vector<int> clbuf(Nd), cgbuf(Nd);
sobj s; sobj s;
PARALLEL_FOR_LOOP_INTERN thread_loop_in_region( (int idx=0;idx<sgrid->lSites();idx++), {
for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,clbuf); sgrid->LocalIndexToLocalCoor(idx,clbuf);
cgbuf = clbuf; cgbuf = clbuf;
cgbuf[dim] = clbuf[dim]+L*pc; cgbuf[dim] = clbuf[dim]+L*pc;
peekLocalSite(s,pgbuf,cgbuf); peekLocalSite(s,pgbuf,cgbuf);
pokeLocalSite(s,result,clbuf); pokeLocalSite(s,result,clbuf);
} });
} }
result = result*div; result = result*div;

View File

@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid);
// LinearOperators Take a something and return a something. // LinearOperators Take a something and return a something.
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// //
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real): // Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian Conjugateugate (transpose if real):
//SBase //SBase
// i) F(a x + b y) = aF(x) + b F(y). // i) F(a x + b y) = aF(x) + b F(y).
// ii) <x|Op|y> = <y|AdjOp|x>^\ast // ii) <x|Op|y> = <y|AdjOp|x>^\ast

View File

@ -55,7 +55,7 @@ private:
public: public:
void csv(std::ostream &out){ void csv(std::ostream &out){
RealD diff = hi-lo; RealD diff = hi-lo;
RealD delta = (hi-lo)*1.0e-9; RealD delta = diff*1.0e-9;
for (RealD x=lo; x<hi; x+=delta) { for (RealD x=lo; x<hi; x+=delta) {
delta*=1.1; delta*=1.1;
RealD f = approx(x); RealD f = approx(x);

View File

@ -60,7 +60,7 @@ public:
if(degree == 0){ chi = zero; return chi; } if(degree == 0){ chi = zero; return chi; }
else if(degree == 1){ return prev_solns[0]; } else if(degree == 1){ return prev_solns[0]; }
RealD dot; // RealD dot;
ComplexD xp; ComplexD xp;
Field r(phi); // residual Field r(phi); // residual
Field Mv(phi); Field Mv(phi);
@ -92,7 +92,7 @@ public:
for(int j=0; j<degree; j++){ for(int j=0; j<degree; j++){
for(int k=j+1; k<degree; k++){ for(int k=j+1; k<degree; k++){
G[j][k] = innerProduct(v[j],MdagMv[k]); G[j][k] = innerProduct(v[j],MdagMv[k]);
G[k][j] = std::conj(G[j][k]); G[k][j] = conjugate(G[j][k]);
}} }}
// Gauss-Jordan elimination with partial pivoting // Gauss-Jordan elimination with partial pivoting
@ -100,7 +100,7 @@ public:
// Perform partial pivoting // Perform partial pivoting
int k = i; int k = i;
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } } for(int j=i+1; j<degree; j++){ if(abs(G[j][j]) > abs(G[k][k])){ k = j; } }
if(k != i){ if(k != i){
xp = b[k]; xp = b[k];
b[k] = b[i]; b[k] = b[i];
@ -136,7 +136,7 @@ public:
for(int i=0; i<degree; i++){ for(int i=0; i<degree; i++){
tmp = -b[i]; tmp = -b[i];
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; } for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
tmp = std::conj(tmp)*tmp; tmp = conjugate(tmp)*tmp;
true_r += std::sqrt(tmp.real()); true_r += std::sqrt(tmp.real());
} }

View File

@ -298,7 +298,7 @@ void AlgRemez::stpini(bigfloat *step) {
// Search for error maxima and minima // Search for error maxima and minima
void AlgRemez::search(bigfloat *step) { void AlgRemez::search(bigfloat *step) {
bigfloat a, q, xm, ym, xn, yn, xx0, xx1; bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
int i, j, meq, emsign, ensign, steps; int i, meq, emsign, ensign, steps;
meq = neq + 1; meq = neq + 1;
bigfloat *yy = new bigfloat[meq]; bigfloat *yy = new bigfloat[meq];
@ -306,7 +306,6 @@ void AlgRemez::search(bigfloat *step) {
bigfloat eclose = 1.0e30; bigfloat eclose = 1.0e30;
bigfloat farther = 0l; bigfloat farther = 0l;
j = 1;
xx0 = apstrt; xx0 = apstrt;
for (i = 0; i < meq; i++) { for (i = 0; i < meq; i++) {

View File

@ -30,13 +30,14 @@ directory
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H #ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
#define GRID_BLOCK_CONJUGATE_GRADIENT_H #define GRID_BLOCK_CONJUGATE_GRADIENT_H
#include <Grid/lattice/Lattice_matrix_reduction.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction // Block Conjugate gradient. Dimension zero should be the block direction
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <class Field> template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> { class BlockConjugateGradient : public OperatorFunction<Field> {
@ -178,7 +179,7 @@ public:
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************ /************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) * Block Conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************ ************************************************************************
* Dimensions: * Dimensions:
* *
@ -276,7 +277,7 @@ public:
for(int b=0;b<Nblock;b++) { for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b)); rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b]; rr =real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr; if ( rr > max_resid ) max_resid = rr;
} }
@ -317,7 +318,7 @@ public:
IterationsToComplete = k; IterationsToComplete = k;
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction // Block Conjugate gradient; Original O'Leary Dimension zero should be the block direction
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
@ -360,7 +361,7 @@ public:
/************************************************************************ /************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) * Block Conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************ ************************************************************************
* O'Leary : R = B - A X * O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1 * O'Leary : P = M R ; preconditioner M = 1
@ -463,7 +464,7 @@ public:
IterationsToComplete = k; IterationsToComplete = k;
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction // multiRHS Conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes // Use this for spread out across nodes
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)

View File

@ -135,12 +135,12 @@ public:
Linop.HermOpAndNorm(psi, mmp, d, qq); Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src; p = mmp - src;
RealD srcnorm = sqrt(norm2(src)); RealD srcnorm = std::sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p)); RealD resnorm = std::sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm; RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;

View File

@ -110,7 +110,7 @@ public:
// Check if guess is really REALLY good :) // Check if guess is really REALLY good :)
if (cp <= rsq) { if (cp <= rsq) {
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n"; std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
return; return;
} }
@ -180,12 +180,12 @@ public:
Linop_d.HermOpAndNorm(psi, mmp, d, qq); Linop_d.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src; p = mmp - src;
RealD srcnorm = sqrt(norm2(src)); RealD srcnorm = std::sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p)); RealD resnorm = std::sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm; RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl; std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;

View File

@ -49,7 +49,7 @@ public:
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){ void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
RealD a, b, c, d; RealD a, b; // c, d;
RealD cp, ssq,rsq; RealD cp, ssq,rsq;
RealD rAr, rAAr, rArp; RealD rAr, rAAr, rArp;
@ -95,8 +95,8 @@ public:
axpy(r,-1.0,src,Ap); axpy(r,-1.0,src,Ap);
RealD true_resid = norm2(r)/ssq; RealD true_resid = norm2(r)/ssq;
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
<< " computed residual "<<sqrt(cp/ssq) << " computed residual "<<std::sqrt(cp/ssq)
<< " true residual "<<sqrt(true_resid) << " true residual "<<std::sqrt(true_resid)
<< " target " <<Tolerance <<std::endl; << " target " <<Tolerance <<std::endl;
return; return;
} }

View File

@ -299,7 +299,7 @@ public:
template<typename T> static RealD normalise(T& v) template<typename T> static RealD normalise(T& v)
{ {
RealD nn = norm2(v); RealD nn = norm2(v);
nn = sqrt(nn); nn = std::sqrt(nn);
v = v * (1.0/nn); v = v * (1.0/nn);
return nn; return nn;
} }
@ -464,7 +464,7 @@ until convergence
f *= Qt(k2-1,Nm-1); f *= Qt(k2-1,Nm-1);
f += lme[k2-1] * evec[k2]; f += lme[k2-1] * evec[k2];
beta_k = norm2(f); beta_k = norm2(f);
beta_k = sqrt(beta_k); beta_k = std::sqrt(beta_k);
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl; std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k; RealD betar = 1.0/beta_k;
@ -817,7 +817,7 @@ void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
// determination of 2x2 leading submatrix // determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2]; RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); RealD dd = std::sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift) // (Dsh: shift)