mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
GPU changes and threading macros replaced
This commit is contained in:
parent
8e99264f40
commit
ac56965306
@ -232,19 +232,17 @@ public:
|
|||||||
result = source;
|
result = source;
|
||||||
int pc = processor_coor[dim];
|
int pc = processor_coor[dim];
|
||||||
for(int p=0;p<processors[dim];p++) {
|
for(int p=0;p<processors[dim];p++) {
|
||||||
PARALLEL_REGION
|
thread_region {
|
||||||
{
|
|
||||||
std::vector<int> cbuf(Nd);
|
std::vector<int> cbuf(Nd);
|
||||||
sobj s;
|
sobj s;
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP_INTERN
|
thread_loop_in_region( (int idx=0;idx<sgrid->lSites();idx++), {
|
||||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
|
||||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||||
peekLocalSite(s,result,cbuf);
|
peekLocalSite(s,result,cbuf);
|
||||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||||
// cbuf[dim]+=p*L;
|
// cbuf[dim]+=p*L;
|
||||||
pokeLocalSite(s,pgbuf,cbuf);
|
pokeLocalSite(s,pgbuf,cbuf);
|
||||||
}
|
} );
|
||||||
}
|
}
|
||||||
if (p != processors[dim] - 1)
|
if (p != processors[dim] - 1)
|
||||||
{
|
{
|
||||||
@ -256,19 +254,18 @@ public:
|
|||||||
int NN=pencil_g.lSites();
|
int NN=pencil_g.lSites();
|
||||||
GridStopWatch timer;
|
GridStopWatch timer;
|
||||||
timer.Start();
|
timer.Start();
|
||||||
PARALLEL_REGION
|
thread_region {
|
||||||
{
|
|
||||||
std::vector<int> cbuf(Nd);
|
std::vector<int> cbuf(Nd);
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP_INTERN
|
thread_loop_in_region( (int idx=0;idx<NN;idx++), {
|
||||||
for(int idx=0;idx<NN;idx++) {
|
|
||||||
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||||
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
|
|
||||||
@ -280,19 +277,18 @@ public:
|
|||||||
flops+= flops_call*NN;
|
flops+= flops_call*NN;
|
||||||
|
|
||||||
// writing out result
|
// writing out result
|
||||||
PARALLEL_REGION
|
thread_region {
|
||||||
{
|
|
||||||
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||||
sobj s;
|
sobj s;
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP_INTERN
|
thread_loop_in_region( (int idx=0;idx<sgrid->lSites();idx++), {
|
||||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
|
||||||
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||||
cgbuf = clbuf;
|
cgbuf = clbuf;
|
||||||
cgbuf[dim] = clbuf[dim]+L*pc;
|
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||||
peekLocalSite(s,pgbuf,cgbuf);
|
peekLocalSite(s,pgbuf,cgbuf);
|
||||||
pokeLocalSite(s,result,clbuf);
|
pokeLocalSite(s,result,clbuf);
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
result = result*div;
|
result = result*div;
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// LinearOperators Take a something and return a something.
|
// LinearOperators Take a something and return a something.
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real):
|
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian Conjugateugate (transpose if real):
|
||||||
//SBase
|
//SBase
|
||||||
// i) F(a x + b y) = aF(x) + b F(y).
|
// i) F(a x + b y) = aF(x) + b F(y).
|
||||||
// ii) <x|Op|y> = <y|AdjOp|x>^\ast
|
// ii) <x|Op|y> = <y|AdjOp|x>^\ast
|
||||||
|
@ -55,7 +55,7 @@ private:
|
|||||||
public:
|
public:
|
||||||
void csv(std::ostream &out){
|
void csv(std::ostream &out){
|
||||||
RealD diff = hi-lo;
|
RealD diff = hi-lo;
|
||||||
RealD delta = (hi-lo)*1.0e-9;
|
RealD delta = diff*1.0e-9;
|
||||||
for (RealD x=lo; x<hi; x+=delta) {
|
for (RealD x=lo; x<hi; x+=delta) {
|
||||||
delta*=1.1;
|
delta*=1.1;
|
||||||
RealD f = approx(x);
|
RealD f = approx(x);
|
||||||
|
@ -60,7 +60,7 @@ public:
|
|||||||
if(degree == 0){ chi = zero; return chi; }
|
if(degree == 0){ chi = zero; return chi; }
|
||||||
else if(degree == 1){ return prev_solns[0]; }
|
else if(degree == 1){ return prev_solns[0]; }
|
||||||
|
|
||||||
RealD dot;
|
// RealD dot;
|
||||||
ComplexD xp;
|
ComplexD xp;
|
||||||
Field r(phi); // residual
|
Field r(phi); // residual
|
||||||
Field Mv(phi);
|
Field Mv(phi);
|
||||||
@ -92,7 +92,7 @@ public:
|
|||||||
for(int j=0; j<degree; j++){
|
for(int j=0; j<degree; j++){
|
||||||
for(int k=j+1; k<degree; k++){
|
for(int k=j+1; k<degree; k++){
|
||||||
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
||||||
G[k][j] = std::conj(G[j][k]);
|
G[k][j] = conjugate(G[j][k]);
|
||||||
}}
|
}}
|
||||||
|
|
||||||
// Gauss-Jordan elimination with partial pivoting
|
// Gauss-Jordan elimination with partial pivoting
|
||||||
@ -100,7 +100,7 @@ public:
|
|||||||
|
|
||||||
// Perform partial pivoting
|
// Perform partial pivoting
|
||||||
int k = i;
|
int k = i;
|
||||||
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
|
for(int j=i+1; j<degree; j++){ if(abs(G[j][j]) > abs(G[k][k])){ k = j; } }
|
||||||
if(k != i){
|
if(k != i){
|
||||||
xp = b[k];
|
xp = b[k];
|
||||||
b[k] = b[i];
|
b[k] = b[i];
|
||||||
@ -136,7 +136,7 @@ public:
|
|||||||
for(int i=0; i<degree; i++){
|
for(int i=0; i<degree; i++){
|
||||||
tmp = -b[i];
|
tmp = -b[i];
|
||||||
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
||||||
tmp = std::conj(tmp)*tmp;
|
tmp = conjugate(tmp)*tmp;
|
||||||
true_r += std::sqrt(tmp.real());
|
true_r += std::sqrt(tmp.real());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,7 +298,7 @@ void AlgRemez::stpini(bigfloat *step) {
|
|||||||
// Search for error maxima and minima
|
// Search for error maxima and minima
|
||||||
void AlgRemez::search(bigfloat *step) {
|
void AlgRemez::search(bigfloat *step) {
|
||||||
bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
|
bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
|
||||||
int i, j, meq, emsign, ensign, steps;
|
int i, meq, emsign, ensign, steps;
|
||||||
|
|
||||||
meq = neq + 1;
|
meq = neq + 1;
|
||||||
bigfloat *yy = new bigfloat[meq];
|
bigfloat *yy = new bigfloat[meq];
|
||||||
@ -306,7 +306,6 @@ void AlgRemez::search(bigfloat *step) {
|
|||||||
bigfloat eclose = 1.0e30;
|
bigfloat eclose = 1.0e30;
|
||||||
bigfloat farther = 0l;
|
bigfloat farther = 0l;
|
||||||
|
|
||||||
j = 1;
|
|
||||||
xx0 = apstrt;
|
xx0 = apstrt;
|
||||||
|
|
||||||
for (i = 0; i < meq; i++) {
|
for (i = 0; i < meq; i++) {
|
||||||
|
@ -30,13 +30,14 @@ directory
|
|||||||
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
|
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||||
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
|
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||||
|
|
||||||
|
#include <Grid/lattice/Lattice_matrix_reduction.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
|
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Block conjugate gradient. Dimension zero should be the block direction
|
// Block Conjugate gradient. Dimension zero should be the block direction
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template <class Field>
|
template <class Field>
|
||||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||||
@ -178,7 +179,7 @@ public:
|
|||||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
* Block Conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||||
************************************************************************
|
************************************************************************
|
||||||
* Dimensions:
|
* Dimensions:
|
||||||
*
|
*
|
||||||
@ -276,7 +277,7 @@ public:
|
|||||||
|
|
||||||
for(int b=0;b<Nblock;b++) {
|
for(int b=0;b<Nblock;b++) {
|
||||||
rrsum+=real(m_rr(b,b));
|
rrsum+=real(m_rr(b,b));
|
||||||
rr = real(m_rr(b,b))/ssq[b];
|
rr =real(m_rr(b,b))/ssq[b];
|
||||||
if ( rr > max_resid ) max_resid = rr;
|
if ( rr > max_resid ) max_resid = rr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -317,7 +318,7 @@ public:
|
|||||||
IterationsToComplete = k;
|
IterationsToComplete = k;
|
||||||
}
|
}
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
|
// Block Conjugate gradient; Original O'Leary Dimension zero should be the block direction
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||||
{
|
{
|
||||||
@ -360,7 +361,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
|
* Block Conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
|
||||||
************************************************************************
|
************************************************************************
|
||||||
* O'Leary : R = B - A X
|
* O'Leary : R = B - A X
|
||||||
* O'Leary : P = M R ; preconditioner M = 1
|
* O'Leary : P = M R ; preconditioner M = 1
|
||||||
@ -463,7 +464,7 @@ public:
|
|||||||
IterationsToComplete = k;
|
IterationsToComplete = k;
|
||||||
}
|
}
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
// multiRHS Conjugate gradient. Dimension zero should be the block direction
|
||||||
// Use this for spread out across nodes
|
// Use this for spread out across nodes
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||||
|
@ -135,12 +135,12 @@ public:
|
|||||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
p = mmp - src;
|
p = mmp - src;
|
||||||
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
RealD srcnorm = std::sqrt(norm2(src));
|
||||||
RealD resnorm = sqrt(norm2(p));
|
RealD resnorm = std::sqrt(norm2(p));
|
||||||
RealD true_residual = resnorm / srcnorm;
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
|
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
|
||||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ public:
|
|||||||
// Check if guess is really REALLY good :)
|
// Check if guess is really REALLY good :)
|
||||||
if (cp <= rsq) {
|
if (cp <= rsq) {
|
||||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
||||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,12 +180,12 @@ public:
|
|||||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
p = mmp - src;
|
p = mmp - src;
|
||||||
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
RealD srcnorm = std::sqrt(norm2(src));
|
||||||
RealD resnorm = sqrt(norm2(p));
|
RealD resnorm = std::sqrt(norm2(p));
|
||||||
RealD true_residual = resnorm / srcnorm;
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
||||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ public:
|
|||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||||
|
|
||||||
RealD a, b, c, d;
|
RealD a, b; // c, d;
|
||||||
RealD cp, ssq,rsq;
|
RealD cp, ssq,rsq;
|
||||||
|
|
||||||
RealD rAr, rAAr, rArp;
|
RealD rAr, rAAr, rArp;
|
||||||
@ -95,8 +95,8 @@ public:
|
|||||||
axpy(r,-1.0,src,Ap);
|
axpy(r,-1.0,src,Ap);
|
||||||
RealD true_resid = norm2(r)/ssq;
|
RealD true_resid = norm2(r)/ssq;
|
||||||
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
|
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
|
||||||
<< " computed residual "<<sqrt(cp/ssq)
|
<< " computed residual "<<std::sqrt(cp/ssq)
|
||||||
<< " true residual "<<sqrt(true_resid)
|
<< " true residual "<<std::sqrt(true_resid)
|
||||||
<< " target " <<Tolerance <<std::endl;
|
<< " target " <<Tolerance <<std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -299,7 +299,7 @@ public:
|
|||||||
template<typename T> static RealD normalise(T& v)
|
template<typename T> static RealD normalise(T& v)
|
||||||
{
|
{
|
||||||
RealD nn = norm2(v);
|
RealD nn = norm2(v);
|
||||||
nn = sqrt(nn);
|
nn = std::sqrt(nn);
|
||||||
v = v * (1.0/nn);
|
v = v * (1.0/nn);
|
||||||
return nn;
|
return nn;
|
||||||
}
|
}
|
||||||
@ -464,7 +464,7 @@ until convergence
|
|||||||
f *= Qt(k2-1,Nm-1);
|
f *= Qt(k2-1,Nm-1);
|
||||||
f += lme[k2-1] * evec[k2];
|
f += lme[k2-1] * evec[k2];
|
||||||
beta_k = norm2(f);
|
beta_k = norm2(f);
|
||||||
beta_k = sqrt(beta_k);
|
beta_k = std::sqrt(beta_k);
|
||||||
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
|
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
|
||||||
|
|
||||||
RealD betar = 1.0/beta_k;
|
RealD betar = 1.0/beta_k;
|
||||||
@ -817,7 +817,7 @@ void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
|||||||
|
|
||||||
// determination of 2x2 leading submatrix
|
// determination of 2x2 leading submatrix
|
||||||
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||||
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
RealD dd = std::sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||||
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||||
// (Dsh: shift)
|
// (Dsh: shift)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user