mirror of
https://github.com/paboyle/Grid.git
synced 2025-11-06 06:49:30 +00:00
Compare commits
6 Commits
9203126aa5
...
specflow
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7780d88d26 | ||
|
|
2bf9179d2c | ||
|
|
c606f5dca0 | ||
|
|
8419cc5c64 | ||
|
|
2cc6deb8e0 | ||
|
|
19d0590579 |
@@ -73,6 +73,7 @@ NAMESPACE_CHECK(BiCGSTAB);
|
||||
#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
|
||||
#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/iterative/SimpleLanczos.h>
|
||||
#include <Grid/algorithms/iterative/PowerMethod.h>
|
||||
#include <Grid/algorithms/iterative/AdefGeneric.h>
|
||||
#include <Grid/algorithms/iterative/AdefMrhs.h>
|
||||
|
||||
@@ -269,9 +269,7 @@ public:
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
grid->Barrier();
|
||||
axpby(T1,xscale,mscale,y,in);
|
||||
grid->Barrier();
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
// out = ()*T0 + Coeffs[1]*T1;
|
||||
|
||||
931
Grid/algorithms/iterative/SimpleLanczos.h
Normal file
931
Grid/algorithms/iterative/SimpleLanczos.h
Normal file
@@ -0,0 +1,931 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LANC_H
|
||||
#define GRID_LANC_H
|
||||
|
||||
#include <string.h> //memset
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
#ifdef USE_MKL
|
||||
#include<mkl_lapack.h>
|
||||
#else
|
||||
void LAPACK_dstegr (char *jobz, char *range, int *n, double *d, double *e,
|
||||
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||
double *work, int *lwork, int *iwork, int *liwork,
|
||||
int *info);
|
||||
//#include <lapacke/lapacke.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//#include <Grid/algorithms/densematrix/DenseMatrix.h>
|
||||
|
||||
// eliminate temorary vector in calc()
|
||||
#define MEM_SAVE
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
|
||||
struct Bisection
|
||||
{
|
||||
|
||||
#if 0
|
||||
static void get_eig2 (int row_num, std::vector < RealD > &ALPHA,
|
||||
std::vector < RealD > &BETA,
|
||||
std::vector < RealD > &eig)
|
||||
{
|
||||
int i, j;
|
||||
std::vector < RealD > evec1 (row_num + 3);
|
||||
std::vector < RealD > evec2 (row_num + 3);
|
||||
RealD eps2;
|
||||
ALPHA[1] = 0.;
|
||||
BETHA[1] = 0.;
|
||||
for (i = 0; i < row_num - 1; i++)
|
||||
{
|
||||
ALPHA[i + 1] = A[i * (row_num + 1)].real ();
|
||||
BETHA[i + 2] = A[i * (row_num + 1) + 1].real ();
|
||||
}
|
||||
ALPHA[row_num] = A[(row_num - 1) * (row_num + 1)].real ();
|
||||
bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-10, 1e-10, evec1, eps2);
|
||||
bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-16, 1e-16, evec2, eps2);
|
||||
|
||||
// Do we really need to sort here?
|
||||
int begin = 1;
|
||||
int end = row_num;
|
||||
int swapped = 1;
|
||||
while (swapped)
|
||||
{
|
||||
swapped = 0;
|
||||
for (i = begin; i < end; i++)
|
||||
{
|
||||
if (mag (evec2[i]) > mag (evec2[i + 1]))
|
||||
{
|
||||
swap (evec2 + i, evec2 + i + 1);
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
end--;
|
||||
for (i = end - 1; i >= begin; i--)
|
||||
{
|
||||
if (mag (evec2[i]) > mag (evec2[i + 1]))
|
||||
{
|
||||
swap (evec2 + i, evec2 + i + 1);
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
|
||||
for (i = 0; i < row_num; i++)
|
||||
{
|
||||
for (j = 0; j < row_num; j++)
|
||||
{
|
||||
if (i == j)
|
||||
H[i * row_num + j] = evec2[i + 1];
|
||||
else
|
||||
H[i * row_num + j] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void bisec (std::vector < RealD > &c,
|
||||
std::vector < RealD > &b,
|
||||
int n,
|
||||
int m1,
|
||||
int m2,
|
||||
RealD eps1,
|
||||
RealD relfeh, std::vector < RealD > &x, RealD & eps2)
|
||||
{
|
||||
std::vector < RealD > wu (n + 2);
|
||||
|
||||
RealD h, q, x1, xu, x0, xmin, xmax;
|
||||
int i, a, k;
|
||||
|
||||
b[1] = 0.0;
|
||||
xmin = c[n] - fabs (b[n]);
|
||||
xmax = c[n] + fabs (b[n]);
|
||||
for (i = 1; i < n; i++)
|
||||
{
|
||||
h = fabs (b[i]) + fabs (b[i + 1]);
|
||||
if (c[i] + h > xmax)
|
||||
xmax = c[i] + h;
|
||||
if (c[i] - h < xmin)
|
||||
xmin = c[i] - h;
|
||||
}
|
||||
xmax *= 2.;
|
||||
|
||||
eps2 = relfeh * ((xmin + xmax) > 0.0 ? xmax : -xmin);
|
||||
if (eps1 <= 0.0)
|
||||
eps1 = eps2;
|
||||
eps2 = 0.5 * eps1 + 7.0 * (eps2);
|
||||
x0 = xmax;
|
||||
for (i = m1; i <= m2; i++)
|
||||
{
|
||||
x[i] = xmax;
|
||||
wu[i] = xmin;
|
||||
}
|
||||
|
||||
for (k = m2; k >= m1; k--)
|
||||
{
|
||||
xu = xmin;
|
||||
i = k;
|
||||
do
|
||||
{
|
||||
if (xu < wu[i])
|
||||
{
|
||||
xu = wu[i];
|
||||
i = m1 - 1;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
while (i >= m1);
|
||||
if (x0 > x[k])
|
||||
x0 = x[k];
|
||||
while ((x0 - xu) > 2 * relfeh * (fabs (xu) + fabs (x0)) + eps1)
|
||||
{
|
||||
x1 = (xu + x0) / 2;
|
||||
|
||||
a = 0;
|
||||
q = 1.0;
|
||||
for (i = 1; i <= n; i++)
|
||||
{
|
||||
q =
|
||||
c[i] - x1 -
|
||||
((q != 0.0) ? b[i] * b[i] / q : fabs (b[i]) / relfeh);
|
||||
if (q < 0)
|
||||
a++;
|
||||
}
|
||||
// printf("x1=%0.14e a=%d\n",x1,a);
|
||||
if (a < k)
|
||||
{
|
||||
if (a < m1)
|
||||
{
|
||||
xu = x1;
|
||||
wu[m1] = x1;
|
||||
}
|
||||
else
|
||||
{
|
||||
xu = x1;
|
||||
wu[a + 1] = x1;
|
||||
if (x[a] > x1)
|
||||
x[a] = x1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x0 = x1;
|
||||
}
|
||||
printf ("x0=%0.14e xu=%0.14e k=%d\n", x0, xu, k);
|
||||
x[k] = (x0 + xu) / 2;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template < class Field > class SimpleLanczos
|
||||
{
|
||||
|
||||
const RealD small = 1.0e-16;
|
||||
public:
|
||||
int lock;
|
||||
int get;
|
||||
int Niter;
|
||||
int converged;
|
||||
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
int Np; // Np -- Number of spare vecs in kryloc space
|
||||
int Nm; // Nm -- total number of vectors
|
||||
|
||||
|
||||
RealD OrthoTime;
|
||||
|
||||
RealD eresid;
|
||||
|
||||
// SortEigen < Field > _sort;
|
||||
|
||||
LinearFunction < Field > &_Linop;
|
||||
|
||||
// OperatorFunction < Field > &_poly;
|
||||
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
void init (void)
|
||||
{
|
||||
};
|
||||
// void Abort (int ff, std::vector < RealD > &evals, DenseVector < Denstd::vector < RealD > >&evecs);
|
||||
|
||||
SimpleLanczos (LinearFunction < Field > &Linop, // op
|
||||
// OperatorFunction < Field > &poly, // polynmial
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _Niter): // Max iterations
|
||||
|
||||
_Linop (Linop),
|
||||
// _poly (poly),
|
||||
Nstop (_Nstop), Nk (_Nk), Nm (_Nm), eresid (_eresid), Niter (_Niter)
|
||||
{
|
||||
Np = Nm - Nk;
|
||||
assert (Np > 0);
|
||||
};
|
||||
|
||||
/////////////////////////
|
||||
// Sanity checked this routine (step) against Saad.
|
||||
/////////////////////////
|
||||
void RitzMatrix (std::vector < Field > &evec, int k)
|
||||
{
|
||||
|
||||
if (1)
|
||||
return;
|
||||
|
||||
GridBase *grid = evec[0].Grid();
|
||||
Field w (grid);
|
||||
std::cout << GridLogMessage << "RitzMatrix " << std::endl;
|
||||
for (int i = 0; i < k; i++)
|
||||
{
|
||||
_Linop(evec[i], w);
|
||||
// _poly(_Linop,evec[i],w);
|
||||
std::cout << GridLogMessage << "[" << i << "] ";
|
||||
for (int j = 0; j < k; j++)
|
||||
{
|
||||
ComplexD in = innerProduct (evec[j], w);
|
||||
if (fabs ((double) i - j) > 1)
|
||||
{
|
||||
if (abs (in) > 1.0e-9)
|
||||
{
|
||||
std::cout << GridLogMessage << "oops" << std::endl;
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
std::cout << GridLogMessage << " 0 ";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogMessage << " " << in << " ";
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void step (std::vector < RealD > &lmd,
|
||||
std::vector < RealD > &lme,
|
||||
Field & last, Field & current, Field & next, uint64_t k)
|
||||
{
|
||||
if (lmd.size () <= k)
|
||||
lmd.resize (k + Nm);
|
||||
if (lme.size () <= k)
|
||||
lme.resize (k + Nm);
|
||||
|
||||
|
||||
// _poly(_Linop,current,next ); // 3. wk:=Avk−βkv_{k−1}
|
||||
_Linop(current, next); // 3. wk:=Avk−βkv_{k−1}
|
||||
if (k > 0)
|
||||
{
|
||||
next -= lme[k - 1] * last;
|
||||
}
|
||||
// std::cout<<GridLogMessage << "<last|next>" << innerProduct(last,next) <<std::endl;
|
||||
|
||||
ComplexD zalph = innerProduct (current, next); // 4. αk:=(wk,vk)
|
||||
RealD alph = real (zalph);
|
||||
|
||||
next = next - alph * current; // 5. wk:=wk−αkvk
|
||||
// std::cout<<GridLogMessage << "<current|next>" << innerProduct(current,next) <<std::endl;
|
||||
|
||||
RealD beta = normalise (next); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
// norm=beta;
|
||||
|
||||
int interval = Nm / 100 + 1;
|
||||
if ((k % interval) == 0)
|
||||
std::
|
||||
cout << GridLogMessage << k << " : alpha = " << zalph << " beta " <<
|
||||
beta << std::endl;
|
||||
const RealD tiny = 1.0e-20;
|
||||
if (beta < tiny)
|
||||
{
|
||||
std::cout << GridLogMessage << " beta is tiny " << beta << std::
|
||||
endl;
|
||||
}
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
}
|
||||
|
||||
void qr_decomp (std::vector < RealD > &lmd,
|
||||
std::vector < RealD > &lme,
|
||||
int Nk,
|
||||
int Nm,
|
||||
std::vector < RealD > &Qt, RealD Dsh, int kmin, int kmax)
|
||||
{
|
||||
int k = kmin - 1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0 / hypot (lmd[k] - Dsh, lme[k]);
|
||||
RealD c = (lmd[k] - Dsh) * Fden;
|
||||
RealD s = -lme[k] * Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k + 1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
|
||||
lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
|
||||
lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
|
||||
x = -s * lme[k + 1];
|
||||
lme[k + 1] = c * lme[k + 1];
|
||||
|
||||
for (int i = 0; i < Nk; ++i)
|
||||
{
|
||||
RealD Qtmp1 = Qt[i + Nm * k];
|
||||
RealD Qtmp2 = Qt[i + Nm * (k + 1)];
|
||||
Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
|
||||
Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for (int k = kmin; k < kmax - 1; ++k)
|
||||
{
|
||||
|
||||
RealD Fden = 1.0 / hypot (x, lme[k - 1]);
|
||||
RealD c = lme[k - 1] * Fden;
|
||||
RealD s = -x * Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k + 1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
|
||||
lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
|
||||
lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
|
||||
lme[k - 1] = c * lme[k - 1] - s * x;
|
||||
|
||||
if (k != kmax - 2)
|
||||
{
|
||||
x = -s * lme[k + 1];
|
||||
lme[k + 1] = c * lme[k + 1];
|
||||
}
|
||||
|
||||
for (int i = 0; i < Nk; ++i)
|
||||
{
|
||||
RealD Qtmp1 = Qt[i + Nm * k];
|
||||
RealD Qtmp2 = Qt[i + Nm * (k + 1)];
|
||||
Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
|
||||
Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
#ifdef USE_LAPACK
|
||||
#ifdef USE_MKL
|
||||
#define LAPACK_INT MKL_INT
|
||||
#else
|
||||
#define LAPACK_INT long long
|
||||
#endif
|
||||
void diagonalize_lapack (std::vector < RealD > &lmd, std::vector < RealD > &lme, int N1, // all
|
||||
int N2, // get
|
||||
GridBase * grid)
|
||||
{
|
||||
const int size = Nm;
|
||||
LAPACK_INT NN = N1;
|
||||
double evals_tmp[NN];
|
||||
double DD[NN];
|
||||
double EE[NN];
|
||||
for (int i = 0; i < NN; i++)
|
||||
for (int j = i - 1; j <= i + 1; j++)
|
||||
if (j < NN && j >= 0)
|
||||
{
|
||||
if (i == j)
|
||||
DD[i] = lmd[i];
|
||||
if (i == j)
|
||||
evals_tmp[i] = lmd[i];
|
||||
if (j == (i - 1))
|
||||
EE[j] = lme[j];
|
||||
}
|
||||
LAPACK_INT evals_found;
|
||||
LAPACK_INT lwork =
|
||||
((18 * NN) >
|
||||
(1 + 4 * NN + NN * NN) ? (18 * NN) : (1 + 4 * NN + NN * NN));
|
||||
LAPACK_INT liwork = 3 + NN * 10;
|
||||
LAPACK_INT iwork[liwork];
|
||||
double work[lwork];
|
||||
LAPACK_INT isuppz[2 * NN];
|
||||
char jobz = 'N'; // calculate evals only
|
||||
char range = 'I'; // calculate il-th to iu-th evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
int ifail[NN];
|
||||
LAPACK_INT info;
|
||||
// int total = QMP_get_number_of_nodes();
|
||||
// int node = QMP_get_node_number();
|
||||
// GridBase *grid = evec[0]._grid;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN / total) + 1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
LAPACK_INT il = interval * node + 1, iu = interval * (node + 1);
|
||||
if (iu > NN)
|
||||
iu = NN;
|
||||
double tol = 0.0;
|
||||
if (1)
|
||||
{
|
||||
memset (evals_tmp, 0, sizeof (double) * NN);
|
||||
if (il <= NN)
|
||||
{
|
||||
printf ("total=%d node=%d il=%d iu=%d\n", total, node, il, iu);
|
||||
#ifdef USE_MKL
|
||||
dstegr (&jobz, &range, &NN,
|
||||
#else
|
||||
LAPACK_dstegr (&jobz, &range, &NN,
|
||||
#endif
|
||||
(double *) DD, (double *) EE, &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, evals_tmp, (double *) NULL, &NN,
|
||||
isuppz, work, &lwork, iwork, &liwork, &info);
|
||||
for (int i = iu - 1; i >= il - 1; i--)
|
||||
{
|
||||
printf ("node=%d evals_found=%d evals_tmp[%d] = %g\n", node,
|
||||
evals_found, i - (il - 1), evals_tmp[i - (il - 1)]);
|
||||
evals_tmp[i] = evals_tmp[i - (il - 1)];
|
||||
if (il > 1)
|
||||
evals_tmp[i - (il - 1)] = 0.;
|
||||
}
|
||||
}
|
||||
{
|
||||
grid->GlobalSumVector (evals_tmp, NN);
|
||||
}
|
||||
}
|
||||
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
|
||||
}
|
||||
#undef LAPACK_INT
|
||||
#endif
|
||||
|
||||
|
||||
void diagonalize (std::vector < RealD > &lmd,
|
||||
std::vector < RealD > &lme,
|
||||
int N2, int N1, GridBase * grid)
|
||||
{
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
const int check_lapack = 0; // just use lapack if 0, check against lapack if 1
|
||||
|
||||
if (!check_lapack)
|
||||
return diagonalize_lapack (lmd, lme, N2, N1, grid);
|
||||
|
||||
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static RealD normalise (Field & v)
|
||||
{
|
||||
RealD nn = norm2 (v);
|
||||
nn = sqrt (nn);
|
||||
v = v * (1.0 / nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize (Field & w, std::vector < Field > &evec, int k)
|
||||
{
|
||||
double t0 = -usecond () / 1e6;
|
||||
typedef typename Field::scalar_type MyComplex;
|
||||
MyComplex ip;
|
||||
|
||||
if (0)
|
||||
{
|
||||
for (int j = 0; j < k; ++j)
|
||||
{
|
||||
normalise (evec[j]);
|
||||
for (int i = 0; i < j; i++)
|
||||
{
|
||||
ip = innerProduct (evec[i], evec[j]); // are the evecs normalised? ; this assumes so.
|
||||
evec[j] = evec[j] - ip * evec[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < k; ++j)
|
||||
{
|
||||
ip = innerProduct (evec[j], w); // are the evecs normalised? ; this assumes so.
|
||||
w = w - ip * evec[j];
|
||||
}
|
||||
normalise (w);
|
||||
t0 += usecond () / 1e6;
|
||||
OrthoTime += t0;
|
||||
}
|
||||
|
||||
void setUnit_Qt (int Nm, std::vector < RealD > &Qt)
|
||||
{
|
||||
for (int i = 0; i < Qt.size (); ++i)
|
||||
Qt[i] = 0.0;
|
||||
for (int k = 0; k < Nm; ++k)
|
||||
Qt[k + k * Nm] = 1.0;
|
||||
}
|
||||
|
||||
|
||||
void calc (std::vector < RealD > &eval, const Field & src, int &Nconv)
|
||||
{
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
// assert(grid == src._grid);
|
||||
|
||||
std::
|
||||
cout << GridLogMessage << " -- Nk = " << Nk << " Np = " << Np << std::
|
||||
endl;
|
||||
std::cout << GridLogMessage << " -- Nm = " << Nm << std::endl;
|
||||
std::cout << GridLogMessage << " -- size of eval = " << eval.
|
||||
size () << std::endl;
|
||||
|
||||
// assert(c.size() && Nm == eval.size());
|
||||
|
||||
std::vector < RealD > lme (Nm);
|
||||
std::vector < RealD > lmd (Nm);
|
||||
|
||||
|
||||
Field current (grid);
|
||||
Field last (grid);
|
||||
Field next (grid);
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
RealD beta_k;
|
||||
|
||||
// Set initial vector
|
||||
// (uniform vector) Why not src??
|
||||
// evec[0] = 1.0;
|
||||
current = src;
|
||||
std::cout << GridLogMessage << "norm2(src)= " << norm2 (src) << std::
|
||||
endl;
|
||||
normalise (current);
|
||||
std::
|
||||
cout << GridLogMessage << "norm2(evec[0])= " << norm2 (current) <<
|
||||
std::endl;
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime = 0.;
|
||||
double t0 = usecond () / 1e6;
|
||||
RealD norm; // sqrt norm of last vector
|
||||
|
||||
uint64_t iter = 0;
|
||||
|
||||
bool initted = false;
|
||||
std::vector < RealD > low (Nstop * 10);
|
||||
std::vector < RealD > high (Nstop * 10);
|
||||
RealD cont = 0.;
|
||||
while (1) {
|
||||
cont = 0.;
|
||||
std::vector < RealD > lme2 (Nm);
|
||||
std::vector < RealD > lmd2 (Nm);
|
||||
for (uint64_t k = 0; k < Nm; ++k, iter++) {
|
||||
step (lmd, lme, last, current, next, iter);
|
||||
last = current;
|
||||
current = next;
|
||||
}
|
||||
double t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL::Initial steps: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
std::
|
||||
cout << GridLogMessage << "IRL::Initial steps:OrthoTime " <<
|
||||
OrthoTime << "seconds" << std::endl;
|
||||
|
||||
// getting eigenvalues
|
||||
lmd2.resize (iter + 2);
|
||||
lme2.resize (iter + 2);
|
||||
for (uint64_t k = 0; k < iter; ++k) {
|
||||
lmd2[k + 1] = lmd[k];
|
||||
lme2[k + 2] = lme[k];
|
||||
}
|
||||
t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL:: copy: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
{
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (Nstop / total) + 1;
|
||||
int iu = (iter + 1) - (interval * node + 1);
|
||||
int il = (iter + 1) - (interval * (node + 1));
|
||||
std::vector < RealD > eval2 (iter + 3);
|
||||
RealD eps2;
|
||||
Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
|
||||
eps2);
|
||||
// diagonalize(eval2,lme2,iter,Nk,grid);
|
||||
RealD diff = 0.;
|
||||
for (int i = il; i <= iu; i++) {
|
||||
if (initted)
|
||||
diff =
|
||||
fabs (eval2[i] - high[iu-i]) / (fabs (eval2[i]) +
|
||||
fabs (high[iu-i]));
|
||||
if (initted && (diff > eresid))
|
||||
cont = 1.;
|
||||
if (initted)
|
||||
printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
|
||||
high[iu-i], diff);
|
||||
high[iu-i] = eval2[i];
|
||||
}
|
||||
il = (interval * node + 1);
|
||||
iu = (interval * (node + 1));
|
||||
Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
|
||||
eps2);
|
||||
for (int i = il; i <= iu; i++) {
|
||||
if (initted)
|
||||
diff =
|
||||
fabs (eval2[i] - low[i]) / (fabs (eval2[i]) +
|
||||
fabs (low[i]));
|
||||
if (initted && (diff > eresid))
|
||||
cont = 1.;
|
||||
if (initted)
|
||||
printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
|
||||
low[i], diff);
|
||||
low[i] = eval2[i];
|
||||
}
|
||||
t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL:: diagonalize: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
}
|
||||
|
||||
for (uint64_t k = 0; k < Nk; ++k) {
|
||||
// eval[k] = eval2[k];
|
||||
}
|
||||
if (initted)
|
||||
{
|
||||
grid->GlobalSumVector (&cont, 1);
|
||||
if (cont < 1.) return;
|
||||
}
|
||||
initted = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
/**
|
||||
There is some matrix Q such that for any vector y
|
||||
Q.e_1 = y and Q is unitary.
|
||||
**/
|
||||
template < class T >
|
||||
static T orthQ (DenseMatrix < T > &Q, std::vector < T > y)
|
||||
{
|
||||
int N = y.size (); //Matrix Size
|
||||
Fill (Q, 0.0);
|
||||
T tau;
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
Q[i][0] = y[i];
|
||||
}
|
||||
T sig = conj (y[0]) * y[0];
|
||||
T tau0 = fabs (sqrt (sig));
|
||||
|
||||
for (int j = 1; j < N; j++)
|
||||
{
|
||||
sig += conj (y[j]) * y[j];
|
||||
tau = abs (sqrt (sig));
|
||||
|
||||
if (abs (tau0) > 0.0)
|
||||
{
|
||||
|
||||
T gam = conj ((y[j] / tau) / tau0);
|
||||
for (int k = 0; k <= j - 1; k++)
|
||||
{
|
||||
Q[k][j] = -gam * y[k];
|
||||
}
|
||||
Q[j][j] = tau0 / tau;
|
||||
}
|
||||
else
|
||||
{
|
||||
Q[j - 1][j] = 1.0;
|
||||
}
|
||||
tau0 = tau;
|
||||
}
|
||||
return tau;
|
||||
}
|
||||
|
||||
/**
|
||||
There is some matrix Q such that for any vector y
|
||||
Q.e_k = y and Q is unitary.
|
||||
**/
|
||||
template < class T >
|
||||
static T orthU (DenseMatrix < T > &Q, std::vector < T > y)
|
||||
{
|
||||
T tau = orthQ (Q, y);
|
||||
SL (Q);
|
||||
return tau;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Wind up with a matrix with the first con rows untouched
|
||||
|
||||
say con = 2
|
||||
Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum
|
||||
and the matrix is upper hessenberg
|
||||
and with f and Q appropriately modidied with Q is the arnoldi factorization
|
||||
|
||||
**/
|
||||
|
||||
template < class T > static void Lock (DenseMatrix < T > &H, ///Hess mtx
|
||||
DenseMatrix < T > &Q, ///Lock Transform
|
||||
T val, ///value to be locked
|
||||
int con, ///number already locked
|
||||
RealD small, int dfg, bool herm)
|
||||
{
|
||||
//ForceTridiagonal(H);
|
||||
|
||||
int M = H.dim;
|
||||
DenseVector < T > vec;
|
||||
Resize (vec, M - con);
|
||||
|
||||
DenseMatrix < T > AH;
|
||||
Resize (AH, M - con, M - con);
|
||||
AH = GetSubMtx (H, con, M, con, M);
|
||||
|
||||
DenseMatrix < T > QQ;
|
||||
Resize (QQ, M - con, M - con);
|
||||
|
||||
Unity (Q);
|
||||
Unity (QQ);
|
||||
|
||||
DenseVector < T > evals;
|
||||
Resize (evals, M - con);
|
||||
DenseMatrix < T > evecs;
|
||||
Resize (evecs, M - con, M - con);
|
||||
|
||||
Wilkinson < T > (AH, evals, evecs, small);
|
||||
|
||||
int k = 0;
|
||||
RealD cold = abs (val - evals[k]);
|
||||
for (int i = 1; i < M - con; i++)
|
||||
{
|
||||
RealD cnew = abs (val - evals[i]);
|
||||
if (cnew < cold)
|
||||
{
|
||||
k = i;
|
||||
cold = cnew;
|
||||
}
|
||||
}
|
||||
vec = evecs[k];
|
||||
|
||||
ComplexD tau;
|
||||
orthQ (QQ, vec);
|
||||
//orthQM(QQ,AH,vec);
|
||||
|
||||
AH = Hermitian (QQ) * AH;
|
||||
AH = AH * QQ;
|
||||
|
||||
for (int i = con; i < M; i++)
|
||||
{
|
||||
for (int j = con; j < M; j++)
|
||||
{
|
||||
Q[i][j] = QQ[i - con][j - con];
|
||||
H[i][j] = AH[i - con][j - con];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = M - 1; j > con + 2; j--)
|
||||
{
|
||||
|
||||
DenseMatrix < T > U;
|
||||
Resize (U, j - 1 - con, j - 1 - con);
|
||||
DenseVector < T > z;
|
||||
Resize (z, j - 1 - con);
|
||||
T nm = norm (z);
|
||||
for (int k = con + 0; k < j - 1; k++)
|
||||
{
|
||||
z[k - con] = conj (H (j, k + 1));
|
||||
}
|
||||
normalise (z);
|
||||
|
||||
RealD tmp = 0;
|
||||
for (int i = 0; i < z.size () - 1; i++)
|
||||
{
|
||||
tmp = tmp + abs (z[i]);
|
||||
}
|
||||
|
||||
if (tmp < small / ((RealD) z.size () - 1.0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
tau = orthU (U, z);
|
||||
|
||||
DenseMatrix < T > Hb;
|
||||
Resize (Hb, j - 1 - con, M);
|
||||
|
||||
for (int a = 0; a < M; a++)
|
||||
{
|
||||
for (int b = 0; b < j - 1 - con; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += H[a][con + 1 + c] * U[c][b];
|
||||
} //sum += H(a,con+1+c)*U(c,b);}
|
||||
Hb[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = con + 1; k < j; k++)
|
||||
{
|
||||
for (int l = 0; l < M; l++)
|
||||
{
|
||||
H[l][k] = Hb[k - 1 - con][l];
|
||||
}
|
||||
} //H(Hb[k-1-con][l] , l,k);}}
|
||||
|
||||
DenseMatrix < T > Qb;
|
||||
Resize (Qb, M, M);
|
||||
|
||||
for (int a = 0; a < M; a++)
|
||||
{
|
||||
for (int b = 0; b < j - 1 - con; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += Q[a][con + 1 + c] * U[c][b];
|
||||
} //sum += Q(a,con+1+c)*U(c,b);}
|
||||
Qb[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = con + 1; k < j; k++)
|
||||
{
|
||||
for (int l = 0; l < M; l++)
|
||||
{
|
||||
Q[l][k] = Qb[k - 1 - con][l];
|
||||
}
|
||||
} //Q(Qb[k-1-con][l] , l,k);}}
|
||||
|
||||
DenseMatrix < T > Hc;
|
||||
Resize (Hc, M, M);
|
||||
|
||||
for (int a = 0; a < j - 1 - con; a++)
|
||||
{
|
||||
for (int b = 0; b < M; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += conj (U[c][a]) * H[con + 1 + c][b];
|
||||
} //sum += conj( U(c,a) )*H(con+1+c,b);}
|
||||
Hc[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = 0; k < M; k++)
|
||||
{
|
||||
for (int l = con + 1; l < j; l++)
|
||||
{
|
||||
H[l][k] = Hc[k][l - 1 - con];
|
||||
}
|
||||
} //H(Hc[k][l-1-con] , l,k);}}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -260,39 +260,32 @@ CartesianCommunicator::~CartesianCommunicator()
|
||||
}
|
||||
#ifdef USE_GRID_REDUCTION
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
FlightRecorder::StepLog("GlobalSumP2P");
|
||||
CartesianCommunicator::GlobalSumP2P(f);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
FlightRecorder::StepLog("GlobalSumP2P");
|
||||
CartesianCommunicator::GlobalSumP2P(d);
|
||||
}
|
||||
#else
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
#endif
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
|
||||
FlightRecorder::StepLog("AllReduceVector");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
@@ -801,7 +794,6 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
FlightRecorder::StepLog("NodeBarrier");
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
@@ -809,13 +801,11 @@ void CartesianCommunicator::StencilBarrier(void)
|
||||
//}
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
FlightRecorder::StepLog("GridBarrier");
|
||||
int ierr = MPI_Barrier(communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("Broadcast");
|
||||
int ierr=MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
@@ -834,7 +824,6 @@ void CartesianCommunicator::BarrierWorld(void){
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("BroadcastWorld");
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
@@ -857,7 +846,6 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("AllToAll");
|
||||
// MPI is a pain and uses "int" arguments
|
||||
// 64*64*64*128*16 == 500Million elements of data.
|
||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||
|
||||
@@ -990,7 +990,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
}
|
||||
#endif
|
||||
|
||||
// SharedMemoryTest();
|
||||
SharedMemoryTest();
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
|
||||
@@ -122,10 +122,10 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
|
||||
{
|
||||
acceleratorMemSet(dest,0,bytes);
|
||||
}
|
||||
//void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||
//{
|
||||
// acceleratorCopyToDevice(src,dest,bytes);
|
||||
//}
|
||||
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||
{
|
||||
acceleratorCopyToDevice(src,dest,bytes);
|
||||
}
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
|
||||
@@ -236,7 +236,7 @@ public:
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
vobj vtmp;
|
||||
vtmp = r;
|
||||
#if 1
|
||||
#if 0
|
||||
deviceVector<vobj> vvtmp(1);
|
||||
acceleratorPut(vvtmp[0],vtmp);
|
||||
vobj *vvtmp_p = & vvtmp[0];
|
||||
|
||||
@@ -252,11 +252,6 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||
|
||||
out = in;
|
||||
RealD taus = 0.;
|
||||
|
||||
// Perform initial t=0 measurements
|
||||
for(auto const &meas : this->functions)
|
||||
meas.second(0,taus,out);
|
||||
|
||||
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
evolve_step(out, taus);
|
||||
@@ -341,11 +336,6 @@ void WilsonFlowAdaptive<Gimpl>::smear(GaugeField& out, const GaugeField& in) con
|
||||
RealD taus = 0.;
|
||||
RealD eps = init_epsilon;
|
||||
unsigned int step = 0;
|
||||
|
||||
// Perform initial t=0 measurements
|
||||
for(auto const &meas : this->functions)
|
||||
meas.second(step,taus,out);
|
||||
|
||||
do{
|
||||
int step_success = evolve_step_adaptive(out, taus, eps);
|
||||
step += step_success; //step will not be incremented if the integration step fails
|
||||
|
||||
@@ -396,7 +396,6 @@ public:
|
||||
Packets[i].from_rank,Packets[i].do_recv,
|
||||
Packets[i].xbytes,Packets[i].rbytes,i);
|
||||
}
|
||||
FlightRecorder::StepLog("Communicate begin has finished");
|
||||
// Get comms started then run checksums
|
||||
// Having this PRIOR to the dslash seems to make Sunspot work... (!)
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
|
||||
@@ -251,7 +251,7 @@ inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { c
|
||||
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
|
||||
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
|
||||
acceleratorCopyToDevice(from,to,bytes);
|
||||
acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice);
|
||||
return 0;
|
||||
}
|
||||
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
|
||||
@@ -337,7 +337,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
cgh.parallel_for( \
|
||||
sycl::nd_range<3>(global,local), \
|
||||
[=] (sycl::nd_item<3> item) /*mutable*/ \
|
||||
[[sycl::reqd_sub_group_size(16)]] \
|
||||
[[intel::reqd_sub_group_size(16)]] \
|
||||
{ \
|
||||
auto iter1 = item.get_global_id(0); \
|
||||
auto iter2 = item.get_global_id(1); \
|
||||
|
||||
@@ -28,6 +28,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)>(y)?(y):(x))
|
||||
#endif
|
||||
|
||||
|
||||
// Introduce a class to gain deterministic bit reproducible reduction.
|
||||
// make static; perhaps just a namespace is required.
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
@@ -638,11 +638,12 @@ void Grid_debug_handler_init(void)
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
// sigaction(SIGSEGV,&sa,NULL);
|
||||
sigaction(SIGTRAP,&sa,NULL);
|
||||
// sigaction(SIGBUS,&sa,NULL);
|
||||
sigaction(SIGBUS,&sa,NULL);
|
||||
// sigaction(SIGUSR2,&sa,NULL);
|
||||
|
||||
// feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
// sigaction(SIGFPE,&sa,NULL);
|
||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
|
||||
sigaction(SIGFPE,&sa,NULL);
|
||||
sigaction(SIGKILL,&sa,NULL);
|
||||
sigaction(SIGILL,&sa,NULL);
|
||||
|
||||
|
||||
@@ -66,7 +66,6 @@ namespace Grid{
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
template <class T> void writeFile(T& in, std::string const fname){
|
||||
#ifdef HAVE_LIME
|
||||
// Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
|
||||
@@ -74,7 +73,7 @@ template <class T> void writeFile(T& in, std::string const fname){
|
||||
Grid::emptyUserRecord record;
|
||||
Grid::ScidacWriter WR(in.Grid()->IsBoss());
|
||||
WR.open(fname);
|
||||
WR.writeScidacFieldRecord(in,record,0); // Lexico
|
||||
WR.writeScidacFieldRecord(in,record,0);
|
||||
WR.close();
|
||||
#endif
|
||||
// What is the appropriate way to throw error?
|
||||
@@ -108,18 +107,8 @@ int main(int argc, char **argv) {
|
||||
|
||||
for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
|
||||
|
||||
#if 0
|
||||
CPNersc.CheckpointRestore(conf, Umu, sRNG, pRNG);
|
||||
#else
|
||||
// Don't require Grid format RNGs
|
||||
FieldMetaData header;
|
||||
std::string file, filesmr;
|
||||
file = CPar.conf_path + "/" + CPar.conf_prefix + "." + std::to_string(conf);
|
||||
filesmr = CPar.conf_path + "/" + CPar.conf_smr_prefix + "." + std::to_string(conf);
|
||||
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
#endif
|
||||
|
||||
std::cout << std::setprecision(15);
|
||||
std::cout << GridLogMessage << "Initial plaquette: "<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
|
||||
|
||||
@@ -127,7 +116,6 @@ int main(int argc, char **argv) {
|
||||
std::string file_post = CPar.conf_prefix + "." + std::to_string(conf);
|
||||
|
||||
WilsonFlow<PeriodicGimplR> WF(WFPar.step_size,WFPar.steps,WFPar.meas_interval);
|
||||
|
||||
WF.addMeasurement(WFPar.meas_interval_density, [&file_pre,&file_post,&conf](int step, RealD t, const typename PeriodicGimplR::GaugeField &U){
|
||||
|
||||
typedef typename PeriodicGimplR::GaugeLinkField GaugeMat;
|
||||
@@ -177,48 +165,33 @@ int main(int argc, char **argv) {
|
||||
//double coeff = 2.0 / (1.0 * Nd * (Nd - 1)) / 3.0;
|
||||
//Plq = coeff * Plq;
|
||||
|
||||
|
||||
RealD WFlow_TC5Li = WilsonLoops<PeriodicGimplR>::TopologicalCharge5Li(U);
|
||||
|
||||
int tau = std::round(t);
|
||||
|
||||
std::string efile = file_pre + "E_dnsty_" + std::to_string(tau) + "_" + file_post;
|
||||
// writeFile(R,efile);
|
||||
|
||||
writeFile(R,efile);
|
||||
std::string tfile = file_pre + "Top_dnsty_" + std::to_string(tau) + "_" + file_post;
|
||||
// writeFile(qfield,tfile);
|
||||
writeFile(qfield,tfile);
|
||||
|
||||
std::string ufile = file_pre + "U_" + std::to_string(tau) + "_" + file_post;
|
||||
{
|
||||
// PeriodicGimplR::GaugeField Ucopy = U;
|
||||
// NerscIO::writeConfiguration(Ucopy,ufile);
|
||||
}
|
||||
|
||||
RealD E = real(sum(R))/ RealD(U.Grid()->gSites());
|
||||
RealD T = real( sum(qfield) );
|
||||
Coordinate scoor; for (int mu=0; mu < Nd; mu++) scoor[mu] = 0;
|
||||
RealD E0 = real(peekSite(R,scoor));
|
||||
RealD T0 = real(peekSite(qfield,scoor));
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Saved energy density (clover) & topo. charge density: " << conf << " " << step << " " << tau << " "
|
||||
<< "(E_avg,T_sum) " << E << " " << T << " (E, T at origin) " << E0 << " " << T0 << " Q5Li "<< WFlow_TC5Li << std::endl;
|
||||
<< "(E_avg,T_sum) " << E << " " << T << " (E, T at origin) " << E0 << " " << T0 << std::endl;
|
||||
|
||||
});
|
||||
|
||||
int t=WFPar.maxTau;
|
||||
WF.smear(Uflow, Umu);
|
||||
// NerscIO::writeConfiguration(Uflow,filesmr);
|
||||
|
||||
|
||||
|
||||
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
|
||||
RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
|
||||
RealD WFlow_TC5Li = WilsonLoops<PeriodicGimplR>::TopologicalCharge5Li(Uflow);
|
||||
RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow); // t
|
||||
RealD WFlow_EC = WF.energyDensityCloverleaf(t,Uflow);
|
||||
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
|
||||
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
|
||||
std::cout << GridLogMessage << "TC0 "<< conf << " " << WFlow_EC << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge5Li "<< conf << " " << WFlow_TC5Li<< std::endl;
|
||||
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
|
||||
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
|
||||
std::cout << GridLogMessage << "TC0 "<< conf << " " << WFlow_EC << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
|
||||
|
||||
std::cout<< GridLogMessage << " Admissibility check:\n";
|
||||
const double sp_adm = 0.067; // admissible threshold
|
||||
|
||||
@@ -873,7 +873,7 @@ int main (int argc, char ** argv)
|
||||
int do_su4=0;
|
||||
int do_memory=1;
|
||||
int do_comms =1;
|
||||
int do_blas =1;
|
||||
int do_blas =0;
|
||||
int do_dslash=1;
|
||||
|
||||
int sel=4;
|
||||
|
||||
@@ -1,273 +0,0 @@
|
||||
RANK 1 using NUMA 1 GPU 1 NIC mlx5_1:1
|
||||
RANK 3 using NUMA 3 GPU 3 NIC mlx5_3:1
|
||||
RANK 0 using NUMA 0 GPU 0 NIC mlx5_0:1
|
||||
RANK 2 using NUMA 2 GPU 2 NIC mlx5_2:1
|
||||
SLURM detected
|
||||
AcceleratorCudaInit[0]: ========================
|
||||
AcceleratorCudaInit[0]: Device Number : 0
|
||||
AcceleratorCudaInit[0]: ========================
|
||||
AcceleratorCudaInit[0]: Device identifier: NVIDIA GH200 120GB
|
||||
AcceleratorCudaInit[0]: totalGlobalMem: 102005473280
|
||||
AcceleratorCudaInit[0]: managedMemory: 1
|
||||
AcceleratorCudaInit[0]: isMultiGpuBoard: 0
|
||||
AcceleratorCudaInit[0]: warpSize: 32
|
||||
AcceleratorCudaInit[0]: pciBusID: 1
|
||||
AcceleratorCudaInit[0]: pciDeviceID: 0
|
||||
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
|
||||
AcceleratorCudaInit: using default device
|
||||
AcceleratorCudaInit: assume user either uses
|
||||
AcceleratorCudaInit: a) IBM jsrun, or
|
||||
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
|
||||
AcceleratorCudaInit: Configure options --enable-setdevice=no
|
||||
local rank 0 device 0 bus id: 0009:01:00.0
|
||||
AcceleratorCudaInit: ================================================
|
||||
SharedMemoryMpi: World communicator of size 4
|
||||
SharedMemoryMpi: Node communicator of size 4
|
||||
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x4002c0000000 - 40033fffffff for comms buffers
|
||||
Setting up IPC
|
||||
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
__|_ | | | | | | | | | | | | _|__
|
||||
__|_ _|__
|
||||
__|_ GGGG RRRR III DDDD _|__
|
||||
__|_ G R R I D D _|__
|
||||
__|_ G R R I D D _|__
|
||||
__|_ G GG RRRR I D D _|__
|
||||
__|_ G G R R I D D _|__
|
||||
__|_ GGGG R R III DDDD _|__
|
||||
__|_ _|__
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
| | | | | | | | | | | | | |
|
||||
|
||||
|
||||
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Current Grid git commit hash=3737a24096282ea179607fc879814710860a0de6: (HEAD -> develop, origin/develop, origin/HEAD) clean
|
||||
|
||||
Grid : Message : ================================================
|
||||
Grid : Message : MPI is initialised and logging filters activated
|
||||
Grid : Message : ================================================
|
||||
Grid : Message : This rank is running on host jpbo-119-30.jupiter.internal
|
||||
Grid : Message : Requested 2147483648 byte stencil comms buffers
|
||||
Grid : Message : MemoryManager Cache 81604378624 bytes
|
||||
Grid : Message : MemoryManager::Init() setting up
|
||||
Grid : Message : MemoryManager::Init() cache pool for recent host allocations: SMALL 8 LARGE 2 HUGE 0
|
||||
Grid : Message : MemoryManager::Init() cache pool for recent device allocations: SMALL 16 LARGE 8 Huge 0
|
||||
Grid : Message : MemoryManager::Init() cache pool for recent shared allocations: SMALL 16 LARGE 8 Huge 0
|
||||
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
|
||||
Grid : Message : MemoryManager::Init() Using cudaMalloc
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Grid : Message : 0.303000 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 0.309000 s : Testing with full communication
|
||||
Grid : Message : 0.312000 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 0.313000 s : Grid Layout
|
||||
Grid : Message : 0.313000 s : Global lattice size : 32 32 64 64
|
||||
Grid : Message : 0.319000 s : OpenMP threads : 4
|
||||
Grid : Message : 0.320000 s : MPI tasks : 1 1 2 2
|
||||
Grid : Message : 0.129590 s : Initialising 4d RNG
|
||||
Grid : Message : 0.764790 s : Intialising parallel RNG with unique string 'The 4D RNG'
|
||||
Grid : Message : 0.764920 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
|
||||
Grid : Message : 0.942440 s : Initialising 5d RNG
|
||||
Grid : Message : 1.149388 s : Intialising parallel RNG with unique string 'The 5D RNG'
|
||||
Grid : Message : 1.149404 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
|
||||
local rank 1 device 0 bus id: 0019:01:00.0
|
||||
local rank 2 device 0 bus id: 0029:01:00.0
|
||||
local rank 3 device 0 bus id: 0039:01:00.0
|
||||
Grid : Message : 43.893114 s : Drawing gauge field
|
||||
Grid : Message : 54.574150 s : Random gauge initialised
|
||||
Grid : Message : 54.574170 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0]
|
||||
Grid : Message : 54.574172 s : Applying BCs for Dirichlet Block4 [0 0 0 0]
|
||||
Grid : Message : 54.580032 s : Setting up Cshift based reference
|
||||
Grid : Message : 60.407451 s : *****************************************************************
|
||||
Grid : Message : 60.407469 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
|
||||
Grid : Message : 60.407470 s : *****************************************************************
|
||||
Grid : Message : 60.407471 s : *****************************************************************
|
||||
Grid : Message : 60.407472 s : * Benchmarking DomainWallFermionR::Dhop
|
||||
Grid : Message : 60.407473 s : * Vectorising space-time by 8
|
||||
Grid : Message : 60.407475 s : * VComplex size is 64 B
|
||||
Grid : Message : 60.407477 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 60.407479 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 60.407480 s : *****************************************************************
|
||||
Grid : Message : 61.102178 s : Called warmup
|
||||
Grid : Message : 62.177160 s : Called Dw 300 times in 1074958 us
|
||||
Grid : Message : 62.177198 s : mflop/s = 24721998.6
|
||||
Grid : Message : 62.177201 s : mflop/s per rank = 6180499.64
|
||||
Grid : Message : 62.177204 s : mflop/s per node = 24721998.6
|
||||
Grid : Message : 62.182696 s : norm diff 5.8108784e-14 Line 306
|
||||
Grid : Message : 71.328862 s : ----------------------------------------------------------------
|
||||
Grid : Message : 71.328884 s : Compare to naive wilson implementation Dag to verify correctness
|
||||
Grid : Message : 71.328885 s : ----------------------------------------------------------------
|
||||
Grid : Message : 71.328886 s : Called DwDag
|
||||
Grid : Message : 71.328887 s : norm dag result 4.12810493
|
||||
Grid : Message : 71.329493 s : norm dag ref 4.12810493
|
||||
Grid : Message : 71.331967 s : norm dag diff 3.40632318e-14 Line 377
|
||||
Grid : Message : 71.394727 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
|
||||
Grid : Message : 71.803650 s : src_e0.500003185
|
||||
Grid : Message : 71.819727 s : src_o0.499996882
|
||||
Grid : Message : 71.821991 s : *********************************************************
|
||||
Grid : Message : 71.821993 s : * Benchmarking DomainWallFermion::DhopEO
|
||||
Grid : Message : 71.821995 s : * Vectorising space-time by 8
|
||||
Grid : Message : 71.821998 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 71.822002 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 71.822003 s : *********************************************************
|
||||
Grid : Message : 72.377054 s : Deo mflop/s = 24065467
|
||||
Grid : Message : 72.377071 s : Deo mflop/s per rank 6016366.75
|
||||
Grid : Message : 72.377074 s : Deo mflop/s per node 24065467
|
||||
Grid : Message : 72.624877 s : r_e2.06377678
|
||||
Grid : Message : 72.625198 s : r_o2.06381058
|
||||
Grid : Message : 72.625507 s : res4.12758736
|
||||
Grid : Message : 73.759140 s : norm diff 0
|
||||
Grid : Message : 73.868204 s : norm diff even 0
|
||||
Grid : Message : 73.907201 s : norm diff odd 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Grid : Message : 74.414580 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 74.414582 s : Testing without internode communication
|
||||
Grid : Message : 74.414584 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 74.414586 s : Grid Layout
|
||||
Grid : Message : 74.414586 s : Global lattice size : 32 32 64 64
|
||||
Grid : Message : 74.414594 s : OpenMP threads : 4
|
||||
Grid : Message : 74.414595 s : MPI tasks : 1 1 2 2
|
||||
Grid : Message : 74.679364 s : Initialising 4d RNG
|
||||
Grid : Message : 74.742332 s : Intialising parallel RNG with unique string 'The 4D RNG'
|
||||
Grid : Message : 74.742343 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
|
||||
Grid : Message : 74.759525 s : Initialising 5d RNG
|
||||
Grid : Message : 75.812412 s : Intialising parallel RNG with unique string 'The 5D RNG'
|
||||
Grid : Message : 75.812429 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
|
||||
Grid : Message : 119.252016 s : Drawing gauge field
|
||||
Grid : Message : 129.919846 s : Random gauge initialised
|
||||
Grid : Message : 129.919863 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0]
|
||||
Grid : Message : 129.919865 s : Applying BCs for Dirichlet Block4 [0 0 0 0]
|
||||
Grid : Message : 129.923611 s : Setting up Cshift based reference
|
||||
Grid : Message : 135.522878 s : *****************************************************************
|
||||
Grid : Message : 135.522897 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
|
||||
Grid : Message : 135.522899 s : *****************************************************************
|
||||
Grid : Message : 135.522899 s : *****************************************************************
|
||||
Grid : Message : 135.522900 s : * Benchmarking DomainWallFermionR::Dhop
|
||||
Grid : Message : 135.522901 s : * Vectorising space-time by 8
|
||||
Grid : Message : 135.522903 s : * VComplex size is 64 B
|
||||
Grid : Message : 135.522905 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 135.522907 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 135.522908 s : *****************************************************************
|
||||
Grid : Message : 136.151202 s : Called warmup
|
||||
Grid : Message : 137.224721 s : Called Dw 300 times in 1073490 us
|
||||
Grid : Message : 137.224748 s : mflop/s = 24755806
|
||||
Grid : Message : 137.224751 s : mflop/s per rank = 6188951.49
|
||||
Grid : Message : 137.224753 s : mflop/s per node = 24755806
|
||||
Grid : Message : 137.235239 s : norm diff 5.8108784e-14 Line 306
|
||||
Grid : Message : 146.451686 s : ----------------------------------------------------------------
|
||||
Grid : Message : 146.451708 s : Compare to naive wilson implementation Dag to verify correctness
|
||||
Grid : Message : 146.451710 s : ----------------------------------------------------------------
|
||||
Grid : Message : 146.451712 s : Called DwDag
|
||||
Grid : Message : 146.451714 s : norm dag result 4.12810493
|
||||
Grid : Message : 146.452323 s : norm dag ref 4.12810493
|
||||
Grid : Message : 146.454799 s : norm dag diff 3.40632318e-14 Line 377
|
||||
Grid : Message : 146.498557 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
|
||||
Grid : Message : 146.940894 s : src_e0.500003185
|
||||
Grid : Message : 146.953676 s : src_o0.499996882
|
||||
Grid : Message : 146.955927 s : *********************************************************
|
||||
Grid : Message : 146.955929 s : * Benchmarking DomainWallFermion::DhopEO
|
||||
Grid : Message : 146.955932 s : * Vectorising space-time by 8
|
||||
Grid : Message : 146.955936 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 146.955938 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 146.955941 s : *********************************************************
|
||||
Grid : Message : 147.511975 s : Deo mflop/s = 24036256.5
|
||||
Grid : Message : 147.511989 s : Deo mflop/s per rank 6009064.13
|
||||
Grid : Message : 147.511991 s : Deo mflop/s per node 24036256.5
|
||||
Grid : Message : 147.522100 s : r_e2.06377678
|
||||
Grid : Message : 147.522433 s : r_o2.06381058
|
||||
Grid : Message : 147.522745 s : res4.12758736
|
||||
Grid : Message : 148.229848 s : norm diff 0
|
||||
Grid : Message : 149.233474 s : norm diff even 0
|
||||
Grid : Message : 149.235815 s : norm diff odd 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Grid : Message : 149.960985 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 149.960990 s : Testing without intranode communication
|
||||
Grid : Message : 149.960991 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 149.960995 s : Grid Layout
|
||||
Grid : Message : 149.960995 s : Global lattice size : 32 32 64 64
|
||||
Grid : Message : 149.961003 s : OpenMP threads : 4
|
||||
Grid : Message : 149.961004 s : MPI tasks : 1 1 2 2
|
||||
Grid : Message : 150.155810 s : Initialising 4d RNG
|
||||
Grid : Message : 150.800200 s : Intialising parallel RNG with unique string 'The 4D RNG'
|
||||
Grid : Message : 150.800340 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
|
||||
Grid : Message : 150.973420 s : Initialising 5d RNG
|
||||
Grid : Message : 151.131117 s : Intialising parallel RNG with unique string 'The 5D RNG'
|
||||
Grid : Message : 151.131136 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
|
||||
Grid : Message : 193.933765 s : Drawing gauge field
|
||||
Grid : Message : 204.611551 s : Random gauge initialised
|
||||
Grid : Message : 204.611574 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0]
|
||||
Grid : Message : 204.611576 s : Applying BCs for Dirichlet Block4 [0 0 0 0]
|
||||
Grid : Message : 204.615265 s : Setting up Cshift based reference
|
||||
Grid : Message : 210.117788 s : *****************************************************************
|
||||
Grid : Message : 210.117807 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
|
||||
Grid : Message : 210.117809 s : *****************************************************************
|
||||
Grid : Message : 210.117810 s : *****************************************************************
|
||||
Grid : Message : 210.117812 s : * Benchmarking DomainWallFermionR::Dhop
|
||||
Grid : Message : 210.117813 s : * Vectorising space-time by 8
|
||||
Grid : Message : 210.117814 s : * VComplex size is 64 B
|
||||
Grid : Message : 210.117817 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 210.117818 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 210.117819 s : *****************************************************************
|
||||
Grid : Message : 210.714641 s : Called warmup
|
||||
Grid : Message : 211.892227 s : Called Dw 300 times in 1177557 us
|
||||
Grid : Message : 211.892252 s : mflop/s = 22568003.2
|
||||
Grid : Message : 211.892255 s : mflop/s per rank = 5642000.8
|
||||
Grid : Message : 211.892257 s : mflop/s per node = 22568003.2
|
||||
Grid : Message : 211.896037 s : norm diff 5.8108784e-14 Line 306
|
||||
Grid : Message : 220.751375 s : ----------------------------------------------------------------
|
||||
Grid : Message : 220.751406 s : Compare to naive wilson implementation Dag to verify correctness
|
||||
Grid : Message : 220.751409 s : ----------------------------------------------------------------
|
||||
Grid : Message : 220.751411 s : Called DwDag
|
||||
Grid : Message : 220.751412 s : norm dag result 4.12810493
|
||||
Grid : Message : 220.753307 s : norm dag ref 4.12810493
|
||||
Grid : Message : 220.755796 s : norm dag diff 3.40632318e-14 Line 377
|
||||
Grid : Message : 220.813226 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
|
||||
Grid : Message : 221.697800 s : src_e0.500003185
|
||||
Grid : Message : 221.890920 s : src_o0.499996882
|
||||
Grid : Message : 221.913430 s : *********************************************************
|
||||
Grid : Message : 221.913450 s : * Benchmarking DomainWallFermion::DhopEO
|
||||
Grid : Message : 221.913480 s : * Vectorising space-time by 8
|
||||
Grid : Message : 221.913500 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 221.913530 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 221.913550 s : *********************************************************
|
||||
Grid : Message : 221.645213 s : Deo mflop/s = 24114032
|
||||
Grid : Message : 221.645228 s : Deo mflop/s per rank 6028508.01
|
||||
Grid : Message : 221.645231 s : Deo mflop/s per node 24114032
|
||||
Grid : Message : 221.656021 s : r_e2.06377678
|
||||
Grid : Message : 221.656389 s : r_o2.06381058
|
||||
Grid : Message : 221.656698 s : res4.12758736
|
||||
Grid : Message : 222.110075 s : norm diff 0
|
||||
Grid : Message : 222.857692 s : norm diff even 0
|
||||
Grid : Message : 222.875763 s : norm diff odd 0
|
||||
Grid : Message : 223.598127 s : *******************************************
|
||||
Grid : Message : 223.598145 s : ******* Grid Finalize ******
|
||||
Grid : Message : 223.598146 s : *******************************************
|
||||
@@ -1,286 +0,0 @@
|
||||
RANK 2 using NUMA 2 GPU 2 NIC mlx5_2:1
|
||||
RANK 3 using NUMA 3 GPU 3 NIC mlx5_3:1
|
||||
RANK 0 using NUMA 0 GPU 0 NIC mlx5_0:1
|
||||
RANK 1 using NUMA 1 GPU 1 NIC mlx5_1:1
|
||||
RANK 0 using NUMA 0 GPU 0 NIC mlx5_0:1
|
||||
RANK 2 using NUMA 2 GPU 2 NIC mlx5_2:1
|
||||
RANK 1 using NUMA 1 GPU 1 NIC mlx5_1:1
|
||||
RANK 3 using NUMA 3 GPU 3 NIC mlx5_3:1
|
||||
RANK 3 using NUMA 3 GPU 3 NIC mlx5_3:1
|
||||
RANK 0 using NUMA 0 GPU 0 NIC mlx5_0:1
|
||||
RANK 1 using NUMA 1 GPU 1 NIC mlx5_1:1
|
||||
RANK 2 using NUMA 2 GPU 2 NIC mlx5_2:1
|
||||
RANK 1 using NUMA 1 GPU 1 NIC mlx5_1:1
|
||||
RANK 3 using NUMA 3 GPU 3 NIC mlx5_3:1
|
||||
RANK 0 using NUMA 0 GPU 0 NIC mlx5_0:1
|
||||
RANK 2 using NUMA 2 GPU 2 NIC mlx5_2:1
|
||||
SLURM detected
|
||||
AcceleratorCudaInit[0]: ========================
|
||||
AcceleratorCudaInit[0]: Device Number : 0
|
||||
AcceleratorCudaInit[0]: ========================
|
||||
AcceleratorCudaInit[0]: Device identifier: NVIDIA GH200 120GB
|
||||
AcceleratorCudaInit[0]: totalGlobalMem: 102005473280
|
||||
AcceleratorCudaInit[0]: managedMemory: 1
|
||||
AcceleratorCudaInit[0]: isMultiGpuBoard: 0
|
||||
AcceleratorCudaInit[0]: warpSize: 32
|
||||
AcceleratorCudaInit[0]: pciBusID: 1
|
||||
AcceleratorCudaInit[0]: pciDeviceID: 0
|
||||
AcceleratorCudaInit[0]: maxGridSize (2147483647,65535,65535)
|
||||
AcceleratorCudaInit: using default device
|
||||
AcceleratorCudaInit: assume user either uses
|
||||
AcceleratorCudaInit: a) IBM jsrun, or
|
||||
AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding
|
||||
AcceleratorCudaInit: Configure options --enable-setdevice=no
|
||||
local rank 0 device 0 bus id: 0009:01:00.0
|
||||
AcceleratorCudaInit: ================================================
|
||||
SharedMemoryMpi: World communicator of size 16
|
||||
SharedMemoryMpi: Node communicator of size 4
|
||||
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x4002a0000000 - 40031fffffff for comms buffers
|
||||
Setting up IPC
|
||||
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
__|_ | | | | | | | | | | | | _|__
|
||||
__|_ _|__
|
||||
__|_ GGGG RRRR III DDDD _|__
|
||||
__|_ G R R I D D _|__
|
||||
__|_ G R R I D D _|__
|
||||
__|_ G GG RRRR I D D _|__
|
||||
__|_ G G R R I D D _|__
|
||||
__|_ GGGG R R III DDDD _|__
|
||||
__|_ _|__
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
|
||||
| | | | | | | | | | | | | |
|
||||
|
||||
|
||||
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Current Grid git commit hash=3737a24096282ea179607fc879814710860a0de6: (HEAD -> develop, origin/develop, origin/HEAD) clean
|
||||
|
||||
Grid : Message : ================================================
|
||||
Grid : Message : MPI is initialised and logging filters activated
|
||||
Grid : Message : ================================================
|
||||
Grid : Message : This rank is running on host jpbo-012-11.jupiter.internal
|
||||
Grid : Message : Requested 2147483648 byte stencil comms buffers
|
||||
Grid : Message : MemoryManager Cache 81604378624 bytes
|
||||
Grid : Message : MemoryManager::Init() setting up
|
||||
Grid : Message : MemoryManager::Init() cache pool for recent host allocations: SMALL 8 LARGE 2 HUGE 0
|
||||
Grid : Message : MemoryManager::Init() cache pool for recent device allocations: SMALL 16 LARGE 8 Huge 0
|
||||
Grid : Message : MemoryManager::Init() cache pool for recent shared allocations: SMALL 16 LARGE 8 Huge 0
|
||||
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
|
||||
Grid : Message : MemoryManager::Init() Using cudaMalloc
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Grid : Message : 0.834000 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 0.838000 s : Testing with full communication
|
||||
Grid : Message : 0.839000 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 0.840000 s : Grid Layout
|
||||
Grid : Message : 0.840000 s : Global lattice size : 64 64 64 64
|
||||
Grid : Message : 0.846000 s : OpenMP threads : 4
|
||||
Grid : Message : 0.846000 s : MPI tasks : 2 2 2 2
|
||||
Grid : Message : 0.165970 s : Initialising 4d RNG
|
||||
Grid : Message : 0.787270 s : Intialising parallel RNG with unique string 'The 4D RNG'
|
||||
Grid : Message : 0.787340 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
|
||||
Grid : Message : 0.960410 s : Initialising 5d RNG
|
||||
Grid : Message : 1.142344 s : Intialising parallel RNG with unique string 'The 5D RNG'
|
||||
Grid : Message : 1.142352 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
|
||||
local rank 2 device 0 bus id: 0029:01:00.0
|
||||
local rank 3 device 0 bus id: 0039:01:00.0
|
||||
local rank 1 device 0 bus id: 0019:01:00.0
|
||||
Grid : Message : 44.657270 s : Drawing gauge field
|
||||
Grid : Message : 55.247733 s : Random gauge initialised
|
||||
Grid : Message : 55.247745 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0]
|
||||
Grid : Message : 55.247747 s : Applying BCs for Dirichlet Block4 [0 0 0 0]
|
||||
Grid : Message : 55.253053 s : Setting up Cshift based reference
|
||||
Grid : Message : 62.191747 s : *****************************************************************
|
||||
Grid : Message : 62.191767 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
|
||||
Grid : Message : 62.191768 s : *****************************************************************
|
||||
Grid : Message : 62.191769 s : *****************************************************************
|
||||
Grid : Message : 62.191769 s : * Benchmarking DomainWallFermionR::Dhop
|
||||
Grid : Message : 62.191769 s : * Vectorising space-time by 8
|
||||
Grid : Message : 62.191770 s : * VComplex size is 64 B
|
||||
Grid : Message : 62.191771 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 62.191771 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 62.191772 s : *****************************************************************
|
||||
Grid : Message : 62.857568 s : Called warmup
|
||||
Grid : Message : 65.581790 s : Called Dw 300 times in 2200540 us
|
||||
Grid : Message : 65.582120 s : mflop/s = 48306525
|
||||
Grid : Message : 65.582140 s : mflop/s per rank = 3019157.81
|
||||
Grid : Message : 65.582150 s : mflop/s per node = 12076631.3
|
||||
Grid : Message : 65.637550 s : norm diff 5.80156793e-14 Line 306
|
||||
Grid : Message : 75.122153 s : ----------------------------------------------------------------
|
||||
Grid : Message : 75.122166 s : Compare to naive wilson implementation Dag to verify correctness
|
||||
Grid : Message : 75.122167 s : ----------------------------------------------------------------
|
||||
Grid : Message : 75.122167 s : Called DwDag
|
||||
Grid : Message : 75.122167 s : norm dag result 4.12801829
|
||||
Grid : Message : 75.123295 s : norm dag ref 4.12801829
|
||||
Grid : Message : 75.125890 s : norm dag diff 3.42093991e-14 Line 377
|
||||
Grid : Message : 75.188462 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
|
||||
Grid : Message : 75.605683 s : src_e0.500004005
|
||||
Grid : Message : 75.617824 s : src_o0.499996067
|
||||
Grid : Message : 75.620089 s : *********************************************************
|
||||
Grid : Message : 75.620091 s : * Benchmarking DomainWallFermion::DhopEO
|
||||
Grid : Message : 75.620093 s : * Vectorising space-time by 8
|
||||
Grid : Message : 75.620094 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 75.620095 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 75.620096 s : *********************************************************
|
||||
Grid : Message : 76.732272 s : Deo mflop/s = 48068252.4
|
||||
Grid : Message : 76.732283 s : Deo mflop/s per rank 3004265.77
|
||||
Grid : Message : 76.732285 s : Deo mflop/s per node 12017063.1
|
||||
Grid : Message : 76.749317 s : r_e2.06443136
|
||||
Grid : Message : 76.749652 s : r_o2.06378451
|
||||
Grid : Message : 76.749955 s : res4.12821587
|
||||
Grid : Message : 77.198827 s : norm diff 0
|
||||
Grid : Message : 77.981760 s : norm diff even 0
|
||||
Grid : Message : 78.455900 s : norm diff odd 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Grid : Message : 78.539333 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 78.539337 s : Testing without internode communication
|
||||
Grid : Message : 78.539338 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 78.539339 s : Grid Layout
|
||||
Grid : Message : 78.539339 s : Global lattice size : 64 64 64 64
|
||||
Grid : Message : 78.539347 s : OpenMP threads : 4
|
||||
Grid : Message : 78.539348 s : MPI tasks : 2 2 2 2
|
||||
Grid : Message : 78.798501 s : Initialising 4d RNG
|
||||
Grid : Message : 78.862916 s : Intialising parallel RNG with unique string 'The 4D RNG'
|
||||
Grid : Message : 78.862925 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
|
||||
Grid : Message : 78.879916 s : Initialising 5d RNG
|
||||
Grid : Message : 79.941271 s : Intialising parallel RNG with unique string 'The 5D RNG'
|
||||
Grid : Message : 79.941280 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
|
||||
Grid : Message : 124.586264 s : Drawing gauge field
|
||||
Grid : Message : 135.338090 s : Random gauge initialised
|
||||
Grid : Message : 135.338102 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0]
|
||||
Grid : Message : 135.338103 s : Applying BCs for Dirichlet Block4 [0 0 0 0]
|
||||
Grid : Message : 135.341266 s : Setting up Cshift based reference
|
||||
Grid : Message : 142.604280 s : *****************************************************************
|
||||
Grid : Message : 142.604450 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
|
||||
Grid : Message : 142.604460 s : *****************************************************************
|
||||
Grid : Message : 142.604470 s : *****************************************************************
|
||||
Grid : Message : 142.604480 s : * Benchmarking DomainWallFermionR::Dhop
|
||||
Grid : Message : 142.604480 s : * Vectorising space-time by 8
|
||||
Grid : Message : 142.604500 s : * VComplex size is 64 B
|
||||
Grid : Message : 142.604510 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 142.604510 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 142.604520 s : *****************************************************************
|
||||
Grid : Message : 142.686034 s : Called warmup
|
||||
Grid : Message : 144.868543 s : Called Dw 300 times in 2182483 us
|
||||
Grid : Message : 144.868559 s : mflop/s = 48706194.1
|
||||
Grid : Message : 144.868561 s : mflop/s per rank = 3044137.13
|
||||
Grid : Message : 144.868562 s : mflop/s per node = 12176548.5
|
||||
Grid : Message : 144.887595 s : norm diff 5.80156793e-14 Line 306
|
||||
Grid : Message : 153.622978 s : ----------------------------------------------------------------
|
||||
Grid : Message : 153.622994 s : Compare to naive wilson implementation Dag to verify correctness
|
||||
Grid : Message : 153.622995 s : ----------------------------------------------------------------
|
||||
Grid : Message : 153.622995 s : Called DwDag
|
||||
Grid : Message : 153.622996 s : norm dag result 4.12801829
|
||||
Grid : Message : 153.623604 s : norm dag ref 4.12801829
|
||||
Grid : Message : 153.626098 s : norm dag diff 3.42093991e-14 Line 377
|
||||
Grid : Message : 153.691426 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
|
||||
Grid : Message : 154.148319 s : src_e0.500004005
|
||||
Grid : Message : 154.151454 s : src_o0.499996067
|
||||
Grid : Message : 154.153722 s : *********************************************************
|
||||
Grid : Message : 154.153724 s : * Benchmarking DomainWallFermion::DhopEO
|
||||
Grid : Message : 154.153725 s : * Vectorising space-time by 8
|
||||
Grid : Message : 154.153726 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 154.153727 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 154.153728 s : *********************************************************
|
||||
Grid : Message : 155.200671 s : Deo mflop/s = 51121022.4
|
||||
Grid : Message : 155.200682 s : Deo mflop/s per rank 3195063.9
|
||||
Grid : Message : 155.200684 s : Deo mflop/s per node 12780255.6
|
||||
Grid : Message : 155.217204 s : r_e2.06443136
|
||||
Grid : Message : 155.217550 s : r_o2.06378451
|
||||
Grid : Message : 155.217869 s : res4.12821587
|
||||
Grid : Message : 155.673744 s : norm diff 0
|
||||
Grid : Message : 156.463329 s : norm diff even 0
|
||||
Grid : Message : 156.878866 s : norm diff odd 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Grid : Message : 157.620761 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 157.620764 s : Testing without intranode communication
|
||||
Grid : Message : 157.620765 s : ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
Grid : Message : 157.620766 s : Grid Layout
|
||||
Grid : Message : 157.620766 s : Global lattice size : 64 64 64 64
|
||||
Grid : Message : 157.620773 s : OpenMP threads : 4
|
||||
Grid : Message : 157.620774 s : MPI tasks : 2 2 2 2
|
||||
Grid : Message : 157.671479 s : Initialising 4d RNG
|
||||
Grid : Message : 157.738691 s : Intialising parallel RNG with unique string 'The 4D RNG'
|
||||
Grid : Message : 157.738698 s : Seed SHA256: 49db4542db694e3b1a74bf2592a8c1b83bfebbe18401693c2609a4c3af1
|
||||
Grid : Message : 157.755651 s : Initialising 5d RNG
|
||||
Grid : Message : 158.848676 s : Intialising parallel RNG with unique string 'The 5D RNG'
|
||||
Grid : Message : 158.848685 s : Seed SHA256: b6316f2fac44ce14111f93e0296389330b077bfd0a7b359f781c58589f8a
|
||||
Grid : Message : 202.465158 s : Drawing gauge field
|
||||
Grid : Message : 213.214546 s : Random gauge initialised
|
||||
Grid : Message : 213.214561 s : Applying BCs for Dirichlet Block5 [0 0 0 0 0]
|
||||
Grid : Message : 213.214563 s : Applying BCs for Dirichlet Block4 [0 0 0 0]
|
||||
Grid : Message : 213.217711 s : Setting up Cshift based reference
|
||||
Grid : Message : 219.662772 s : *****************************************************************
|
||||
Grid : Message : 219.662786 s : * Kernel options --dslash-generic, --dslash-unroll, --dslash-asm
|
||||
Grid : Message : 219.662787 s : *****************************************************************
|
||||
Grid : Message : 219.662788 s : *****************************************************************
|
||||
Grid : Message : 219.662788 s : * Benchmarking DomainWallFermionR::Dhop
|
||||
Grid : Message : 219.662789 s : * Vectorising space-time by 8
|
||||
Grid : Message : 219.662790 s : * VComplex size is 64 B
|
||||
Grid : Message : 219.662791 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 219.662791 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 219.662791 s : *****************************************************************
|
||||
Grid : Message : 220.425592 s : Called warmup
|
||||
Grid : Message : 222.536249 s : Called Dw 300 times in 2110597 us
|
||||
Grid : Message : 222.536267 s : mflop/s = 50365105.5
|
||||
Grid : Message : 222.536269 s : mflop/s per rank = 3147819.09
|
||||
Grid : Message : 222.536270 s : mflop/s per node = 12591276.4
|
||||
Grid : Message : 222.541053 s : norm diff 5.80156793e-14 Line 306
|
||||
Grid : Message : 232.135901 s : ----------------------------------------------------------------
|
||||
Grid : Message : 232.135915 s : Compare to naive wilson implementation Dag to verify correctness
|
||||
Grid : Message : 232.135916 s : ----------------------------------------------------------------
|
||||
Grid : Message : 232.135917 s : Called DwDag
|
||||
Grid : Message : 232.135918 s : norm dag result 4.12801829
|
||||
Grid : Message : 232.151938 s : norm dag ref 4.12801829
|
||||
Grid : Message : 232.154451 s : norm dag diff 3.42093991e-14 Line 377
|
||||
Grid : Message : 232.216117 s : Calling Deo and Doe and //assert Deo+Doe == Dunprec
|
||||
Grid : Message : 232.630529 s : src_e0.500004005
|
||||
Grid : Message : 232.643197 s : src_o0.499996067
|
||||
Grid : Message : 232.645527 s : *********************************************************
|
||||
Grid : Message : 232.645529 s : * Benchmarking DomainWallFermion::DhopEO
|
||||
Grid : Message : 232.645532 s : * Vectorising space-time by 8
|
||||
Grid : Message : 232.645533 s : * Using Overlapped Comms/Compute
|
||||
Grid : Message : 232.645534 s : * Using GENERIC Nc WilsonKernels
|
||||
Grid : Message : 232.645535 s : *********************************************************
|
||||
Grid : Message : 233.774184 s : Deo mflop/s = 47432091.9
|
||||
Grid : Message : 233.774194 s : Deo mflop/s per rank 2964505.74
|
||||
Grid : Message : 233.774196 s : Deo mflop/s per node 11858023
|
||||
Grid : Message : 233.791552 s : r_e2.06443136
|
||||
Grid : Message : 233.791899 s : r_o2.06378451
|
||||
Grid : Message : 233.792204 s : res4.12821587
|
||||
Grid : Message : 234.230783 s : norm diff 0
|
||||
Grid : Message : 235.162780 s : norm diff even 0
|
||||
Grid : Message : 235.291950 s : norm diff odd 0
|
||||
Grid : Message : 235.765411 s : *******************************************
|
||||
Grid : Message : 235.765424 s : ******* Grid Finalize ******
|
||||
Grid : Message : 235.765425 s : *******************************************
|
||||
| ||||