mirror of
https://github.com/paboyle/Grid.git
synced 2025-11-06 22:59:32 +00:00
Compare commits
19 Commits
specflow
...
c7b74db317
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7b74db317 | ||
|
|
0ce201efbe | ||
|
|
6d8a3d8bb2 | ||
|
|
7dfd207ebb | ||
|
|
3a65a096f2 | ||
|
|
85b2bd4c93 | ||
|
|
35e10a1159 | ||
| d418f78352 | |||
| 25163998a0 | |||
|
|
dc546aaa4b | ||
|
|
5364d580c9 | ||
|
|
2a9a6347e3 | ||
|
|
cfdb56f314 | ||
|
|
b517e88db3 | ||
| bb317aba8d | |||
| 644cc6647e | |||
| 72397ce23b | |||
|
|
d60a80c098 | ||
|
|
bb8b6d9d73 |
@@ -37,6 +37,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/qcd/QCD.h>
|
||||
#include <Grid/qcd/spin/Spin.h>
|
||||
#include <Grid/qcd/gparity/Gparity.h>
|
||||
#include <Grid/qcd/spin/Pauli.h> // depends on Gparity
|
||||
#include <Grid/qcd/utils/Utils.h>
|
||||
#include <Grid/qcd/representations/Representations.h>
|
||||
NAMESPACE_CHECK(GridQCDCore);
|
||||
|
||||
@@ -73,7 +73,6 @@ NAMESPACE_CHECK(BiCGSTAB);
|
||||
#include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
|
||||
#include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/iterative/SimpleLanczos.h>
|
||||
#include <Grid/algorithms/iterative/PowerMethod.h>
|
||||
#include <Grid/algorithms/iterative/AdefGeneric.h>
|
||||
#include <Grid/algorithms/iterative/AdefMrhs.h>
|
||||
|
||||
@@ -269,7 +269,9 @@ public:
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
grid->Barrier();
|
||||
axpby(T1,xscale,mscale,y,in);
|
||||
grid->Barrier();
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
// out = ()*T0 + Coeffs[1]*T1;
|
||||
|
||||
@@ -1,931 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LANC_H
|
||||
#define GRID_LANC_H
|
||||
|
||||
#include <string.h> //memset
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
#ifdef USE_MKL
|
||||
#include<mkl_lapack.h>
|
||||
#else
|
||||
void LAPACK_dstegr (char *jobz, char *range, int *n, double *d, double *e,
|
||||
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||
double *work, int *lwork, int *iwork, int *liwork,
|
||||
int *info);
|
||||
//#include <lapacke/lapacke.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//#include <Grid/algorithms/densematrix/DenseMatrix.h>
|
||||
|
||||
// eliminate temorary vector in calc()
|
||||
#define MEM_SAVE
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
|
||||
struct Bisection
|
||||
{
|
||||
|
||||
#if 0
|
||||
static void get_eig2 (int row_num, std::vector < RealD > &ALPHA,
|
||||
std::vector < RealD > &BETA,
|
||||
std::vector < RealD > &eig)
|
||||
{
|
||||
int i, j;
|
||||
std::vector < RealD > evec1 (row_num + 3);
|
||||
std::vector < RealD > evec2 (row_num + 3);
|
||||
RealD eps2;
|
||||
ALPHA[1] = 0.;
|
||||
BETHA[1] = 0.;
|
||||
for (i = 0; i < row_num - 1; i++)
|
||||
{
|
||||
ALPHA[i + 1] = A[i * (row_num + 1)].real ();
|
||||
BETHA[i + 2] = A[i * (row_num + 1) + 1].real ();
|
||||
}
|
||||
ALPHA[row_num] = A[(row_num - 1) * (row_num + 1)].real ();
|
||||
bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-10, 1e-10, evec1, eps2);
|
||||
bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-16, 1e-16, evec2, eps2);
|
||||
|
||||
// Do we really need to sort here?
|
||||
int begin = 1;
|
||||
int end = row_num;
|
||||
int swapped = 1;
|
||||
while (swapped)
|
||||
{
|
||||
swapped = 0;
|
||||
for (i = begin; i < end; i++)
|
||||
{
|
||||
if (mag (evec2[i]) > mag (evec2[i + 1]))
|
||||
{
|
||||
swap (evec2 + i, evec2 + i + 1);
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
end--;
|
||||
for (i = end - 1; i >= begin; i--)
|
||||
{
|
||||
if (mag (evec2[i]) > mag (evec2[i + 1]))
|
||||
{
|
||||
swap (evec2 + i, evec2 + i + 1);
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
|
||||
for (i = 0; i < row_num; i++)
|
||||
{
|
||||
for (j = 0; j < row_num; j++)
|
||||
{
|
||||
if (i == j)
|
||||
H[i * row_num + j] = evec2[i + 1];
|
||||
else
|
||||
H[i * row_num + j] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void bisec (std::vector < RealD > &c,
|
||||
std::vector < RealD > &b,
|
||||
int n,
|
||||
int m1,
|
||||
int m2,
|
||||
RealD eps1,
|
||||
RealD relfeh, std::vector < RealD > &x, RealD & eps2)
|
||||
{
|
||||
std::vector < RealD > wu (n + 2);
|
||||
|
||||
RealD h, q, x1, xu, x0, xmin, xmax;
|
||||
int i, a, k;
|
||||
|
||||
b[1] = 0.0;
|
||||
xmin = c[n] - fabs (b[n]);
|
||||
xmax = c[n] + fabs (b[n]);
|
||||
for (i = 1; i < n; i++)
|
||||
{
|
||||
h = fabs (b[i]) + fabs (b[i + 1]);
|
||||
if (c[i] + h > xmax)
|
||||
xmax = c[i] + h;
|
||||
if (c[i] - h < xmin)
|
||||
xmin = c[i] - h;
|
||||
}
|
||||
xmax *= 2.;
|
||||
|
||||
eps2 = relfeh * ((xmin + xmax) > 0.0 ? xmax : -xmin);
|
||||
if (eps1 <= 0.0)
|
||||
eps1 = eps2;
|
||||
eps2 = 0.5 * eps1 + 7.0 * (eps2);
|
||||
x0 = xmax;
|
||||
for (i = m1; i <= m2; i++)
|
||||
{
|
||||
x[i] = xmax;
|
||||
wu[i] = xmin;
|
||||
}
|
||||
|
||||
for (k = m2; k >= m1; k--)
|
||||
{
|
||||
xu = xmin;
|
||||
i = k;
|
||||
do
|
||||
{
|
||||
if (xu < wu[i])
|
||||
{
|
||||
xu = wu[i];
|
||||
i = m1 - 1;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
while (i >= m1);
|
||||
if (x0 > x[k])
|
||||
x0 = x[k];
|
||||
while ((x0 - xu) > 2 * relfeh * (fabs (xu) + fabs (x0)) + eps1)
|
||||
{
|
||||
x1 = (xu + x0) / 2;
|
||||
|
||||
a = 0;
|
||||
q = 1.0;
|
||||
for (i = 1; i <= n; i++)
|
||||
{
|
||||
q =
|
||||
c[i] - x1 -
|
||||
((q != 0.0) ? b[i] * b[i] / q : fabs (b[i]) / relfeh);
|
||||
if (q < 0)
|
||||
a++;
|
||||
}
|
||||
// printf("x1=%0.14e a=%d\n",x1,a);
|
||||
if (a < k)
|
||||
{
|
||||
if (a < m1)
|
||||
{
|
||||
xu = x1;
|
||||
wu[m1] = x1;
|
||||
}
|
||||
else
|
||||
{
|
||||
xu = x1;
|
||||
wu[a + 1] = x1;
|
||||
if (x[a] > x1)
|
||||
x[a] = x1;
|
||||
}
|
||||
}
|
||||
else
|
||||
x0 = x1;
|
||||
}
|
||||
printf ("x0=%0.14e xu=%0.14e k=%d\n", x0, xu, k);
|
||||
x[k] = (x0 + xu) / 2;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template < class Field > class SimpleLanczos
|
||||
{
|
||||
|
||||
const RealD small = 1.0e-16;
|
||||
public:
|
||||
int lock;
|
||||
int get;
|
||||
int Niter;
|
||||
int converged;
|
||||
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
int Np; // Np -- Number of spare vecs in kryloc space
|
||||
int Nm; // Nm -- total number of vectors
|
||||
|
||||
|
||||
RealD OrthoTime;
|
||||
|
||||
RealD eresid;
|
||||
|
||||
// SortEigen < Field > _sort;
|
||||
|
||||
LinearFunction < Field > &_Linop;
|
||||
|
||||
// OperatorFunction < Field > &_poly;
|
||||
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
void init (void)
|
||||
{
|
||||
};
|
||||
// void Abort (int ff, std::vector < RealD > &evals, DenseVector < Denstd::vector < RealD > >&evecs);
|
||||
|
||||
SimpleLanczos (LinearFunction < Field > &Linop, // op
|
||||
// OperatorFunction < Field > &poly, // polynmial
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _Niter): // Max iterations
|
||||
|
||||
_Linop (Linop),
|
||||
// _poly (poly),
|
||||
Nstop (_Nstop), Nk (_Nk), Nm (_Nm), eresid (_eresid), Niter (_Niter)
|
||||
{
|
||||
Np = Nm - Nk;
|
||||
assert (Np > 0);
|
||||
};
|
||||
|
||||
/////////////////////////
|
||||
// Sanity checked this routine (step) against Saad.
|
||||
/////////////////////////
|
||||
void RitzMatrix (std::vector < Field > &evec, int k)
|
||||
{
|
||||
|
||||
if (1)
|
||||
return;
|
||||
|
||||
GridBase *grid = evec[0].Grid();
|
||||
Field w (grid);
|
||||
std::cout << GridLogMessage << "RitzMatrix " << std::endl;
|
||||
for (int i = 0; i < k; i++)
|
||||
{
|
||||
_Linop(evec[i], w);
|
||||
// _poly(_Linop,evec[i],w);
|
||||
std::cout << GridLogMessage << "[" << i << "] ";
|
||||
for (int j = 0; j < k; j++)
|
||||
{
|
||||
ComplexD in = innerProduct (evec[j], w);
|
||||
if (fabs ((double) i - j) > 1)
|
||||
{
|
||||
if (abs (in) > 1.0e-9)
|
||||
{
|
||||
std::cout << GridLogMessage << "oops" << std::endl;
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
std::cout << GridLogMessage << " 0 ";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogMessage << " " << in << " ";
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void step (std::vector < RealD > &lmd,
|
||||
std::vector < RealD > &lme,
|
||||
Field & last, Field & current, Field & next, uint64_t k)
|
||||
{
|
||||
if (lmd.size () <= k)
|
||||
lmd.resize (k + Nm);
|
||||
if (lme.size () <= k)
|
||||
lme.resize (k + Nm);
|
||||
|
||||
|
||||
// _poly(_Linop,current,next ); // 3. wk:=Avk−βkv_{k−1}
|
||||
_Linop(current, next); // 3. wk:=Avk−βkv_{k−1}
|
||||
if (k > 0)
|
||||
{
|
||||
next -= lme[k - 1] * last;
|
||||
}
|
||||
// std::cout<<GridLogMessage << "<last|next>" << innerProduct(last,next) <<std::endl;
|
||||
|
||||
ComplexD zalph = innerProduct (current, next); // 4. αk:=(wk,vk)
|
||||
RealD alph = real (zalph);
|
||||
|
||||
next = next - alph * current; // 5. wk:=wk−αkvk
|
||||
// std::cout<<GridLogMessage << "<current|next>" << innerProduct(current,next) <<std::endl;
|
||||
|
||||
RealD beta = normalise (next); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
// norm=beta;
|
||||
|
||||
int interval = Nm / 100 + 1;
|
||||
if ((k % interval) == 0)
|
||||
std::
|
||||
cout << GridLogMessage << k << " : alpha = " << zalph << " beta " <<
|
||||
beta << std::endl;
|
||||
const RealD tiny = 1.0e-20;
|
||||
if (beta < tiny)
|
||||
{
|
||||
std::cout << GridLogMessage << " beta is tiny " << beta << std::
|
||||
endl;
|
||||
}
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
}
|
||||
|
||||
void qr_decomp (std::vector < RealD > &lmd,
|
||||
std::vector < RealD > &lme,
|
||||
int Nk,
|
||||
int Nm,
|
||||
std::vector < RealD > &Qt, RealD Dsh, int kmin, int kmax)
|
||||
{
|
||||
int k = kmin - 1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0 / hypot (lmd[k] - Dsh, lme[k]);
|
||||
RealD c = (lmd[k] - Dsh) * Fden;
|
||||
RealD s = -lme[k] * Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k + 1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
|
||||
lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
|
||||
lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
|
||||
x = -s * lme[k + 1];
|
||||
lme[k + 1] = c * lme[k + 1];
|
||||
|
||||
for (int i = 0; i < Nk; ++i)
|
||||
{
|
||||
RealD Qtmp1 = Qt[i + Nm * k];
|
||||
RealD Qtmp2 = Qt[i + Nm * (k + 1)];
|
||||
Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
|
||||
Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for (int k = kmin; k < kmax - 1; ++k)
|
||||
{
|
||||
|
||||
RealD Fden = 1.0 / hypot (x, lme[k - 1]);
|
||||
RealD c = lme[k - 1] * Fden;
|
||||
RealD s = -x * Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k + 1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
|
||||
lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
|
||||
lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
|
||||
lme[k - 1] = c * lme[k - 1] - s * x;
|
||||
|
||||
if (k != kmax - 2)
|
||||
{
|
||||
x = -s * lme[k + 1];
|
||||
lme[k + 1] = c * lme[k + 1];
|
||||
}
|
||||
|
||||
for (int i = 0; i < Nk; ++i)
|
||||
{
|
||||
RealD Qtmp1 = Qt[i + Nm * k];
|
||||
RealD Qtmp2 = Qt[i + Nm * (k + 1)];
|
||||
Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
|
||||
Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
#ifdef USE_LAPACK
|
||||
#ifdef USE_MKL
|
||||
#define LAPACK_INT MKL_INT
|
||||
#else
|
||||
#define LAPACK_INT long long
|
||||
#endif
|
||||
void diagonalize_lapack (std::vector < RealD > &lmd, std::vector < RealD > &lme, int N1, // all
|
||||
int N2, // get
|
||||
GridBase * grid)
|
||||
{
|
||||
const int size = Nm;
|
||||
LAPACK_INT NN = N1;
|
||||
double evals_tmp[NN];
|
||||
double DD[NN];
|
||||
double EE[NN];
|
||||
for (int i = 0; i < NN; i++)
|
||||
for (int j = i - 1; j <= i + 1; j++)
|
||||
if (j < NN && j >= 0)
|
||||
{
|
||||
if (i == j)
|
||||
DD[i] = lmd[i];
|
||||
if (i == j)
|
||||
evals_tmp[i] = lmd[i];
|
||||
if (j == (i - 1))
|
||||
EE[j] = lme[j];
|
||||
}
|
||||
LAPACK_INT evals_found;
|
||||
LAPACK_INT lwork =
|
||||
((18 * NN) >
|
||||
(1 + 4 * NN + NN * NN) ? (18 * NN) : (1 + 4 * NN + NN * NN));
|
||||
LAPACK_INT liwork = 3 + NN * 10;
|
||||
LAPACK_INT iwork[liwork];
|
||||
double work[lwork];
|
||||
LAPACK_INT isuppz[2 * NN];
|
||||
char jobz = 'N'; // calculate evals only
|
||||
char range = 'I'; // calculate il-th to iu-th evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
int ifail[NN];
|
||||
LAPACK_INT info;
|
||||
// int total = QMP_get_number_of_nodes();
|
||||
// int node = QMP_get_node_number();
|
||||
// GridBase *grid = evec[0]._grid;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN / total) + 1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
LAPACK_INT il = interval * node + 1, iu = interval * (node + 1);
|
||||
if (iu > NN)
|
||||
iu = NN;
|
||||
double tol = 0.0;
|
||||
if (1)
|
||||
{
|
||||
memset (evals_tmp, 0, sizeof (double) * NN);
|
||||
if (il <= NN)
|
||||
{
|
||||
printf ("total=%d node=%d il=%d iu=%d\n", total, node, il, iu);
|
||||
#ifdef USE_MKL
|
||||
dstegr (&jobz, &range, &NN,
|
||||
#else
|
||||
LAPACK_dstegr (&jobz, &range, &NN,
|
||||
#endif
|
||||
(double *) DD, (double *) EE, &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, evals_tmp, (double *) NULL, &NN,
|
||||
isuppz, work, &lwork, iwork, &liwork, &info);
|
||||
for (int i = iu - 1; i >= il - 1; i--)
|
||||
{
|
||||
printf ("node=%d evals_found=%d evals_tmp[%d] = %g\n", node,
|
||||
evals_found, i - (il - 1), evals_tmp[i - (il - 1)]);
|
||||
evals_tmp[i] = evals_tmp[i - (il - 1)];
|
||||
if (il > 1)
|
||||
evals_tmp[i - (il - 1)] = 0.;
|
||||
}
|
||||
}
|
||||
{
|
||||
grid->GlobalSumVector (evals_tmp, NN);
|
||||
}
|
||||
}
|
||||
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
|
||||
}
|
||||
#undef LAPACK_INT
|
||||
#endif
|
||||
|
||||
|
||||
void diagonalize (std::vector < RealD > &lmd,
|
||||
std::vector < RealD > &lme,
|
||||
int N2, int N1, GridBase * grid)
|
||||
{
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
const int check_lapack = 0; // just use lapack if 0, check against lapack if 1
|
||||
|
||||
if (!check_lapack)
|
||||
return diagonalize_lapack (lmd, lme, N2, N1, grid);
|
||||
|
||||
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static RealD normalise (Field & v)
|
||||
{
|
||||
RealD nn = norm2 (v);
|
||||
nn = sqrt (nn);
|
||||
v = v * (1.0 / nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize (Field & w, std::vector < Field > &evec, int k)
|
||||
{
|
||||
double t0 = -usecond () / 1e6;
|
||||
typedef typename Field::scalar_type MyComplex;
|
||||
MyComplex ip;
|
||||
|
||||
if (0)
|
||||
{
|
||||
for (int j = 0; j < k; ++j)
|
||||
{
|
||||
normalise (evec[j]);
|
||||
for (int i = 0; i < j; i++)
|
||||
{
|
||||
ip = innerProduct (evec[i], evec[j]); // are the evecs normalised? ; this assumes so.
|
||||
evec[j] = evec[j] - ip * evec[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < k; ++j)
|
||||
{
|
||||
ip = innerProduct (evec[j], w); // are the evecs normalised? ; this assumes so.
|
||||
w = w - ip * evec[j];
|
||||
}
|
||||
normalise (w);
|
||||
t0 += usecond () / 1e6;
|
||||
OrthoTime += t0;
|
||||
}
|
||||
|
||||
void setUnit_Qt (int Nm, std::vector < RealD > &Qt)
|
||||
{
|
||||
for (int i = 0; i < Qt.size (); ++i)
|
||||
Qt[i] = 0.0;
|
||||
for (int k = 0; k < Nm; ++k)
|
||||
Qt[k + k * Nm] = 1.0;
|
||||
}
|
||||
|
||||
|
||||
void calc (std::vector < RealD > &eval, const Field & src, int &Nconv)
|
||||
{
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
// assert(grid == src._grid);
|
||||
|
||||
std::
|
||||
cout << GridLogMessage << " -- Nk = " << Nk << " Np = " << Np << std::
|
||||
endl;
|
||||
std::cout << GridLogMessage << " -- Nm = " << Nm << std::endl;
|
||||
std::cout << GridLogMessage << " -- size of eval = " << eval.
|
||||
size () << std::endl;
|
||||
|
||||
// assert(c.size() && Nm == eval.size());
|
||||
|
||||
std::vector < RealD > lme (Nm);
|
||||
std::vector < RealD > lmd (Nm);
|
||||
|
||||
|
||||
Field current (grid);
|
||||
Field last (grid);
|
||||
Field next (grid);
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
RealD beta_k;
|
||||
|
||||
// Set initial vector
|
||||
// (uniform vector) Why not src??
|
||||
// evec[0] = 1.0;
|
||||
current = src;
|
||||
std::cout << GridLogMessage << "norm2(src)= " << norm2 (src) << std::
|
||||
endl;
|
||||
normalise (current);
|
||||
std::
|
||||
cout << GridLogMessage << "norm2(evec[0])= " << norm2 (current) <<
|
||||
std::endl;
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime = 0.;
|
||||
double t0 = usecond () / 1e6;
|
||||
RealD norm; // sqrt norm of last vector
|
||||
|
||||
uint64_t iter = 0;
|
||||
|
||||
bool initted = false;
|
||||
std::vector < RealD > low (Nstop * 10);
|
||||
std::vector < RealD > high (Nstop * 10);
|
||||
RealD cont = 0.;
|
||||
while (1) {
|
||||
cont = 0.;
|
||||
std::vector < RealD > lme2 (Nm);
|
||||
std::vector < RealD > lmd2 (Nm);
|
||||
for (uint64_t k = 0; k < Nm; ++k, iter++) {
|
||||
step (lmd, lme, last, current, next, iter);
|
||||
last = current;
|
||||
current = next;
|
||||
}
|
||||
double t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL::Initial steps: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
std::
|
||||
cout << GridLogMessage << "IRL::Initial steps:OrthoTime " <<
|
||||
OrthoTime << "seconds" << std::endl;
|
||||
|
||||
// getting eigenvalues
|
||||
lmd2.resize (iter + 2);
|
||||
lme2.resize (iter + 2);
|
||||
for (uint64_t k = 0; k < iter; ++k) {
|
||||
lmd2[k + 1] = lmd[k];
|
||||
lme2[k + 2] = lme[k];
|
||||
}
|
||||
t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL:: copy: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
{
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (Nstop / total) + 1;
|
||||
int iu = (iter + 1) - (interval * node + 1);
|
||||
int il = (iter + 1) - (interval * (node + 1));
|
||||
std::vector < RealD > eval2 (iter + 3);
|
||||
RealD eps2;
|
||||
Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
|
||||
eps2);
|
||||
// diagonalize(eval2,lme2,iter,Nk,grid);
|
||||
RealD diff = 0.;
|
||||
for (int i = il; i <= iu; i++) {
|
||||
if (initted)
|
||||
diff =
|
||||
fabs (eval2[i] - high[iu-i]) / (fabs (eval2[i]) +
|
||||
fabs (high[iu-i]));
|
||||
if (initted && (diff > eresid))
|
||||
cont = 1.;
|
||||
if (initted)
|
||||
printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
|
||||
high[iu-i], diff);
|
||||
high[iu-i] = eval2[i];
|
||||
}
|
||||
il = (interval * node + 1);
|
||||
iu = (interval * (node + 1));
|
||||
Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
|
||||
eps2);
|
||||
for (int i = il; i <= iu; i++) {
|
||||
if (initted)
|
||||
diff =
|
||||
fabs (eval2[i] - low[i]) / (fabs (eval2[i]) +
|
||||
fabs (low[i]));
|
||||
if (initted && (diff > eresid))
|
||||
cont = 1.;
|
||||
if (initted)
|
||||
printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
|
||||
low[i], diff);
|
||||
low[i] = eval2[i];
|
||||
}
|
||||
t1 = usecond () / 1e6;
|
||||
std::cout << GridLogMessage << "IRL:: diagonalize: " << t1 -
|
||||
t0 << "seconds" << std::endl;
|
||||
t0 = t1;
|
||||
}
|
||||
|
||||
for (uint64_t k = 0; k < Nk; ++k) {
|
||||
// eval[k] = eval2[k];
|
||||
}
|
||||
if (initted)
|
||||
{
|
||||
grid->GlobalSumVector (&cont, 1);
|
||||
if (cont < 1.) return;
|
||||
}
|
||||
initted = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
/**
|
||||
There is some matrix Q such that for any vector y
|
||||
Q.e_1 = y and Q is unitary.
|
||||
**/
|
||||
template < class T >
|
||||
static T orthQ (DenseMatrix < T > &Q, std::vector < T > y)
|
||||
{
|
||||
int N = y.size (); //Matrix Size
|
||||
Fill (Q, 0.0);
|
||||
T tau;
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
Q[i][0] = y[i];
|
||||
}
|
||||
T sig = conj (y[0]) * y[0];
|
||||
T tau0 = fabs (sqrt (sig));
|
||||
|
||||
for (int j = 1; j < N; j++)
|
||||
{
|
||||
sig += conj (y[j]) * y[j];
|
||||
tau = abs (sqrt (sig));
|
||||
|
||||
if (abs (tau0) > 0.0)
|
||||
{
|
||||
|
||||
T gam = conj ((y[j] / tau) / tau0);
|
||||
for (int k = 0; k <= j - 1; k++)
|
||||
{
|
||||
Q[k][j] = -gam * y[k];
|
||||
}
|
||||
Q[j][j] = tau0 / tau;
|
||||
}
|
||||
else
|
||||
{
|
||||
Q[j - 1][j] = 1.0;
|
||||
}
|
||||
tau0 = tau;
|
||||
}
|
||||
return tau;
|
||||
}
|
||||
|
||||
/**
|
||||
There is some matrix Q such that for any vector y
|
||||
Q.e_k = y and Q is unitary.
|
||||
**/
|
||||
template < class T >
|
||||
static T orthU (DenseMatrix < T > &Q, std::vector < T > y)
|
||||
{
|
||||
T tau = orthQ (Q, y);
|
||||
SL (Q);
|
||||
return tau;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Wind up with a matrix with the first con rows untouched
|
||||
|
||||
say con = 2
|
||||
Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum
|
||||
and the matrix is upper hessenberg
|
||||
and with f and Q appropriately modidied with Q is the arnoldi factorization
|
||||
|
||||
**/
|
||||
|
||||
template < class T > static void Lock (DenseMatrix < T > &H, ///Hess mtx
|
||||
DenseMatrix < T > &Q, ///Lock Transform
|
||||
T val, ///value to be locked
|
||||
int con, ///number already locked
|
||||
RealD small, int dfg, bool herm)
|
||||
{
|
||||
//ForceTridiagonal(H);
|
||||
|
||||
int M = H.dim;
|
||||
DenseVector < T > vec;
|
||||
Resize (vec, M - con);
|
||||
|
||||
DenseMatrix < T > AH;
|
||||
Resize (AH, M - con, M - con);
|
||||
AH = GetSubMtx (H, con, M, con, M);
|
||||
|
||||
DenseMatrix < T > QQ;
|
||||
Resize (QQ, M - con, M - con);
|
||||
|
||||
Unity (Q);
|
||||
Unity (QQ);
|
||||
|
||||
DenseVector < T > evals;
|
||||
Resize (evals, M - con);
|
||||
DenseMatrix < T > evecs;
|
||||
Resize (evecs, M - con, M - con);
|
||||
|
||||
Wilkinson < T > (AH, evals, evecs, small);
|
||||
|
||||
int k = 0;
|
||||
RealD cold = abs (val - evals[k]);
|
||||
for (int i = 1; i < M - con; i++)
|
||||
{
|
||||
RealD cnew = abs (val - evals[i]);
|
||||
if (cnew < cold)
|
||||
{
|
||||
k = i;
|
||||
cold = cnew;
|
||||
}
|
||||
}
|
||||
vec = evecs[k];
|
||||
|
||||
ComplexD tau;
|
||||
orthQ (QQ, vec);
|
||||
//orthQM(QQ,AH,vec);
|
||||
|
||||
AH = Hermitian (QQ) * AH;
|
||||
AH = AH * QQ;
|
||||
|
||||
for (int i = con; i < M; i++)
|
||||
{
|
||||
for (int j = con; j < M; j++)
|
||||
{
|
||||
Q[i][j] = QQ[i - con][j - con];
|
||||
H[i][j] = AH[i - con][j - con];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = M - 1; j > con + 2; j--)
|
||||
{
|
||||
|
||||
DenseMatrix < T > U;
|
||||
Resize (U, j - 1 - con, j - 1 - con);
|
||||
DenseVector < T > z;
|
||||
Resize (z, j - 1 - con);
|
||||
T nm = norm (z);
|
||||
for (int k = con + 0; k < j - 1; k++)
|
||||
{
|
||||
z[k - con] = conj (H (j, k + 1));
|
||||
}
|
||||
normalise (z);
|
||||
|
||||
RealD tmp = 0;
|
||||
for (int i = 0; i < z.size () - 1; i++)
|
||||
{
|
||||
tmp = tmp + abs (z[i]);
|
||||
}
|
||||
|
||||
if (tmp < small / ((RealD) z.size () - 1.0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
tau = orthU (U, z);
|
||||
|
||||
DenseMatrix < T > Hb;
|
||||
Resize (Hb, j - 1 - con, M);
|
||||
|
||||
for (int a = 0; a < M; a++)
|
||||
{
|
||||
for (int b = 0; b < j - 1 - con; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += H[a][con + 1 + c] * U[c][b];
|
||||
} //sum += H(a,con+1+c)*U(c,b);}
|
||||
Hb[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = con + 1; k < j; k++)
|
||||
{
|
||||
for (int l = 0; l < M; l++)
|
||||
{
|
||||
H[l][k] = Hb[k - 1 - con][l];
|
||||
}
|
||||
} //H(Hb[k-1-con][l] , l,k);}}
|
||||
|
||||
DenseMatrix < T > Qb;
|
||||
Resize (Qb, M, M);
|
||||
|
||||
for (int a = 0; a < M; a++)
|
||||
{
|
||||
for (int b = 0; b < j - 1 - con; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += Q[a][con + 1 + c] * U[c][b];
|
||||
} //sum += Q(a,con+1+c)*U(c,b);}
|
||||
Qb[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = con + 1; k < j; k++)
|
||||
{
|
||||
for (int l = 0; l < M; l++)
|
||||
{
|
||||
Q[l][k] = Qb[k - 1 - con][l];
|
||||
}
|
||||
} //Q(Qb[k-1-con][l] , l,k);}}
|
||||
|
||||
DenseMatrix < T > Hc;
|
||||
Resize (Hc, M, M);
|
||||
|
||||
for (int a = 0; a < j - 1 - con; a++)
|
||||
{
|
||||
for (int b = 0; b < M; b++)
|
||||
{
|
||||
T sum = 0;
|
||||
for (int c = 0; c < j - 1 - con; c++)
|
||||
{
|
||||
sum += conj (U[c][a]) * H[con + 1 + c][b];
|
||||
} //sum += conj( U(c,a) )*H(con+1+c,b);}
|
||||
Hc[b][a] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = 0; k < M; k++)
|
||||
{
|
||||
for (int l = con + 1; l < j; l++)
|
||||
{
|
||||
H[l][k] = Hc[k][l - 1 - con];
|
||||
}
|
||||
} //H(Hc[k][l-1-con] , l,k);}}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -31,5 +31,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/cartesian/Cartesian_base.h>
|
||||
#include <Grid/cartesian/Cartesian_full.h>
|
||||
#include <Grid/cartesian/Cartesian_red_black.h>
|
||||
#include <Grid/cartesian/CartesianCrossIcosahedron.h>
|
||||
|
||||
#endif
|
||||
|
||||
199
Grid/cartesian/CartesianCrossIcosahedron.h
Normal file
199
Grid/cartesian/CartesianCrossIcosahedron.h
Normal file
@@ -0,0 +1,199 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/CartesianCrossIcosahedron.h
|
||||
|
||||
Copyright (C) 2025
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Grid Support.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum IcosahedralMeshType {
|
||||
IcosahedralVertices,
|
||||
IcosahedralEdges
|
||||
} ;
|
||||
enum NorthSouth {
|
||||
North = 1,
|
||||
South = 0
|
||||
};
|
||||
|
||||
const int num_icosahedron_tiles = 10;
|
||||
|
||||
class GridCartesianCrossIcosahedron: public GridCartesian {
|
||||
|
||||
public:
|
||||
|
||||
IcosahedralMeshType meshType;
|
||||
|
||||
IcosahedralMeshType MeshType(void) { return meshType; };
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Constructor takes a parent grid and possibly subdivides communicator.
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
GridCartesian(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid,
|
||||
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
|
||||
{
|
||||
assert(0); // No subdivision
|
||||
}
|
||||
GridCartesian(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid,
|
||||
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
|
||||
{
|
||||
assert(0); // No subdivision
|
||||
}
|
||||
*/
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Construct from comm world
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesianCrossIcosahedron(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid,
|
||||
IcosahedralMeshType _meshType) : GridCartesian(dimensions,simd_layout,processor_grid)
|
||||
{
|
||||
meshType = _meshType;
|
||||
Coordinate S2dimensions=dimensions;
|
||||
Coordinate S2simd =simd_layout;
|
||||
Coordinate S2procs =processor_grid;
|
||||
|
||||
assert(simd_layout[0]==1); // Force simd into perpendicular dimensions
|
||||
assert(simd_layout[1]==1); // to avoid pole storage complexity interacting with SIMD.
|
||||
assert(dimensions[_ndimension-1]==num_icosahedron_tiles);
|
||||
assert(processor_grid[_ndimension-1]<=2); // Keeps the patches that need a pole on the same node
|
||||
|
||||
// allocate the pole storage if we are seeking vertex domain data
|
||||
if ( meshType == IcosahedralVertices ) {
|
||||
InitPoles();
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~GridCartesianCrossIcosahedron() = default;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Use to decide if a given grid is icosahedral
|
||||
////////////////////////////////////////////////
|
||||
int hasNorthPole;
|
||||
int hasSouthPole;
|
||||
int northPoleOsite;
|
||||
int southPoleOsite;
|
||||
int northPoleOsites;
|
||||
int southPoleOsites;
|
||||
|
||||
virtual int isIcosahedral(void) override { return 1;}
|
||||
virtual int isIcosahedralVertex(void) override { return meshType==IcosahedralVertices;}
|
||||
virtual int isIcosahedralEdge (void) override { return meshType==IcosahedralEdges;}
|
||||
virtual int ownsNorthPole(void) const override { return hasNorthPole; };
|
||||
virtual int NorthPoleOsite(void) const override { return northPoleOsite; };
|
||||
virtual int NorthPoleOsites(void) const override { return northPoleOsites; };
|
||||
virtual int ownsSouthPole(void) const override { return hasSouthPole; };
|
||||
virtual int SouthPoleOsite(void) const override { return southPoleOsite; };
|
||||
virtual int SouthPoleOsites(void) const override { return southPoleOsites; };
|
||||
|
||||
void InitPoles(void)
|
||||
{
|
||||
int Ndm1 = _ndimension-1;
|
||||
///////////////////////
|
||||
// Add the extra pole storage
|
||||
///////////////////////
|
||||
// Vertices = 1x LxLx D1...Dn + 2.D1...Dn
|
||||
// Start after the LxL and don't include the 10 patch dim
|
||||
int OrthogSize = 1;
|
||||
for (int d = 2; d < Ndm1; d++) {
|
||||
OrthogSize *= _gdimensions[d];
|
||||
}
|
||||
_fsites += OrthogSize*2;
|
||||
_gsites += OrthogSize*2;
|
||||
|
||||
// Simd reduced sizes are multiplied up.
|
||||
// If the leading LxL are simd-ized, the vector objects will contain "redundant" lanes
|
||||
// which should contain identical north (south) pole data
|
||||
OrthogSize = 1;
|
||||
for (int d = 2; d < Ndm1; d++) {
|
||||
OrthogSize *= _rdimensions[d];
|
||||
}
|
||||
|
||||
// Grow the local volume to hold pole data
|
||||
// on rank (0,0) in the LxL planes
|
||||
// since SIMD must be placed in the orthogonal directions
|
||||
Coordinate pcoor = this->ThisProcessorCoor();
|
||||
Coordinate pgrid = this->ProcessorGrid();
|
||||
|
||||
const int xdim=0;
|
||||
const int ydim=1;
|
||||
/*
|
||||
*
|
||||
* /\/\/\/\/\
|
||||
* /\/\/\/\/\/
|
||||
* \/\/\/\/\/
|
||||
*
|
||||
* y
|
||||
* /
|
||||
* \x
|
||||
*
|
||||
* Labelling patches as 5 6 7 8 9
|
||||
* 0 1 2 3 4
|
||||
*
|
||||
* Will ban distribution of the patch dimension by more than 2.
|
||||
*
|
||||
* Hence all 5 patches associated with the pole must have the
|
||||
* appropriate "corner" of the patch L^2 located on the SAME rank.
|
||||
*/
|
||||
|
||||
if( (pcoor[xdim]==pgrid[xdim]-1) && (pcoor[ydim]==0) && (pcoor[Ndm1]==0) ){
|
||||
hasSouthPole =1;
|
||||
southPoleOsite=this->_osites;
|
||||
southPoleOsites=OrthogSize;
|
||||
this->_osites += OrthogSize;
|
||||
} else {
|
||||
hasSouthPole =0;
|
||||
southPoleOsites=0;
|
||||
southPoleOsite=0;
|
||||
}
|
||||
if( (pcoor[xdim]==0) && (pcoor[ydim]==pgrid[ydim]-1) && (pcoor[Ndm1]==pgrid[Ndm1]-1) ){
|
||||
hasNorthPole =1;
|
||||
northPoleOsite=this->_osites;
|
||||
northPoleOsites=OrthogSize;
|
||||
this->_osites += OrthogSize;
|
||||
} else {
|
||||
hasNorthPole =0;
|
||||
northPoleOsites=0;
|
||||
northPoleOsite=0;
|
||||
}
|
||||
std::cout << "Icosahedral vertex field volume " << this->_osites<<std::endl;
|
||||
std::cout << "Icosahedral south pole offset " << this->southPoleOsite<<std::endl;
|
||||
std::cout << "Icosahedral north pole offset " << this->northPoleOsite<<std::endl;
|
||||
std::cout << "Icosahedral south pole size " << this->southPoleOsites<<std::endl;
|
||||
std::cout << "Icosahedral north pole size " << this->northPoleOsites<<std::endl;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
@@ -86,10 +86,22 @@ public:
|
||||
|
||||
public:
|
||||
|
||||
// Icosahedral decisions
|
||||
virtual int isIcosahedral(void) { return 0;}
|
||||
virtual int isIcosahedralVertex(void) { return 0;}
|
||||
virtual int isIcosahedralEdge (void) { return 0;}
|
||||
virtual int ownsNorthPole(void) const { return 0; };
|
||||
virtual int ownsSouthPole(void) const { return 0; };
|
||||
virtual int NorthPoleOsite(void) const { return 0; };
|
||||
virtual int SouthPoleOsite(void) const { return 0; };
|
||||
virtual int NorthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
|
||||
virtual int SouthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Checkerboarding interface is virtual and overridden by
|
||||
// GridCartesian / GridRedBlackCartesian
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
virtual int CheckerBoarded(int dim) =0;
|
||||
virtual int CheckerBoard(const Coordinate &site)=0;
|
||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||
@@ -176,6 +188,8 @@ public:
|
||||
}
|
||||
return permute_type;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Array sizing queries
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -260,32 +260,39 @@ CartesianCommunicator::~CartesianCommunicator()
|
||||
}
|
||||
#ifdef USE_GRID_REDUCTION
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
FlightRecorder::StepLog("GlobalSumP2P");
|
||||
CartesianCommunicator::GlobalSumP2P(f);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
FlightRecorder::StepLog("GlobalSumP2P");
|
||||
CartesianCommunicator::GlobalSumP2P(d);
|
||||
}
|
||||
#else
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
#endif
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
|
||||
FlightRecorder::StepLog("AllReduceVector");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
@@ -794,6 +801,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
FlightRecorder::StepLog("NodeBarrier");
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
@@ -801,11 +809,13 @@ void CartesianCommunicator::StencilBarrier(void)
|
||||
//}
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
FlightRecorder::StepLog("GridBarrier");
|
||||
int ierr = MPI_Barrier(communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("Broadcast");
|
||||
int ierr=MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
@@ -824,6 +834,7 @@ void CartesianCommunicator::BarrierWorld(void){
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("BroadcastWorld");
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
@@ -846,6 +857,7 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("AllToAll");
|
||||
// MPI is a pain and uses "int" arguments
|
||||
// 64*64*64*128*16 == 500Million elements of data.
|
||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||
|
||||
@@ -990,7 +990,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
}
|
||||
#endif
|
||||
|
||||
SharedMemoryTest();
|
||||
// SharedMemoryTest();
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
|
||||
@@ -122,10 +122,10 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
|
||||
{
|
||||
acceleratorMemSet(dest,0,bytes);
|
||||
}
|
||||
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||
{
|
||||
acceleratorCopyToDevice(src,dest,bytes);
|
||||
}
|
||||
//void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||
//{
|
||||
// acceleratorCopyToDevice(src,dest,bytes);
|
||||
//}
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
|
||||
@@ -34,6 +34,8 @@ NAMESPACE_BEGIN(Grid);
|
||||
const int Cshift_verbose=0;
|
||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
assert(!rhs.Grid()->isIcosahedral());
|
||||
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
assert(!rhs.Grid()->isIcosahedral());
|
||||
Lattice<vobj> ret(rhs.Grid());
|
||||
ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
|
||||
Cshift_local(ret,rhs,dimension,shift);
|
||||
|
||||
@@ -236,7 +236,7 @@ public:
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
vobj vtmp;
|
||||
vtmp = r;
|
||||
#if 0
|
||||
#if 1
|
||||
deviceVector<vobj> vvtmp(1);
|
||||
acceleratorPut(vvtmp[0],vtmp);
|
||||
vobj *vvtmp_p = & vvtmp[0];
|
||||
@@ -373,14 +373,17 @@ public:
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
for(int64_t g=0;g<o.Grid()->_gsites;g++){
|
||||
uint64_t gsites=1;
|
||||
uint64_t polesites=0;
|
||||
for(int d=0;d<o.Grid()->_ndimension;d++) gsites *= o.Grid()->_gdimensions[d];
|
||||
for(int64_t g=0;g<gsites;g++){
|
||||
|
||||
Coordinate gcoor;
|
||||
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
||||
|
||||
sobj ss;
|
||||
peekSite(ss,o,gcoor);
|
||||
stream<<"[";
|
||||
stream<<"["<< g<<" : ";
|
||||
for(int d=0;d<gcoor.size();d++){
|
||||
stream<<gcoor[d];
|
||||
if(d!=gcoor.size()-1) stream<<",";
|
||||
@@ -388,6 +391,41 @@ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Latti
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
if ( o.Grid()->isIcosahedral() ) {
|
||||
uint64_t psites=1;
|
||||
Coordinate perpdims;
|
||||
for(int d=2;d<o.Grid()->_ndimension-1;d++){
|
||||
int pd=o.Grid()->_gdimensions[d];
|
||||
psites*=pd;
|
||||
perpdims.push_back(pd);
|
||||
}
|
||||
for(uint64_t p=0;p<psites;p++){
|
||||
sobj ss;
|
||||
Coordinate orthog;
|
||||
Lexicographic::CoorFromIndex(orthog,p,perpdims);
|
||||
peekPole(ss,o,orthog,South);
|
||||
stream<<"[ SouthPole : ";
|
||||
for(int d=0;d<orthog.size();d++){
|
||||
stream<<orthog[d];
|
||||
if(d!=orthog.size()-1) stream<<",";
|
||||
}
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
for(uint64_t p=0;p<psites;p++){
|
||||
sobj ss;
|
||||
Coordinate orthog;
|
||||
Lexicographic::CoorFromIndex(orthog,p,perpdims);
|
||||
peekPole(ss,o,orthog,North);
|
||||
stream<<"[ NorthPole : ";
|
||||
for(int d=0;d<orthog.size();d++){
|
||||
stream<<orthog[d];
|
||||
if(d!=orthog.size()-1) stream<<",";
|
||||
}
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,22 +34,86 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
l=Zero();
|
||||
|
||||
GridBase *grid = l.Grid();
|
||||
int Nsimd = grid->iSites();
|
||||
|
||||
autoView(l_v, l, CpuWrite);
|
||||
thread_for( o, grid->oSites(), {
|
||||
vector_type vI;
|
||||
Coordinate gcoor;
|
||||
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
int cartesian_vol = grid->oSites();
|
||||
if ( grid->isIcosahedral() ) {
|
||||
cartesian_vol = cartesian_vol - grid->NorthPoleOsites()-grid->SouthPoleOsites();
|
||||
}
|
||||
{
|
||||
autoView(l_v, l, CpuWrite);
|
||||
thread_for( o, cartesian_vol, {
|
||||
vector_type vI;
|
||||
Coordinate gcoor;
|
||||
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l_v[o]=vI;
|
||||
});
|
||||
}
|
||||
|
||||
if (grid->isIcosahedralVertex()) {
|
||||
uint64_t psites=1;
|
||||
Coordinate perpdims;
|
||||
typename iobj::scalar_object ss;
|
||||
for(int d=2;d<grid->_ndimension-1;d++){
|
||||
int pd=grid->_gdimensions[d];
|
||||
psites*=pd;
|
||||
perpdims.push_back(pd);
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l_v[o]=vI;
|
||||
});
|
||||
for(uint64_t p=0;p<psites;p++){
|
||||
Coordinate orthog;
|
||||
Lexicographic::CoorFromIndex(orthog,p,perpdims);
|
||||
|
||||
int icoor;
|
||||
if ( mu>=2 && mu < grid->_ndimension-1) {
|
||||
icoor = orthog[mu-2];
|
||||
} else {
|
||||
icoor = -1;
|
||||
}
|
||||
|
||||
ss=scalar_type(icoor);
|
||||
|
||||
pokePole(ss,l,orthog,South);
|
||||
pokePole(ss,l,orthog,North);
|
||||
}
|
||||
}
|
||||
};
|
||||
template<class iobj> inline void LatticePole(Lattice<iobj> &l,NorthSouth pole)
|
||||
{
|
||||
typedef typename iobj::scalar_object sobj;
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
GridBase *grid = l.Grid();
|
||||
|
||||
l=Zero();
|
||||
|
||||
assert(grid->isIcosahedralVertex());
|
||||
|
||||
if (grid->isIcosahedralVertex()) {
|
||||
uint64_t psites=1;
|
||||
Coordinate perpdims;
|
||||
sobj ss;
|
||||
scalar_type one(1.0);
|
||||
ss=one;
|
||||
for(int d=2;d<l.Grid()->_ndimension-1;d++){
|
||||
int pd=l.Grid()->_gdimensions[d];
|
||||
psites*=pd;
|
||||
perpdims.push_back(pd);
|
||||
}
|
||||
for(uint64_t p=0;p<psites;p++){
|
||||
Coordinate orthog;
|
||||
Lexicographic::CoorFromIndex(orthog,p,perpdims);
|
||||
pokePole(ss,l,orthog,pole);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
@@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
autoView( l_v , l, CpuWrite);
|
||||
autoView( l_v , l, CpuRead);
|
||||
extract(l_v[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
@@ -151,6 +151,136 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
return;
|
||||
};
|
||||
|
||||
// zero for south pole, one for north pole
|
||||
template<class vobj,class sobj>
|
||||
void peekPole(sobj &s,const Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
|
||||
{
|
||||
s=Zero();
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
assert(grid->isIcosahedral());
|
||||
assert(grid->isIcosahedralVertex());
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
int rank;
|
||||
|
||||
int Ndm1 = grid->_ndimension-1;
|
||||
Coordinate pgrid = grid->ProcessorGrid();
|
||||
const int xdim=0;
|
||||
const int ydim=1;
|
||||
const int pdim=Ndm1;
|
||||
|
||||
int64_t pole_osite;
|
||||
int64_t pole_isite;
|
||||
Coordinate rdims;
|
||||
Coordinate idims;
|
||||
Coordinate ocoor;
|
||||
Coordinate icoor;
|
||||
Coordinate pcoor(grid->_ndimension);
|
||||
for(int d=2;d<Ndm1;d++){
|
||||
int dd=d-2;
|
||||
rdims.push_back(grid->_rdimensions[d]);
|
||||
idims.push_back(grid->_simd_layout[d]);
|
||||
icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
|
||||
ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
|
||||
pcoor[d] = orthog[dd]/grid->_ldimensions[d];
|
||||
}
|
||||
Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
|
||||
Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
|
||||
|
||||
int64_t osite;
|
||||
if(isNorth == North){
|
||||
pcoor[xdim] = 0;
|
||||
pcoor[ydim] = pgrid[ydim]-1;
|
||||
pcoor[Ndm1] = pgrid[Ndm1]-1;
|
||||
osite = pole_osite + grid->NorthPoleOsite();
|
||||
} else {
|
||||
pcoor[xdim] = pgrid[xdim]-1;
|
||||
pcoor[ydim] = 0;
|
||||
pcoor[Ndm1] = 0;
|
||||
osite = pole_osite + grid->SouthPoleOsite();
|
||||
}
|
||||
|
||||
rank = grid->RankFromProcessorCoor(pcoor);
|
||||
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
autoView( l_v , l, CpuWrite);
|
||||
extract(l_v[osite],buf);
|
||||
s = buf[pole_isite];
|
||||
}
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
template<class vobj,class sobj>
|
||||
void pokePole(const sobj &s,Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
|
||||
{
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
assert(grid->isIcosahedral());
|
||||
assert(grid->isIcosahedralVertex());
|
||||
|
||||
grid->Broadcast(grid->BossRank(),s);
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
int rank;
|
||||
int Ndm1 = grid->_ndimension-1;
|
||||
Coordinate pgrid = grid->ProcessorGrid();
|
||||
const int xdim=0;
|
||||
const int ydim=1;
|
||||
const int pdim=Ndm1;
|
||||
|
||||
int64_t pole_osite;
|
||||
int64_t pole_isite;
|
||||
Coordinate rdims;
|
||||
Coordinate idims;
|
||||
Coordinate ocoor;
|
||||
Coordinate icoor;
|
||||
Coordinate pcoor(grid->_ndimension,0);
|
||||
for(int d=2;d<Ndm1;d++){
|
||||
int dd = d-2;
|
||||
rdims.push_back(grid->_rdimensions[d]);
|
||||
idims.push_back(grid->_simd_layout[d]);
|
||||
icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
|
||||
ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
|
||||
pcoor[d] = orthog[dd]/grid->_ldimensions[d];
|
||||
|
||||
int o = orthog[dd];
|
||||
int r = grid->_rdimensions[d];
|
||||
int omr = o % r;
|
||||
}
|
||||
Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
|
||||
Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
|
||||
|
||||
int64_t osite;
|
||||
if(isNorth ==North){
|
||||
pcoor[xdim] = 0;
|
||||
pcoor[ydim] = pgrid[ydim]-1;
|
||||
pcoor[Ndm1] = pgrid[Ndm1]-1;
|
||||
osite = pole_osite + grid->NorthPoleOsite();
|
||||
} else {
|
||||
pcoor[xdim] = pgrid[xdim]-1;
|
||||
pcoor[ydim] = 0;
|
||||
pcoor[Ndm1] = 0;
|
||||
osite = pole_osite + grid->SouthPoleOsite();
|
||||
}
|
||||
|
||||
rank = grid->RankFromProcessorCoor(pcoor);
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
autoView( l_v , l, CpuWrite);
|
||||
extract(l_v[osite],buf);
|
||||
buf[pole_isite] = s;
|
||||
merge(l_v[osite],buf);
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
@@ -179,7 +309,7 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
|
||||
for(int w=0;w<words;w++){
|
||||
pt[w] = getlane(vp[w],idx);
|
||||
}
|
||||
// std::cout << "peekLocalSite "<<site<<" "<<odx<<","<<idx<<" "<<s<<std::endl;
|
||||
|
||||
return;
|
||||
};
|
||||
template<class vobj,class sobj>
|
||||
|
||||
@@ -49,7 +49,7 @@ static constexpr int Tm = 7;
|
||||
|
||||
static constexpr int Nc=Config_Nc;
|
||||
static constexpr int Ns=4;
|
||||
static constexpr int Nd=4;
|
||||
static constexpr int Nd=Config_Nd;
|
||||
static constexpr int Nhs=2; // half spinor
|
||||
static constexpr int Nds=8; // double stored gauge field
|
||||
static constexpr int Ngp=2; // gparity index range
|
||||
@@ -75,6 +75,7 @@ static constexpr int InverseYes=1;
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
|
||||
const int SpinorIndex = 2;
|
||||
const int PauliIndex = 2; //TensorLevel counts from the bottom!
|
||||
template<typename T> struct isSpinor {
|
||||
static constexpr bool value = (SpinorIndex==T::TensorLevel);
|
||||
};
|
||||
|
||||
@@ -123,10 +123,10 @@ public:
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
|
||||
peekLocalSite(ScalarUmu, Umu_v, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||
for (int mu = 0; mu < Nd; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||
|
||||
peekLocalSite(ScalarUmu, Uadj_v, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||
for (int mu = 0; mu < Nd; mu++) ScalarUds(mu + Nd) = ScalarUmu(mu);
|
||||
|
||||
pokeLocalSite(ScalarUds, Uds_v, lcoor);
|
||||
});
|
||||
|
||||
@@ -85,6 +85,15 @@ NAMESPACE_CHECK(DomainWall);
|
||||
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
||||
NAMESPACE_CHECK(Overlap);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Two spin wilson fermion based
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h>
|
||||
NAMESPACE_CHECK(TwoSpinWilson);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -41,8 +41,9 @@ NAMESPACE_CHECK(Compressor);
|
||||
NAMESPACE_CHECK(FermionOperatorImpl);
|
||||
#include <Grid/qcd/action/fermion/FermionOperator.h>
|
||||
NAMESPACE_CHECK(FermionOperator);
|
||||
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||
#include <Grid/qcd/action/fermion/StaggeredKernels.h> //used by all wilson type fermions
|
||||
#include <Grid/qcd/action/fermion/TwoSpinWilsonKernels.h> //used for 3D fermions, pauli in place of Dirac
|
||||
NAMESPACE_CHECK(Kernels);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -180,6 +180,12 @@ NAMESPACE_CHECK(ImplGparityWilson);
|
||||
#include <Grid/qcd/action/fermion/StaggeredImpl.h>
|
||||
NAMESPACE_CHECK(ImplStaggered);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Two component spinor Wilson action for 3d / Boston
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/fermion/TwoSpinWilsonImpl.h>
|
||||
NAMESPACE_CHECK(ImplTwoSpinWilson);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index. 5d vec
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -274,7 +274,7 @@ public:
|
||||
autoView( Uds_v , Uds, CpuWrite);
|
||||
autoView( Utmp_v, Utmp, CpuWrite);
|
||||
thread_foreach(ss,Utmp_v,{
|
||||
Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
|
||||
Uds_v[ss](0)(mu+Nd) = Utmp_v[ss]();
|
||||
});
|
||||
}
|
||||
Utmp = Uconj;
|
||||
@@ -286,7 +286,7 @@ public:
|
||||
autoView( Uds_v , Uds, CpuWrite);
|
||||
autoView( Utmp_v, Utmp, CpuWrite);
|
||||
thread_foreach(ss,Utmp_v,{
|
||||
Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
|
||||
Uds_v[ss](1)(mu+Nd) = Utmp_v[ss]();
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -320,7 +320,7 @@ public:
|
||||
}
|
||||
|
||||
Uconj = conjugate(*Upoke);
|
||||
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
|
||||
pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + Nd);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ public:
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static const int npoint = 16;
|
||||
static std::vector<int> MakeDirections(void);
|
||||
static std::vector<int> MakeDisplacements(void);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
|
||||
@@ -40,6 +40,8 @@ public:
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
const int npoint = 16;
|
||||
static std::vector<int> MakeDirections(void);
|
||||
static std::vector<int> MakeDisplacements(void);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
|
||||
@@ -36,6 +36,8 @@ public:
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static const int npoint = 8;
|
||||
static std::vector<int> MakeDirections(void);
|
||||
static std::vector<int> MakeDisplacements(void);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
|
||||
@@ -141,9 +141,9 @@ public:
|
||||
Udag = Udag *phases;
|
||||
|
||||
InsertGaugeField(Uds,U,mu);
|
||||
InsertGaugeField(Uds,Udag,mu+4);
|
||||
InsertGaugeField(Uds,Udag,mu+Nd);
|
||||
// PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
// PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
|
||||
// PokeIndex<LorentzIndex>(Uds, Udag, mu + Nd);
|
||||
|
||||
// 3 hop based on thin links. Crazy huh ?
|
||||
U = PeekIndex<LorentzIndex>(Uthin, mu);
|
||||
@@ -156,7 +156,7 @@ public:
|
||||
UUUdag = UUUdag *phases;
|
||||
|
||||
InsertGaugeField(UUUds,UUU,mu);
|
||||
InsertGaugeField(UUUds,UUUdag,mu+4);
|
||||
InsertGaugeField(UUUds,UUUdag,mu+Nd);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
175
Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
Normal file
175
Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
Normal file
@@ -0,0 +1,175 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma one
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
class TwoSpinWilsonFermion3plus1DStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static constexpr int npoint = 6;
|
||||
static std::vector<int> MakeDirections(void);
|
||||
static std::vector<int> MakeDisplacements(void);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
class TwoSpinWilsonFermion3plus1D : public TwoSpinWilsonKernels<Impl>, public TwoSpinWilsonFermion3plus1DStatic
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef TwoSpinWilsonKernels<Impl> Kernels;
|
||||
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
int Dirichlet;
|
||||
Coordinate Block;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _ThreeDimGrid ;}
|
||||
GridBase *GaugeRedBlackGrid(void) { return _ThreeDimRedBlackGrid ;}
|
||||
GridBase *FermionGrid(void) { return _FourDimGrid;}
|
||||
GridBase *FermionRedBlackGrid(void) { return _FourDimRedBlackGrid;}
|
||||
|
||||
// full checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual void M (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
|
||||
|
||||
// half checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
// These can be overridden by fancy 5d chiral action
|
||||
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
|
||||
// void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||
|
||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||
// Implement s-diagonal DW
|
||||
void DW (const FermionField &in, FermionField &out,int dag);
|
||||
void Dhop (const FermionField &in, FermionField &out,int dag);
|
||||
void DhopOE(const FermionField &in, FermionField &out,int dag);
|
||||
void DhopEO(const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
void DhopComms (const FermionField &in, FermionField &out);
|
||||
void DhopCalc (const FermionField &in, FermionField &out,uint64_t *ids);
|
||||
|
||||
// add a DhopComm
|
||||
// -- suboptimal interface will presently trigger multiple comms.
|
||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||
void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
|
||||
void DhopDirComms(const FermionField &in);
|
||||
void DhopDirCalc(const FermionField &in, FermionField &out,int point);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// New methods added
|
||||
///////////////////////////////////////////////////////////////
|
||||
void DerivInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag);
|
||||
|
||||
void DhopInternal(StencilImpl & st,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
void DhopInternalOverlappedComms(StencilImpl & st,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
void DhopInternalSerialComms(StencilImpl & st,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
// Constructors
|
||||
TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
GridCartesian &ThreeDimGrid,
|
||||
GridRedBlackCartesian &ThreeDimRedBlackGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
virtual void DirichletBlock(const Coordinate & block)
|
||||
{
|
||||
}
|
||||
|
||||
// DoubleStore
|
||||
void ImportGauge(const GaugeField &_Umu);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
public:
|
||||
|
||||
// Add these to the support from Wilson
|
||||
GridBase *_ThreeDimGrid;
|
||||
GridBase *_ThreeDimRedBlackGrid;
|
||||
GridBase *_FourDimGrid;
|
||||
GridBase *_FourDimRedBlackGrid;
|
||||
|
||||
double M5;
|
||||
int Ls;
|
||||
|
||||
//Defines the stencils for even and odd
|
||||
StencilImpl Stencil;
|
||||
StencilImpl StencilEven;
|
||||
StencilImpl StencilOdd;
|
||||
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
DoubledGaugeField Umu;
|
||||
DoubledGaugeField UmuEven;
|
||||
DoubledGaugeField UmuOdd;
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
222
Grid/qcd/action/fermion/TwoSpinWilsonImpl.h
Normal file
222
Grid/qcd/action/fermion/TwoSpinWilsonImpl.h
Normal file
@@ -0,0 +1,222 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour four spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
|
||||
class TwoSpinWilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
public:
|
||||
|
||||
static const int Dimension = Representation::Dimension;
|
||||
static const bool isFundamental = Representation::isFundamental;
|
||||
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
typedef typename Options::_Coeff_t Coeff_t;
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplHalfCommSpinor<Simd> SiteHalfCommSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef SimpleCompressor<SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
|
||||
typedef const typename StencilImpl::View_type StencilView;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
TwoSpinWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
|
||||
};
|
||||
|
||||
template<class _Spinor>
|
||||
static accelerator_inline void multLink(_Spinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const _Spinor &chi,
|
||||
int mu)
|
||||
{
|
||||
auto UU = coalescedRead(U(mu));
|
||||
mult(&phi(), &UU, &chi());
|
||||
}
|
||||
template<class _Spinor>
|
||||
static accelerator_inline void multLink(_Spinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const _Spinor &chi,
|
||||
int mu,
|
||||
StencilEntry *SE,
|
||||
StencilView &St)
|
||||
{
|
||||
multLink(phi,U,chi,mu);
|
||||
}
|
||||
|
||||
template<class _SpinorField>
|
||||
inline void multLinkField(_SpinorField & out,
|
||||
const DoubledGaugeField &Umu,
|
||||
const _SpinorField & phi,
|
||||
int mu)
|
||||
{
|
||||
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||
autoView( out_v, out, AcceleratorWrite);
|
||||
autoView( phi_v, phi, AcceleratorRead);
|
||||
autoView( Umu_v, Umu, AcceleratorRead);
|
||||
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
|
||||
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
|
||||
calcSpinor tmp;
|
||||
multLink(tmp,Umu_v[sss],phi_v(sss),mu);
|
||||
coalescedWrite(out_v[sss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template <class ref>
|
||||
static accelerator_inline void loadLinkElement(Simd ®, ref &memory)
|
||||
{
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Umu)
|
||||
{
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
|
||||
conformable(Uds.Grid(), GaugeGrid);
|
||||
conformable(Umu.Grid(), GaugeGrid);
|
||||
|
||||
GaugeLinkField U(GaugeGrid);
|
||||
GaugeLinkField tmp(GaugeGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
////////////////////////////////////////////////////
|
||||
// apply any boundary phase or twists
|
||||
////////////////////////////////////////////////////
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
////////// boundary phase /////////////
|
||||
auto pha = Params.boundary_phases[mu];
|
||||
scalar_type phase( real(pha),imag(pha) );
|
||||
|
||||
int L = GaugeGrid->GlobalDimensions()[mu];
|
||||
int Lmu = L - 1;
|
||||
|
||||
LatticeCoordinate(coor, mu);
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
|
||||
// apply any twists
|
||||
RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
|
||||
if ( theta != 0.0) {
|
||||
scalar_type twphase(::cos(theta),::sin(theta));
|
||||
U = twphase*U;
|
||||
std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
|
||||
}
|
||||
|
||||
tmp = where(coor == Lmu, phase * U, U);
|
||||
PokeIndex<LorentzIndex>(Uds, tmp, mu);
|
||||
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
U = where(coor == 0, conjugate(phase) * U, U);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + Nd);
|
||||
}
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||
GaugeLinkField link(mat.Grid());
|
||||
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||
}
|
||||
|
||||
inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){
|
||||
mat = outerProduct(B,A);
|
||||
}
|
||||
|
||||
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
|
||||
mat = TraceIndex<SpinIndex>(P);
|
||||
}
|
||||
|
||||
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
|
||||
{
|
||||
for (int mu = 0; mu < Nd; mu++)
|
||||
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu)
|
||||
{
|
||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||
autoView( mat_v , mat, AcceleratorWrite);
|
||||
{
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
|
||||
int sU=sss;
|
||||
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
|
||||
ColorMatrixType sum;
|
||||
zeroit(sum);
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
||||
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
|
||||
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
|
||||
auto op = outerProduct(bb,aa);
|
||||
sum = sum + op;
|
||||
}
|
||||
}
|
||||
coalescedWrite(mat_v[sU](mu)(), sum);
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef TwoSpinWilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplR; // Real.. whichever prec
|
||||
typedef TwoSpinWilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplF; // Float
|
||||
typedef TwoSpinWilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD; // Double
|
||||
typedef TwoSpinWilsonImpl<vComplexD2, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD2; // Double
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
84
Grid/qcd/action/fermion/TwoSpinWilsonKernels.h
Normal file
84
Grid/qcd/action/fermion/TwoSpinWilsonKernels.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper routines that implement Wilson stencil for a single site.
|
||||
// Common to both the WilsonFermion and WilsonFermion5D
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Impl> class TwoSpinWilsonKernels : public FermionOperator<Impl> {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
typedef AcceleratorVector<int,STENCIL_MAX> StencilVector;
|
||||
public:
|
||||
|
||||
static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
int interior=1,int exterior=1) ;
|
||||
|
||||
static void DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
uint64_t *ids);
|
||||
|
||||
static void DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
int interior=1,int exterior=1) ;
|
||||
|
||||
static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
|
||||
int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
|
||||
|
||||
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||
|
||||
private:
|
||||
|
||||
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
||||
|
||||
static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||
static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||
static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||
static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||
static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||
static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||
|
||||
public:
|
||||
TwoSpinWilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
@@ -38,6 +38,8 @@ public:
|
||||
static int MortonOrder;
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static std::vector<int> MakeDirections(void);
|
||||
static std::vector<int> MakeDisplacements(void);
|
||||
static const int npoint = 8;
|
||||
};
|
||||
|
||||
|
||||
@@ -62,6 +62,8 @@ public:
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static constexpr int npoint = 8;
|
||||
static std::vector<int> MakeDirections(void);
|
||||
static std::vector<int> MakeDisplacements(void);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
|
||||
@@ -166,7 +166,7 @@ public:
|
||||
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
U = where(coor == 0, conjugate(phase) * U, U);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + Nd);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
Frbgrid,
|
||||
Ugrid,
|
||||
Urbgrid,
|
||||
4.0,p)
|
||||
Nd*1.0,p)
|
||||
|
||||
{
|
||||
update(_mass,_mu);
|
||||
@@ -83,7 +83,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD a = Nd*1.0+this->mass[s];
|
||||
ComplexD b(0.0,this->mu[s]);
|
||||
axpbg5y_ssp(out,a,in,b,in,s,s);
|
||||
}
|
||||
@@ -92,7 +92,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
virtual void MooeeDag(const FermionField &in, FermionField &out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD a = Nd*1.0+this->mass[s];
|
||||
ComplexD b(0.0,-this->mu[s]);
|
||||
axpbg5y_ssp(out,a,in,b,in,s,s);
|
||||
}
|
||||
@@ -101,7 +101,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
RealD m = this->mass[s];
|
||||
RealD tm = this->mu[s];
|
||||
RealD mtil = 4.0+this->mass[s];
|
||||
RealD mtil = Nd*1.0+this->mass[s];
|
||||
RealD sq = mtil*mtil+tm*tm;
|
||||
ComplexD a = mtil/sq;
|
||||
ComplexD b(0.0, -tm /sq);
|
||||
@@ -112,7 +112,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
RealD m = this->mass[s];
|
||||
RealD tm = this->mu[s];
|
||||
RealD mtil = 4.0+this->mass[s];
|
||||
RealD mtil = Nd*1.0+this->mass[s];
|
||||
RealD sq = mtil*mtil+tm*tm;
|
||||
ComplexD a = mtil/sq;
|
||||
ComplexD b(0.0,tm /sq);
|
||||
@@ -126,7 +126,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
this->Dhop(in, out, DaggerNo);
|
||||
FermionField tmp(out.Grid());
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD a = Nd*1.0+this->mass[s];
|
||||
ComplexD b(0.0,this->mu[s]);
|
||||
axpbg5y_ssp(tmp,a,in,b,in,s,s);
|
||||
}
|
||||
|
||||
@@ -240,7 +240,7 @@ void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::ve
|
||||
this->ceo.resize(Ls);
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
this->bee[i] = 4.0 - this->M5 + 1.0;
|
||||
this->bee[i] = Nd*1.0 - this->M5 + 1.0;
|
||||
this->cee[i] = 1.0;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,486 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion2plus1D.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// 5d lattice for DWF.
|
||||
template<class Impl>
|
||||
TwoSpinWilsonFermion3plus15D<Impl>::TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
GridCartesian &ThreeDimGrid,
|
||||
GridRedBlackCartesian &ThreeDimRedBlackGrid,
|
||||
RealD _M5,const ImplParams &p) :
|
||||
Kernels(p),
|
||||
_FourDimGrid (&FourDimGrid),
|
||||
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
||||
_ThreeDimGrid (&ThreeDimGrid),
|
||||
_ThreeDimRedBlackGrid(&ThreeDimRedBlackGrid),
|
||||
Stencil (_FourDimGrid,npoint,Even,directions,displacements,p),
|
||||
StencilEven(_FourDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
|
||||
StencilOdd (_FourDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
|
||||
M5(_M5),
|
||||
Umu(_ThreeDimGrid),
|
||||
UmuEven(_ThreeDimRedBlackGrid),
|
||||
UmuOdd (_ThreeDimRedBlackGrid),
|
||||
_tmp(&FourDimRedBlackGrid),
|
||||
Dirichlet(0)
|
||||
{
|
||||
// some assertions
|
||||
assert(FourDimGrid._ndimension==Nd+1);
|
||||
assert(ThreeDimGrid._ndimension==Nd);
|
||||
assert(ThreeDimRedBlackGrid._ndimension==Nd);
|
||||
assert(FourDimRedBlackGrid._ndimension==Nd+1);
|
||||
assert(FourDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||
|
||||
// extent of fifth dim and not spread out
|
||||
Ls=FourDimGrid._fdimensions[0];
|
||||
assert(FourDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FourDimGrid._processors[0] ==1);
|
||||
assert(FourDimRedBlackGrid._processors[0] ==1);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<Nd;d++){
|
||||
|
||||
assert(FourDimGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
|
||||
assert(FourDimRedBlackGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
|
||||
assert(ThreeDimRedBlackGrid._processors[d] ==ThreeDimGrid._processors[d]);
|
||||
|
||||
assert(FourDimGrid._fdimensions[d+1] ==ThreeDimGrid._fdimensions[d]);
|
||||
assert(FourDimRedBlackGrid._fdimensions[d+1]==ThreeDimGrid._fdimensions[d]);
|
||||
assert(ThreeDimRedBlackGrid._fdimensions[d] ==ThreeDimGrid._fdimensions[d]);
|
||||
|
||||
assert(FourDimGrid._simd_layout[d+1] ==ThreeDimGrid._simd_layout[d]);
|
||||
assert(FourDimRedBlackGrid._simd_layout[d+1]==ThreeDimGrid._simd_layout[d]);
|
||||
assert(ThreeDimRedBlackGrid._simd_layout[d] ==ThreeDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
if ( p.dirichlet.size() == Nd+1) {
|
||||
Coordinate block = p.dirichlet;
|
||||
for(int d=0;d<Nd+1;d++) {
|
||||
if ( block[d] ){
|
||||
Dirichlet = 1;
|
||||
std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
|
||||
std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
|
||||
Block = block;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Coordinate block(Nd+1,0);
|
||||
Block = block;
|
||||
}
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
assert(FourDimRedBlackGrid._simd_layout[0]==1);
|
||||
assert(FourDimGrid._simd_layout[0] ==1);
|
||||
|
||||
// Allocate the required comms buffer
|
||||
ImportGauge(_Umu);
|
||||
// Build lists of exterior only nodes
|
||||
int LLs = FourDimGrid._rdimensions[0];
|
||||
int vol3;
|
||||
vol3=ThreeDimGrid.oSites();
|
||||
Stencil.BuildSurfaceList(LLs,vol3);
|
||||
|
||||
vol3=ThreeDimRedBlackGrid.oSites();
|
||||
StencilEven.BuildSurfaceList(LLs,vol3);
|
||||
StencilOdd.BuildSurfaceList(LLs,vol3);
|
||||
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
{
|
||||
GaugeField HUmu(_Umu.Grid());
|
||||
HUmu = _Umu*(-0.5);
|
||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
||||
pickCheckerboard(Even,UmuEven,Umu);
|
||||
pickCheckerboard(Odd ,UmuOdd,Umu);
|
||||
}
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
|
||||
{
|
||||
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
|
||||
// we drop off the innermost fifth dimension
|
||||
// assert( (disp==1)||(disp==-1) );
|
||||
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
||||
|
||||
int skip = (disp==1) ? 0 : 1;
|
||||
int dirdisp = dir+skip*Nd;
|
||||
int gamma = dir+(1-skip)*Nd;
|
||||
|
||||
Compressor compressor(DaggerNo);
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
|
||||
uint64_t Nsite = Umu.Grid()->oSites();
|
||||
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
|
||||
|
||||
};
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||
{
|
||||
Compressor compressor(DaggerNo);
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
uint64_t Nsite = Umu.Grid()->oSites();
|
||||
Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
|
||||
};
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DerivInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
|
||||
conformable(st.Grid(),A.Grid());
|
||||
conformable(st.Grid(),B.Grid());
|
||||
|
||||
Compressor compressor(dag);
|
||||
|
||||
FermionField Btilde(B.Grid());
|
||||
FermionField Atilde(B.Grid());
|
||||
|
||||
st.HaloExchange(B,compressor);
|
||||
|
||||
Atilde=A;
|
||||
int LLs = B.Grid()->_rdimensions[0];
|
||||
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Flip gamma if dag
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int gamma = mu;
|
||||
if (!dag) gamma += Nd;
|
||||
|
||||
////////////////////////
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
|
||||
int Usites = U.Grid()->oSites();
|
||||
|
||||
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
|
||||
|
||||
////////////////////////////
|
||||
// spin trace outer product
|
||||
////////////////////////////
|
||||
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDeriv(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
conformable(A.Grid(),FermionGrid());
|
||||
conformable(A.Grid(),B.Grid());
|
||||
|
||||
//conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment
|
||||
|
||||
mat.Checkerboard() = A.Checkerboard();
|
||||
// mat.checkerboard = A.checkerboard;
|
||||
|
||||
DerivInternal(Stencil,Umu,mat,A,B,dag);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivEO(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
conformable(A.Grid(),FermionRedBlackGrid());
|
||||
conformable(A.Grid(),B.Grid());
|
||||
|
||||
assert(B.Checkerboard()==Odd);
|
||||
assert(A.Checkerboard()==Even);
|
||||
mat.Checkerboard() = Even;
|
||||
|
||||
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
conformable(A.Grid(),FermionRedBlackGrid());
|
||||
conformable(A.Grid(),B.Grid());
|
||||
|
||||
assert(B.Checkerboard()==Even);
|
||||
assert(A.Checkerboard()==Odd);
|
||||
mat.Checkerboard() = Odd;
|
||||
|
||||
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopInternalSerialComms(st,U,in,out,dag);
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
GRID_TRACE("DhopInternalOverlappedComms");
|
||||
Compressor compressor(dag);
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
int len = U.Grid()->oSites();
|
||||
|
||||
/////////////////////////////
|
||||
// Start comms // Gather intranode and extra node differentiated??
|
||||
/////////////////////////////
|
||||
{
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D gather " <<std::endl;
|
||||
GRID_TRACE("Gather");
|
||||
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
|
||||
}
|
||||
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Communicate Begin " <<std::endl;
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
|
||||
#if 1
|
||||
/////////////////////////////
|
||||
// Overlap with comms
|
||||
/////////////////////////////
|
||||
st.CommunicateBegin(requests);
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
#endif
|
||||
|
||||
/////////////////////////////
|
||||
// do the compute interior
|
||||
/////////////////////////////
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagInterior");
|
||||
Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||
} else {
|
||||
GRID_TRACE("DhopInterior");
|
||||
Kernels::DhopKernel (st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||
}
|
||||
|
||||
//ifdef GRID_ACCELERATED
|
||||
#if 0
|
||||
/////////////////////////////
|
||||
// Overlap with comms -- on GPU the interior kernel call is nonblocking
|
||||
/////////////////////////////
|
||||
st.CommunicateBegin(requests);
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
#endif
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Complete comms
|
||||
/////////////////////////////
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Comms Complete " <<std::endl;
|
||||
st.CommunicateComplete(requests);
|
||||
// traceStop(id);
|
||||
|
||||
/////////////////////////////
|
||||
// do the compute exterior
|
||||
/////////////////////////////
|
||||
{
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Comms Merge " <<std::endl;
|
||||
GRID_TRACE("Merge");
|
||||
st.CommsMerge(compressor);
|
||||
}
|
||||
|
||||
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Exterior " <<std::endl;
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagExterior");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||
} else {
|
||||
GRID_TRACE("DhopExterior");
|
||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||
}
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in,
|
||||
FermionField &out,int dag)
|
||||
{
|
||||
GRID_TRACE("DhopInternalSerialComms");
|
||||
Compressor compressor(dag);
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Halo exch " <<std::endl;
|
||||
{
|
||||
GRID_TRACE("HaloExchange");
|
||||
st.HaloExchangeOpt(in,compressor);
|
||||
}
|
||||
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Dhop " <<std::endl;
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDag");
|
||||
Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||
} else {
|
||||
GRID_TRACE("Dhop");
|
||||
Kernels::DhopKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||
}
|
||||
// std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||
|
||||
assert(in.Checkerboard()==Even);
|
||||
out.Checkerboard() = Odd;
|
||||
|
||||
DhopInternal(StencilEven,UmuOdd,in,out,dag);
|
||||
}
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||
|
||||
assert(in.Checkerboard()==Odd);
|
||||
out.Checkerboard() = Even;
|
||||
|
||||
DhopInternal(StencilOdd,UmuEven,in,out,dag);
|
||||
}
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopComms(const FermionField &in, FermionField &out)
|
||||
{
|
||||
int dag =0 ;
|
||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||
conformable(in.Grid(),out.Grid());
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Compressor compressor(dag);
|
||||
Stencil.HaloExchangeOpt(in,compressor);
|
||||
}
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DhopCalc(const FermionField &in, FermionField &out,uint64_t *ids)
|
||||
{
|
||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||
conformable(in.Grid(),out.Grid());
|
||||
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
Kernels::DhopKernel(Stencil,Umu,Stencil.CommBuf(),LLs,Umu.oSites(),in,out,ids);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||
conformable(in.Grid(),out.Grid());
|
||||
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
DhopInternal(Stencil,Umu,in,out,dag);
|
||||
}
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
out.Checkerboard()=in.Checkerboard();
|
||||
Dhop(in,out,dag); // -0.5 is included
|
||||
axpy(out,Nd*1.0-M5,in,out);
|
||||
}
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(Nd*1.0 + M5);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0/(Nd*1.0 + M5))*in;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MooeeInv(in,out);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,441 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/TwoSpinWilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if (SE->_is_local) { \
|
||||
int perm= SE->_permute; \
|
||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||
spProj(chi,tmp); \
|
||||
} else { \
|
||||
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||
} \
|
||||
acceleratorSynchronise(); \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi);
|
||||
|
||||
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if (SE->_is_local) { \
|
||||
int perm= SE->_permute; \
|
||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||
spProj(chi,tmp); \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
} \
|
||||
acceleratorSynchronise();
|
||||
|
||||
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if (!SE->_is_local ) { \
|
||||
auto chi = coalescedRead(buf[SE->_offset],lane); \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
nmu++; \
|
||||
} \
|
||||
acceleratorSynchronise();
|
||||
|
||||
#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
|
||||
if (SE->_is_local ) { \
|
||||
int perm= SE->_permute; \
|
||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||
spProj(chi,tmp); \
|
||||
} else { \
|
||||
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||
} \
|
||||
acceleratorSynchronise(); \
|
||||
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
|
||||
Recon(result, Uchi);
|
||||
|
||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
||||
if (gamma == Dir) { \
|
||||
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// All legs kernels ; comms then compute
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::DhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
calcSpinor chi;
|
||||
calcSpinor Uchi;
|
||||
calcSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
GENERIC_STENCIL_LEG(Xp,pauliProjXp,pauliAssign);
|
||||
GENERIC_STENCIL_LEG(Yp,pauliProjYp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Zp,pauliProjZp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Xm,pauliProjXm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Ym,pauliProjYm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Zm,pauliProjZm,pauliAdd);
|
||||
coalescedWrite(out[sF],result,lane);
|
||||
};
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
calcSpinor chi;
|
||||
// calcSpinor *chi_p;
|
||||
calcSpinor Uchi;
|
||||
calcSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
GENERIC_STENCIL_LEG(Xm,pauliProjXp,pauliAssign);
|
||||
GENERIC_STENCIL_LEG(Ym,pauliProjYp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Zm,pauliProjZp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Xp,pauliProjXm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Yp,pauliProjYm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG(Zp,pauliProjZm,pauliAdd);
|
||||
coalescedWrite(out[sF], result,lane);
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Interior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
calcSpinor chi;
|
||||
// calcSpinor *chi_p;
|
||||
calcSpinor Uchi;
|
||||
calcSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
|
||||
result=Zero();
|
||||
GENERIC_STENCIL_LEG_INT(Xp,pauliProjXp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Yp,pauliProjYp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Zp,pauliProjZp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Xm,pauliProjXm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Ym,pauliProjYm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Zm,pauliProjZm,pauliAdd);
|
||||
coalescedWrite(out[sF], result,lane);
|
||||
};
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
|
||||
calcSpinor chi;
|
||||
// calcSpinor *chi_p;
|
||||
calcSpinor Uchi;
|
||||
calcSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
result=Zero();
|
||||
GENERIC_STENCIL_LEG_INT(Xm,pauliProjXp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Ym,pauliProjYp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Zm,pauliProjZp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Xp,pauliProjXm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Yp,pauliProjYm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_INT(Zp,pauliProjZm,pauliAdd);
|
||||
coalescedWrite(out[sF], result,lane);
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Exterior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
// calcSpinor *chi_p;
|
||||
calcSpinor Uchi;
|
||||
calcSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int nmu=0;
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
result=Zero();
|
||||
GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZm,pauliAdd);
|
||||
if ( nmu ) {
|
||||
auto out_t = coalescedRead(out[sF],lane);
|
||||
out_t = out_t + result;
|
||||
coalescedWrite(out[sF],out_t,lane);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
// calcSpinor *chi_p;
|
||||
calcSpinor Uchi;
|
||||
calcSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int nmu=0;
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
result=Zero();
|
||||
GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZp,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYm,pauliAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZm,pauliAdd);
|
||||
if ( nmu ) {
|
||||
auto out_t = coalescedRead(out[sF],lane);
|
||||
out_t = out_t + result;
|
||||
coalescedWrite(out[sF],out_t,lane);
|
||||
}
|
||||
};
|
||||
|
||||
#define DhopDirMacro(Dir,spProj,spRecon) \
|
||||
template <class Impl> accelerator_inline \
|
||||
void TwoSpinWilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF, \
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
|
||||
{ \
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor; \
|
||||
calcSpinor chi; \
|
||||
calcSpinor result; \
|
||||
calcSpinor Uchi; \
|
||||
StencilEntry *SE; \
|
||||
int ptype; \
|
||||
const int Nsimd = SiteSpinor::Nsimd(); \
|
||||
const int lane=acceleratorSIMTlane(Nsimd); \
|
||||
\
|
||||
SE = st.GetEntry(ptype, dir, sF); \
|
||||
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
|
||||
coalescedWrite(out[sF], result,lane); \
|
||||
}
|
||||
|
||||
DhopDirMacro(Xp,pauliProjXp,pauliAssign);
|
||||
DhopDirMacro(Yp,pauliProjYp,pauliAssign);
|
||||
DhopDirMacro(Zp,pauliProjZp,pauliAssign);
|
||||
DhopDirMacro(Xm,pauliProjXm,pauliAssign);
|
||||
DhopDirMacro(Ym,pauliProjYm,pauliAssign);
|
||||
DhopDirMacro(Zm,pauliProjZm,pauliAssign);
|
||||
|
||||
template <class Impl> accelerator_inline
|
||||
void TwoSpinWilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
|
||||
{
|
||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||
calcSpinor chi;
|
||||
calcSpinor result;
|
||||
calcSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
const int lane=acceleratorSIMTlane(Nsimd);
|
||||
|
||||
SE = st.GetEntry(ptype, dir, sF);
|
||||
GENERIC_DHOPDIR_LEG(Xp,pauliProjXp,pauliAssign);
|
||||
GENERIC_DHOPDIR_LEG(Yp,pauliProjYp,pauliAssign);
|
||||
GENERIC_DHOPDIR_LEG(Zp,pauliProjZp,pauliAssign);
|
||||
GENERIC_DHOPDIR_LEG(Xm,pauliProjXm,pauliAssign);
|
||||
GENERIC_DHOPDIR_LEG(Ym,pauliProjYm,pauliAssign);
|
||||
GENERIC_DHOPDIR_LEG(Zm,pauliProjZm,pauliAssign);
|
||||
coalescedWrite(out[sF], result,lane);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
|
||||
int Nsite, const FermionField &in, std::vector<FermionField> &out)
|
||||
{
|
||||
autoView(U_v ,U,AcceleratorRead);
|
||||
autoView(in_v ,in,AcceleratorRead);
|
||||
autoView(st_v ,st,AcceleratorRead);
|
||||
|
||||
autoView(out_Xm,out[0],AcceleratorWrite);
|
||||
autoView(out_Ym,out[1],AcceleratorWrite);
|
||||
autoView(out_Zm,out[2],AcceleratorWrite);
|
||||
autoView(out_Xp,out[4],AcceleratorWrite);
|
||||
autoView(out_Yp,out[5],AcceleratorWrite);
|
||||
autoView(out_Zp,out[6],AcceleratorWrite);
|
||||
auto CBp=st.CommBuf();
|
||||
accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{
|
||||
int sU=sss/Ls;
|
||||
int sF =sss;
|
||||
DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0);
|
||||
DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1);
|
||||
DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2);
|
||||
DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,3);
|
||||
DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,4);
|
||||
DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,5);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
|
||||
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
|
||||
{
|
||||
assert(dirdisp<=5);
|
||||
assert(dirdisp>=0);
|
||||
|
||||
autoView(U_v ,U ,AcceleratorRead);
|
||||
autoView(in_v ,in ,AcceleratorRead);
|
||||
autoView(out_v,out,AcceleratorWrite);
|
||||
autoView(st_v ,st ,AcceleratorRead);
|
||||
auto CBp=st.CommBuf();
|
||||
#define LoopBody(Dir) \
|
||||
case Dir : \
|
||||
accelerator_for(ss,Nsite,Simd::Nsimd(),{ \
|
||||
for(int s=0;s<Ls;s++){ \
|
||||
int sU=ss; \
|
||||
int sF = s+Ls*sU; \
|
||||
DhopDir##Dir(st_v,U_v,CBp,sF,sU,in_v,out_v,dirdisp);\
|
||||
} \
|
||||
}); \
|
||||
break;
|
||||
|
||||
switch(gamma){
|
||||
LoopBody(Xp);
|
||||
LoopBody(Yp);
|
||||
LoopBody(Zp);
|
||||
|
||||
LoopBody(Xm);
|
||||
LoopBody(Ym);
|
||||
LoopBody(Zm);
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
#undef LoopBody
|
||||
}
|
||||
|
||||
|
||||
#define KERNEL_CALLNB(A) \
|
||||
const uint64_t NN = Nsite*Ls; \
|
||||
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
|
||||
int sF = ss; \
|
||||
int sU = ss/Ls; \
|
||||
TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
||||
});
|
||||
|
||||
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
|
||||
|
||||
#define KERNEL_CALL_EXT(A) \
|
||||
const uint64_t sz = st.surface_list.size(); \
|
||||
auto ptr = &st.surface_list[0]; \
|
||||
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
||||
int sF = ptr[ss]; \
|
||||
int sU = sF/Ls; \
|
||||
TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
||||
}); \
|
||||
accelerator_barrier();
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonKernels<Impl>::DhopKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
autoView(U_v , U,AcceleratorRead);
|
||||
autoView(in_v , in,AcceleratorRead);
|
||||
autoView(out_v,out,AcceleratorWrite);
|
||||
autoView(st_v , st,AcceleratorRead);
|
||||
|
||||
if( interior && exterior ) {
|
||||
acceleratorFenceComputeStream();
|
||||
KERNEL_CALL(GenericDhopSite);
|
||||
return;
|
||||
} else if( interior ) {
|
||||
KERNEL_CALLNB(GenericDhopSiteInt);
|
||||
return;
|
||||
} else if( exterior ) {
|
||||
// // dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
KERNEL_CALL_EXT(GenericDhopSiteExt);
|
||||
return;
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void TwoSpinWilsonKernels<Impl>::DhopDagKernel(StencilImpl &st, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
autoView(U_v ,U,AcceleratorRead);
|
||||
autoView(in_v ,in,AcceleratorRead);
|
||||
autoView(out_v,out,AcceleratorWrite);
|
||||
autoView(st_v ,st,AcceleratorRead);
|
||||
|
||||
if( interior && exterior ) {
|
||||
acceleratorFenceComputeStream();
|
||||
KERNEL_CALL(GenericDhopSiteDag);
|
||||
return;
|
||||
} else if( interior ) {
|
||||
KERNEL_CALLNB(GenericDhopSiteDagInt); return;
|
||||
} else if( exterior ) {
|
||||
// Dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
|
||||
#undef KERNEL_CALLNB
|
||||
#undef KERNEL_CALL
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
@@ -61,7 +61,7 @@ WilsonCloverFermion<Impl, CloverHelpers>::WilsonCloverFermion(GaugeField&
|
||||
diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0);
|
||||
} else {
|
||||
csw_r = _csw_r * 0.5;
|
||||
diag_mass = 4.0 + _mass;
|
||||
diag_mass = Nd*1.0 + _mass;
|
||||
}
|
||||
csw_t = _csw_t * 0.5;
|
||||
|
||||
@@ -297,9 +297,9 @@ void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const F
|
||||
{
|
||||
if (mu == nu)
|
||||
continue;
|
||||
|
||||
|
||||
RealD factor;
|
||||
if (nu == 4 || mu == 4)
|
||||
if (nu == (Nd-1) || mu == (Nd-1)) // This was a bug - surely mu/nu is NEVER 4 but rather (Nd-1)=3 ??
|
||||
{
|
||||
factor = 2.0 * csw_t;
|
||||
}
|
||||
@@ -307,9 +307,11 @@ void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const F
|
||||
{
|
||||
factor = 2.0 * csw_r;
|
||||
}
|
||||
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
|
||||
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
|
||||
force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
|
||||
if ( mu < Nd && nu < Nd ) { // Allow to restrict range to Nd=3, but preserve orders of SigmaMuNu in table by counting ALL
|
||||
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
|
||||
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
|
||||
force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
|
||||
@@ -63,10 +63,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
Dirichlet(0)
|
||||
{
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
assert(FourDimRedBlackGrid._ndimension==4);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FiveDimGrid._ndimension==Nd+1);
|
||||
assert(FourDimGrid._ndimension==Nd);
|
||||
assert(FourDimRedBlackGrid._ndimension==Nd);
|
||||
assert(FiveDimRedBlackGrid._ndimension==Nd+1);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||
|
||||
// extent of fifth dim and not spread out
|
||||
@@ -76,7 +76,7 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
for(int d=0;d<Nd;d++){
|
||||
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
@@ -93,11 +93,13 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
|
||||
if ( p.dirichlet.size() == Nd+1) {
|
||||
Coordinate block = p.dirichlet;
|
||||
if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
|
||||
Dirichlet = 1;
|
||||
std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
|
||||
std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
|
||||
Block = block;
|
||||
for(int d=0;d<Nd+1;d++) {
|
||||
if ( block[d] ){
|
||||
Dirichlet = 1;
|
||||
std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
|
||||
std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
|
||||
Block = block;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Coordinate block(Nd+1,0);
|
||||
@@ -112,7 +114,7 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||
|
||||
for(int d=0;d<4;d++){
|
||||
for(int d=0;d<Nd;d++){
|
||||
assert(FourDimGrid._simd_layout[d]==1);
|
||||
assert(FourDimRedBlackGrid._simd_layout[d]==1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||
@@ -183,8 +185,8 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
|
||||
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
||||
|
||||
int skip = (disp==1) ? 0 : 1;
|
||||
int dirdisp = dir+skip*4;
|
||||
int gamma = dir+(1-skip)*4;
|
||||
int dirdisp = dir+skip*Nd;
|
||||
int gamma = dir+(1-skip)*Nd;
|
||||
|
||||
Compressor compressor(DaggerNo);
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
@@ -483,7 +485,7 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
|
||||
{
|
||||
out.Checkerboard()=in.Checkerboard();
|
||||
Dhop(in,out,dag); // -0.5 is included
|
||||
axpy(out,4.0-M5,in,out);
|
||||
axpy(out,Nd*1.0-M5,in,out);
|
||||
}
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||
@@ -509,7 +511,7 @@ template <class Impl>
|
||||
void WilsonFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(4.0 + M5);
|
||||
typename FermionField::scalar_type scal(Nd*1.0 + M5);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
@@ -524,7 +526,7 @@ template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0/(4.0 + M5))*in;
|
||||
out = (1.0/(Nd*1.0 + M5))*in;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@@ -635,7 +637,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
||||
A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
|
||||
F = eaLs * (one - Wea + (Wema - one) * mass*mass);
|
||||
F = F + emaLs * (Wema - one + (one - Wea) * mass*mass);
|
||||
F = F - abs(W) * sinha * 4.0 * mass;
|
||||
F = F - abs(W) * sinha * (Nd* 1.0) * mass;
|
||||
|
||||
Bpp = (A/F) * (ema2Ls - one) * (one - Wema) * (one - mass*mass * one);
|
||||
Bmm = (A/F) * (one - ea2Ls) * (one - Wea) * (one - mass*mass * one);
|
||||
|
||||
@@ -63,7 +63,7 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||
if (anisotropyCoeff.isAnisotropic){
|
||||
diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
|
||||
} else {
|
||||
diag_mass = 4.0 + mass;
|
||||
diag_mass = Nd*1.0 + mass;
|
||||
}
|
||||
|
||||
int vol4;
|
||||
@@ -354,8 +354,8 @@ void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int
|
||||
Stencil.HaloExchange(in, compressor);
|
||||
|
||||
int skip = (disp == 1) ? 0 : 1;
|
||||
int dirdisp = dir + skip * 4;
|
||||
int gamma = dir + (1 - skip) * 4;
|
||||
int dirdisp = dir + skip * Nd;
|
||||
int gamma = dir + (1 - skip) * Nd;
|
||||
|
||||
DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
|
||||
};
|
||||
@@ -370,8 +370,8 @@ void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<Fermion
|
||||
for(int disp=-1;disp<=1;disp+=2){
|
||||
|
||||
int skip = (disp == 1) ? 0 : 1;
|
||||
int dirdisp = dir + skip * 4;
|
||||
int gamma = dir + (1 - skip) * 4;
|
||||
int dirdisp = dir + skip * Nd;
|
||||
int gamma = dir + (1 - skip) * Nd;
|
||||
|
||||
DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
distance = st._distances[DIR]; \
|
||||
sl = st._simd_layout[direction]; \
|
||||
inplace_twist = 0; \
|
||||
if(SE->_around_the_world && st.parameters.twists[DIR % 4]){ \
|
||||
if(SE->_around_the_world && st.parameters.twists[DIR % Nd]){ \
|
||||
if(sl == 1){ \
|
||||
g = (F+1) % 2; \
|
||||
}else{ \
|
||||
|
||||
@@ -32,8 +32,30 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
const std::vector<int> ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4});
|
||||
const std::vector<int> ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
|
||||
const std::vector<int> ImprovedStaggeredFermion5DStatic::directions(ImprovedStaggeredFermion5DStatic::MakeDirections());
|
||||
const std::vector<int> ImprovedStaggeredFermion5DStatic::displacements(ImprovedStaggeredFermion5DStatic::MakeDisplacements());
|
||||
std::vector<int> ImprovedStaggeredFermion5DStatic::MakeDirections(void)
|
||||
{
|
||||
std::vector<int> directions(4*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
directions[d+Nd*0] = d+1;
|
||||
directions[d+Nd*1] = d+1;
|
||||
directions[d+Nd*2] = d+1;
|
||||
directions[d+Nd*3] = d+1;
|
||||
}
|
||||
return directions;
|
||||
}
|
||||
std::vector<int> ImprovedStaggeredFermion5DStatic::MakeDisplacements(void)
|
||||
{
|
||||
std::vector<int> displacements(4*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
displacements[d+Nd*0] =+1;
|
||||
displacements[d+Nd*1] =-1;
|
||||
displacements[d+Nd*2] =+3;
|
||||
displacements[d+Nd*3] =-3;
|
||||
}
|
||||
return displacements;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
@@ -32,5 +32,26 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
const std::vector<int> ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
|
||||
const std::vector<int> ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
|
||||
|
||||
std::vector<int> ImprovedStaggeredFermionStatic::MakeDirections(void)
|
||||
{
|
||||
std::vector<int> directions(4*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
directions[d+Nd*0] = d;
|
||||
directions[d+Nd*1] = d;
|
||||
directions[d+Nd*2] = d;
|
||||
directions[d+Nd*3] = d;
|
||||
}
|
||||
return directions;
|
||||
}
|
||||
std::vector<int> ImprovedStaggeredFermionStatic::MakeDisplacements(void)
|
||||
{
|
||||
std::vector<int> displacements(4*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
displacements[d+Nd*0] =+1;
|
||||
displacements[d+Nd*1] =-1;
|
||||
displacements[d+Nd*2] =+3;
|
||||
displacements[d+Nd*3] =-3;
|
||||
}
|
||||
return displacements;
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
@@ -30,7 +30,27 @@ directory
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
const std::vector<int> NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
|
||||
const std::vector<int> NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
|
||||
//const std::vector<int> NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
|
||||
//const std::vector<int> NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
|
||||
const std::vector<int> NaiveStaggeredFermionStatic::directions(NaiveStaggeredFermionStatic::MakeDirections());
|
||||
const std::vector<int> NaiveStaggeredFermionStatic::displacements(NaiveStaggeredFermionStatic::MakeDisplacements());
|
||||
std::vector<int> NaiveStaggeredFermionStatic::MakeDirections(void)
|
||||
{
|
||||
std::vector<int> directions(4*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
directions[d+Nd*0] = d;
|
||||
directions[d+Nd*1] = d;
|
||||
}
|
||||
return directions;
|
||||
}
|
||||
std::vector<int> NaiveStaggeredFermionStatic::MakeDisplacements(void)
|
||||
{
|
||||
std::vector<int> displacements(4*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
displacements[d+Nd*0] =+1;
|
||||
displacements[d+Nd*1] =-1;
|
||||
}
|
||||
return displacements;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
|
||||
const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::directions (TwoSpinWilsonFermion3plus1DStatic::MakeDirections());
|
||||
const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::displacements(TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements());
|
||||
|
||||
std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDirections (void)
|
||||
{
|
||||
std::vector<int> directions(2*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
directions[d] = d+1;
|
||||
directions[d+Nd] = d+1;
|
||||
}
|
||||
return directions;
|
||||
}
|
||||
std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements(void)
|
||||
{
|
||||
std::vector<int> displacements(2*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
displacements[d] = +1;
|
||||
displacements[d+Nd] = -1;
|
||||
}
|
||||
return displacements;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class TwoSpinWilsonFermion3plus1D<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015, 2020
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class TwoSpinWilsonKernels<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
@@ -34,8 +34,28 @@ directory
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
const std::vector<int> WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4});
|
||||
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
|
||||
|
||||
const std::vector<int> WilsonFermion5DStatic::directions (WilsonFermion5DStatic::MakeDirections());
|
||||
const std::vector<int> WilsonFermion5DStatic::displacements(WilsonFermion5DStatic::MakeDisplacements());
|
||||
|
||||
std::vector<int> WilsonFermion5DStatic::MakeDirections (void)
|
||||
{
|
||||
std::vector<int> directions(2*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
directions[d] = d+1;
|
||||
directions[d+Nd] = d+1;
|
||||
}
|
||||
return directions;
|
||||
}
|
||||
std::vector<int> WilsonFermion5DStatic::MakeDisplacements(void)
|
||||
{
|
||||
std::vector<int> displacements(2*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
displacements[d] = +1;
|
||||
displacements[d+Nd] = -1;
|
||||
}
|
||||
return displacements;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
@@ -33,9 +33,27 @@ directory
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
|
||||
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
|
||||
const std::vector<int> WilsonFermionStatic::directions(WilsonFermionStatic::MakeDirections());
|
||||
const std::vector<int> WilsonFermionStatic::displacements(WilsonFermionStatic::MakeDisplacements());
|
||||
int WilsonFermionStatic::HandOptDslash;
|
||||
std::vector<int> WilsonFermionStatic::MakeDirections (void)
|
||||
{
|
||||
std::vector<int> directions(2*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
directions[d] = d;
|
||||
directions[d+Nd] = d;
|
||||
}
|
||||
return directions;
|
||||
}
|
||||
std::vector<int> WilsonFermionStatic::MakeDisplacements(void)
|
||||
{
|
||||
std::vector<int> displacements(2*Nd);
|
||||
for(int d=0;d<Nd;d++){
|
||||
displacements[d] = +1;
|
||||
displacements[d+Nd] = -1;
|
||||
}
|
||||
return displacements;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
@@ -36,11 +36,16 @@ DWF_IMPL_LIST=" \
|
||||
ZWilsonImplF \
|
||||
ZWilsonImplD2 "
|
||||
|
||||
TWOSPIN_WILSON_IMPL_LIST=" \
|
||||
TwoSpinWilsonImplF \
|
||||
TwoSpinWilsonImplD "
|
||||
|
||||
|
||||
GDWF_IMPL_LIST=" \
|
||||
GparityWilsonImplF \
|
||||
GparityWilsonImplD "
|
||||
|
||||
IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST"
|
||||
IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST $TWOSPIN_WILSON_IMPL_LIST"
|
||||
|
||||
for impl in $IMPL_LIST
|
||||
do
|
||||
@@ -110,7 +115,12 @@ do
|
||||
done
|
||||
done
|
||||
|
||||
CC_LIST=" \
|
||||
ImprovedStaggeredFermion5DInstantiation \
|
||||
StaggeredKernelsInstantiation "
|
||||
CC_LIST="TwoSpinWilsonFermion3plus1DInstantiation.cc.master TwoSpinWilsonKernelsInstantiation.cc.master"
|
||||
|
||||
for impl in $TWOSPIN_WILSON_IMPL_LIST
|
||||
do
|
||||
for f in $CC_LIST
|
||||
do
|
||||
ln -f -s ../$f.cc.master $impl/$f$impl.cc
|
||||
done
|
||||
done
|
||||
|
||||
@@ -158,8 +158,8 @@ RealD WilsonFlowBase<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeF
|
||||
LatticeComplexD R(U.Grid());
|
||||
R = Zero();
|
||||
|
||||
for(int mu=0;mu<3;mu++){
|
||||
for(int nu=mu+1;nu<4;nu++){
|
||||
for(int mu=0;mu<Nd-1;mu++){
|
||||
for(int nu=mu+1;nu<Nd;nu++){
|
||||
WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
|
||||
R = R + trace(F*F);
|
||||
}
|
||||
@@ -252,6 +252,11 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||
|
||||
out = in;
|
||||
RealD taus = 0.;
|
||||
|
||||
// Perform initial t=0 measurements
|
||||
for(auto const &meas : this->functions)
|
||||
meas.second(0,taus,out);
|
||||
|
||||
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
evolve_step(out, taus);
|
||||
@@ -336,6 +341,11 @@ void WilsonFlowAdaptive<Gimpl>::smear(GaugeField& out, const GaugeField& in) con
|
||||
RealD taus = 0.;
|
||||
RealD eps = init_epsilon;
|
||||
unsigned int step = 0;
|
||||
|
||||
// Perform initial t=0 measurements
|
||||
for(auto const &meas : this->functions)
|
||||
meas.second(step,taus,out);
|
||||
|
||||
do{
|
||||
int step_success = evolve_step_adaptive(out, taus, eps);
|
||||
step += step_success; //step will not be incremented if the integration step fails
|
||||
|
||||
220
Grid/qcd/spin/Pauli.h
Normal file
220
Grid/qcd/spin/Pauli.h
Normal file
@@ -0,0 +1,220 @@
|
||||
#ifndef GRID_QCD_PAULI_H
|
||||
#define GRID_QCD_PAULI_H
|
||||
|
||||
#include <array>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
//
|
||||
/*
|
||||
* Pauli basis
|
||||
* sx sy sz ident
|
||||
* (0 1) , (0 -i) , ( 1 0 )
|
||||
* (1 0) (i 0) ( 0 -1)
|
||||
*
|
||||
* These are hermitian.
|
||||
*
|
||||
* Also supply wilson "projectors" (1+/-sx), (1+/-sy), (1+/-sz)
|
||||
*
|
||||
* spPauliProjXm
|
||||
* spPauliProjYm etc...
|
||||
*/
|
||||
class Pauli {
|
||||
public:
|
||||
GRID_SERIALIZABLE_ENUM(Algebra, undef,
|
||||
SigmaX , 0,
|
||||
MinusSigmaX , 1,
|
||||
SigmaY , 2,
|
||||
MinusSigmaY , 3,
|
||||
SigmaZ , 4,
|
||||
MinusSigmaZ , 5,
|
||||
Identity , 6,
|
||||
MinusIdentity , 7);
|
||||
|
||||
static constexpr unsigned int nPauli = 8;
|
||||
static const std::array<const char *, nPauli> name;
|
||||
static const std::array<std::array<Algebra, nPauli>, nPauli> mul;
|
||||
static const std::array<Algebra, nPauli> adj;
|
||||
static const std::array<const Pauli, 4> gmu;
|
||||
static const std::array<const Pauli, 16> gall;
|
||||
Algebra g;
|
||||
public:
|
||||
accelerator Pauli(Algebra initg): g(initg) {}
|
||||
};
|
||||
|
||||
#define CopyImplementation(iTemplate,multPauli,multFlavour) \
|
||||
template<class vtype> \
|
||||
accelerator_inline void multPauli(iTemplate<vtype, Nhs> &ret, const iTemplate<vtype, Nhs> &rhs) { \
|
||||
multFlavour(ret,rhs); \
|
||||
}
|
||||
|
||||
CopyImplementation(iVector,multPauliSigmaX,multFlavourSigmaX);
|
||||
CopyImplementation(iMatrix,lmultPauliSigmaX,lmultFlavourSigmaX);
|
||||
CopyImplementation(iMatrix,rmultPauliSigmaX,rmultFlavourSigmaX);
|
||||
|
||||
CopyImplementation(iVector,multPauliMinusSigmaX ,multFlavourMinusSigmaX);
|
||||
CopyImplementation(iMatrix,lmultPauliMinusSigmaX,lmultFlavourMinusSigmaX);
|
||||
CopyImplementation(iMatrix,rmultPauliMinusSigmaX,rmultFlavourMinusSigmaX);
|
||||
|
||||
CopyImplementation(iVector,multPauliSigmaY,multFlavourSigmaY);
|
||||
CopyImplementation(iMatrix,lmultPauliSigmaY,lmultFlavourSigmaY);
|
||||
CopyImplementation(iMatrix,rmultPauliSigmaY,rmultFlavourSigmaY);
|
||||
|
||||
CopyImplementation(iVector,multPauliMinusSigmaY ,multFlavourMinusSigmaY);
|
||||
CopyImplementation(iMatrix,lmultPauliMinusSigmaY,lmultFlavourMinusSigmaY);
|
||||
CopyImplementation(iMatrix,rmultPauliMinusSigmaY,rmultFlavourMinusSigmaY);
|
||||
|
||||
CopyImplementation(iVector,multPauliSigmaZ,multFlavourSigmaZ);
|
||||
CopyImplementation(iMatrix,lmultPauliSigmaZ,lmultFlavourSigmaZ);
|
||||
CopyImplementation(iMatrix,rmultPauliSigmaZ,rmultFlavourSigmaZ);
|
||||
|
||||
CopyImplementation(iVector,multPauliMinusSigmaZ ,multFlavourMinusSigmaZ);
|
||||
CopyImplementation(iMatrix,lmultPauliMinusSigmaZ,lmultFlavourMinusSigmaZ);
|
||||
CopyImplementation(iMatrix,rmultPauliMinusSigmaZ,rmultFlavourMinusSigmaZ);
|
||||
|
||||
CopyImplementation(iVector,multPauliIdentity,multFlavourIdentity);
|
||||
CopyImplementation(iMatrix,lmultPauliIdentity,lmultFlavourIdentity);
|
||||
CopyImplementation(iMatrix,rmultPauliIdentity,rmultFlavourIdentity);
|
||||
|
||||
CopyImplementation(iVector,multPauliMinusIdentity ,multFlavourMinusIdentity);
|
||||
CopyImplementation(iMatrix,lmultPauliMinusIdentity,lmultFlavourMinusIdentity);
|
||||
CopyImplementation(iMatrix,rmultPauliMinusIdentity,rmultFlavourMinusIdentity);
|
||||
|
||||
/*
|
||||
* sx sy sz ident
|
||||
* (0 1) , (0 -i) , ( 1 0 )
|
||||
* (1 0) (i 0) ( 0 -1)
|
||||
*/
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
|
||||
{
|
||||
hspin(0)=fspin(0)+fspin(1);
|
||||
hspin(1)=fspin(1)+fspin(0);
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
|
||||
{
|
||||
hspin(0)=fspin(0)-fspin(1);
|
||||
hspin(1)=fspin(1)-fspin(0);
|
||||
}
|
||||
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
|
||||
{
|
||||
hspin(0)=fspin(0)-timesI(fspin(1));
|
||||
hspin(1)=fspin(1)+timesI(fspin(0));
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
|
||||
{
|
||||
hspin(0)=fspin(0)+timesI(fspin(1));
|
||||
hspin(1)=fspin(1)-timesI(fspin(0));
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
|
||||
{
|
||||
hspin(0)=fspin(0)+fspin(0);
|
||||
hspin(1)=Zero();
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
|
||||
{
|
||||
hspin(0)=Zero();
|
||||
hspin(1)=fspin(1)+fspin(1);
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAssign(iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
fspin = hspin;
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAdd (iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
fspin = fspin + hspin;
|
||||
}
|
||||
|
||||
template<class vtype>
|
||||
accelerator_inline auto operator*(const Pauli &G, const iVector<vtype, Nhs> &arg)
|
||||
->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Nhs>, PauliIndex>::value, iVector<vtype, Nhs>>::type
|
||||
{
|
||||
iVector<vtype, Nhs> ret;
|
||||
|
||||
switch (G.g)
|
||||
{
|
||||
case Pauli::Algebra::SigmaX:
|
||||
multPauliSigmaX(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaX:
|
||||
multPauliMinusSigmaX(ret, arg); break;
|
||||
case Pauli::Algebra::SigmaY:
|
||||
multPauliSigmaY(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaY:
|
||||
multPauliMinusSigmaY(ret, arg); break;
|
||||
case Pauli::Algebra::SigmaZ:
|
||||
multPauliSigmaZ(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaZ:
|
||||
multPauliMinusSigmaZ(ret, arg); break;
|
||||
case Pauli::Algebra::Identity:
|
||||
multPauliIdentity(ret, arg); break;
|
||||
case Pauli::Algebra::MinusIdentity:
|
||||
multPauliMinusIdentity(ret, arg); break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vtype>
|
||||
accelerator_inline auto operator*(const Pauli &G, const iMatrix<vtype, Nhs> &arg)
|
||||
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
|
||||
{
|
||||
iMatrix<vtype, Nhs> ret;
|
||||
|
||||
switch (G.g)
|
||||
{
|
||||
case Pauli::Algebra::SigmaX:
|
||||
lmultPauliSigmaX(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaX:
|
||||
lmultPauliMinusSigmaX(ret, arg); break;
|
||||
case Pauli::Algebra::SigmaY:
|
||||
lmultPauliSigmaY(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaY:
|
||||
lmultPauliMinusSigmaY(ret, arg); break;
|
||||
case Pauli::Algebra::SigmaZ:
|
||||
lmultPauliSigmaZ(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaZ:
|
||||
lmultPauliMinusSigmaZ(ret, arg); break;
|
||||
case Pauli::Algebra::Identity:
|
||||
lmultPauliIdentity(ret, arg); break;
|
||||
case Pauli::Algebra::MinusIdentity:
|
||||
lmultPauliMinusIdentity(ret, arg); break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vtype>
|
||||
accelerator_inline auto operator*(const iMatrix<vtype, Nhs> &arg, const Pauli &G)
|
||||
->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
|
||||
{
|
||||
iMatrix<vtype, Nhs> ret;
|
||||
|
||||
switch (G.g)
|
||||
{
|
||||
case Pauli::Algebra::SigmaX:
|
||||
rmultPauliSigmaX(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaX:
|
||||
rmultPauliMinusSigmaX(ret, arg); break;
|
||||
case Pauli::Algebra::SigmaY:
|
||||
rmultPauliSigmaY(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaY:
|
||||
rmultPauliMinusSigmaY(ret, arg); break;
|
||||
case Pauli::Algebra::SigmaZ:
|
||||
rmultPauliSigmaZ(ret, arg); break;
|
||||
case Pauli::Algebra::MinusSigmaZ:
|
||||
rmultPauliMinusSigmaZ(ret, arg); break;
|
||||
case Pauli::Algebra::Identity:
|
||||
rmultPauliIdentity(ret, arg); break;
|
||||
case Pauli::Algebra::MinusIdentity:
|
||||
rmultPauliMinusIdentity(ret, arg); break;
|
||||
default: assert(0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif // GRID_QCD_GAMMA_H
|
||||
@@ -179,20 +179,17 @@ public:
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z the temporal loop
|
||||
//////////////////////////////////////////////////
|
||||
static ComplexD avgPolyakovLoop(const GaugeField &Umu) { //assume Nd=4
|
||||
static ComplexD avgPolyakovLoop(const GaugeField &Umu) {
|
||||
GaugeMat Ut(Umu.Grid()), P(Umu.Grid());
|
||||
ComplexD out;
|
||||
int T = Umu.Grid()->GlobalDimensions()[3];
|
||||
int X = Umu.Grid()->GlobalDimensions()[0];
|
||||
int Y = Umu.Grid()->GlobalDimensions()[1];
|
||||
int Z = Umu.Grid()->GlobalDimensions()[2];
|
||||
|
||||
Ut = peekLorentz(Umu,3); //Select temporal direction
|
||||
uint64_t vol = Umu.Grid()->gSites();
|
||||
int T = Umu.Grid()->GlobalDimensions()[Nd-1];
|
||||
Ut = peekLorentz(Umu,Nd-1); //Select temporal direction
|
||||
P = Ut;
|
||||
for (int t=1;t<T;t++){
|
||||
P = Gimpl::CovShiftForward(Ut,3,P);
|
||||
P = Gimpl::CovShiftForward(Ut,Nd-1,P);
|
||||
}
|
||||
RealD norm = 1.0/(Nc*X*Y*Z*T);
|
||||
RealD norm = 1.0/(Nc*vol);
|
||||
out = sum(trace(P))*norm;
|
||||
return out;
|
||||
}
|
||||
@@ -215,7 +212,7 @@ public:
|
||||
|
||||
double vol = Umu.Grid()->gSites();
|
||||
|
||||
return p.real() / vol / (4.0 * Nc ) ;
|
||||
return p.real() / vol / (Nd * Nc ) ;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
@@ -740,6 +737,7 @@ public:
|
||||
//cf https://arxiv.org/pdf/hep-lat/9701012.pdf Eq 6
|
||||
//output is the charge by timeslice: sum over timeslices to obtain the total
|
||||
static std::vector<Real> TimesliceTopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
|
||||
// Audit: 4D epsilon is hard coded
|
||||
assert(Nd == 4);
|
||||
std::vector<std::vector<GaugeMat*> > F(Nd,std::vector<GaugeMat*>(Nd,nullptr));
|
||||
//Note F_numu = - F_munu
|
||||
@@ -829,6 +827,25 @@ public:
|
||||
return out;
|
||||
}
|
||||
|
||||
//Compute the 5Li topological charge density
|
||||
static std::vector<Real> TopologicalChargeDensity5Li(const GaugeLorentz &U){
|
||||
|
||||
static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} };
|
||||
std::vector<std::vector<Real> > loops = TimesliceTopologicalCharge5LiContributions(U);
|
||||
|
||||
double c5=1./20.;
|
||||
double c4=1./5.-2.*c5;
|
||||
double c3=(-64.+640.*c5)/45.;
|
||||
double c2=(1-64.*c5)/9.;
|
||||
double c1=(19.-55.*c5)/9.;
|
||||
|
||||
int Lt = loops[0].size();
|
||||
std::vector<Real> out(Lt,0.);
|
||||
for(int t=0;t<Lt;t++)
|
||||
out[t] += c1*loops[0][t] + c2*loops[1][t] + c3*loops[2][t] + c4*loops[3][t] + c5*loops[4][t];
|
||||
return out;
|
||||
}
|
||||
|
||||
static Real TopologicalCharge5Li(const GaugeLorentz &U){
|
||||
std::vector<Real> Qt = TimesliceTopologicalCharge5Li(U);
|
||||
Real Q = 0.;
|
||||
@@ -1455,7 +1472,7 @@ public:
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu.Grid());
|
||||
std::vector<GaugeMat> U(Nd, Umu.Grid());
|
||||
|
||||
for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
@@ -1474,7 +1491,7 @@ public:
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu.Grid());
|
||||
std::vector<GaugeMat> U(Nd, Umu.Grid());
|
||||
|
||||
for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
@@ -1492,8 +1509,8 @@ public:
|
||||
// sum over all x,y,z,t and over all planes of spatial Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu.Grid());
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(Nd, Umu.Grid());
|
||||
|
||||
for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
|
||||
@@ -396,6 +396,7 @@ public:
|
||||
Packets[i].from_rank,Packets[i].do_recv,
|
||||
Packets[i].xbytes,Packets[i].rbytes,i);
|
||||
}
|
||||
FlightRecorder::StepLog("Communicate begin has finished");
|
||||
// Get comms started then run checksums
|
||||
// Having this PRIOR to the dslash seems to make Sunspot work... (!)
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
|
||||
@@ -251,7 +251,7 @@ inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { c
|
||||
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
|
||||
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
|
||||
acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice);
|
||||
acceleratorCopyToDevice(from,to,bytes);
|
||||
return 0;
|
||||
}
|
||||
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
|
||||
@@ -337,7 +337,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
cgh.parallel_for( \
|
||||
sycl::nd_range<3>(global,local), \
|
||||
[=] (sycl::nd_item<3> item) /*mutable*/ \
|
||||
[[intel::reqd_sub_group_size(16)]] \
|
||||
[[sycl::reqd_sub_group_size(16)]] \
|
||||
{ \
|
||||
auto iter1 = item.get_global_id(0); \
|
||||
auto iter2 = item.get_global_id(1); \
|
||||
|
||||
@@ -28,11 +28,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)>(y)?(y):(x))
|
||||
#endif
|
||||
|
||||
|
||||
// Introduce a class to gain deterministic bit reproducible reduction.
|
||||
// make static; perhaps just a namespace is required.
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
@@ -187,9 +187,10 @@ void GridParseLayout(char **argv,int argc,
|
||||
Coordinate &latt_c,
|
||||
Coordinate &mpi_c)
|
||||
{
|
||||
auto mpi =std::vector<int>({1,1,1,1});
|
||||
auto latt=std::vector<int>({8,8,8,8});
|
||||
|
||||
auto mpi =std::vector<int>(Nd,1);
|
||||
auto latt=std::vector<int>(Nd,8);
|
||||
std::cout << "Default mpi "<<mpi<<std::endl;
|
||||
std::cout << "Default latt"<<latt<<std::endl;
|
||||
GridThread::SetMaxThreads();
|
||||
|
||||
std::string arg;
|
||||
@@ -228,6 +229,9 @@ void GridParseLayout(char **argv,int argc,
|
||||
}
|
||||
// Copy back into coordinate format
|
||||
int nd = mpi.size();
|
||||
std::cout << "mpi.size() "<<nd<<std::endl;
|
||||
std::cout << "latt.size() "<<latt.size()<<std::endl;
|
||||
std::cout << "Nd "<<Nd<<std::endl;
|
||||
assert(latt.size()==nd);
|
||||
latt_c.resize(nd);
|
||||
mpi_c.resize(nd);
|
||||
@@ -638,12 +642,11 @@ void Grid_debug_handler_init(void)
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
// sigaction(SIGSEGV,&sa,NULL);
|
||||
sigaction(SIGTRAP,&sa,NULL);
|
||||
sigaction(SIGBUS,&sa,NULL);
|
||||
// sigaction(SIGBUS,&sa,NULL);
|
||||
// sigaction(SIGUSR2,&sa,NULL);
|
||||
|
||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
|
||||
sigaction(SIGFPE,&sa,NULL);
|
||||
// feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
// sigaction(SIGFPE,&sa,NULL);
|
||||
sigaction(SIGKILL,&sa,NULL);
|
||||
sigaction(SIGILL,&sa,NULL);
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@ namespace Grid{
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
template <class T> void writeFile(T& in, std::string const fname){
|
||||
#ifdef HAVE_LIME
|
||||
// Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
|
||||
@@ -73,7 +74,7 @@ template <class T> void writeFile(T& in, std::string const fname){
|
||||
Grid::emptyUserRecord record;
|
||||
Grid::ScidacWriter WR(in.Grid()->IsBoss());
|
||||
WR.open(fname);
|
||||
WR.writeScidacFieldRecord(in,record,0);
|
||||
WR.writeScidacFieldRecord(in,record,0); // Lexico
|
||||
WR.close();
|
||||
#endif
|
||||
// What is the appropriate way to throw error?
|
||||
@@ -107,8 +108,18 @@ int main(int argc, char **argv) {
|
||||
|
||||
for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
|
||||
|
||||
#if 0
|
||||
CPNersc.CheckpointRestore(conf, Umu, sRNG, pRNG);
|
||||
#else
|
||||
// Don't require Grid format RNGs
|
||||
FieldMetaData header;
|
||||
std::string file, filesmr;
|
||||
file = CPar.conf_path + "/" + CPar.conf_prefix + "." + std::to_string(conf);
|
||||
filesmr = CPar.conf_path + "/" + CPar.conf_smr_prefix + "." + std::to_string(conf);
|
||||
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
#endif
|
||||
|
||||
std::cout << std::setprecision(15);
|
||||
std::cout << GridLogMessage << "Initial plaquette: "<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
|
||||
|
||||
@@ -116,6 +127,7 @@ int main(int argc, char **argv) {
|
||||
std::string file_post = CPar.conf_prefix + "." + std::to_string(conf);
|
||||
|
||||
WilsonFlow<PeriodicGimplR> WF(WFPar.step_size,WFPar.steps,WFPar.meas_interval);
|
||||
|
||||
WF.addMeasurement(WFPar.meas_interval_density, [&file_pre,&file_post,&conf](int step, RealD t, const typename PeriodicGimplR::GaugeField &U){
|
||||
|
||||
typedef typename PeriodicGimplR::GaugeLinkField GaugeMat;
|
||||
@@ -165,33 +177,48 @@ int main(int argc, char **argv) {
|
||||
//double coeff = 2.0 / (1.0 * Nd * (Nd - 1)) / 3.0;
|
||||
//Plq = coeff * Plq;
|
||||
|
||||
int tau = std::round(t);
|
||||
std::string efile = file_pre + "E_dnsty_" + std::to_string(tau) + "_" + file_post;
|
||||
writeFile(R,efile);
|
||||
std::string tfile = file_pre + "Top_dnsty_" + std::to_string(tau) + "_" + file_post;
|
||||
writeFile(qfield,tfile);
|
||||
|
||||
RealD WFlow_TC5Li = WilsonLoops<PeriodicGimplR>::TopologicalCharge5Li(U);
|
||||
|
||||
int tau = std::round(t);
|
||||
|
||||
std::string efile = file_pre + "E_dnsty_" + std::to_string(tau) + "_" + file_post;
|
||||
// writeFile(R,efile);
|
||||
|
||||
std::string tfile = file_pre + "Top_dnsty_" + std::to_string(tau) + "_" + file_post;
|
||||
// writeFile(qfield,tfile);
|
||||
|
||||
std::string ufile = file_pre + "U_" + std::to_string(tau) + "_" + file_post;
|
||||
{
|
||||
// PeriodicGimplR::GaugeField Ucopy = U;
|
||||
// NerscIO::writeConfiguration(Ucopy,ufile);
|
||||
}
|
||||
|
||||
RealD E = real(sum(R))/ RealD(U.Grid()->gSites());
|
||||
RealD T = real( sum(qfield) );
|
||||
Coordinate scoor; for (int mu=0; mu < Nd; mu++) scoor[mu] = 0;
|
||||
RealD E0 = real(peekSite(R,scoor));
|
||||
RealD T0 = real(peekSite(qfield,scoor));
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Saved energy density (clover) & topo. charge density: " << conf << " " << step << " " << tau << " "
|
||||
<< "(E_avg,T_sum) " << E << " " << T << " (E, T at origin) " << E0 << " " << T0 << std::endl;
|
||||
<< "(E_avg,T_sum) " << E << " " << T << " (E, T at origin) " << E0 << " " << T0 << " Q5Li "<< WFlow_TC5Li << std::endl;
|
||||
|
||||
});
|
||||
|
||||
int t=WFPar.maxTau;
|
||||
WF.smear(Uflow, Umu);
|
||||
|
||||
// NerscIO::writeConfiguration(Uflow,filesmr);
|
||||
|
||||
|
||||
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
|
||||
RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
|
||||
RealD WFlow_TC5Li = WilsonLoops<PeriodicGimplR>::TopologicalCharge5Li(Uflow);
|
||||
RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow); // t
|
||||
RealD WFlow_EC = WF.energyDensityCloverleaf(t,Uflow);
|
||||
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
|
||||
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
|
||||
std::cout << GridLogMessage << "TC0 "<< conf << " " << WFlow_EC << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
|
||||
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
|
||||
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
|
||||
std::cout << GridLogMessage << "TC0 "<< conf << " " << WFlow_EC << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge5Li "<< conf << " " << WFlow_TC5Li<< std::endl;
|
||||
|
||||
std::cout<< GridLogMessage << " Admissibility check:\n";
|
||||
const double sp_adm = 0.067; // admissible threshold
|
||||
|
||||
5
TODO
5
TODO
@@ -1,3 +1,8 @@
|
||||
|
||||
* Clean up the extract merge and replace with insertLane/extractLane
|
||||
|
||||
-----
|
||||
|
||||
i) Refine subspace with HDCG & recompute
|
||||
ii) Block Lanczos in coarse space
|
||||
iii) Batched block project in the operator computation
|
||||
|
||||
@@ -873,7 +873,7 @@ int main (int argc, char ** argv)
|
||||
int do_su4=0;
|
||||
int do_memory=1;
|
||||
int do_comms =1;
|
||||
int do_blas =0;
|
||||
int do_blas =1;
|
||||
int do_dslash=1;
|
||||
|
||||
int sel=4;
|
||||
|
||||
18
configure.ac
18
configure.ac
@@ -198,6 +198,8 @@ AC_ARG_ENABLE([Nc],
|
||||
[ac_Nc=${enable_Nc}], [ac_Nc=3])
|
||||
|
||||
case ${ac_Nc} in
|
||||
1)
|
||||
AC_DEFINE([Config_Nc],[1],[Gauge group Nc]);;
|
||||
2)
|
||||
AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);;
|
||||
3)
|
||||
@@ -211,6 +213,21 @@ case ${ac_Nc} in
|
||||
*)
|
||||
AC_MSG_ERROR(["Unsupport gauge group choice Nc = ${ac_Nc}"]);;
|
||||
esac
|
||||
############### Nd
|
||||
AC_ARG_ENABLE([Nd],
|
||||
[AS_HELP_STRING([--enable-Nd=2|3|4],[enable default LGT dimension])],
|
||||
[ac_Nd=${enable_Nd}], [ac_Nd=4])
|
||||
|
||||
case ${ac_Nd} in
|
||||
2)
|
||||
AC_DEFINE([Config_Nd],[2],[Gauge field dimension Nd]);;
|
||||
3)
|
||||
AC_DEFINE([Config_Nd],[3],[Gauge field dimension Nd]);;
|
||||
4)
|
||||
AC_DEFINE([Config_Nd],[4],[Gauge field dimension Nd]);;
|
||||
*)
|
||||
AC_MSG_ERROR(["Unsupport dimension Nd = ${ac_Nd}"]);;
|
||||
esac
|
||||
|
||||
############### Symplectic group
|
||||
AC_ARG_ENABLE([Sp],
|
||||
@@ -818,6 +835,7 @@ os (target) : $target_os
|
||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||
compiler version : ${ax_cv_gxx_version}
|
||||
----- BUILD OPTIONS -----------------------------------
|
||||
Nd : ${ac_Nd}
|
||||
Nc : ${ac_Nc}
|
||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||
Threading : ${ac_openmp}
|
||||
|
||||
11
systems/mac-arm/config-command
Normal file
11
systems/mac-arm/config-command
Normal file
@@ -0,0 +1,11 @@
|
||||
MPICXX=mpicxx CXXFLAGS=-I/opt/local/include LDFLAGS=-L/opt/local/lib/ CXX=clang++ ../../configure \
|
||||
--enable-simd=GEN \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-unified=yes \
|
||||
--prefix $HOME/QCD/GridInstall \
|
||||
--with-lime=/Users/peterboyle/QCD/SciDAC/install/ \
|
||||
--with-openssl=$BREW \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
--enable-debug
|
||||
|
||||
@@ -7,8 +7,6 @@ CXX=mpicxx ../../configure \
|
||||
--enable-unified=yes \
|
||||
--prefix /Users/peterboyle/QCD/vtk/Grid/install \
|
||||
--with-lime=$CLIME \
|
||||
--with-hdf5=$HDF5 \
|
||||
--with-fftw=$FFTW \
|
||||
--with-openssl=$OPENSSL \
|
||||
--with-gmp=$GMP \
|
||||
--with-mpfr=$MPFR \
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
|
||||
spack load c-lime
|
||||
spack load fftw
|
||||
spack load hdf5+cxx
|
||||
|
||||
export FFTW=`spack find --paths fftw | grep ^fftw | awk '{print $2}' `
|
||||
export HDF5=`spack find --paths hdf5+cxx | grep ^hdf5 | awk '{print $2}' `
|
||||
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
|
||||
|
||||
../../configure \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-unified=yes \
|
||||
@@ -5,12 +14,16 @@
|
||||
--enable-shm-fast-path=shmopen \
|
||||
--enable-accelerator=none \
|
||||
--enable-simd=AVX512 \
|
||||
--disable-accelerator-cshift \
|
||||
--with-lime=$CLIME \
|
||||
--with-hdf5=$HDF5 \
|
||||
--with-fftw=$FFTW \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
CXX=clang++ \
|
||||
MPICXX=mpicxx \
|
||||
CXXFLAGS="-std=c++17"
|
||||
LIBS=-llime \
|
||||
LDFLAGS=-L$CLIME/lib/ \
|
||||
CXXFLAGS="-std=c++17 -fPIE"
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
source $HOME/spack/share/spack/setup-env.sh
|
||||
spack load llvm@17.0.4
|
||||
export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-17.0.4-laufdrcip63ivkadmtgoepwmj3dtztdu/lib:$LD_LIBRARY_PATH
|
||||
module load openmpi
|
||||
module load openmpi/4.1.8
|
||||
spack load c-lime
|
||||
|
||||
93
tests/debug/Test_icosahedron.cc
Normal file
93
tests/debug/Test_icosahedron.cc
Normal file
@@ -0,0 +1,93 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/debug/Test_icosahedron.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
const int MyNd=3;
|
||||
template<typename vtype> using iIcosahedralLorentzComplex = iVector<iScalar<iScalar<vtype> >, MyNd+1 > ;
|
||||
|
||||
typedef iIcosahedralLorentzComplex<ComplexD > IcosahedralLorentzComplexD;
|
||||
typedef iIcosahedralLorentzComplex<vComplexD> vIcosahedralLorentzComplexD;
|
||||
typedef Lattice<vIcosahedralLorentzComplexD> LatticeIcosahedralLorentzComplexD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int L=8;
|
||||
const int Npatch = num_icosahedron_tiles;
|
||||
|
||||
// Put SIMD all in time direction
|
||||
Coordinate latt_size = GridDefaultLatt();
|
||||
Coordinate simd_layout({1,1,vComplexD::Nsimd(),1});
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout << GridLogMessage << " mpi "<<mpi_layout<<std::endl;
|
||||
std::cout << GridLogMessage << " simd "<<simd_layout<<std::endl;
|
||||
std::cout << GridLogMessage << " latt "<<latt_size<<std::endl;
|
||||
GridCartesianCrossIcosahedron EdgeGrid(latt_size,simd_layout,mpi_layout,IcosahedralEdges);
|
||||
std::cout << GridLogMessage << " Created edge grid "<<std::endl;
|
||||
GridCartesianCrossIcosahedron VertexGrid(latt_size,simd_layout,mpi_layout,IcosahedralVertices);
|
||||
|
||||
std::cout << GridLogMessage << " Created vertex grid "<<std::endl;
|
||||
LatticeIcosahedralLorentzComplexD Umu(&EdgeGrid);
|
||||
LatticeComplex Phi(&VertexGrid);
|
||||
std::cout << GridLogMessage << " Created two fields "<<std::endl;
|
||||
|
||||
Phi = Zero();
|
||||
Umu = Zero();
|
||||
std::cout << GridLogMessage << " Zeroed two fields "<<std::endl;
|
||||
|
||||
ComplexD one (1.0);
|
||||
Phi = one;
|
||||
Umu = one;
|
||||
|
||||
std::cout << GridLogMessage << " V = "<<norm2(Phi)<<std::endl;
|
||||
std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10+2*latt_size[2]<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << " E = "<<norm2(Umu)<<std::endl;
|
||||
std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10*4<<std::endl;
|
||||
|
||||
// std::cout << " Umu "<<Umu<<std::endl;
|
||||
// std::cout << " Phi "<<Phi<<std::endl;
|
||||
LatticePole(Phi,South);
|
||||
std::cout << " Phi South Pole set\n"<<Phi<<std::endl;
|
||||
|
||||
LatticePole(Phi,North);
|
||||
std::cout << " Phi North Pole set\n"<<Phi<<std::endl;
|
||||
|
||||
for(int mu=0;mu<VertexGrid._ndimension;mu++){
|
||||
std::cout << " Calling lattice coordinate mu="<<mu<<std::endl;
|
||||
LatticeCoordinate(Phi,mu);
|
||||
std::cout << " Phi coor mu="<<mu<<"\n"<<Phi<<std::endl;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
@@ -1,15 +1,14 @@
|
||||
<?xml version="1.0"?>
|
||||
<grid>
|
||||
<LanczosParameters>
|
||||
<mass>-1.025</mass>
|
||||
<mstep>-0.025</mstep>
|
||||
<mass>0.00107</mass>
|
||||
<M5>1.8</M5>
|
||||
<Ls>48</Ls>
|
||||
<Nstop>10</Nstop>
|
||||
<Nk>12</Nk>
|
||||
<Np>30</Np>
|
||||
<ChebyLow>0.1</ChebyLow>
|
||||
<ChebyHigh>50</ChebyHigh>
|
||||
<ChebyOrder>51</ChebyOrder>
|
||||
<Nk>15</Nk>
|
||||
<Np>85</Np>
|
||||
<ChebyLow>0.003</ChebyLow>
|
||||
<ChebyHigh>60</ChebyHigh>
|
||||
<ChebyOrder>201</ChebyOrder>
|
||||
</LanczosParameters>
|
||||
</grid>
|
||||
|
||||
@@ -31,16 +31,23 @@ directory
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
#if 0
|
||||
//typedef WilsonFermionD FermionOp;
|
||||
typedef DomainWallFermionD FermionOp;
|
||||
typedef typename DomainWallFermionD::FermionField FermionField;
|
||||
#else
|
||||
typedef MobiusFermionD FermionOp;
|
||||
typedef typename MobiusFermionD::FermionField FermionField;
|
||||
#endif
|
||||
|
||||
template <class T> void writeFile(T& in, std::string const fname){
|
||||
#ifdef HAVE_LIME
|
||||
// Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
|
||||
std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
|
||||
Grid::emptyUserRecord record;
|
||||
Grid::ScidacWriter WR(in.Grid()->IsBoss());
|
||||
WR.open(fname);
|
||||
WR.writeScidacFieldRecord(in,record,0);
|
||||
WR.close();
|
||||
#endif
|
||||
// What is the appropriate way to throw error?
|
||||
}
|
||||
|
||||
RealD AllZero(RealD x) { return 0.; }
|
||||
|
||||
@@ -125,7 +132,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
int Ls=16;
|
||||
RealD M5=1.8;
|
||||
RealD mass = -1.0;
|
||||
RealD mass = 0.01;
|
||||
|
||||
mass=LanParams.mass;
|
||||
Ls=LanParams.Ls;
|
||||
@@ -163,10 +170,10 @@ int main(int argc, char** argv) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
*/
|
||||
|
||||
int Nstop = 10;
|
||||
int Nk = 20;
|
||||
int Nstop = Nk;
|
||||
int Np = 80;
|
||||
|
||||
Nstop=LanParams.Nstop;
|
||||
Nk=LanParams.Nk;
|
||||
Np=LanParams.Np;
|
||||
@@ -174,11 +181,10 @@ int main(int argc, char** argv) {
|
||||
int Nm = Nk + Np;
|
||||
int MaxIt = 10000;
|
||||
RealD resid = 1.0e-5;
|
||||
RealD mob_b=1.5;
|
||||
|
||||
|
||||
//while ( mass > - 5.0){
|
||||
// FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.);
|
||||
FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
MdagMLinearOperator<FermionOp,FermionField> HermOp(Ddwf); /// <-----
|
||||
// Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
|
||||
Gamma5R5HermitianLinearOperator<FermionOp, LatticeFermion> G5R5Herm(Ddwf);
|
||||
@@ -206,8 +212,12 @@ int main(int argc, char** argv) {
|
||||
int Nconv;
|
||||
IRL.calc(eval, evec, src, Nconv);
|
||||
|
||||
std::cout << mass <<" : " << eval << std::endl;
|
||||
|
||||
std::cout << mass <<" : " << eval << std::endl;
|
||||
std::cout << " #evecs " << evec.size() << std::endl;
|
||||
std::cout << " Nconv " << Nconv << std::endl;
|
||||
std::cout << " Nm " << Nm << std::endl;
|
||||
if ( Nconv > evec.size() ) Nconv = evec.size();
|
||||
|
||||
#if 0
|
||||
Gamma g5(Gamma::Algebra::Gamma5) ;
|
||||
ComplexD dot;
|
||||
@@ -237,6 +247,7 @@ int main(int argc, char** argv) {
|
||||
vector<LatticeFermion> finalevec(Nconv, FGrid);
|
||||
vector<RealD> eMe(Nconv), eMMe(Nconv);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
cout << "calculate the matrix element["<<i<<"]" << endl;
|
||||
G5R5Herm.HermOpAndNorm(evec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
|
||||
}
|
||||
cout << "Re<evec, G5R5M(evec)>: " << endl;
|
||||
@@ -331,13 +342,27 @@ int main(int argc, char** argv) {
|
||||
axpby_ssp(G5evec[i], -1., finalevec[i], 0., G5evec[i], j, j);
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
chiral_matrix_real[i].resize(Nconv);
|
||||
chiral_matrix[i].resize(Nconv);
|
||||
|
||||
std::string evfile("./evec_density");
|
||||
evfile = evfile+"_"+std::to_string(i);
|
||||
auto evdensity = localInnerProduct(finalevec[i],finalevec[i] );
|
||||
writeFile(evdensity,evfile);
|
||||
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
chiral_matrix[i][j] = innerProduct(finalevec[i], G5evec[j]);
|
||||
std::cout <<" chiral_matrix_real signed "<<i<<" "<<j<<" "<< chiral_matrix_real[i][j] << std::endl;
|
||||
chiral_matrix_real[i][j] = abs(chiral_matrix[i][j]);
|
||||
std::cout <<" chiral_matrix_real "<<i<<" "<<j<<" "<< chiral_matrix_real[i][j] << std::endl;
|
||||
if ( chiral_matrix_real[i][j] > 0.8 ) {
|
||||
auto g5density = localInnerProduct(finalevec[i], G5evec[j]);
|
||||
std::string chfile("./chiral_density_");
|
||||
chfile = chfile +std::to_string(i)+"_"+std::to_string(j);
|
||||
writeFile(g5density,chfile);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
@@ -346,6 +371,43 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FILE *fp = fopen("lego-plot.py","w"); assert(fp!=NULL);
|
||||
#define PYTHON_LINE(A) fprintf(fp,A"\n");
|
||||
PYTHON_LINE("import matplotlib.pyplot as plt");
|
||||
PYTHON_LINE("import numpy as np");
|
||||
PYTHON_LINE("");
|
||||
PYTHON_LINE("fig = plt.figure()");
|
||||
PYTHON_LINE("ax = fig.add_subplot(projection='3d')");
|
||||
PYTHON_LINE("");
|
||||
PYTHON_LINE("x, y = np.random.rand(2, 100) * 4");
|
||||
PYTHON_LINE("hist, xedges, yedges = np.histogram2d(x, y, bins=10, range=[[0, 9], [0, 9]])");
|
||||
PYTHON_LINE("");
|
||||
PYTHON_LINE("# Construct arrays for the anchor positions of the 16 bars");
|
||||
PYTHON_LINE("xpos, ypos = np.meshgrid(xedges[:-1] + 0.25, yedges[:-1] + 0.25, indexing=\"ij\")");
|
||||
PYTHON_LINE("xpos = xpos.ravel()");
|
||||
PYTHON_LINE("ypos = ypos.ravel()");
|
||||
PYTHON_LINE("zpos = 0");
|
||||
PYTHON_LINE("");
|
||||
PYTHON_LINE("# Construct arrays with the dimensions for the 16 bars.");
|
||||
PYTHON_LINE("dx = dy = 0.5 * np.ones_like(zpos)");
|
||||
PYTHON_LINE("dz = np.array([");
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
fprintf(fp,"\t[ ");
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
fprintf(fp,"%lf ",chiral_matrix_real[i][j]);
|
||||
if(j<Nconv-1) fprintf(fp,",");
|
||||
else fprintf(fp," ");
|
||||
}
|
||||
fprintf(fp,"]");
|
||||
if(i<Nconv-1) fprintf(fp,",\n");
|
||||
else fprintf(fp,"\n");
|
||||
}
|
||||
|
||||
PYTHON_LINE("\t])");
|
||||
PYTHON_LINE("dz = dz.ravel()");
|
||||
PYTHON_LINE("ax.bar3d(xpos, ypos, zpos, dx, dy, dz, zsort='average')");
|
||||
PYTHON_LINE("plt.show()");
|
||||
fclose(fp);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
||||
@@ -113,9 +113,6 @@ struct LanczosParameters: Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
|
||||
RealD, mass ,
|
||||
RealD, resid,
|
||||
Integer, Nstop,
|
||||
Integer, Nk,
|
||||
Integer, Np,
|
||||
RealD, ChebyLow,
|
||||
RealD, ChebyHigh,
|
||||
Integer, ChebyOrder)
|
||||
@@ -207,6 +204,7 @@ int main(int argc, char** argv) {
|
||||
int Nstop = 5;
|
||||
int Nk = 10;
|
||||
int Np = 90;
|
||||
int Nm = Nk + Np;
|
||||
int MaxIt = 10000;
|
||||
RealD resid = 1.0e-5;
|
||||
|
||||
@@ -228,14 +226,10 @@ int main(int argc, char** argv) {
|
||||
XmlWriter HMCwr("LanParams.xml.out");
|
||||
write(HMCwr,"LanczosParameters",LanParams);
|
||||
}
|
||||
Nstop=LanParams.Nstop;
|
||||
Nk=LanParams.Nk;
|
||||
Np=LanParams.Np;
|
||||
|
||||
mass=LanParams.mass;
|
||||
resid=LanParams.resid;
|
||||
|
||||
int Nm = Nk + Np;
|
||||
|
||||
|
||||
while ( mass > - 5.0){
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,2.+mass);
|
||||
|
||||
@@ -61,8 +61,7 @@ int main(int argc, char** argv) {
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
// SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::ColdConfiguration(Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
/*
|
||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||
@@ -70,15 +69,9 @@ int main(int argc, char** argv) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
*/
|
||||
// std::vector<Complex> boundary = {1,1,1,-1};
|
||||
std::vector<Complex> boundary = {1,1,1,1};
|
||||
FermionOp::ImplParams Params(boundary);
|
||||
|
||||
|
||||
|
||||
RealD mass = 0.0;
|
||||
// FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass,Params);
|
||||
RealD mass = -0.1;
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
|
||||
MdagMLinearOperator<FermionOp,LatticeFermion> HermOp(WilsonOperator); /// <-----
|
||||
//SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
|
||||
|
||||
@@ -96,8 +89,7 @@ int main(int argc, char** argv) {
|
||||
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
|
||||
PlainHermOp<FermionField> Op (HermOp);
|
||||
|
||||
// ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
|
||||
SimpleLanczos<FermionField> IRL(Op,Nstop, Nk, Nm, resid, MaxIt);
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
FermionField src(FGrid);
|
||||
@@ -109,8 +101,7 @@ int main(int argc, char** argv) {
|
||||
};
|
||||
|
||||
int Nconv;
|
||||
// IRL.calc(eval, evec, src, Nconv);
|
||||
IRL.calc(eval, src, Nconv);
|
||||
IRL.calc(eval, evec, src, Nconv);
|
||||
|
||||
std::cout << eval << std::endl;
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/parallelIO/IldgIOtypes.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
@@ -39,29 +38,11 @@ typedef typename WilsonFermionD::FermionField FermionField;
|
||||
|
||||
RealD AllZero(RealD x) { return 0.; }
|
||||
|
||||
template <class T> void writeFile(T& in, std::string const fname){
|
||||
#if 1
|
||||
// Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
|
||||
std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
|
||||
Grid::emptyUserRecord record;
|
||||
Grid::ScidacWriter WR(in.Grid()->IsBoss());
|
||||
WR.open(fname);
|
||||
WR.writeScidacFieldRecord(in,record,0);
|
||||
WR.close();
|
||||
#endif
|
||||
// What is the appropriate way to throw error?
|
||||
}
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
struct LanczosParameters: Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
|
||||
RealD, mass ,
|
||||
RealD, mstep ,
|
||||
Integer, Nstop,
|
||||
Integer, Nk,
|
||||
Integer, Np,
|
||||
RealD, ChebyLow,
|
||||
RealD, ChebyHigh,
|
||||
Integer, ChebyOrder)
|
||||
@@ -134,7 +115,6 @@ int main(int argc, char** argv) {
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
// SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
// SU<Nc>::ColdConfiguration(Umu);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("./config");
|
||||
@@ -178,20 +158,10 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
mass=LanParams.mass;
|
||||
Nstop=LanParams.Nstop;
|
||||
Nk=LanParams.Nk;
|
||||
Np=LanParams.Np;
|
||||
Nm = Nk + Np;
|
||||
|
||||
FermionField src(FGrid);
|
||||
gaussian(RNG5, src);
|
||||
std::vector<Complex> boundary = {1,1,1,-1};
|
||||
// std::vector<Complex> boundary = {1,1,1,1};
|
||||
FermionOp::ImplParams Params(boundary);
|
||||
|
||||
|
||||
while ( mass > - 2.5){
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass,Params);
|
||||
while ( mass > - 5.0){
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
|
||||
MdagMLinearOperator<FermionOp,FermionField> HermOp(WilsonOperator); /// <-----
|
||||
//SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
|
||||
Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
|
||||
@@ -210,9 +180,10 @@ while ( mass > - 2.5){
|
||||
PlainHermOp<FermionField> Op2 (HermOp2);
|
||||
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op2, Nstop, Nk, Nm, resid, MaxIt);
|
||||
// SimpleLanczos<FermionField> IRL(Op,Nstop, Nk, Nm, resid, MaxIt);
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
FermionField src(FGrid);
|
||||
gaussian(RNG5, src);
|
||||
std::vector<FermionField> evec(Nm, FGrid);
|
||||
for (int i = 0; i < 1; i++) {
|
||||
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
|
||||
@@ -221,7 +192,6 @@ while ( mass > - 2.5){
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval, evec, src, Nconv);
|
||||
// IRL.calc(eval, src, Nconv);
|
||||
|
||||
std::cout << mass <<" : " << eval << std::endl;
|
||||
|
||||
@@ -232,17 +202,9 @@ while ( mass > - 2.5){
|
||||
tmp = g5*evec[i];
|
||||
dot = innerProduct(tmp,evec[i]);
|
||||
std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ;
|
||||
// if ( i<1)
|
||||
{
|
||||
std::string evfile ("./evec_"+std::to_string(mass)+"_"+std::to_string(i));
|
||||
auto evdensity = localInnerProduct(evec[i],evec[i] );
|
||||
writeFile(evdensity,evfile);
|
||||
}
|
||||
}
|
||||
src = evec[0]+evec[1]+evec[2];
|
||||
src += evec[3]+evec[4]+evec[5];
|
||||
src += evec[6]+evec[7]+evec[8];
|
||||
mass += LanParams.mstep;
|
||||
mass += -0.1;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
|
||||
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
|
||||
|
||||
project(GridViewer)
|
||||
|
||||
list(APPEND CMAKE_PREFIX_PATH "/Users/peterboyle/QCD/vtk/VTK-9.4.2-install/")
|
||||
list(APPEND CMAKE_PREFIX_PATH "/home/paboyle/Visualisation/install/")
|
||||
|
||||
find_package(VTK COMPONENTS
|
||||
CommonColor
|
||||
|
||||
@@ -48,15 +48,14 @@ typedef vtkMarchingCubes isosurface;
|
||||
|
||||
int mpeg = 0 ;
|
||||
int xlate = 0 ;
|
||||
int framerate = 10;
|
||||
|
||||
template <class T> void readFile(T& out, std::string const fname){
|
||||
#ifdef HAVE_LIME
|
||||
Grid::emptyUserRecord record;
|
||||
Grid::ScidacReader RD;
|
||||
RD.open(fname);
|
||||
RD.readScidacFieldRecord(out,record);
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
using namespace Grid;
|
||||
|
||||
@@ -208,6 +207,10 @@ int main(int argc, char* argv[])
|
||||
xlate = 1;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--fps") ){
|
||||
arg=GridCmdOptionPayload(argv,argv+argc,"--fps");
|
||||
GridCmdOptionInt(arg,framerate);
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--isosurface") ){
|
||||
arg=GridCmdOptionPayload(argv,argv+argc,"--isosurface");
|
||||
GridCmdOptionFloat(arg,default_contour);
|
||||
@@ -420,7 +423,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
vtkFFMPEGWriter *writer = vtkFFMPEGWriter::New();
|
||||
writer->SetFileName("movie.avi");
|
||||
writer->SetRate(1);
|
||||
writer->SetRate(framerate);
|
||||
writer->SetInputConnection(imageFilter->GetOutputPort());
|
||||
writer->Start();
|
||||
|
||||
@@ -477,7 +480,7 @@ int main(int argc, char* argv[])
|
||||
slidercallback->fu_list = fu_list;
|
||||
sliderWidget->AddObserver(vtkCommand::InteractionEvent, slidercallback);
|
||||
|
||||
int timerId = iren->CreateRepeatingTimer(300);
|
||||
int timerId = iren->CreateRepeatingTimer(1000/framerate);
|
||||
std::cout << "timerId: " << timerId << std::endl;
|
||||
|
||||
// Start the interaction and timer
|
||||
|
||||
@@ -73,6 +73,21 @@ each to:
|
||||
|
||||
VTK really should make it easier to pick up the flags required for FFMPEG linkage, especially as they are very quirky on MacOS.
|
||||
|
||||
========================================
|
||||
Aurora compilation:
|
||||
========================================
|
||||
module load ffmpeg
|
||||
download & untar: VTK-7.0.2
|
||||
mkdir build
|
||||
cd build
|
||||
ccmake ../
|
||||
|
||||
"t"
|
||||
Enable: VTK_MODULE_ENABLE_VTK_IOFFMPEG YES
|
||||
"configure" ; should "discover" the installed ffmpeg module
|
||||
|
||||
Still need an "X" connection to make the MPEG files.
|
||||
|
||||
|
||||
========================================
|
||||
Grid:
|
||||
|
||||
@@ -1,9 +1,17 @@
|
||||
libs=`grid-config --libs`
|
||||
ldflags=`grid-config --ldflags`
|
||||
cxxflags=`grid-config --cxxflags`
|
||||
cxx=`grid-config --cxx`
|
||||
export grid_config=/home/paboyle/GPT/install/bin/grid-config
|
||||
libs=`$grid_config --libs`
|
||||
ldflags=`$grid_config --ldflags`
|
||||
cxxflags=`$grid_config --cxxflags`
|
||||
cxx=`$grid_config --cxx`
|
||||
cc=icx
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags
|
||||
echo CC $cc
|
||||
echo CXX $cxx
|
||||
echo CXXFLAGS $cxxflags
|
||||
echo LDFLAGS $ldflags
|
||||
echo LIBS $libs
|
||||
|
||||
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_C_COMPILER=$cc -DCMAKE_CXX_COMPILER="$cxx" -DCMAKE_CXX_FLAGS="$cxxflags "
|
||||
|
||||
Reference in New Issue
Block a user