mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Merge https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
c772bcd514
@ -9,11 +9,6 @@ matrix:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=single
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=double
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -55,7 +50,7 @@ script:
|
||||
- make -j4
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||
- ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
|
@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/lattice/Lattice_local.h>
|
||||
#include <Grid/lattice/Lattice_reduction.h>
|
||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||
//#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_real_imag.h>
|
||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||
#include <Grid/lattice/Lattice_comparison.h>
|
||||
|
@ -342,14 +342,10 @@ inline void ExpressionViewClose(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
|
||||
GridUnopClass(UnarySub, -a);
|
||||
GridUnopClass(UnaryNot, Not(a));
|
||||
GridUnopClass(UnaryAdj, adj(a));
|
||||
GridUnopClass(UnaryConj, conjugate(a));
|
||||
GridUnopClass(UnaryTrace, trace(a));
|
||||
GridUnopClass(UnaryTranspose, transpose(a));
|
||||
GridUnopClass(UnaryTa, Ta(a));
|
||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryToReal, toReal(a));
|
||||
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||
GridUnopClass(UnaryTimesI, timesI(a));
|
||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||
GridUnopClass(UnaryAbs, abs(a));
|
||||
@ -456,14 +452,12 @@ GridTrinOpClass(TrinaryWhere,
|
||||
GRID_DEF_UNOP(operator-, UnarySub);
|
||||
GRID_DEF_UNOP(Not, UnaryNot);
|
||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||
//GRID_DEF_UNOP(adj, UnaryAdj);
|
||||
//GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||
@ -494,27 +488,27 @@ GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))>
|
||||
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1)))>::type >
|
||||
{
|
||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))> ret(expr);
|
||||
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1)))>::type > ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>
|
||||
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>::type >
|
||||
{
|
||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))> ret(expr);
|
||||
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>::type > ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
vecEval(0, expr.arg2),
|
||||
vecEval(0, expr.arg3)))>
|
||||
vecEval(0, expr.arg3)))>::type >
|
||||
{
|
||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
vecEval(0, expr.arg2),
|
||||
vecEval(0, expr.arg3)))> ret(expr);
|
||||
vecEval(0, expr.arg3)))>::type > ret(expr);
|
||||
return ret;
|
||||
}
|
||||
#define EXPRESSION_CLOSURE(function) \
|
||||
|
@ -45,8 +45,8 @@ template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||
coalescedWrite(ret_v[ss], adj(lhs_v(ss)));
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = adj(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
@ -64,6 +64,53 @@ template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<typename vobj::Complexified> toComplex(const Lattice<vobj> &lhs){
|
||||
Lattice<typename vobj::Complexified> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = toComplex(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<class vobj> inline Lattice<typename vobj::Realified> toReal(const Lattice<vobj> &lhs){
|
||||
Lattice<typename vobj::Realified> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = toReal(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto toComplex(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return toComplex(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto toReal(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return toReal(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto adj(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return adj(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto conjugate(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return conjugate(closure(expr));
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
||||
|
@ -130,6 +130,8 @@ public:
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
std::ios_base::fmtflags f(stream.flags());
|
||||
|
||||
stream << log.background()<< std::left;
|
||||
if (log.topWidth > 0)
|
||||
{
|
||||
@ -152,6 +154,8 @@ public:
|
||||
<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
stream.flags(f);
|
||||
|
||||
return stream;
|
||||
} else {
|
||||
return devnull;
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
||||
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
||||
Grid::BinaryIO::IoPerf Grid::BinaryIO::lastPerf;
|
||||
|
@ -79,6 +79,13 @@ inline void removeWhitespace(std::string &key)
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BinaryIO {
|
||||
public:
|
||||
struct IoPerf
|
||||
{
|
||||
uint64_t size{0},time{0};
|
||||
double mbytesPerSecond{0.};
|
||||
};
|
||||
|
||||
static IoPerf lastPerf;
|
||||
static int latticeWriteMaxRetry;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@ -502,12 +509,15 @@ class BinaryIO {
|
||||
timer.Stop();
|
||||
}
|
||||
|
||||
lastPerf.size = sizeof(fobj)*iodata.size()*nrank;
|
||||
lastPerf.time = timer.useconds();
|
||||
lastPerf.mbytesPerSecond = lastPerf.size/1024./1024./(lastPerf.time/1.0e6);
|
||||
std::cout<<GridLogMessage<<"IOobject: ";
|
||||
if ( control & BINARYIO_READ) std::cout << " read ";
|
||||
else std::cout << " write ";
|
||||
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
|
||||
std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl;
|
||||
std::cout<< lastPerf.size <<" bytes in "<< timer.Elapsed() <<" "
|
||||
<< lastPerf.mbytesPerSecond <<" MB/s "<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
|
||||
|
||||
|
@ -47,7 +47,7 @@ static constexpr int Ym = 5;
|
||||
static constexpr int Zm = 6;
|
||||
static constexpr int Tm = 7;
|
||||
|
||||
static constexpr int Nc=3;
|
||||
static constexpr int Nc=Config_Nc;
|
||||
static constexpr int Ns=4;
|
||||
static constexpr int Nd=4;
|
||||
static constexpr int Nhs=2; // half spinor
|
||||
|
@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||
|
||||
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
|
||||
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
@ -449,7 +449,8 @@ public:
|
||||
LatticeReal alpha(grid);
|
||||
|
||||
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
|
||||
alpha = toReal(2.0 * xi);
|
||||
xi = 2.0 *xi;
|
||||
alpha = toReal(xi);
|
||||
|
||||
do {
|
||||
// A. Generate two uniformly distributed pseudo-random numbers R and R',
|
||||
|
33
README
33
README
@ -111,11 +111,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
|
||||
|
||||
``` bash
|
||||
mkdir build; cd build
|
||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||
../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||
```
|
||||
|
||||
where `--enable-precision=` set the default precision,
|
||||
`--enable-simd=` set the SIMD type, `--enable-
|
||||
where `--enable-simd=` set the SIMD type, `--enable-
|
||||
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||
--help` to display them. Like with any other program using GNU autotool, the
|
||||
@ -146,8 +145,8 @@ If you want to build all the tests at once just use `make tests`.
|
||||
- `--enable-numa`: enable NUMA first touch optimisation
|
||||
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
||||
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option**
|
||||
- `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
||||
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||
- `--enable-chroma`: enable Chroma regression tests.
|
||||
@ -201,8 +200,7 @@ Alternatively, some CPU codenames can be directly used:
|
||||
The following configuration is recommended for the Intel Knights Landing platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
@ -212,8 +210,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -232,8 +229,7 @@ for interior communication. This is the mpi3 communications implementation.
|
||||
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CC=icpc MPICXX=mpiicpc
|
||||
@ -244,8 +240,7 @@ We recommend four ranks per node for best performance, but optimum is local volu
|
||||
The following configuration is recommended for the Intel Haswell platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
@ -262,8 +257,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -280,8 +274,7 @@ This is the default.
|
||||
The following configuration is recommended for the Intel Skylake platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
../configure --enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=mpiicpc
|
||||
@ -298,8 +291,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
../configure --enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -330,8 +322,7 @@ and 8 threads per rank.
|
||||
The following configuration is recommended for the AMD EPYC platform.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
CXX=mpicxx
|
||||
```
|
||||
|
33
README.md
33
README.md
@ -115,11 +115,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
|
||||
|
||||
``` bash
|
||||
mkdir build; cd build
|
||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||
../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||
```
|
||||
|
||||
where `--enable-precision=` set the default precision,
|
||||
`--enable-simd=` set the SIMD type, `--enable-
|
||||
where `--enable-simd=` set the SIMD type, `--enable-
|
||||
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||
--help` to display them. Like with any other program using GNU autotool, the
|
||||
@ -150,8 +149,8 @@ If you want to build all the tests at once just use `make tests`.
|
||||
- `--enable-numa`: enable NUMA first touch optimisation
|
||||
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
||||
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option**
|
||||
- `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
||||
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||
- `--enable-chroma`: enable Chroma regression tests.
|
||||
@ -205,8 +204,7 @@ Alternatively, some CPU codenames can be directly used:
|
||||
The following configuration is recommended for the Intel Knights Landing platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
@ -216,8 +214,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -236,8 +233,7 @@ for interior communication. This is the mpi3 communications implementation.
|
||||
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CC=icpc MPICXX=mpiicpc
|
||||
@ -248,8 +244,7 @@ We recommend four ranks per node for best performance, but optimum is local volu
|
||||
The following configuration is recommended for the Intel Haswell platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
@ -266,8 +261,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -284,8 +278,7 @@ This is the default.
|
||||
The following configuration is recommended for the Intel Skylake platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
../configure --enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=mpiicpc
|
||||
@ -302,8 +295,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
../configure --enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -334,8 +326,7 @@ and 8 threads per rank.
|
||||
The following configuration is recommended for the AMD EPYC platform.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
CXX=mpicxx
|
||||
```
|
||||
|
@ -12,31 +12,31 @@ module load mpi/openmpi-aarch64
|
||||
|
||||
scl enable gcc-toolset-10 bash
|
||||
|
||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
* gcc 10.1 prebuild w/ MPI, QPACE4 interactive login
|
||||
|
||||
scl enable gcc-toolset-10 bash
|
||||
module load mpi/openmpi-aarch64
|
||||
|
||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
* armclang 20.2 (qp4)
|
||||
|
||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN"
|
||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
* gcc 10.0.1 VLA (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
|
||||
|
||||
* gcc 10.0.1 fixed-size ACLE (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
|
||||
* gcc 10.0.1 fixed-size ACLE (fjt) w/ MPI
|
||||
@ -46,34 +46,34 @@ export OMPI_CXX=g++-10.0.1
|
||||
export MPICH_CC=gcc-10.0.1
|
||||
export MPICH_CXX=g++-10.0.1
|
||||
|
||||
$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt"
|
||||
$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt"
|
||||
|
||||
--------------------------------------------------------
|
||||
|
||||
* armclang 20.0 VLA (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
|
||||
TODO check ARMCLANGCOMPAT
|
||||
|
||||
|
||||
* armclang 20.1 VLA (merlin)
|
||||
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||
|
||||
TODO check ARMCLANGCOMPAT
|
||||
|
||||
|
||||
* armclang 20.1 VLA (fjt cluster)
|
||||
|
||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||
|
||||
TODO check ARMCLANGCOMPAT
|
||||
|
||||
|
||||
* armclang 20.1 VLA w/MPI (fjt cluster)
|
||||
|
||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64"
|
||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64"
|
||||
|
||||
No ARMCLANGCOMPAT -> still correct ?
|
||||
|
||||
@ -81,9 +81,9 @@ No ARMCLANGCOMPAT -> still correct ?
|
||||
|
||||
* Fujitsu fcc
|
||||
|
||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
|
||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
|
||||
|
||||
|
||||
* Fujitsu fcc w/ MPI
|
||||
|
||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||
|
@ -1,8 +1,16 @@
|
||||
|
||||
#include "Benchmark_IO.hpp"
|
||||
|
||||
#ifndef BENCH_IO_LMIN
|
||||
#define BENCH_IO_LMIN 8
|
||||
#endif
|
||||
|
||||
#ifndef BENCH_IO_LMAX
|
||||
#define BENCH_IO_LMAX 40
|
||||
#define BENCH_IO_LMAX 32
|
||||
#endif
|
||||
|
||||
#ifndef BENCH_IO_NPASS
|
||||
#define BENCH_IO_NPASS 10
|
||||
#endif
|
||||
|
||||
using namespace Grid;
|
||||
@ -12,37 +20,179 @@ std::string filestem(const int l)
|
||||
return "iobench_l" + std::to_string(l);
|
||||
}
|
||||
|
||||
int vol(const int i)
|
||||
{
|
||||
return BENCH_IO_LMIN + 2*i;
|
||||
}
|
||||
|
||||
int volInd(const int l)
|
||||
{
|
||||
return (l - BENCH_IO_LMIN)/2;
|
||||
}
|
||||
|
||||
template <typename Mat>
|
||||
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||
{
|
||||
auto nr = data[0].rows(), nc = data[0].cols();
|
||||
Eigen::MatrixXd sqSum(nr, nc);
|
||||
double n = static_cast<double>(data.size());
|
||||
|
||||
assert(n > 1.);
|
||||
mean = Mat::Zero(nr, nc);
|
||||
sqSum = Mat::Zero(nr, nc);
|
||||
for (auto &d: data)
|
||||
{
|
||||
mean += d;
|
||||
sqSum += d.cwiseProduct(d);
|
||||
}
|
||||
stdDev = ((sqSum - mean.cwiseProduct(mean)/n)/(n - 1.)).cwiseSqrt();
|
||||
mean /= n;
|
||||
}
|
||||
|
||||
#define grid_printf(...) \
|
||||
{\
|
||||
char _buf[1024];\
|
||||
sprintf(_buf, __VA_ARGS__);\
|
||||
MSG << _buf;\
|
||||
}
|
||||
|
||||
enum {sRead = 0, sWrite = 1, gRead = 2, gWrite = 3};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
auto mpi = GridDefaultMpi();
|
||||
unsigned int nVol = (BENCH_IO_LMAX - BENCH_IO_LMIN)/2 + 1;
|
||||
unsigned int nRelVol = (BENCH_IO_LMAX - 24)/2 + 1;
|
||||
std::vector<Eigen::MatrixXd> perf(BENCH_IO_NPASS, Eigen::MatrixXd::Zero(nVol, 4));
|
||||
std::vector<Eigen::VectorXd> avPerf(BENCH_IO_NPASS, Eigen::VectorXd::Zero(4));
|
||||
std::vector<int> latt;
|
||||
|
||||
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
||||
MSG << "MPI partition " << mpi << std::endl;
|
||||
for (unsigned int i = 0; i < BENCH_IO_NPASS; ++i)
|
||||
{
|
||||
auto mpi = GridDefaultMpi();
|
||||
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
MSG << BIGSEP << std::endl;
|
||||
MSG << "Pass " << i + 1 << "/" << BENCH_IO_NPASS << std::endl;
|
||||
MSG << BIGSEP << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark std write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
std::cout << "-- Local volume " << l << "^4" << std::endl;
|
||||
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
writeBenchmark<LatticeFermion>(latt, filestem(l), stdWrite<LatticeFermion>);
|
||||
perf[i](volInd(l), sWrite) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||
}
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark std read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
readBenchmark<LatticeFermion>(latt, filestem(l), stdRead<LatticeFermion>);
|
||||
perf[i](volInd(l), sRead) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
|
||||
perf[i](volInd(l), gWrite) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||
}
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
|
||||
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
|
||||
perf[i](volInd(l), gRead) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||
}
|
||||
#endif
|
||||
avPerf[i].fill(0.);
|
||||
for (int f = 0; f < 4; ++f)
|
||||
for (int l = 24; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
avPerf[i](f) += perf[i](volInd(l), f);
|
||||
}
|
||||
avPerf[i] /= nRelVol;
|
||||
}
|
||||
|
||||
MSG << "Benchmark Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
auto mpi = GridDefaultMpi();
|
||||
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||
Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4);
|
||||
Eigen::VectorXd avMean(4), avStdDev(4), avRob(4);
|
||||
double n = BENCH_IO_NPASS;
|
||||
|
||||
std::cout << "-- Local volume " << l << "^4" << std::endl;
|
||||
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
|
||||
stats(mean, stdDev, perf);
|
||||
stats(avMean, avStdDev, avPerf);
|
||||
rob.fill(100.);
|
||||
rob -= 100.*stdDev.cwiseQuotient(mean.cwiseAbs());
|
||||
avRob.fill(100.);
|
||||
avRob -= 100.*avStdDev.cwiseQuotient(avMean.cwiseAbs());
|
||||
|
||||
MSG << BIGSEP << std::endl;
|
||||
MSG << "SUMMARY" << std::endl;
|
||||
MSG << BIGSEP << std::endl;
|
||||
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||
"L", "std read", "std dev", "std write", "std dev",
|
||||
"Grid read", "std dev", "Grid write", "std dev");
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
|
||||
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
|
||||
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
|
||||
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
|
||||
}
|
||||
MSG << std::endl;
|
||||
MSG << "Robustness of individual results, in \%. (rob = 100\% - std dev / mean)" << std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%4s %12s %12s %12s %12s\n",
|
||||
"L", "std read", "std write", "Grid read", "Grid write");
|
||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||
{
|
||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
|
||||
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
|
||||
rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||
}
|
||||
MSG << std::endl;
|
||||
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
|
||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||
"std read", "std dev", "std write", "std dev",
|
||||
"Grid read", "std dev", "Grid write", "std dev");
|
||||
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
|
||||
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||
MSG << std::endl;
|
||||
MSG << "Robustness of volume-averaged results, in \%. (rob = 100\% - std dev / mean)" << std::endl;
|
||||
MSG << std::endl;
|
||||
grid_printf("%12s %12s %12s %12s\n",
|
||||
"std read", "std write", "Grid read", "Grid write");
|
||||
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
|
||||
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
|
||||
|
||||
Grid_finalize();
|
||||
#endif
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -5,6 +5,8 @@
|
||||
#ifdef HAVE_LIME
|
||||
#define MSG std::cout << GridLogMessage
|
||||
#define SEP \
|
||||
"-----------------------------------------------------------------------------"
|
||||
#define BIGSEP \
|
||||
"============================================================================="
|
||||
|
||||
namespace Grid {
|
||||
@ -14,13 +16,152 @@ using WriterFn = std::function<void(const std::string, Field &)> ;
|
||||
template <typename Field>
|
||||
using ReaderFn = std::function<void(Field &, const std::string)>;
|
||||
|
||||
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||
//
|
||||
// template <typename Field>
|
||||
// void stdWrite(const std::string filestem, Field &vec)
|
||||
// {
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||
// size_t size;
|
||||
// uint32_t crc;
|
||||
// GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
// autoView(vec_v, vec, CpuRead);
|
||||
// crcWatch.Start();
|
||||
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
// std::fwrite(&crc, sizeof(uint32_t), 1, file);
|
||||
// crcWatch.Stop();
|
||||
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||
// ioWatch.Start();
|
||||
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||
// ioWatch.Stop();
|
||||
// std::fclose(file);
|
||||
// size *= vec.Grid()->ProcessorCount();
|
||||
// auto &p = BinaryIO::lastPerf;
|
||||
// p.size = size;
|
||||
// p.time = ioWatch.useconds();
|
||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
// }
|
||||
//
|
||||
// template <typename Field>
|
||||
// void stdRead(Field &vec, const std::string filestem)
|
||||
// {
|
||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||
// size_t size;
|
||||
// uint32_t crcRead, crcData;
|
||||
// GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
// crcWatch.Start();
|
||||
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||
// crcWatch.Stop();
|
||||
// {
|
||||
// autoView(vec_v, vec, CpuWrite);
|
||||
// ioWatch.Start();
|
||||
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||
// ioWatch.Stop();
|
||||
// std::fclose(file);
|
||||
// }
|
||||
// {
|
||||
// autoView(vec_v, vec, CpuRead);
|
||||
// crcWatch.Start();
|
||||
// crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
// crcWatch.Stop();
|
||||
// }
|
||||
// MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||
// assert(crcData == crcRead);
|
||||
// size *= vec.Grid()->ProcessorCount();
|
||||
// auto &p = BinaryIO::lastPerf;
|
||||
// p.size = size;
|
||||
// p.time = ioWatch.useconds();
|
||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
// }
|
||||
|
||||
template <typename Field>
|
||||
void stdWrite(const std::string filestem, Field &vec)
|
||||
{
|
||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
||||
size_t size, sizec;
|
||||
uint32_t crc;
|
||||
GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
sizec = size/sizeof(char); // just in case of...
|
||||
autoView(vec_v, vec, CpuRead);
|
||||
crcWatch.Start();
|
||||
crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
file.write(reinterpret_cast<char *>(&crc), sizeof(uint32_t)/sizeof(char));
|
||||
crcWatch.Stop();
|
||||
MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||
ioWatch.Start();
|
||||
file.write(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||
file.flush();
|
||||
ioWatch.Stop();
|
||||
size *= vec.Grid()->ProcessorCount();
|
||||
auto &p = BinaryIO::lastPerf;
|
||||
p.size = size;
|
||||
p.time = ioWatch.useconds();
|
||||
p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void stdRead(Field &vec, const std::string filestem)
|
||||
{
|
||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
||||
size_t size, sizec;
|
||||
uint32_t crcRead, crcData;
|
||||
GridStopWatch ioWatch, crcWatch;
|
||||
|
||||
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||
sizec = size/sizeof(char); // just in case of...
|
||||
crcWatch.Start();
|
||||
file.read(reinterpret_cast<char *>(&crcRead), sizeof(uint32_t)/sizeof(char));
|
||||
crcWatch.Stop();
|
||||
{
|
||||
autoView(vec_v, vec, CpuWrite);
|
||||
ioWatch.Start();
|
||||
file.read(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||
ioWatch.Stop();
|
||||
}
|
||||
{
|
||||
autoView(vec_v, vec, CpuRead);
|
||||
crcWatch.Start();
|
||||
crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||
crcWatch.Stop();
|
||||
}
|
||||
MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||
assert(crcData == crcRead);
|
||||
size *= vec.Grid()->ProcessorCount();
|
||||
auto &p = BinaryIO::lastPerf;
|
||||
p.size = size;
|
||||
p.time = ioWatch.useconds();
|
||||
p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||
}
|
||||
|
||||
template <typename Field>
|
||||
void limeWrite(const std::string filestem, Field &vec)
|
||||
{
|
||||
emptyUserRecord record;
|
||||
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||
|
||||
binWriter.open(filestem + ".bin");
|
||||
binWriter.open(filestem + ".lime.bin");
|
||||
binWriter.writeScidacFieldRecord(vec, record);
|
||||
binWriter.close();
|
||||
}
|
||||
@ -31,7 +172,7 @@ void limeRead(Field &vec, const std::string filestem)
|
||||
emptyUserRecord record;
|
||||
ScidacReader binReader;
|
||||
|
||||
binReader.open(filestem + ".bin");
|
||||
binReader.open(filestem + ".lime.bin");
|
||||
binReader.readScidacFieldRecord(vec, record);
|
||||
binReader.close();
|
||||
}
|
||||
@ -73,12 +214,18 @@ void writeBenchmark(const Coordinate &latt, const std::string filename,
|
||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||
std::shared_ptr<GridBase> gPt;
|
||||
std::random_device rd;
|
||||
|
||||
makeGrid(gPt, gBasePt, Ls, rb);
|
||||
|
||||
GridBase *g = gPt.get();
|
||||
GridParallelRNG rng(g);
|
||||
Field vec(g);
|
||||
GridBase *g = gPt.get();
|
||||
GridParallelRNG rng(g);
|
||||
Field vec(g);
|
||||
|
||||
rng.SeedFixedIntegers({static_cast<int>(rd()), static_cast<int>(rd()),
|
||||
static_cast<int>(rd()), static_cast<int>(rd()),
|
||||
static_cast<int>(rd()), static_cast<int>(rd()),
|
||||
static_cast<int>(rd()), static_cast<int>(rd())});
|
||||
|
||||
random(rng, vec);
|
||||
write(filename, vec);
|
||||
@ -96,8 +243,8 @@ void readBenchmark(const Coordinate &latt, const std::string filename,
|
||||
|
||||
makeGrid(gPt, gBasePt, Ls, rb);
|
||||
|
||||
GridBase *g = gPt.get();
|
||||
Field vec(g);
|
||||
GridBase *g = gPt.get();
|
||||
Field vec(g);
|
||||
|
||||
read(vec, filename);
|
||||
}
|
||||
|
@ -1,14 +1,9 @@
|
||||
#include "Benchmark_IO.hpp"
|
||||
|
||||
#define MSG std::cout << GridLogMessage
|
||||
#define SEP \
|
||||
"============================================================================="
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
std::vector<std::string> dir;
|
||||
unsigned int Ls;
|
||||
bool rb;
|
||||
@ -34,46 +29,71 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
auto mpi = GridDefaultMpi();
|
||||
|
||||
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark double precision Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
MSG << "MPI partition " << mpi << std::endl;
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark double precision Lime read" << std::endl;
|
||||
MSG << "Benchmark Grid std write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb);
|
||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
stdWrite<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid std read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
stdRead<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark single precision Lime write" << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
|
||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
limeWrite<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||
limeRead<LatticeFermion>, Ls, rb);
|
||||
}
|
||||
#endif
|
||||
|
||||
MSG << SEP << std::endl;
|
||||
MSG << "Benchmark single precision Lime read" << std::endl;
|
||||
MSG << SEP << std::endl;
|
||||
for (auto &d: dir)
|
||||
{
|
||||
MSG << "-- Directory " << d << std::endl;
|
||||
readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
|
||||
}
|
||||
// MSG << SEP << std::endl;
|
||||
// MSG << "Benchmark single precision Lime write" << std::endl;
|
||||
// MSG << SEP << std::endl;
|
||||
// for (auto &d: dir)
|
||||
// {
|
||||
// MSG << "-- Directory " << d << std::endl;
|
||||
// writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
|
||||
// }
|
||||
|
||||
// MSG << SEP << std::endl;
|
||||
// MSG << "Benchmark single precision Lime read" << std::endl;
|
||||
// MSG << SEP << std::endl;
|
||||
// for (auto &d: dir)
|
||||
// {
|
||||
// MSG << "-- Directory " << d << std::endl;
|
||||
// readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
|
||||
// }
|
||||
|
||||
Grid_finalize();
|
||||
#endif
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
@ -62,7 +62,7 @@ struct time_statistics{
|
||||
|
||||
void comms_header(){
|
||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
|
||||
<<"bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)"<<std::endl;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
@ -125,7 +125,7 @@ public:
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
std::cout << GridLogMessage<< latt_size <<std::endl;
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
@ -137,8 +137,8 @@ public:
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
@ -189,11 +189,11 @@ public:
|
||||
// double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
|
||||
<< bytes << " \t "
|
||||
<<xbytes/timestat.mean<<" \t "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " \t "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
}
|
||||
@ -224,7 +224,7 @@ public:
|
||||
|
||||
|
||||
uint64_t lmax=32;
|
||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||
|
||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
@ -249,11 +249,6 @@ public:
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=a*x-y;
|
||||
autoView( x_v , x, CpuWrite);
|
||||
autoView( y_v , y, CpuWrite);
|
||||
autoView( z_v , z, CpuRead);
|
||||
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
|
||||
y_v[4]=z_v[4];
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
@ -286,7 +281,7 @@ public:
|
||||
|
||||
|
||||
uint64_t lmax=32;
|
||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||
|
||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
@ -309,11 +304,6 @@ public:
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=x*y;
|
||||
autoView( x_v , x, CpuWrite);
|
||||
autoView( y_v , y, CpuWrite);
|
||||
autoView( z_v , z, CpuRead);
|
||||
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
|
||||
y_v[4]=z_v[4];
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
@ -358,6 +348,7 @@ public:
|
||||
///////// Welcome message ////////////
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Nc : "<<Nc<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||
std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl;
|
||||
@ -386,7 +377,7 @@ public:
|
||||
typedef LatticeGaugeFieldF Gauge;
|
||||
|
||||
///////// Source preparation ////////////
|
||||
Gauge Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
Gauge Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
Fermion src (FGrid); random(RNG5,src);
|
||||
Fermion src_e (FrbGrid);
|
||||
Fermion src_o (FrbGrid);
|
||||
@ -431,7 +422,7 @@ public:
|
||||
}
|
||||
FGrid->Barrier();
|
||||
double t1=usecond();
|
||||
uint64_t ncall = 50;
|
||||
uint64_t ncall = 500;
|
||||
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
|
||||
@ -449,7 +440,13 @@ public:
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(1344.0*volume)/2;
|
||||
|
||||
// Nc=3 gives
|
||||
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
||||
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
||||
// double flops=(1344.0*volume)/2;
|
||||
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
||||
double flops=(fps*volume)/2;
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
@ -464,6 +461,7 @@ public:
|
||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||
|
||||
std::cout<<GridLogMessage<< "Deo FlopsPerSite is "<<fps<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
@ -540,7 +538,7 @@ public:
|
||||
typedef typename Action::FermionField Fermion;
|
||||
typedef LatticeGaugeFieldF Gauge;
|
||||
|
||||
Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
Gauge Umu(FGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
typename Action::ImplParams params;
|
||||
Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params);
|
||||
@ -698,7 +696,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\tt Staggered" <<std::endl;
|
||||
std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" <<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<<std::endl;
|
||||
}
|
||||
@ -729,9 +727,9 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4 " <<std::endl;
|
||||
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " <<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<std::endl;
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<< " \t "<<staggered[l]/NN<<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
|
@ -108,7 +108,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
#if 0
|
||||
Umu=1.0;
|
||||
|
364
benchmarks/Benchmark_dwf_fp32.cc
Normal file
364
benchmarks/Benchmark_dwf_fp32.cc
Normal file
@ -0,0 +1,364 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#ifdef GRID_CUDA
|
||||
#define CUDA_PROFILE
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_PROFILE
|
||||
#include <cuda_profiler_api.h>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
Coordinate latt4 = GridDefaultLatt();
|
||||
int Ls=8;
|
||||
for(int i=0;i<argc;i++)
|
||||
if(std::string(argv[i]) == "-Ls"){
|
||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||
}
|
||||
|
||||
GridLogLayout();
|
||||
|
||||
long unsigned int single_site_flops = 8*Nc*(7+16*Nc);
|
||||
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
|
||||
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
|
||||
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
LatticeFermionF src (FGrid); random(RNG5,src);
|
||||
#if 0
|
||||
src = Zero();
|
||||
{
|
||||
Coordinate origin({0,0,0,latt4[2]-1,0});
|
||||
SpinColourVectorF tmp;
|
||||
tmp=Zero();
|
||||
tmp()(0)(0)=Complex(-2.0,0.0);
|
||||
std::cout << " source site 0 " << tmp<<std::endl;
|
||||
pokeSite(tmp,src,origin);
|
||||
}
|
||||
#else
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
#endif
|
||||
|
||||
|
||||
LatticeFermionF result(FGrid); result=Zero();
|
||||
LatticeFermionF ref(FGrid); ref=Zero();
|
||||
LatticeFermionF tmp(FGrid);
|
||||
LatticeFermionF err(FGrid);
|
||||
|
||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||
LatticeGaugeFieldF Umu(UGrid);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
#if 0
|
||||
Umu=1.0;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
LatticeColourMatrixF ttmp(UGrid);
|
||||
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
// if (mu !=2 ) ttmp = 0;
|
||||
// ttmp = ttmp* pow(10.0,mu);
|
||||
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
|
||||
#endif
|
||||
|
||||
////////////////////////////////////
|
||||
// Naive wilson implementation
|
||||
////////////////////////////////////
|
||||
// replicate across fifth dimension
|
||||
LatticeGaugeFieldF Umu5d(FGrid);
|
||||
std::vector<LatticeColourMatrixF> U(4,FGrid);
|
||||
{
|
||||
autoView( Umu5d_v, Umu5d, CpuWrite);
|
||||
autoView( Umu_v , Umu , CpuRead);
|
||||
for(int ss=0;ss<Umu.Grid()->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
Umu5d_v[Ls*ss+s] = Umu_v[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||
|
||||
if (1)
|
||||
{
|
||||
ref = Zero();
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
RealD NP = UGrid->_Nprocessors;
|
||||
RealD NN = UGrid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
|
||||
std::cout << GridLogMessage<< "* VComplexF size is "<<sizeof(vComplexF)<< " B"<<std::endl;
|
||||
if ( sizeof(RealF)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(RealF)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
|
||||
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
int ncall =1000;
|
||||
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
Dw.Dhop(src,result,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
Dw.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=single_site_flops*volume*ncall;
|
||||
|
||||
auto nsimd = vComplex::Nsimd();
|
||||
auto simdwidth = sizeof(vComplex);
|
||||
|
||||
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||
double data_rf = volume * ((2*Nd+1)*Nd*Nc + 2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
||||
|
||||
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
||||
double data_mem = (volume * (2*Nd+1)*Nd*Nc + (volume/Ls) *2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
std::cout<<GridLogMessage << "RF GiB/s (base 2) = "<< 1000000. * data_rf/((t1-t0))<<std::endl;
|
||||
std::cout<<GridLogMessage << "mem GiB/s (base 2) = "<< 1000000. * data_mem/((t1-t0))<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
//exit(0);
|
||||
|
||||
if(( norm2(err)>1.0e-4) ) {
|
||||
/*
|
||||
std::cout << "RESULT\n " << result<<std::endl;
|
||||
std::cout << "REF \n " << ref <<std::endl;
|
||||
std::cout << "ERR \n " << err <<std::endl;
|
||||
*/
|
||||
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
|
||||
FGrid->Barrier();
|
||||
exit(-1);
|
||||
}
|
||||
assert (norm2(err)< 1.0e-4 );
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
if (1)
|
||||
{ // Naive wilson dag implementation
|
||||
ref = Zero();
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
{
|
||||
autoView( ref_v, ref, CpuWrite);
|
||||
autoView( tmp_v, tmp, CpuRead);
|
||||
for(int i=0;i<ref_v.size();i++){
|
||||
ref_v[i]+= tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]; ;
|
||||
}
|
||||
}
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
{
|
||||
autoView( ref_v, ref, CpuWrite);
|
||||
autoView( tmp_v, tmp, CpuRead);
|
||||
for(int i=0;i<ref_v.size();i++){
|
||||
ref_v[i]+= tmp_v[i] - Gamma(Gmu[mu])*tmp_v[i]; ;
|
||||
}
|
||||
}
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
}
|
||||
// dump=1;
|
||||
Dw.Dhop(src,result,1);
|
||||
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
|
||||
if((norm2(err)>1.0e-4)){
|
||||
/*
|
||||
std::cout<< "DAG RESULT\n " <<ref << std::endl;
|
||||
std::cout<< "DAG sRESULT\n " <<result << std::endl;
|
||||
std::cout<< "DAG ERR \n " << err <<std::endl;
|
||||
*/
|
||||
}
|
||||
LatticeFermionF src_e (FrbGrid);
|
||||
LatticeFermionF src_o (FrbGrid);
|
||||
LatticeFermionF r_e (FrbGrid);
|
||||
LatticeFermionF r_o (FrbGrid);
|
||||
LatticeFermionF r_eo (FGrid);
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionF::DhopEO "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
|
||||
if ( sizeof(RealF)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(RealF)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
{
|
||||
Dw.ZeroCounters();
|
||||
FGrid->Barrier();
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
#ifdef CUDA_PROFILE
|
||||
if(i==10) cudaProfilerStart();
|
||||
#endif
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
#ifdef CUDA_PROFILE
|
||||
if(i==20) cudaProfilerStop();
|
||||
#endif
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(single_site_flops*volume*ncall)/2.0;
|
||||
|
||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||
Dw.Dhop (src ,result,DaggerNo);
|
||||
|
||||
std::cout<<GridLogMessage << "r_e"<<norm2(r_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "r_o"<<norm2(r_o)<<std::endl;
|
||||
std::cout<<GridLogMessage << "res"<<norm2(result)<<std::endl;
|
||||
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
|
||||
err = r_eo-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
if((norm2(err)>1.0e-4)){
|
||||
/*
|
||||
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
|
||||
std::cout<< "Deo REF\n " <<result << std::endl;
|
||||
std::cout<< "Deo ERR \n " << err <<std::endl;
|
||||
*/
|
||||
}
|
||||
|
||||
pickCheckerboard(Even,src_e,err);
|
||||
pickCheckerboard(Odd,src_o,err);
|
||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||
|
||||
assert(norm2(src_e)<1.0e-4);
|
||||
assert(norm2(src_o)<1.0e-4);
|
||||
Grid_finalize();
|
||||
exit(0);
|
||||
}
|
@ -63,7 +63,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||
LatticeGaugeFieldF Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -30,7 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
@ -53,7 +53,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
std::cout << GridLogMessage << "Seeded"<<std::endl;
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::cout << GridLogMessage << "made random gauge fields"<<std::endl;
|
||||
|
||||
|
52
configure.ac
52
configure.ac
@ -123,6 +123,24 @@ case ${ac_LAPACK} in
|
||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||
esac
|
||||
|
||||
############### Nc
|
||||
AC_ARG_ENABLE([Nc],
|
||||
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])],
|
||||
[ac_Nc=${enable_Nc}], [ac_Nc=3])
|
||||
|
||||
case ${ac_Nc} in
|
||||
2)
|
||||
AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);;
|
||||
3)
|
||||
AC_DEFINE([Config_Nc],[3],[Gauge group Nc]);;
|
||||
4)
|
||||
AC_DEFINE([Config_Nc],[4],[Gauge group Nc]);;
|
||||
5)
|
||||
AC_DEFINE([Config_Nc],[5],[Gauge group Nc]);;
|
||||
*)
|
||||
AC_MSG_ERROR(["Unsupport gauge group choice Nc = ${ac_Nc}"]);;
|
||||
esac
|
||||
|
||||
############### FP16 conversions
|
||||
AC_ARG_ENABLE([sfw-fp16],
|
||||
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
|
||||
@ -459,23 +477,24 @@ esac
|
||||
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
||||
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
||||
|
||||
############### Precision selection
|
||||
AC_ARG_ENABLE([precision],
|
||||
[AC_HELP_STRING([--enable-precision=single|double],
|
||||
[Select default word size of Real])],
|
||||
[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||
############### Precision selection - deprecate
|
||||
#AC_ARG_ENABLE([precision],
|
||||
# [AC_HELP_STRING([--enable-precision=single|double],
|
||||
# [Select default word size of Real])],
|
||||
# [ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||
|
||||
case ${ac_PRECISION} in
|
||||
single)
|
||||
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||
;;
|
||||
double)
|
||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
|
||||
;;
|
||||
esac
|
||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||
|
||||
#case ${ac_PRECISION} in
|
||||
# single)
|
||||
# AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||
# ;;
|
||||
# double)
|
||||
# ;;
|
||||
# *)
|
||||
# AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
|
||||
# ;;
|
||||
#esac
|
||||
|
||||
###################### Shared memory allocation technique under MPI3
|
||||
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone],
|
||||
@ -656,6 +675,7 @@ os (target) : $target_os
|
||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||
compiler version : ${ax_cv_gxx_version}
|
||||
----- BUILD OPTIONS -----------------------------------
|
||||
Nc : ${ac_Nc}
|
||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||
Threading : ${ac_openmp}
|
||||
Acceleration : ${ac_ACCELERATOR}
|
||||
|
@ -184,19 +184,19 @@ Below are shown the `configure` script invocations for three recommended configu
|
||||
|
||||
This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging:
|
||||
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=double --prefix=$GridPre/Debug
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Debug
|
||||
|
||||
#### 2. `Release`
|
||||
|
||||
Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation):
|
||||
Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`:
|
||||
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=single --prefix=$GridPre/Release
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Release
|
||||
|
||||
#### 3. `MPIDebug`
|
||||
|
||||
Debug configuration with MPI:
|
||||
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --enable-precision=double --prefix=$GridPre/MPIDebug
|
||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --prefix=$GridPre/MPIDebug
|
||||
|
||||
### 5.3 Build Grid
|
||||
|
||||
|
@ -178,15 +178,10 @@ Then enter the cloned directory and set up the build system::
|
||||
Now you can execute the `configure` script to generate makefiles (here from a build directory)::
|
||||
|
||||
mkdir build; cd build
|
||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto \
|
||||
../configure --enable-simd=AVX --enable-comms=mpi-auto \
|
||||
--prefix=<path>
|
||||
|
||||
where::
|
||||
|
||||
--enable-precision=single|double
|
||||
|
||||
sets the **default precision**. Since this is largely a benchmarking convenience, it is anticipated that the default precision may be removed in future implementations,
|
||||
and that explicit type selection be made at all points. Naturally, most code will be type templated in any case.::
|
||||
::
|
||||
|
||||
--enable-simd=GEN|SSE4|AVX|AVXFMA|AVXFMA4|AVX2|AVX512|NEONv8|QPX
|
||||
|
||||
@ -236,7 +231,7 @@ Detailed build configuration options
|
||||
--enable-mkl[=path] use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
|
||||
--enable-simd=code setup Grid for the SIMD target `<code>`(default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||
--enable-gen-simd-width=size select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). E.g. SSE 128 bit corresponds to 16 bytes.
|
||||
--enable-precision=single|double set the default precision (default: `double`).
|
||||
--enable-precision=single|double set the default precision (default: `double`). **Deprecated option**
|
||||
--enable-comms=mpi|none use `<comm>` for message passing (default: `none`).
|
||||
--enable-rng=sitmo|ranlux48|mt19937 choose the RNG (default: `sitmo`).
|
||||
--disable-timers disable system dependent high-resolution timers.
|
||||
@ -304,8 +299,7 @@ Build setup for Intel Knights Landing platform
|
||||
|
||||
The following configuration is recommended for the Intel Knights Landing platform::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
@ -314,8 +308,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -332,8 +325,7 @@ presently performs better with use of more than one rank per node, using shared
|
||||
for interior communication.
|
||||
We recommend four ranks per node for best performance, but optimum is local volume dependent. ::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
../configure --enable-simd=KNL \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-mkl \
|
||||
CC=icpc MPICXX=mpiicpc
|
||||
@ -343,8 +335,7 @@ Build setup for Intel Haswell Xeon platform
|
||||
|
||||
The following configuration is recommended for the Intel Haswell platform::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
@ -360,8 +351,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -379,8 +369,7 @@ Build setup for Intel Skylake Xeon platform
|
||||
|
||||
The following configuration is recommended for the Intel Skylake platform::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
../configure --enable-simd=AVX512 \
|
||||
--enable-comms=mpi \
|
||||
--enable-mkl \
|
||||
CXX=mpiicpc
|
||||
@ -396,8 +385,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
../configure --enable-simd=AVX512 \
|
||||
--enable-comms=mpi \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
@ -422,8 +410,7 @@ and 8 threads per rank.
|
||||
The following configuration is recommended for the AMD EPYC platform::
|
||||
|
||||
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
../configure --enable-simd=AVX2 \
|
||||
--enable-comms=mpi \
|
||||
CXX=mpicxx
|
||||
|
||||
|
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNGa,Umu);
|
||||
SU<Nc>::HotConfiguration(pRNGa,Umu);
|
||||
|
||||
|
||||
FieldMetaData header;
|
||||
|
@ -84,7 +84,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,&Fine);
|
||||
|
||||
SU3::HotConfiguration(pRNGa,Umu);
|
||||
SU<Nc>::HotConfiguration(pRNGa,Umu);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("./ckpoint_lat.4000");
|
||||
|
@ -80,7 +80,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
@ -202,7 +202,7 @@ int main (int argc, char ** argv) {
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
// FieldMetaData header;
|
||||
// NerscIO::readConfiguration(Umu,header,Params.config);
|
||||
|
||||
|
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
|
||||
LatticeGaugeFieldD Umu(UGrid);
|
||||
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
precisionChange(Umu_f,Umu);
|
||||
|
||||
|
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
||||
LatticeGaugeFieldD Umu(UGrid);
|
||||
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
precisionChange(Umu_f,Umu);
|
||||
|
||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(FGrid); ref=Zero();
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -131,7 +131,7 @@ int main (int argc, char ** argv)
|
||||
// LatticeFermion result(FGrid); result=Zero();
|
||||
// LatticeGaugeField Umu(UGrid);
|
||||
|
||||
// SU3::HotConfiguration(RNG4,Umu);
|
||||
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
// std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
// for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -73,7 +73,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref (FGrid); ref = Zero();
|
||||
LatticeFermion tmp (FGrid); tmp = Zero();
|
||||
LatticeFermion err (FGrid); err = Zero();
|
||||
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu);
|
||||
LatticeGaugeField Umu (UGrid); SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
// Only one non-zero (y)
|
||||
|
@ -72,7 +72,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(FGrid); ref=Zero();
|
||||
LatticeFermion tmp(FGrid); tmp=Zero();
|
||||
LatticeFermion err(FGrid); tmp=Zero();
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
// Only one non-zero (y)
|
||||
|
@ -138,7 +138,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeFieldD Umu(&GRID);
|
||||
|
||||
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||
// Umu=Zero();
|
||||
////////////////////////////////////////////////////
|
||||
// Wilson test
|
||||
|
@ -73,11 +73,11 @@ int main (int argc, char ** argv)
|
||||
LatticeColourMatrix xform2(&GRID); // Gauge xform
|
||||
LatticeColourMatrix xform3(&GRID); // Gauge xform
|
||||
|
||||
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||
Uorg=Umu;
|
||||
Urnd=Umu;
|
||||
|
||||
SU3::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge
|
||||
SU<Nc>::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge
|
||||
|
||||
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
||||
std::cout << " Initial plaquette "<<plaq << std::endl;
|
||||
@ -121,7 +121,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<< "* Testing non-unit configuration *" <<std::endl;
|
||||
std::cout<< "*****************************************************************" <<std::endl;
|
||||
|
||||
SU3::HotConfiguration(pRNG,Umu); // Unit gauge
|
||||
SU<Nc>::HotConfiguration(pRNG,Umu); // Unit gauge
|
||||
|
||||
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
||||
std::cout << " Initial plaquette "<<plaq << std::endl;
|
||||
@ -136,7 +136,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<< "*****************************************************************" <<std::endl;
|
||||
|
||||
Umu=Urnd;
|
||||
SU3::HotConfiguration(pRNG,Umu); // Unit gauge
|
||||
SU<Nc>::HotConfiguration(pRNG,Umu); // Unit gauge
|
||||
|
||||
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
||||
std::cout << " Initial plaquette "<<plaq << std::endl;
|
||||
|
@ -114,7 +114,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
|
||||
|
||||
GparityGaugeField Umu_2f(UGrid_2f);
|
||||
SU3::HotConfiguration(RNG4_2f,Umu_2f);
|
||||
SU<Nc>::HotConfiguration(RNG4_2f,Umu_2f);
|
||||
|
||||
StandardFermionField src (FGrid_2f);
|
||||
StandardFermionField tmpsrc(FGrid_2f);
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
FermionField ref(&Grid); ref=Zero();
|
||||
FermionField tmp(&Grid); tmp=Zero();
|
||||
FermionField err(&Grid); tmp=Zero();
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
|
@ -66,7 +66,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
std::cout << GridLogMessage << "*********************************************"
|
||||
<< std::endl;
|
||||
std::cout << GridLogMessage << "* Generators for SU(3)" << std::endl;
|
||||
std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl;
|
||||
std::cout << GridLogMessage << "*********************************************"
|
||||
<< std::endl;
|
||||
SU3::printGenerators();
|
||||
@ -114,8 +114,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
|
||||
LatticeGaugeField U(grid), V(grid);
|
||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U);
|
||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V);
|
||||
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U);
|
||||
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V);
|
||||
|
||||
// Adjoint representation
|
||||
// Test group structure
|
||||
@ -123,8 +123,8 @@ int main(int argc, char** argv) {
|
||||
LatticeGaugeField UV(grid);
|
||||
UV = Zero();
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
SU<Nc>::LatticeMatrix Umu = peekLorentz(U,mu);
|
||||
SU<Nc>::LatticeMatrix Vmu = peekLorentz(V,mu);
|
||||
SU3::LatticeMatrix Umu = peekLorentz(U,mu);
|
||||
SU3::LatticeMatrix Vmu = peekLorentz(V,mu);
|
||||
pokeLorentz(UV,Umu*Vmu, mu);
|
||||
}
|
||||
|
||||
@ -151,16 +151,16 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Check correspondence of algebra and group transformations
|
||||
// Create a random vector
|
||||
SU<Nc>::LatticeAlgebraVector h_adj(grid);
|
||||
SU3::LatticeAlgebraVector h_adj(grid);
|
||||
typename AdjointRep<Nc>::LatticeMatrix Ar(grid);
|
||||
random(gridRNG,h_adj);
|
||||
h_adj = real(h_adj);
|
||||
SU_Adjoint<Nc>::AdjointLieAlgebraMatrix(h_adj,Ar);
|
||||
|
||||
// Re-extract h_adj
|
||||
SU<Nc>::LatticeAlgebraVector h_adj2(grid);
|
||||
SU3::LatticeAlgebraVector h_adj2(grid);
|
||||
SU_Adjoint<Nc>::projectOnAlgebra(h_adj2, Ar);
|
||||
SU<Nc>::LatticeAlgebraVector h_diff = h_adj - h_adj2;
|
||||
SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2;
|
||||
std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl;
|
||||
|
||||
// Exponentiate
|
||||
@ -183,14 +183,14 @@ int main(int argc, char** argv) {
|
||||
|
||||
|
||||
// Construct the fundamental matrix in the group
|
||||
SU<Nc>::LatticeMatrix Af(grid);
|
||||
SU<Nc>::FundamentalLieAlgebraMatrix(h_adj,Af);
|
||||
SU<Nc>::LatticeMatrix Ufund(grid);
|
||||
SU3::LatticeMatrix Af(grid);
|
||||
SU3::FundamentalLieAlgebraMatrix(h_adj,Af);
|
||||
SU3::LatticeMatrix Ufund(grid);
|
||||
Ufund = expMat(Af, 1.0, 16);
|
||||
// Check unitarity
|
||||
SU<Nc>::LatticeMatrix uno_f(grid);
|
||||
SU3::LatticeMatrix uno_f(grid);
|
||||
uno_f = 1.0;
|
||||
SU<Nc>::LatticeMatrix UnitCheck(grid);
|
||||
SU3::LatticeMatrix UnitCheck(grid);
|
||||
UnitCheck = Ufund * adj(Ufund) - uno_f;
|
||||
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck)
|
||||
<< std::endl;
|
||||
@ -311,14 +311,14 @@ int main(int argc, char** argv) {
|
||||
// Test group structure
|
||||
// (U_f * V_f)_r = U_r * V_r
|
||||
LatticeGaugeField U2(grid), V2(grid);
|
||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U2);
|
||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V2);
|
||||
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2);
|
||||
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2);
|
||||
|
||||
LatticeGaugeField UV2(grid);
|
||||
UV2 = Zero();
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
SU<Nc>::LatticeMatrix Umu2 = peekLorentz(U2,mu);
|
||||
SU<Nc>::LatticeMatrix Vmu2 = peekLorentz(V2,mu);
|
||||
SU3::LatticeMatrix Umu2 = peekLorentz(U2,mu);
|
||||
SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu);
|
||||
pokeLorentz(UV2,Umu2*Vmu2, mu);
|
||||
}
|
||||
|
||||
@ -345,16 +345,16 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Check correspondence of algebra and group transformations
|
||||
// Create a random vector
|
||||
SU<Nc>::LatticeAlgebraVector h_sym(grid);
|
||||
SU3::LatticeAlgebraVector h_sym(grid);
|
||||
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ar_sym(grid);
|
||||
random(gridRNG,h_sym);
|
||||
h_sym = real(h_sym);
|
||||
SU_TwoIndex<Nc,Symmetric>::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym);
|
||||
|
||||
// Re-extract h_sym
|
||||
SU<Nc>::LatticeAlgebraVector h_sym2(grid);
|
||||
SU3::LatticeAlgebraVector h_sym2(grid);
|
||||
SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym);
|
||||
SU<Nc>::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
|
||||
SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
|
||||
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl;
|
||||
|
||||
|
||||
@ -379,11 +379,11 @@ int main(int argc, char** argv) {
|
||||
|
||||
|
||||
// Construct the fundamental matrix in the group
|
||||
SU<Nc>::LatticeMatrix Af_sym(grid);
|
||||
SU<Nc>::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
|
||||
SU<Nc>::LatticeMatrix Ufund2(grid);
|
||||
SU3::LatticeMatrix Af_sym(grid);
|
||||
SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
|
||||
SU3::LatticeMatrix Ufund2(grid);
|
||||
Ufund2 = expMat(Af_sym, 1.0, 16);
|
||||
SU<Nc>::LatticeMatrix UnitCheck2(grid);
|
||||
SU3::LatticeMatrix UnitCheck2(grid);
|
||||
UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f;
|
||||
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2)
|
||||
<< std::endl;
|
||||
@ -421,14 +421,14 @@ int main(int argc, char** argv) {
|
||||
// Test group structure
|
||||
// (U_f * V_f)_r = U_r * V_r
|
||||
LatticeGaugeField U2A(grid), V2A(grid);
|
||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U2A);
|
||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V2A);
|
||||
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2A);
|
||||
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2A);
|
||||
|
||||
LatticeGaugeField UV2A(grid);
|
||||
UV2A = Zero();
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
SU<Nc>::LatticeMatrix Umu2A = peekLorentz(U2,mu);
|
||||
SU<Nc>::LatticeMatrix Vmu2A = peekLorentz(V2,mu);
|
||||
SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu);
|
||||
SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu);
|
||||
pokeLorentz(UV2A,Umu2A*Vmu2A, mu);
|
||||
}
|
||||
|
||||
@ -455,16 +455,16 @@ int main(int argc, char** argv) {
|
||||
|
||||
// Check correspondence of algebra and group transformations
|
||||
// Create a random vector
|
||||
SU<Nc>::LatticeAlgebraVector h_Asym(grid);
|
||||
SU3::LatticeAlgebraVector h_Asym(grid);
|
||||
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid);
|
||||
random(gridRNG,h_Asym);
|
||||
h_Asym = real(h_Asym);
|
||||
SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym);
|
||||
|
||||
// Re-extract h_sym
|
||||
SU<Nc>::LatticeAlgebraVector h_Asym2(grid);
|
||||
SU3::LatticeAlgebraVector h_Asym2(grid);
|
||||
SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym);
|
||||
SU<Nc>::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
|
||||
SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
|
||||
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl;
|
||||
|
||||
|
||||
@ -489,11 +489,11 @@ int main(int argc, char** argv) {
|
||||
|
||||
|
||||
// Construct the fundamental matrix in the group
|
||||
SU<Nc>::LatticeMatrix Af_Asym(grid);
|
||||
SU<Nc>::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
|
||||
SU<Nc>::LatticeMatrix Ufund2A(grid);
|
||||
SU3::LatticeMatrix Af_Asym(grid);
|
||||
SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
|
||||
SU3::LatticeMatrix Ufund2A(grid);
|
||||
Ufund2A = expMat(Af_Asym, 1.0, 16);
|
||||
SU<Nc>::LatticeMatrix UnitCheck2A(grid);
|
||||
SU3::LatticeMatrix UnitCheck2A(grid);
|
||||
UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f;
|
||||
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A)
|
||||
<< std::endl;
|
||||
|
@ -444,7 +444,7 @@ int main(int argc, char **argv) {
|
||||
// Lattice 12x12 GEMM
|
||||
scFooBar = scFoo * scBar;
|
||||
|
||||
// Benchmark some simple operations LatticeSU3 * Lattice SU3.
|
||||
// Benchmark some simple operations LatticeSU<Nc> * Lattice SU<Nc>.
|
||||
double t0, t1, flops;
|
||||
double bytes;
|
||||
int ncall = 5000;
|
||||
|
@ -73,7 +73,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref (FGrid); ref = Zero();
|
||||
LatticeFermion tmp (FGrid); tmp = Zero();
|
||||
LatticeFermion err (FGrid); err = Zero();
|
||||
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu);
|
||||
LatticeGaugeField Umu (UGrid); SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
// Only one non-zero (y)
|
||||
|
@ -55,7 +55,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds);
|
||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds);
|
||||
|
||||
// SU3 colour operatoions
|
||||
// SU<Nc> colour operatoions
|
||||
LatticeColourMatrix link(grid);
|
||||
LatticeColourMatrix staple(grid);
|
||||
|
||||
@ -87,10 +87,10 @@ int main (int argc, char ** argv)
|
||||
|
||||
link = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
|
||||
for( int subgroup=0;subgroup<SU3::su2subgroups();subgroup++ ) {
|
||||
for( int subgroup=0;subgroup<SU<Nc>::su2subgroups();subgroup++ ) {
|
||||
|
||||
// update Even checkerboard
|
||||
SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask);
|
||||
SU<Nc>::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask);
|
||||
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
||||
FermionField err(&Grid); tmp=Zero();
|
||||
FermionField phi (&Grid); random(pRNG,phi);
|
||||
FermionField chi (&Grid); random(pRNG,chi);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
|
||||
|
@ -75,7 +75,7 @@ int main (int argc, char ** argv)
|
||||
FermionField phi (FGrid); random(pRNG5,phi);
|
||||
FermionField chi (FGrid); random(pRNG5,chi);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::ColdConfiguration(pRNG4,Umu);
|
||||
LatticeGaugeField Umua(UGrid); Umua=Umu;
|
||||
|
||||
double volume=Ls;
|
||||
|
@ -84,7 +84,7 @@ int main (int argc, char ** argv)
|
||||
FermionField chi (FGrid); random(pRNG5,chi);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(pRNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(pRNG4,Umu);
|
||||
|
||||
/*
|
||||
for(int mu=1;mu<4;mu++){
|
||||
|
@ -83,7 +83,7 @@ int main (int argc, char ** argv)
|
||||
FermionField chi (FGrid); random(pRNG5,chi);
|
||||
|
||||
LatticeGaugeFieldF Umu(UGrid);
|
||||
SU3::HotConfiguration(pRNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(pRNG4,Umu);
|
||||
|
||||
/*
|
||||
for(int mu=1;mu<4;mu++){
|
||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
||||
FermionField err(&Grid); tmp=Zero();
|
||||
FermionField phi (&Grid); random(pRNG,phi);
|
||||
FermionField chi (&Grid); random(pRNG,chi);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
|
||||
|
@ -74,7 +74,7 @@ int main(int argc, char **argv)
|
||||
FermionField chi(&Grid);
|
||||
random(pRNG, chi);
|
||||
LatticeGaugeField Umu(&Grid);
|
||||
SU3::HotConfiguration(pRNG, Umu);
|
||||
SU<Nc>::HotConfiguration(pRNG, Umu);
|
||||
std::vector<LatticeColourMatrix> U(4, &Grid);
|
||||
|
||||
double volume = 1;
|
||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion tmp(&Grid); tmp=Zero();
|
||||
LatticeFermion err(&Grid); tmp=Zero();
|
||||
LatticeGaugeField Umu(&Grid);
|
||||
SU3::HotConfiguration(pRNG,Umu);
|
||||
SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
|
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(&Grid); ref=Zero();
|
||||
LatticeFermion tmp(&Grid); tmp=Zero();
|
||||
LatticeFermion err(&Grid); tmp=Zero();
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
|
@ -116,7 +116,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
LatticeGaugeFieldF UmuF(UGridF);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
precisionChange(UmuF,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
|
@ -77,7 +77,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion ref(FGrid); ref=Zero();
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
#if 0
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -71,9 +71,9 @@ int main (int argc, char ** argv)
|
||||
std::string file("./ckpoint_lat.400");
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
// SU3::ColdConfiguration(RNG4,Umu);
|
||||
// SU3::TepidConfiguration(RNG4,Umu);
|
||||
// SU3::HotConfiguration(RNG4,Umu);
|
||||
// SU<Nc>::ColdConfiguration(RNG4,Umu);
|
||||
// SU<Nc>::TepidConfiguration(RNG4,Umu);
|
||||
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
// Umu=Zero();
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -108,8 +108,8 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::ColdConfiguration(Umu);
|
||||
// SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::ColdConfiguration(Umu);
|
||||
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
RealD mass=0.3;
|
||||
RealD M5 =1.0;
|
||||
|
@ -73,7 +73,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5);
|
||||
DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5);
|
||||
|
@ -77,7 +77,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// GparityDomainWallFermionR::ImplParams params;
|
||||
FermionAction::ImplParams params;
|
||||
|
@ -75,7 +75,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c);
|
||||
MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c);
|
||||
|
@ -79,7 +79,7 @@ int main(int argc, char** argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
FermionAction::ImplParams params;
|
||||
FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params);
|
||||
|
@ -102,7 +102,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// Initialize RHMC fermion operators
|
||||
DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5);
|
||||
|
@ -104,7 +104,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// Initialize RHMC fermion operators
|
||||
GparityDomainWallFermionR::ImplParams params;
|
||||
|
@ -104,7 +104,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// Initialize RHMC fermion operators
|
||||
MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c);
|
||||
|
@ -106,7 +106,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// Random gauge field
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// Initialize RHMC fermion operators
|
||||
GparityDomainWallFermionR::ImplParams params;
|
||||
|
@ -59,7 +59,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -93,7 +93,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -72,7 +72,7 @@ int main (int argc, char** argv)
|
||||
LatticeFermion MphiPrime (FGrid);
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -105,7 +105,7 @@ int main (int argc, char** argv)
|
||||
|
||||
for(int mu=0; mu<Nd; mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||
|
||||
|
@ -63,8 +63,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
// SU3::ColdConfiguration(pRNG,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
// SU<Nc>::ColdConfiguration(pRNG,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -112,7 +112,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
Hmom -= real(sum(trace(mommu*mommu)));
|
||||
|
||||
|
@ -75,7 +75,7 @@ int main (int argc, char** argv)
|
||||
FermionField MphiPrime (FGrid);
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -109,7 +109,7 @@ int main (int argc, char** argv)
|
||||
|
||||
for(int mu=0; mu<Nd; mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||
|
||||
|
@ -51,7 +51,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(&Grid);
|
||||
|
||||
SU3::HotConfiguration(pRNG,U);
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
double beta = 1.0;
|
||||
ConjugateWilsonGaugeActionR Action(beta);
|
||||
@ -80,7 +80,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -54,7 +54,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(&Grid);
|
||||
|
||||
SU3::HotConfiguration(pRNG,U);
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
double beta = 1.0;
|
||||
double c1 = 0.331;
|
||||
@ -82,7 +82,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -63,7 +63,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -100,7 +100,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -57,7 +57,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -58,7 +58,7 @@ int main (int argc, char ** argv)
|
||||
PokeIndex<LorentzIndex>(P, P_mu, mu);
|
||||
}
|
||||
|
||||
SU3::HotConfiguration(pRNG,U);
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
|
||||
ConjugateGradient<LatticeGaugeField> CG(1.0e-8, 10000);
|
||||
@ -95,7 +95,7 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage << "Update the U " << std::endl;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
// Traceless antihermitian momentum; gaussian in lie algebra
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||
auto Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
Umu = expMat(mommu, dt, 12) * Umu;
|
||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -96,7 +96,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -72,7 +72,7 @@ int main (int argc, char** argv)
|
||||
LatticeFermion MphiPrime (FGrid);
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -107,7 +107,7 @@ int main (int argc, char** argv)
|
||||
|
||||
for(int mu=0; mu<Nd; mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||
|
||||
|
@ -76,7 +76,7 @@ int main (int argc, char** argv)
|
||||
FermionField MphiPrime (FGrid);
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -112,7 +112,7 @@ int main (int argc, char** argv)
|
||||
|
||||
for(int mu=0; mu<Nd; mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||
autoView( U_v , U, CpuRead);
|
||||
|
@ -62,7 +62,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -96,7 +96,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -54,7 +54,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(&Grid);
|
||||
|
||||
SU3::HotConfiguration(pRNG,U);
|
||||
SU<Nc>::HotConfiguration(pRNG,U);
|
||||
|
||||
double beta = 1.0;
|
||||
double c1 = -0.331;
|
||||
@ -82,7 +82,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
LatticeGaugeField U(&Grid);
|
||||
|
||||
//SU2::HotConfiguration(pRNG,U);
|
||||
SU3::ColdConfiguration(pRNG,U);
|
||||
SU<Nc>::ColdConfiguration(pRNG,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -98,7 +98,7 @@ int main (int argc, char ** argv)
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||
|
||||
Hmom -= real(sum(trace(mommu*mommu)));
|
||||
|
||||
|
@ -62,8 +62,8 @@ int main(int argc, char **argv)
|
||||
|
||||
LatticeGaugeField U(&Grid);
|
||||
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
//SU3::ColdConfiguration(pRNG, U);// Clover term Zero()
|
||||
SU<Nc>::HotConfiguration(pRNG, U);
|
||||
//SU<Nc>::ColdConfiguration(pRNG, U);// Clover term Zero()
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -101,7 +101,7 @@ int main(int argc, char **argv)
|
||||
for (int mu = 0; mu < Nd; mu++)
|
||||
{
|
||||
// Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||
Hmom -= real(sum(trace(mommu * mommu)));
|
||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||
|
||||
|
@ -59,7 +59,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeGaugeField U(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,U);
|
||||
SU<Nc>::HotConfiguration(RNG4,U);
|
||||
|
||||
////////////////////////////////////
|
||||
// Unmodified matrix element
|
||||
@ -109,7 +109,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||
|
||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||
|
||||
|
@ -293,7 +293,7 @@ int main (int argc, char ** argv) {
|
||||
{
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
|
||||
|
||||
|
@ -54,7 +54,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -61,7 +61,7 @@ int main(int argc, char** argv) {
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
/*
|
||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||
|
@ -280,7 +280,7 @@ void make_gauge(GaugeField &Umu, Grid::LatticePropagator &q1,Grid::LatticePropag
|
||||
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu.Grid();
|
||||
Grid::GridParallelRNG RNG4(UGrid);
|
||||
RNG4.SeedFixedIntegers(seeds4);
|
||||
Grid::SU3::HotConfiguration(RNG4, Umu);
|
||||
Grid::SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// Propagator
|
||||
Grid::gaussian(RNG4, q1);
|
||||
|
@ -277,7 +277,7 @@ double calc_grid_p(Grid::LatticeGaugeField & Umu)
|
||||
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
|
||||
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
Grid::SU3::HotConfiguration(RNG4,Umu);
|
||||
Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
Grid::LatticeColourMatrix tmp(UGrid);
|
||||
tmp = Grid::zero;
|
||||
|
@ -502,7 +502,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF
|
||||
Grid::gaussian(RNG5,src);
|
||||
Grid::gaussian(RNG5,res);
|
||||
|
||||
Grid::SU3::HotConfiguration(RNG4,Umu);
|
||||
Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
/*
|
||||
Grid::LatticeColourMatrix U(UGrid);
|
||||
|
@ -333,7 +333,7 @@ void make_gauge(GaugeField & Umu,FermionField &src)
|
||||
|
||||
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
|
||||
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
Grid::SU3::HotConfiguration(RNG4,Umu);
|
||||
Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
Grid::gaussian(RNG4,src);
|
||||
}
|
||||
|
||||
|
@ -348,7 +348,7 @@ void make_gauge(GaugeField &Umu, FermionField &src)
|
||||
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu._grid;
|
||||
Grid::GridParallelRNG RNG4(UGrid);
|
||||
RNG4.SeedFixedIntegers(seeds4);
|
||||
Grid::SU3::HotConfiguration(RNG4, Umu);
|
||||
Grid::SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
// Fermion field
|
||||
Grid::gaussian(RNG4, src);
|
||||
|
@ -47,8 +47,8 @@ int main (int argc, char ** argv)
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=Zero();
|
||||
LatticeGaugeField Umu(&Grid);
|
||||
// SU3::HotConfiguration(pRNG,Umu);
|
||||
SU3::ColdConfiguration(Umu);
|
||||
// SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
SU<Nc>::ColdConfiguration(Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=Zero();
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
|
@ -67,7 +67,7 @@ int main(int argc, char** argv) {
|
||||
result = Zero();
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
|
||||
<< " Ls: " << Ls << std::endl;
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=Zero();
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=Zero();
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
@ -65,7 +65,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=Zero();
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
|
@ -68,7 +68,7 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion result(FGrid); result=Zero();
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
|
||||
|
||||
ConjugateResidual<LatticeFermion> CR(1.0e-6,10000);
|
||||
|
@ -93,7 +93,7 @@ int main (int argc, char ** argv)
|
||||
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
|
||||
for(int s=0;s<nrhs;s++) result[s]=Zero();
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
|
||||
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Bounce these fields to disk
|
||||
|
@ -136,11 +136,11 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage << "Intialising 4D RNG "<<std::endl;
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
std::cout << GridLogMessage << "Intialised 4D RNG "<<std::endl;
|
||||
SU3::HotConfiguration(pRNG,Umu);
|
||||
SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
|
||||
// std::cout << " Site zero "<< Umu[0] <<std::endl;
|
||||
} else {
|
||||
SU3::ColdConfiguration(Umu);
|
||||
SU<Nc>::ColdConfiguration(Umu);
|
||||
std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
|
||||
}
|
||||
/////////////////
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user