mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
c772bcd514
@ -9,11 +9,6 @@ matrix:
|
|||||||
- os: osx
|
- os: osx
|
||||||
osx_image: xcode8.3
|
osx_image: xcode8.3
|
||||||
compiler: clang
|
compiler: clang
|
||||||
env: PREC=single
|
|
||||||
- os: osx
|
|
||||||
osx_image: xcode8.3
|
|
||||||
compiler: clang
|
|
||||||
env: PREC=double
|
|
||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
- export GRIDDIR=`pwd`
|
- export GRIDDIR=`pwd`
|
||||||
@ -55,7 +50,7 @@ script:
|
|||||||
- make -j4
|
- make -j4
|
||||||
- make install
|
- make install
|
||||||
- cd $CWD/build
|
- cd $CWD/build
|
||||||
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
- ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||||
- make check
|
- make check
|
||||||
|
@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/lattice/Lattice_local.h>
|
#include <Grid/lattice/Lattice_local.h>
|
||||||
#include <Grid/lattice/Lattice_reduction.h>
|
#include <Grid/lattice/Lattice_reduction.h>
|
||||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||||
//#include <Grid/lattice/Lattice_reality.h>
|
#include <Grid/lattice/Lattice_reality.h>
|
||||||
#include <Grid/lattice/Lattice_real_imag.h>
|
#include <Grid/lattice/Lattice_real_imag.h>
|
||||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||||
#include <Grid/lattice/Lattice_comparison.h>
|
#include <Grid/lattice/Lattice_comparison.h>
|
||||||
|
@ -342,14 +342,10 @@ inline void ExpressionViewClose(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
|||||||
|
|
||||||
GridUnopClass(UnarySub, -a);
|
GridUnopClass(UnarySub, -a);
|
||||||
GridUnopClass(UnaryNot, Not(a));
|
GridUnopClass(UnaryNot, Not(a));
|
||||||
GridUnopClass(UnaryAdj, adj(a));
|
|
||||||
GridUnopClass(UnaryConj, conjugate(a));
|
|
||||||
GridUnopClass(UnaryTrace, trace(a));
|
GridUnopClass(UnaryTrace, trace(a));
|
||||||
GridUnopClass(UnaryTranspose, transpose(a));
|
GridUnopClass(UnaryTranspose, transpose(a));
|
||||||
GridUnopClass(UnaryTa, Ta(a));
|
GridUnopClass(UnaryTa, Ta(a));
|
||||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||||
GridUnopClass(UnaryToReal, toReal(a));
|
|
||||||
GridUnopClass(UnaryToComplex, toComplex(a));
|
|
||||||
GridUnopClass(UnaryTimesI, timesI(a));
|
GridUnopClass(UnaryTimesI, timesI(a));
|
||||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||||
GridUnopClass(UnaryAbs, abs(a));
|
GridUnopClass(UnaryAbs, abs(a));
|
||||||
@ -456,14 +452,12 @@ GridTrinOpClass(TrinaryWhere,
|
|||||||
GRID_DEF_UNOP(operator-, UnarySub);
|
GRID_DEF_UNOP(operator-, UnarySub);
|
||||||
GRID_DEF_UNOP(Not, UnaryNot);
|
GRID_DEF_UNOP(Not, UnaryNot);
|
||||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||||
GRID_DEF_UNOP(adj, UnaryAdj);
|
//GRID_DEF_UNOP(adj, UnaryAdj);
|
||||||
GRID_DEF_UNOP(conjugate, UnaryConj);
|
//GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||||
GRID_DEF_UNOP(trace, UnaryTrace);
|
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||||
GRID_DEF_UNOP(toReal, UnaryToReal);
|
|
||||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
|
||||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||||
@ -494,27 +488,27 @@ GRID_DEF_TRINOP(where, TrinaryWhere);
|
|||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
template <class Op, class T1>
|
template <class Op, class T1>
|
||||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))>
|
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1)))>::type >
|
||||||
{
|
{
|
||||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))> ret(expr);
|
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1)))>::type > ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template <class Op, class T1, class T2>
|
template <class Op, class T1, class T2>
|
||||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>
|
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>::type >
|
||||||
{
|
{
|
||||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))> ret(expr);
|
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>::type > ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template <class Op, class T1, class T2, class T3>
|
template <class Op, class T1, class T2, class T3>
|
||||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
|
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||||
vecEval(0, expr.arg2),
|
vecEval(0, expr.arg2),
|
||||||
vecEval(0, expr.arg3)))>
|
vecEval(0, expr.arg3)))>::type >
|
||||||
{
|
{
|
||||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
|
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||||
vecEval(0, expr.arg2),
|
vecEval(0, expr.arg2),
|
||||||
vecEval(0, expr.arg3)))> ret(expr);
|
vecEval(0, expr.arg3)))>::type > ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#define EXPRESSION_CLOSURE(function) \
|
#define EXPRESSION_CLOSURE(function) \
|
||||||
|
@ -45,8 +45,8 @@ template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
|||||||
autoView( ret_v, ret, AcceleratorWrite);
|
autoView( ret_v, ret, AcceleratorWrite);
|
||||||
|
|
||||||
ret.Checkerboard()=lhs.Checkerboard();
|
ret.Checkerboard()=lhs.Checkerboard();
|
||||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
accelerator_for( ss, lhs_v.size(), 1, {
|
||||||
coalescedWrite(ret_v[ss], adj(lhs_v(ss)));
|
ret_v[ss] = adj(lhs_v[ss]);
|
||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
@ -64,6 +64,53 @@ template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
|||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class vobj> inline Lattice<typename vobj::Complexified> toComplex(const Lattice<vobj> &lhs){
|
||||||
|
Lattice<typename vobj::Complexified> ret(lhs.Grid());
|
||||||
|
|
||||||
|
autoView( lhs_v, lhs, AcceleratorRead);
|
||||||
|
autoView( ret_v, ret, AcceleratorWrite);
|
||||||
|
|
||||||
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
|
accelerator_for( ss, lhs_v.size(), 1, {
|
||||||
|
ret_v[ss] = toComplex(lhs_v[ss]);
|
||||||
|
});
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
template<class vobj> inline Lattice<typename vobj::Realified> toReal(const Lattice<vobj> &lhs){
|
||||||
|
Lattice<typename vobj::Realified> ret(lhs.Grid());
|
||||||
|
|
||||||
|
autoView( lhs_v, lhs, AcceleratorRead);
|
||||||
|
autoView( ret_v, ret, AcceleratorWrite);
|
||||||
|
|
||||||
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
|
accelerator_for( ss, lhs_v.size(), 1, {
|
||||||
|
ret_v[ss] = toReal(lhs_v[ss]);
|
||||||
|
});
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||||
|
auto toComplex(const Expression &expr) -> decltype(closure(expr))
|
||||||
|
{
|
||||||
|
return toComplex(closure(expr));
|
||||||
|
}
|
||||||
|
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||||
|
auto toReal(const Expression &expr) -> decltype(closure(expr))
|
||||||
|
{
|
||||||
|
return toReal(closure(expr));
|
||||||
|
}
|
||||||
|
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||||
|
auto adj(const Expression &expr) -> decltype(closure(expr))
|
||||||
|
{
|
||||||
|
return adj(closure(expr));
|
||||||
|
}
|
||||||
|
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||||
|
auto conjugate(const Expression &expr) -> decltype(closure(expr))
|
||||||
|
{
|
||||||
|
return conjugate(closure(expr));
|
||||||
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -130,6 +130,8 @@ public:
|
|||||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||||
|
|
||||||
if ( log.active ) {
|
if ( log.active ) {
|
||||||
|
std::ios_base::fmtflags f(stream.flags());
|
||||||
|
|
||||||
stream << log.background()<< std::left;
|
stream << log.background()<< std::left;
|
||||||
if (log.topWidth > 0)
|
if (log.topWidth > 0)
|
||||||
{
|
{
|
||||||
@ -152,6 +154,8 @@ public:
|
|||||||
<< now << log.background() << " : " ;
|
<< now << log.background() << " : " ;
|
||||||
}
|
}
|
||||||
stream << log.colour();
|
stream << log.colour();
|
||||||
|
stream.flags(f);
|
||||||
|
|
||||||
return stream;
|
return stream;
|
||||||
} else {
|
} else {
|
||||||
return devnull;
|
return devnull;
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
||||||
|
Grid::BinaryIO::IoPerf Grid::BinaryIO::lastPerf;
|
||||||
|
@ -79,6 +79,13 @@ inline void removeWhitespace(std::string &key)
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
class BinaryIO {
|
class BinaryIO {
|
||||||
public:
|
public:
|
||||||
|
struct IoPerf
|
||||||
|
{
|
||||||
|
uint64_t size{0},time{0};
|
||||||
|
double mbytesPerSecond{0.};
|
||||||
|
};
|
||||||
|
|
||||||
|
static IoPerf lastPerf;
|
||||||
static int latticeWriteMaxRetry;
|
static int latticeWriteMaxRetry;
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
@ -502,12 +509,15 @@ class BinaryIO {
|
|||||||
timer.Stop();
|
timer.Stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lastPerf.size = sizeof(fobj)*iodata.size()*nrank;
|
||||||
|
lastPerf.time = timer.useconds();
|
||||||
|
lastPerf.mbytesPerSecond = lastPerf.size/1024./1024./(lastPerf.time/1.0e6);
|
||||||
std::cout<<GridLogMessage<<"IOobject: ";
|
std::cout<<GridLogMessage<<"IOobject: ";
|
||||||
if ( control & BINARYIO_READ) std::cout << " read ";
|
if ( control & BINARYIO_READ) std::cout << " read ";
|
||||||
else std::cout << " write ";
|
else std::cout << " write ";
|
||||||
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
|
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
|
||||||
std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<< lastPerf.size <<" bytes in "<< timer.Elapsed() <<" "
|
||||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl;
|
<< lastPerf.mbytesPerSecond <<" MB/s "<<std::endl;
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
|
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ static constexpr int Ym = 5;
|
|||||||
static constexpr int Zm = 6;
|
static constexpr int Zm = 6;
|
||||||
static constexpr int Tm = 7;
|
static constexpr int Tm = 7;
|
||||||
|
|
||||||
static constexpr int Nc=3;
|
static constexpr int Nc=Config_Nc;
|
||||||
static constexpr int Ns=4;
|
static constexpr int Ns=4;
|
||||||
static constexpr int Nd=4;
|
static constexpr int Nd=4;
|
||||||
static constexpr int Nhs=2; // half spinor
|
static constexpr int Nhs=2; // half spinor
|
||||||
|
@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
|
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
||||||
pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
|
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||||
|
|
||||||
pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
|
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
||||||
pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
|
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -449,7 +449,8 @@ public:
|
|||||||
LatticeReal alpha(grid);
|
LatticeReal alpha(grid);
|
||||||
|
|
||||||
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
|
// std::cout<<GridLogMessage<<"xi "<<xi <<std::endl;
|
||||||
alpha = toReal(2.0 * xi);
|
xi = 2.0 *xi;
|
||||||
|
alpha = toReal(xi);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
// A. Generate two uniformly distributed pseudo-random numbers R and R',
|
// A. Generate two uniformly distributed pseudo-random numbers R and R',
|
||||||
|
33
README
33
README
@ -111,11 +111,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
|
|||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
mkdir build; cd build
|
mkdir build; cd build
|
||||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
```
|
```
|
||||||
|
|
||||||
where `--enable-precision=` set the default precision,
|
where `--enable-simd=` set the SIMD type, `--enable-
|
||||||
`--enable-simd=` set the SIMD type, `--enable-
|
|
||||||
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||||
install Grid. Other options are detailed in the next section, you can also use `configure
|
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||||
--help` to display them. Like with any other program using GNU autotool, the
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
@ -146,8 +145,8 @@ If you want to build all the tests at once just use `make tests`.
|
|||||||
- `--enable-numa`: enable NUMA first touch optimisation
|
- `--enable-numa`: enable NUMA first touch optimisation
|
||||||
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||||
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
||||||
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
- `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option**
|
||||||
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
- `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||||
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
||||||
- `--disable-timers`: disable system dependent high-resolution timers.
|
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||||
- `--enable-chroma`: enable Chroma regression tests.
|
- `--enable-chroma`: enable Chroma regression tests.
|
||||||
@ -201,8 +200,7 @@ Alternatively, some CPU codenames can be directly used:
|
|||||||
The following configuration is recommended for the Intel Knights Landing platform:
|
The following configuration is recommended for the Intel Knights Landing platform:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=icpc MPICXX=mpiicpc
|
CXX=icpc MPICXX=mpiicpc
|
||||||
@ -212,8 +210,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
|||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -232,8 +229,7 @@ for interior communication. This is the mpi3 communications implementation.
|
|||||||
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi3-auto \
|
--enable-comms=mpi3-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CC=icpc MPICXX=mpiicpc
|
CC=icpc MPICXX=mpiicpc
|
||||||
@ -244,8 +240,7 @@ We recommend four ranks per node for best performance, but optimum is local volu
|
|||||||
The following configuration is recommended for the Intel Haswell platform:
|
The following configuration is recommended for the Intel Haswell platform:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi3-auto \
|
--enable-comms=mpi3-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=icpc MPICXX=mpiicpc
|
CXX=icpc MPICXX=mpiicpc
|
||||||
@ -262,8 +257,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
|||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -280,8 +274,7 @@ This is the default.
|
|||||||
The following configuration is recommended for the Intel Skylake platform:
|
The following configuration is recommended for the Intel Skylake platform:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX512 \
|
||||||
--enable-simd=AVX512 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=mpiicpc
|
CXX=mpiicpc
|
||||||
@ -298,8 +291,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
|||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX512 \
|
||||||
--enable-simd=AVX512 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -330,8 +322,7 @@ and 8 threads per rank.
|
|||||||
The following configuration is recommended for the AMD EPYC platform.
|
The following configuration is recommended for the AMD EPYC platform.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
CXX=mpicxx
|
CXX=mpicxx
|
||||||
```
|
```
|
||||||
|
33
README.md
33
README.md
@ -115,11 +115,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
|
|||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
mkdir build; cd build
|
mkdir build; cd build
|
||||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
```
|
```
|
||||||
|
|
||||||
where `--enable-precision=` set the default precision,
|
where `--enable-simd=` set the SIMD type, `--enable-
|
||||||
`--enable-simd=` set the SIMD type, `--enable-
|
|
||||||
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||||
install Grid. Other options are detailed in the next section, you can also use `configure
|
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||||
--help` to display them. Like with any other program using GNU autotool, the
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
@ -150,8 +149,8 @@ If you want to build all the tests at once just use `make tests`.
|
|||||||
- `--enable-numa`: enable NUMA first touch optimisation
|
- `--enable-numa`: enable NUMA first touch optimisation
|
||||||
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||||
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
||||||
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
- `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option**
|
||||||
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
- `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||||
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
||||||
- `--disable-timers`: disable system dependent high-resolution timers.
|
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||||
- `--enable-chroma`: enable Chroma regression tests.
|
- `--enable-chroma`: enable Chroma regression tests.
|
||||||
@ -205,8 +204,7 @@ Alternatively, some CPU codenames can be directly used:
|
|||||||
The following configuration is recommended for the Intel Knights Landing platform:
|
The following configuration is recommended for the Intel Knights Landing platform:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=icpc MPICXX=mpiicpc
|
CXX=icpc MPICXX=mpiicpc
|
||||||
@ -216,8 +214,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
|||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -236,8 +233,7 @@ for interior communication. This is the mpi3 communications implementation.
|
|||||||
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi3-auto \
|
--enable-comms=mpi3-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CC=icpc MPICXX=mpiicpc
|
CC=icpc MPICXX=mpiicpc
|
||||||
@ -248,8 +244,7 @@ We recommend four ranks per node for best performance, but optimum is local volu
|
|||||||
The following configuration is recommended for the Intel Haswell platform:
|
The following configuration is recommended for the Intel Haswell platform:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi3-auto \
|
--enable-comms=mpi3-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=icpc MPICXX=mpiicpc
|
CXX=icpc MPICXX=mpiicpc
|
||||||
@ -266,8 +261,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
|||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -284,8 +278,7 @@ This is the default.
|
|||||||
The following configuration is recommended for the Intel Skylake platform:
|
The following configuration is recommended for the Intel Skylake platform:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX512 \
|
||||||
--enable-simd=AVX512 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=mpiicpc
|
CXX=mpiicpc
|
||||||
@ -302,8 +295,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
|||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX512 \
|
||||||
--enable-simd=AVX512 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -334,8 +326,7 @@ and 8 threads per rank.
|
|||||||
The following configuration is recommended for the AMD EPYC platform.
|
The following configuration is recommended for the AMD EPYC platform.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi3 \
|
--enable-comms=mpi3 \
|
||||||
CXX=mpicxx
|
CXX=mpicxx
|
||||||
```
|
```
|
||||||
|
@ -12,31 +12,31 @@ module load mpi/openmpi-aarch64
|
|||||||
|
|
||||||
scl enable gcc-toolset-10 bash
|
scl enable gcc-toolset-10 bash
|
||||||
|
|
||||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||||
|
|
||||||
* gcc 10.1 prebuild w/ MPI, QPACE4 interactive login
|
* gcc 10.1 prebuild w/ MPI, QPACE4 interactive login
|
||||||
|
|
||||||
scl enable gcc-toolset-10 bash
|
scl enable gcc-toolset-10 bash
|
||||||
module load mpi/openmpi-aarch64
|
module load mpi/openmpi-aarch64
|
||||||
|
|
||||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||||
|
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
* armclang 20.2 (qp4)
|
* armclang 20.2 (qp4)
|
||||||
|
|
||||||
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN"
|
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN"
|
||||||
|
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
* gcc 10.0.1 VLA (merlin)
|
* gcc 10.0.1 VLA (merlin)
|
||||||
|
|
||||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||||
|
|
||||||
|
|
||||||
* gcc 10.0.1 fixed-size ACLE (merlin)
|
* gcc 10.0.1 fixed-size ACLE (merlin)
|
||||||
|
|
||||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
|
||||||
|
|
||||||
|
|
||||||
* gcc 10.0.1 fixed-size ACLE (fjt) w/ MPI
|
* gcc 10.0.1 fixed-size ACLE (fjt) w/ MPI
|
||||||
@ -46,34 +46,34 @@ export OMPI_CXX=g++-10.0.1
|
|||||||
export MPICH_CC=gcc-10.0.1
|
export MPICH_CC=gcc-10.0.1
|
||||||
export MPICH_CXX=g++-10.0.1
|
export MPICH_CXX=g++-10.0.1
|
||||||
|
|
||||||
$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt"
|
$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt"
|
||||||
|
|
||||||
--------------------------------------------------------
|
--------------------------------------------------------
|
||||||
|
|
||||||
* armclang 20.0 VLA (merlin)
|
* armclang 20.0 VLA (merlin)
|
||||||
|
|
||||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||||
|
|
||||||
TODO check ARMCLANGCOMPAT
|
TODO check ARMCLANGCOMPAT
|
||||||
|
|
||||||
|
|
||||||
* armclang 20.1 VLA (merlin)
|
* armclang 20.1 VLA (merlin)
|
||||||
|
|
||||||
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
|
||||||
|
|
||||||
TODO check ARMCLANGCOMPAT
|
TODO check ARMCLANGCOMPAT
|
||||||
|
|
||||||
|
|
||||||
* armclang 20.1 VLA (fjt cluster)
|
* armclang 20.1 VLA (fjt cluster)
|
||||||
|
|
||||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||||
|
|
||||||
TODO check ARMCLANGCOMPAT
|
TODO check ARMCLANGCOMPAT
|
||||||
|
|
||||||
|
|
||||||
* armclang 20.1 VLA w/MPI (fjt cluster)
|
* armclang 20.1 VLA w/MPI (fjt cluster)
|
||||||
|
|
||||||
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64"
|
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64"
|
||||||
|
|
||||||
No ARMCLANGCOMPAT -> still correct ?
|
No ARMCLANGCOMPAT -> still correct ?
|
||||||
|
|
||||||
@ -81,9 +81,9 @@ No ARMCLANGCOMPAT -> still correct ?
|
|||||||
|
|
||||||
* Fujitsu fcc
|
* Fujitsu fcc
|
||||||
|
|
||||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
|
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
|
||||||
|
|
||||||
|
|
||||||
* Fujitsu fcc w/ MPI
|
* Fujitsu fcc w/ MPI
|
||||||
|
|
||||||
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
|
||||||
|
@ -1,8 +1,16 @@
|
|||||||
|
|
||||||
#include "Benchmark_IO.hpp"
|
#include "Benchmark_IO.hpp"
|
||||||
|
|
||||||
|
#ifndef BENCH_IO_LMIN
|
||||||
|
#define BENCH_IO_LMIN 8
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef BENCH_IO_LMAX
|
#ifndef BENCH_IO_LMAX
|
||||||
#define BENCH_IO_LMAX 40
|
#define BENCH_IO_LMAX 32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BENCH_IO_NPASS
|
||||||
|
#define BENCH_IO_NPASS 10
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -12,37 +20,179 @@ std::string filestem(const int l)
|
|||||||
return "iobench_l" + std::to_string(l);
|
return "iobench_l" + std::to_string(l);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int vol(const int i)
|
||||||
|
{
|
||||||
|
return BENCH_IO_LMIN + 2*i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int volInd(const int l)
|
||||||
|
{
|
||||||
|
return (l - BENCH_IO_LMIN)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Mat>
|
||||||
|
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||||
|
{
|
||||||
|
auto nr = data[0].rows(), nc = data[0].cols();
|
||||||
|
Eigen::MatrixXd sqSum(nr, nc);
|
||||||
|
double n = static_cast<double>(data.size());
|
||||||
|
|
||||||
|
assert(n > 1.);
|
||||||
|
mean = Mat::Zero(nr, nc);
|
||||||
|
sqSum = Mat::Zero(nr, nc);
|
||||||
|
for (auto &d: data)
|
||||||
|
{
|
||||||
|
mean += d;
|
||||||
|
sqSum += d.cwiseProduct(d);
|
||||||
|
}
|
||||||
|
stdDev = ((sqSum - mean.cwiseProduct(mean)/n)/(n - 1.)).cwiseSqrt();
|
||||||
|
mean /= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define grid_printf(...) \
|
||||||
|
{\
|
||||||
|
char _buf[1024];\
|
||||||
|
sprintf(_buf, __VA_ARGS__);\
|
||||||
|
MSG << _buf;\
|
||||||
|
}
|
||||||
|
|
||||||
|
enum {sRead = 0, sWrite = 1, gRead = 2, gWrite = 3};
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_LIME
|
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
int64_t threads = GridThread::GetThreads();
|
int64_t threads = GridThread::GetThreads();
|
||||||
|
auto mpi = GridDefaultMpi();
|
||||||
|
unsigned int nVol = (BENCH_IO_LMAX - BENCH_IO_LMIN)/2 + 1;
|
||||||
|
unsigned int nRelVol = (BENCH_IO_LMAX - 24)/2 + 1;
|
||||||
|
std::vector<Eigen::MatrixXd> perf(BENCH_IO_NPASS, Eigen::MatrixXd::Zero(nVol, 4));
|
||||||
|
std::vector<Eigen::VectorXd> avPerf(BENCH_IO_NPASS, Eigen::VectorXd::Zero(4));
|
||||||
|
std::vector<int> latt;
|
||||||
|
|
||||||
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||||
MSG << SEP << std::endl;
|
MSG << "MPI partition " << mpi << std::endl;
|
||||||
MSG << "Benchmark Lime write" << std::endl;
|
for (unsigned int i = 0; i < BENCH_IO_NPASS; ++i)
|
||||||
MSG << SEP << std::endl;
|
|
||||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
|
||||||
{
|
{
|
||||||
auto mpi = GridDefaultMpi();
|
MSG << BIGSEP << std::endl;
|
||||||
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
MSG << "Pass " << i + 1 << "/" << BENCH_IO_NPASS << std::endl;
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark std write" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||||
|
|
||||||
std::cout << "-- Local volume " << l << "^4" << std::endl;
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
|
writeBenchmark<LatticeFermion>(latt, filestem(l), stdWrite<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), sWrite) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark std read" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
readBenchmark<LatticeFermion>(latt, filestem(l), stdRead<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), sRead) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), gWrite) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), gRead) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
avPerf[i].fill(0.);
|
||||||
|
for (int f = 0; f < 4; ++f)
|
||||||
|
for (int l = 24; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
avPerf[i](f) += perf[i](volInd(l), f);
|
||||||
|
}
|
||||||
|
avPerf[i] /= nRelVol;
|
||||||
}
|
}
|
||||||
|
|
||||||
MSG << "Benchmark Lime read" << std::endl;
|
Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4);
|
||||||
MSG << SEP << std::endl;
|
Eigen::VectorXd avMean(4), avStdDev(4), avRob(4);
|
||||||
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
|
double n = BENCH_IO_NPASS;
|
||||||
{
|
|
||||||
auto mpi = GridDefaultMpi();
|
|
||||||
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
|
|
||||||
|
|
||||||
std::cout << "-- Local volume " << l << "^4" << std::endl;
|
stats(mean, stdDev, perf);
|
||||||
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
|
stats(avMean, avStdDev, avPerf);
|
||||||
|
rob.fill(100.);
|
||||||
|
rob -= 100.*stdDev.cwiseQuotient(mean.cwiseAbs());
|
||||||
|
avRob.fill(100.);
|
||||||
|
avRob -= 100.*avStdDev.cwiseQuotient(avMean.cwiseAbs());
|
||||||
|
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << "SUMMARY" << std::endl;
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
||||||
|
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||||
|
"L", "std read", "std dev", "std write", "std dev",
|
||||||
|
"Grid read", "std dev", "Grid write", "std dev");
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
|
||||||
|
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
|
||||||
|
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
|
||||||
|
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
|
||||||
}
|
}
|
||||||
|
MSG << std::endl;
|
||||||
|
MSG << "Robustness of individual results, in \%. (rob = 100\% - std dev / mean)" << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%4s %12s %12s %12s %12s\n",
|
||||||
|
"L", "std read", "std write", "Grid read", "Grid write");
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
|
||||||
|
rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||||
|
}
|
||||||
|
MSG << std::endl;
|
||||||
|
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
|
||||||
|
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||||
|
"std read", "std dev", "std write", "std dev",
|
||||||
|
"Grid read", "std dev", "Grid write", "std dev");
|
||||||
|
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
|
||||||
|
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||||
|
MSG << std::endl;
|
||||||
|
MSG << "Robustness of volume-averaged results, in \%. (rob = 100\% - std dev / mean)" << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%12s %12s %12s %12s\n",
|
||||||
|
"std read", "std write", "Grid read", "Grid write");
|
||||||
|
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
#endif
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#ifdef HAVE_LIME
|
#ifdef HAVE_LIME
|
||||||
#define MSG std::cout << GridLogMessage
|
#define MSG std::cout << GridLogMessage
|
||||||
#define SEP \
|
#define SEP \
|
||||||
|
"-----------------------------------------------------------------------------"
|
||||||
|
#define BIGSEP \
|
||||||
"============================================================================="
|
"============================================================================="
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
@ -14,13 +16,152 @@ using WriterFn = std::function<void(const std::string, Field &)> ;
|
|||||||
template <typename Field>
|
template <typename Field>
|
||||||
using ReaderFn = std::function<void(Field &, const std::string)>;
|
using ReaderFn = std::function<void(Field &, const std::string)>;
|
||||||
|
|
||||||
|
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||||
|
//
|
||||||
|
// template <typename Field>
|
||||||
|
// void stdWrite(const std::string filestem, Field &vec)
|
||||||
|
// {
|
||||||
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||||
|
// size_t size;
|
||||||
|
// uint32_t crc;
|
||||||
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||||
|
// autoView(vec_v, vec, CpuRead);
|
||||||
|
// crcWatch.Start();
|
||||||
|
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
// std::fwrite(&crc, sizeof(uint32_t), 1, file);
|
||||||
|
// crcWatch.Stop();
|
||||||
|
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||||
|
// ioWatch.Start();
|
||||||
|
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||||
|
// ioWatch.Stop();
|
||||||
|
// std::fclose(file);
|
||||||
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
|
// auto &p = BinaryIO::lastPerf;
|
||||||
|
// p.size = size;
|
||||||
|
// p.time = ioWatch.useconds();
|
||||||
|
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||||
|
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// template <typename Field>
|
||||||
|
// void stdRead(Field &vec, const std::string filestem)
|
||||||
|
// {
|
||||||
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||||
|
// size_t size;
|
||||||
|
// uint32_t crcRead, crcData;
|
||||||
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||||
|
// crcWatch.Start();
|
||||||
|
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||||
|
// crcWatch.Stop();
|
||||||
|
// {
|
||||||
|
// autoView(vec_v, vec, CpuWrite);
|
||||||
|
// ioWatch.Start();
|
||||||
|
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||||
|
// ioWatch.Stop();
|
||||||
|
// std::fclose(file);
|
||||||
|
// }
|
||||||
|
// {
|
||||||
|
// autoView(vec_v, vec, CpuRead);
|
||||||
|
// crcWatch.Start();
|
||||||
|
// crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
// crcWatch.Stop();
|
||||||
|
// }
|
||||||
|
// MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||||
|
// assert(crcData == crcRead);
|
||||||
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
|
// auto &p = BinaryIO::lastPerf;
|
||||||
|
// p.size = size;
|
||||||
|
// p.time = ioWatch.useconds();
|
||||||
|
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||||
|
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void stdWrite(const std::string filestem, Field &vec)
|
||||||
|
{
|
||||||
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
||||||
|
size_t size, sizec;
|
||||||
|
uint32_t crc;
|
||||||
|
GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||||
|
sizec = size/sizeof(char); // just in case of...
|
||||||
|
autoView(vec_v, vec, CpuRead);
|
||||||
|
crcWatch.Start();
|
||||||
|
crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
file.write(reinterpret_cast<char *>(&crc), sizeof(uint32_t)/sizeof(char));
|
||||||
|
crcWatch.Stop();
|
||||||
|
MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||||
|
ioWatch.Start();
|
||||||
|
file.write(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||||
|
file.flush();
|
||||||
|
ioWatch.Stop();
|
||||||
|
size *= vec.Grid()->ProcessorCount();
|
||||||
|
auto &p = BinaryIO::lastPerf;
|
||||||
|
p.size = size;
|
||||||
|
p.time = ioWatch.useconds();
|
||||||
|
p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||||
|
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void stdRead(Field &vec, const std::string filestem)
|
||||||
|
{
|
||||||
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
||||||
|
size_t size, sizec;
|
||||||
|
uint32_t crcRead, crcData;
|
||||||
|
GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||||
|
sizec = size/sizeof(char); // just in case of...
|
||||||
|
crcWatch.Start();
|
||||||
|
file.read(reinterpret_cast<char *>(&crcRead), sizeof(uint32_t)/sizeof(char));
|
||||||
|
crcWatch.Stop();
|
||||||
|
{
|
||||||
|
autoView(vec_v, vec, CpuWrite);
|
||||||
|
ioWatch.Start();
|
||||||
|
file.read(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||||
|
ioWatch.Stop();
|
||||||
|
}
|
||||||
|
{
|
||||||
|
autoView(vec_v, vec, CpuRead);
|
||||||
|
crcWatch.Start();
|
||||||
|
crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
crcWatch.Stop();
|
||||||
|
}
|
||||||
|
MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||||
|
assert(crcData == crcRead);
|
||||||
|
size *= vec.Grid()->ProcessorCount();
|
||||||
|
auto &p = BinaryIO::lastPerf;
|
||||||
|
p.size = size;
|
||||||
|
p.time = ioWatch.useconds();
|
||||||
|
p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||||
|
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
void limeWrite(const std::string filestem, Field &vec)
|
void limeWrite(const std::string filestem, Field &vec)
|
||||||
{
|
{
|
||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||||
|
|
||||||
binWriter.open(filestem + ".bin");
|
binWriter.open(filestem + ".lime.bin");
|
||||||
binWriter.writeScidacFieldRecord(vec, record);
|
binWriter.writeScidacFieldRecord(vec, record);
|
||||||
binWriter.close();
|
binWriter.close();
|
||||||
}
|
}
|
||||||
@ -31,7 +172,7 @@ void limeRead(Field &vec, const std::string filestem)
|
|||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
ScidacReader binReader;
|
ScidacReader binReader;
|
||||||
|
|
||||||
binReader.open(filestem + ".bin");
|
binReader.open(filestem + ".lime.bin");
|
||||||
binReader.readScidacFieldRecord(vec, record);
|
binReader.readScidacFieldRecord(vec, record);
|
||||||
binReader.close();
|
binReader.close();
|
||||||
}
|
}
|
||||||
@ -73,12 +214,18 @@ void writeBenchmark(const Coordinate &latt, const std::string filename,
|
|||||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
std::shared_ptr<GridBase> gPt;
|
std::shared_ptr<GridBase> gPt;
|
||||||
|
std::random_device rd;
|
||||||
|
|
||||||
makeGrid(gPt, gBasePt, Ls, rb);
|
makeGrid(gPt, gBasePt, Ls, rb);
|
||||||
|
|
||||||
GridBase *g = gPt.get();
|
GridBase *g = gPt.get();
|
||||||
GridParallelRNG rng(g);
|
GridParallelRNG rng(g);
|
||||||
Field vec(g);
|
Field vec(g);
|
||||||
|
|
||||||
|
rng.SeedFixedIntegers({static_cast<int>(rd()), static_cast<int>(rd()),
|
||||||
|
static_cast<int>(rd()), static_cast<int>(rd()),
|
||||||
|
static_cast<int>(rd()), static_cast<int>(rd()),
|
||||||
|
static_cast<int>(rd()), static_cast<int>(rd())});
|
||||||
|
|
||||||
random(rng, vec);
|
random(rng, vec);
|
||||||
write(filename, vec);
|
write(filename, vec);
|
||||||
@ -96,8 +243,8 @@ void readBenchmark(const Coordinate &latt, const std::string filename,
|
|||||||
|
|
||||||
makeGrid(gPt, gBasePt, Ls, rb);
|
makeGrid(gPt, gBasePt, Ls, rb);
|
||||||
|
|
||||||
GridBase *g = gPt.get();
|
GridBase *g = gPt.get();
|
||||||
Field vec(g);
|
Field vec(g);
|
||||||
|
|
||||||
read(vec, filename);
|
read(vec, filename);
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,9 @@
|
|||||||
#include "Benchmark_IO.hpp"
|
#include "Benchmark_IO.hpp"
|
||||||
|
|
||||||
#define MSG std::cout << GridLogMessage
|
|
||||||
#define SEP \
|
|
||||||
"============================================================================="
|
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_LIME
|
|
||||||
std::vector<std::string> dir;
|
std::vector<std::string> dir;
|
||||||
unsigned int Ls;
|
unsigned int Ls;
|
||||||
bool rb;
|
bool rb;
|
||||||
@ -34,46 +29,71 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
|
||||||
int64_t threads = GridThread::GetThreads();
|
int64_t threads = GridThread::GetThreads();
|
||||||
|
auto mpi = GridDefaultMpi();
|
||||||
|
|
||||||
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||||
MSG << SEP << std::endl;
|
MSG << "MPI partition " << mpi << std::endl;
|
||||||
MSG << "Benchmark double precision Lime write" << std::endl;
|
|
||||||
MSG << SEP << std::endl;
|
|
||||||
for (auto &d: dir)
|
|
||||||
{
|
|
||||||
MSG << "-- Directory " << d << std::endl;
|
|
||||||
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb);
|
|
||||||
}
|
|
||||||
|
|
||||||
MSG << SEP << std::endl;
|
MSG << SEP << std::endl;
|
||||||
MSG << "Benchmark double precision Lime read" << std::endl;
|
MSG << "Benchmark Grid std write" << std::endl;
|
||||||
MSG << SEP << std::endl;
|
MSG << SEP << std::endl;
|
||||||
for (auto &d: dir)
|
for (auto &d: dir)
|
||||||
{
|
{
|
||||||
MSG << "-- Directory " << d << std::endl;
|
MSG << "-- Directory " << d << std::endl;
|
||||||
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb);
|
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||||
|
stdWrite<LatticeFermion>, Ls, rb);
|
||||||
|
}
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark Grid std read" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (auto &d: dir)
|
||||||
|
{
|
||||||
|
MSG << "-- Directory " << d << std::endl;
|
||||||
|
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||||
|
stdRead<LatticeFermion>, Ls, rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_LIME
|
||||||
MSG << SEP << std::endl;
|
MSG << SEP << std::endl;
|
||||||
MSG << "Benchmark single precision Lime write" << std::endl;
|
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||||
MSG << SEP << std::endl;
|
MSG << SEP << std::endl;
|
||||||
for (auto &d: dir)
|
for (auto &d: dir)
|
||||||
{
|
{
|
||||||
MSG << "-- Directory " << d << std::endl;
|
MSG << "-- Directory " << d << std::endl;
|
||||||
writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
|
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||||
|
limeWrite<LatticeFermion>, Ls, rb);
|
||||||
}
|
}
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (auto &d: dir)
|
||||||
|
{
|
||||||
|
MSG << "-- Directory " << d << std::endl;
|
||||||
|
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
|
||||||
|
limeRead<LatticeFermion>, Ls, rb);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
MSG << SEP << std::endl;
|
// MSG << SEP << std::endl;
|
||||||
MSG << "Benchmark single precision Lime read" << std::endl;
|
// MSG << "Benchmark single precision Lime write" << std::endl;
|
||||||
MSG << SEP << std::endl;
|
// MSG << SEP << std::endl;
|
||||||
for (auto &d: dir)
|
// for (auto &d: dir)
|
||||||
{
|
// {
|
||||||
MSG << "-- Directory " << d << std::endl;
|
// MSG << "-- Directory " << d << std::endl;
|
||||||
readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
|
// writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
|
||||||
}
|
// }
|
||||||
|
|
||||||
|
// MSG << SEP << std::endl;
|
||||||
|
// MSG << "Benchmark single precision Lime read" << std::endl;
|
||||||
|
// MSG << SEP << std::endl;
|
||||||
|
// for (auto &d: dir)
|
||||||
|
// {
|
||||||
|
// MSG << "-- Directory " << d << std::endl;
|
||||||
|
// readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
|
||||||
|
// }
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
#endif
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ struct time_statistics{
|
|||||||
|
|
||||||
void comms_header(){
|
void comms_header(){
|
||||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||||
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
|
<<"bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)"<<std::endl;
|
||||||
};
|
};
|
||||||
|
|
||||||
Gamma::Algebra Gmu [] = {
|
Gamma::Algebra Gmu [] = {
|
||||||
@ -125,7 +125,7 @@ public:
|
|||||||
lat*mpi_layout[1],
|
lat*mpi_layout[1],
|
||||||
lat*mpi_layout[2],
|
lat*mpi_layout[2],
|
||||||
lat*mpi_layout[3]});
|
lat*mpi_layout[3]});
|
||||||
std::cout << GridLogMessage<< latt_size <<std::endl;
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
RealD Nrank = Grid._Nprocessors;
|
RealD Nrank = Grid._Nprocessors;
|
||||||
RealD Nnode = Grid.NodeCount();
|
RealD Nnode = Grid.NodeCount();
|
||||||
@ -137,8 +137,8 @@ public:
|
|||||||
for(int d=0;d<8;d++){
|
for(int d=0;d<8;d++){
|
||||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
}
|
}
|
||||||
|
|
||||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
@ -189,11 +189,11 @@ public:
|
|||||||
// double rbytes = dbytes*0.5;
|
// double rbytes = dbytes*0.5;
|
||||||
double bidibytes = dbytes;
|
double bidibytes = dbytes;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
|
||||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
<< bytes << " \t "
|
||||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
<<xbytes/timestat.mean<<" \t "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " \t "
|
||||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
<< "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -224,7 +224,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
uint64_t lmax=32;
|
uint64_t lmax=32;
|
||||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||||
|
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
for(int lat=8;lat<=lmax;lat+=8){
|
for(int lat=8;lat<=lmax;lat+=8){
|
||||||
@ -249,11 +249,6 @@ public:
|
|||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=a*x-y;
|
z=a*x-y;
|
||||||
autoView( x_v , x, CpuWrite);
|
|
||||||
autoView( y_v , y, CpuWrite);
|
|
||||||
autoView( z_v , z, CpuRead);
|
|
||||||
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
|
|
||||||
y_v[4]=z_v[4];
|
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
@ -286,7 +281,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
uint64_t lmax=32;
|
uint64_t lmax=32;
|
||||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||||
|
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
for(int lat=8;lat<=lmax;lat+=8){
|
for(int lat=8;lat<=lmax;lat+=8){
|
||||||
@ -309,11 +304,6 @@ public:
|
|||||||
double start=usecond();
|
double start=usecond();
|
||||||
for(int i=0;i<Nloop;i++){
|
for(int i=0;i<Nloop;i++){
|
||||||
z=x*y;
|
z=x*y;
|
||||||
autoView( x_v , x, CpuWrite);
|
|
||||||
autoView( y_v , y, CpuWrite);
|
|
||||||
autoView( z_v , z, CpuRead);
|
|
||||||
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
|
|
||||||
y_v[4]=z_v[4];
|
|
||||||
}
|
}
|
||||||
double stop=usecond();
|
double stop=usecond();
|
||||||
double time = (stop-start)/Nloop*1000;
|
double time = (stop-start)/Nloop*1000;
|
||||||
@ -358,6 +348,7 @@ public:
|
|||||||
///////// Welcome message ////////////
|
///////// Welcome message ////////////
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "* Nc : "<<Nc<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||||
std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl;
|
std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl;
|
||||||
@ -386,7 +377,7 @@ public:
|
|||||||
typedef LatticeGaugeFieldF Gauge;
|
typedef LatticeGaugeFieldF Gauge;
|
||||||
|
|
||||||
///////// Source preparation ////////////
|
///////// Source preparation ////////////
|
||||||
Gauge Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
Gauge Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
Fermion src (FGrid); random(RNG5,src);
|
Fermion src (FGrid); random(RNG5,src);
|
||||||
Fermion src_e (FrbGrid);
|
Fermion src_e (FrbGrid);
|
||||||
Fermion src_o (FrbGrid);
|
Fermion src_o (FrbGrid);
|
||||||
@ -431,7 +422,7 @@ public:
|
|||||||
}
|
}
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
uint64_t ncall = 50;
|
uint64_t ncall = 500;
|
||||||
|
|
||||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||||
|
|
||||||
@ -449,7 +440,13 @@ public:
|
|||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=(1344.0*volume)/2;
|
|
||||||
|
// Nc=3 gives
|
||||||
|
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
||||||
|
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
||||||
|
// double flops=(1344.0*volume)/2;
|
||||||
|
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
||||||
|
double flops=(fps*volume)/2;
|
||||||
double mf_hi, mf_lo, mf_err;
|
double mf_hi, mf_lo, mf_err;
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
timestat.statistics(t_time);
|
||||||
@ -464,6 +461,7 @@ public:
|
|||||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< "Deo FlopsPerSite is "<<fps<<std::endl;
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||||
@ -540,7 +538,7 @@ public:
|
|||||||
typedef typename Action::FermionField Fermion;
|
typedef typename Action::FermionField Fermion;
|
||||||
typedef LatticeGaugeFieldF Gauge;
|
typedef LatticeGaugeFieldF Gauge;
|
||||||
|
|
||||||
Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu);
|
Gauge Umu(FGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
typename Action::ImplParams params;
|
typename Action::ImplParams params;
|
||||||
Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params);
|
Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params);
|
||||||
@ -698,7 +696,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\tt Staggered" <<std::endl;
|
std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" <<std::endl;
|
||||||
for(int l=0;l<L_list.size();l++){
|
for(int l=0;l<L_list.size();l++){
|
||||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<<std::endl;
|
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<<std::endl;
|
||||||
}
|
}
|
||||||
@ -729,9 +727,9 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4 " <<std::endl;
|
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " <<std::endl;
|
||||||
for(int l=0;l<L_list.size();l++){
|
for(int l=0;l<L_list.size();l++){
|
||||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<std::endl;
|
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<< " \t "<<staggered[l]/NN<<std::endl;
|
||||||
}
|
}
|
||||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||||
#if 0
|
#if 0
|
||||||
Umu=1.0;
|
Umu=1.0;
|
||||||
|
364
benchmarks/Benchmark_dwf_fp32.cc
Normal file
364
benchmarks/Benchmark_dwf_fp32.cc
Normal file
@ -0,0 +1,364 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#ifdef GRID_CUDA
|
||||||
|
#define CUDA_PROFILE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CUDA_PROFILE
|
||||||
|
#include <cuda_profiler_api.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::Algebra Gmu [] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ,
|
||||||
|
Gamma::Algebra::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
|
int Ls=8;
|
||||||
|
for(int i=0;i<argc;i++)
|
||||||
|
if(std::string(argv[i]) == "-Ls"){
|
||||||
|
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||||
|
}
|
||||||
|
|
||||||
|
GridLogLayout();
|
||||||
|
|
||||||
|
long unsigned int single_site_flops = 8*Nc*(7+16*Nc);
|
||||||
|
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||||
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
|
||||||
|
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
LatticeFermionF src (FGrid); random(RNG5,src);
|
||||||
|
#if 0
|
||||||
|
src = Zero();
|
||||||
|
{
|
||||||
|
Coordinate origin({0,0,0,latt4[2]-1,0});
|
||||||
|
SpinColourVectorF tmp;
|
||||||
|
tmp=Zero();
|
||||||
|
tmp()(0)(0)=Complex(-2.0,0.0);
|
||||||
|
std::cout << " source site 0 " << tmp<<std::endl;
|
||||||
|
pokeSite(tmp,src,origin);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||||
|
src = src*N2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
LatticeFermionF result(FGrid); result=Zero();
|
||||||
|
LatticeFermionF ref(FGrid); ref=Zero();
|
||||||
|
LatticeFermionF tmp(FGrid);
|
||||||
|
LatticeFermionF err(FGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||||
|
LatticeGaugeFieldF Umu(UGrid);
|
||||||
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||||
|
#if 0
|
||||||
|
Umu=1.0;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
LatticeColourMatrixF ttmp(UGrid);
|
||||||
|
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
// if (mu !=2 ) ttmp = 0;
|
||||||
|
// ttmp = ttmp* pow(10.0,mu);
|
||||||
|
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Naive wilson implementation
|
||||||
|
////////////////////////////////////
|
||||||
|
// replicate across fifth dimension
|
||||||
|
LatticeGaugeFieldF Umu5d(FGrid);
|
||||||
|
std::vector<LatticeColourMatrixF> U(4,FGrid);
|
||||||
|
{
|
||||||
|
autoView( Umu5d_v, Umu5d, CpuWrite);
|
||||||
|
autoView( Umu_v , Umu , CpuRead);
|
||||||
|
for(int ss=0;ss<Umu.Grid()->oSites();ss++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Umu5d_v[Ls*ss+s] = Umu_v[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||||
|
|
||||||
|
if (1)
|
||||||
|
{
|
||||||
|
ref = Zero();
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
|
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||||
|
|
||||||
|
tmp =adj(U[mu])*src;
|
||||||
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
|
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||||
|
}
|
||||||
|
ref = -0.5*ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5 =1.8;
|
||||||
|
|
||||||
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
RealD NN = UGrid->NodeCount();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* VComplexF size is "<<sizeof(vComplexF)<< " B"<<std::endl;
|
||||||
|
if ( sizeof(RealF)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(RealF)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||||
|
#endif
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
int ncall =1000;
|
||||||
|
|
||||||
|
if (1) {
|
||||||
|
FGrid->Barrier();
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
Dw.Dhop(src,result,0);
|
||||||
|
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
|
Dw.Dhop(src,result,0);
|
||||||
|
__SSC_STOP;
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=single_site_flops*volume*ncall;
|
||||||
|
|
||||||
|
auto nsimd = vComplex::Nsimd();
|
||||||
|
auto simdwidth = sizeof(vComplex);
|
||||||
|
|
||||||
|
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||||
|
double data_rf = volume * ((2*Nd+1)*Nd*Nc + 2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
||||||
|
|
||||||
|
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
||||||
|
double data_mem = (volume * (2*Nd+1)*Nd*Nc + (volume/Ls) *2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
|
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
|
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "RF GiB/s (base 2) = "<< 1000000. * data_rf/((t1-t0))<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mem GiB/s (base 2) = "<< 1000000. * data_mem/((t1-t0))<<std::endl;
|
||||||
|
err = ref-result;
|
||||||
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
//exit(0);
|
||||||
|
|
||||||
|
if(( norm2(err)>1.0e-4) ) {
|
||||||
|
/*
|
||||||
|
std::cout << "RESULT\n " << result<<std::endl;
|
||||||
|
std::cout << "REF \n " << ref <<std::endl;
|
||||||
|
std::cout << "ERR \n " << err <<std::endl;
|
||||||
|
*/
|
||||||
|
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
|
||||||
|
FGrid->Barrier();
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
assert (norm2(err)< 1.0e-4 );
|
||||||
|
Dw.Report();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (1)
|
||||||
|
{ // Naive wilson dag implementation
|
||||||
|
ref = Zero();
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
|
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
||||||
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
|
{
|
||||||
|
autoView( ref_v, ref, CpuWrite);
|
||||||
|
autoView( tmp_v, tmp, CpuRead);
|
||||||
|
for(int i=0;i<ref_v.size();i++){
|
||||||
|
ref_v[i]+= tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]; ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp =adj(U[mu])*src;
|
||||||
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
|
{
|
||||||
|
autoView( ref_v, ref, CpuWrite);
|
||||||
|
autoView( tmp_v, tmp, CpuRead);
|
||||||
|
for(int i=0;i<ref_v.size();i++){
|
||||||
|
ref_v[i]+= tmp_v[i] - Gamma(Gmu[mu])*tmp_v[i]; ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ref = -0.5*ref;
|
||||||
|
}
|
||||||
|
// dump=1;
|
||||||
|
Dw.Dhop(src,result,1);
|
||||||
|
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
|
||||||
|
err = ref-result;
|
||||||
|
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
|
||||||
|
if((norm2(err)>1.0e-4)){
|
||||||
|
/*
|
||||||
|
std::cout<< "DAG RESULT\n " <<ref << std::endl;
|
||||||
|
std::cout<< "DAG sRESULT\n " <<result << std::endl;
|
||||||
|
std::cout<< "DAG ERR \n " << err <<std::endl;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
LatticeFermionF src_e (FrbGrid);
|
||||||
|
LatticeFermionF src_o (FrbGrid);
|
||||||
|
LatticeFermionF r_e (FrbGrid);
|
||||||
|
LatticeFermionF r_o (FrbGrid);
|
||||||
|
LatticeFermionF r_eo (FGrid);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
// S-direction is INNERMOST and takes no part in the parity.
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionF::DhopEO "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(RealF)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(RealF)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||||
|
#endif
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
{
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
FGrid->Barrier();
|
||||||
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
#ifdef CUDA_PROFILE
|
||||||
|
if(i==10) cudaProfilerStart();
|
||||||
|
#endif
|
||||||
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
#ifdef CUDA_PROFILE
|
||||||
|
if(i==20) cudaProfilerStop();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=(single_site_flops*volume*ncall)/2.0;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
|
Dw.Report();
|
||||||
|
}
|
||||||
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
|
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||||
|
Dw.Dhop (src ,result,DaggerNo);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "r_e"<<norm2(r_e)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "r_o"<<norm2(r_o)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "res"<<norm2(result)<<std::endl;
|
||||||
|
|
||||||
|
setCheckerboard(r_eo,r_o);
|
||||||
|
setCheckerboard(r_eo,r_e);
|
||||||
|
|
||||||
|
err = r_eo-result;
|
||||||
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
if((norm2(err)>1.0e-4)){
|
||||||
|
/*
|
||||||
|
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
|
||||||
|
std::cout<< "Deo REF\n " <<result << std::endl;
|
||||||
|
std::cout<< "Deo ERR \n " << err <<std::endl;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,err);
|
||||||
|
pickCheckerboard(Odd,src_o,err);
|
||||||
|
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
|
assert(norm2(src_e)<1.0e-4);
|
||||||
|
assert(norm2(src_o)<1.0e-4);
|
||||||
|
Grid_finalize();
|
||||||
|
exit(0);
|
||||||
|
}
|
@ -63,7 +63,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||||
LatticeGaugeFieldF Umu(UGrid);
|
LatticeGaugeFieldF Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
|
@ -30,7 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
;
|
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
@ -53,7 +53,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
std::cout << GridLogMessage << "Seeded"<<std::endl;
|
std::cout << GridLogMessage << "Seeded"<<std::endl;
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "made random gauge fields"<<std::endl;
|
std::cout << GridLogMessage << "made random gauge fields"<<std::endl;
|
||||||
|
|
||||||
|
52
configure.ac
52
configure.ac
@ -123,6 +123,24 @@ case ${ac_LAPACK} in
|
|||||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
############### Nc
|
||||||
|
AC_ARG_ENABLE([Nc],
|
||||||
|
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])],
|
||||||
|
[ac_Nc=${enable_Nc}], [ac_Nc=3])
|
||||||
|
|
||||||
|
case ${ac_Nc} in
|
||||||
|
2)
|
||||||
|
AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);;
|
||||||
|
3)
|
||||||
|
AC_DEFINE([Config_Nc],[3],[Gauge group Nc]);;
|
||||||
|
4)
|
||||||
|
AC_DEFINE([Config_Nc],[4],[Gauge group Nc]);;
|
||||||
|
5)
|
||||||
|
AC_DEFINE([Config_Nc],[5],[Gauge group Nc]);;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["Unsupport gauge group choice Nc = ${ac_Nc}"]);;
|
||||||
|
esac
|
||||||
|
|
||||||
############### FP16 conversions
|
############### FP16 conversions
|
||||||
AC_ARG_ENABLE([sfw-fp16],
|
AC_ARG_ENABLE([sfw-fp16],
|
||||||
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
|
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
|
||||||
@ -459,23 +477,24 @@ esac
|
|||||||
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
||||||
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
||||||
|
|
||||||
############### Precision selection
|
############### Precision selection - deprecate
|
||||||
AC_ARG_ENABLE([precision],
|
#AC_ARG_ENABLE([precision],
|
||||||
[AC_HELP_STRING([--enable-precision=single|double],
|
# [AC_HELP_STRING([--enable-precision=single|double],
|
||||||
[Select default word size of Real])],
|
# [Select default word size of Real])],
|
||||||
[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
# [ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
|
|
||||||
case ${ac_PRECISION} in
|
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||||
single)
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
#case ${ac_PRECISION} in
|
||||||
;;
|
# single)
|
||||||
double)
|
# AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
# ;;
|
||||||
;;
|
# double)
|
||||||
*)
|
# ;;
|
||||||
AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
|
# *)
|
||||||
;;
|
# AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
|
||||||
esac
|
# ;;
|
||||||
|
#esac
|
||||||
|
|
||||||
###################### Shared memory allocation technique under MPI3
|
###################### Shared memory allocation technique under MPI3
|
||||||
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone],
|
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone],
|
||||||
@ -656,6 +675,7 @@ os (target) : $target_os
|
|||||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||||
compiler version : ${ax_cv_gxx_version}
|
compiler version : ${ax_cv_gxx_version}
|
||||||
----- BUILD OPTIONS -----------------------------------
|
----- BUILD OPTIONS -----------------------------------
|
||||||
|
Nc : ${ac_Nc}
|
||||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||||
Threading : ${ac_openmp}
|
Threading : ${ac_openmp}
|
||||||
Acceleration : ${ac_ACCELERATOR}
|
Acceleration : ${ac_ACCELERATOR}
|
||||||
|
@ -184,19 +184,19 @@ Below are shown the `configure` script invocations for three recommended configu
|
|||||||
|
|
||||||
This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging:
|
This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging:
|
||||||
|
|
||||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=double --prefix=$GridPre/Debug
|
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Debug
|
||||||
|
|
||||||
#### 2. `Release`
|
#### 2. `Release`
|
||||||
|
|
||||||
Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation):
|
Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`:
|
||||||
|
|
||||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=single --prefix=$GridPre/Release
|
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Release
|
||||||
|
|
||||||
#### 3. `MPIDebug`
|
#### 3. `MPIDebug`
|
||||||
|
|
||||||
Debug configuration with MPI:
|
Debug configuration with MPI:
|
||||||
|
|
||||||
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --enable-precision=double --prefix=$GridPre/MPIDebug
|
../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --prefix=$GridPre/MPIDebug
|
||||||
|
|
||||||
### 5.3 Build Grid
|
### 5.3 Build Grid
|
||||||
|
|
||||||
|
@ -178,15 +178,10 @@ Then enter the cloned directory and set up the build system::
|
|||||||
Now you can execute the `configure` script to generate makefiles (here from a build directory)::
|
Now you can execute the `configure` script to generate makefiles (here from a build directory)::
|
||||||
|
|
||||||
mkdir build; cd build
|
mkdir build; cd build
|
||||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto \
|
../configure --enable-simd=AVX --enable-comms=mpi-auto \
|
||||||
--prefix=<path>
|
--prefix=<path>
|
||||||
|
|
||||||
where::
|
::
|
||||||
|
|
||||||
--enable-precision=single|double
|
|
||||||
|
|
||||||
sets the **default precision**. Since this is largely a benchmarking convenience, it is anticipated that the default precision may be removed in future implementations,
|
|
||||||
and that explicit type selection be made at all points. Naturally, most code will be type templated in any case.::
|
|
||||||
|
|
||||||
--enable-simd=GEN|SSE4|AVX|AVXFMA|AVXFMA4|AVX2|AVX512|NEONv8|QPX
|
--enable-simd=GEN|SSE4|AVX|AVXFMA|AVXFMA4|AVX2|AVX512|NEONv8|QPX
|
||||||
|
|
||||||
@ -236,7 +231,7 @@ Detailed build configuration options
|
|||||||
--enable-mkl[=path] use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
|
--enable-mkl[=path] use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
|
||||||
--enable-simd=code setup Grid for the SIMD target `<code>`(default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
--enable-simd=code setup Grid for the SIMD target `<code>`(default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||||
--enable-gen-simd-width=size select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). E.g. SSE 128 bit corresponds to 16 bytes.
|
--enable-gen-simd-width=size select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). E.g. SSE 128 bit corresponds to 16 bytes.
|
||||||
--enable-precision=single|double set the default precision (default: `double`).
|
--enable-precision=single|double set the default precision (default: `double`). **Deprecated option**
|
||||||
--enable-comms=mpi|none use `<comm>` for message passing (default: `none`).
|
--enable-comms=mpi|none use `<comm>` for message passing (default: `none`).
|
||||||
--enable-rng=sitmo|ranlux48|mt19937 choose the RNG (default: `sitmo`).
|
--enable-rng=sitmo|ranlux48|mt19937 choose the RNG (default: `sitmo`).
|
||||||
--disable-timers disable system dependent high-resolution timers.
|
--disable-timers disable system dependent high-resolution timers.
|
||||||
@ -304,8 +299,7 @@ Build setup for Intel Knights Landing platform
|
|||||||
|
|
||||||
The following configuration is recommended for the Intel Knights Landing platform::
|
The following configuration is recommended for the Intel Knights Landing platform::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=icpc MPICXX=mpiicpc
|
CXX=icpc MPICXX=mpiicpc
|
||||||
@ -314,8 +308,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
|||||||
|
|
||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -332,8 +325,7 @@ presently performs better with use of more than one rank per node, using shared
|
|||||||
for interior communication.
|
for interior communication.
|
||||||
We recommend four ranks per node for best performance, but optimum is local volume dependent. ::
|
We recommend four ranks per node for best performance, but optimum is local volume dependent. ::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=KNL \
|
||||||
--enable-simd=KNL \
|
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CC=icpc MPICXX=mpiicpc
|
CC=icpc MPICXX=mpiicpc
|
||||||
@ -343,8 +335,7 @@ Build setup for Intel Haswell Xeon platform
|
|||||||
|
|
||||||
The following configuration is recommended for the Intel Haswell platform::
|
The following configuration is recommended for the Intel Haswell platform::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=icpc MPICXX=mpiicpc
|
CXX=icpc MPICXX=mpiicpc
|
||||||
@ -360,8 +351,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
|||||||
|
|
||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -379,8 +369,7 @@ Build setup for Intel Skylake Xeon platform
|
|||||||
|
|
||||||
The following configuration is recommended for the Intel Skylake platform::
|
The following configuration is recommended for the Intel Skylake platform::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX512 \
|
||||||
--enable-simd=AVX512 \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=mpiicpc
|
CXX=mpiicpc
|
||||||
@ -396,8 +385,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
|||||||
|
|
||||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX512 \
|
||||||
--enable-simd=AVX512 \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-mkl \
|
--enable-mkl \
|
||||||
CXX=CC CC=cc
|
CXX=CC CC=cc
|
||||||
@ -422,8 +410,7 @@ and 8 threads per rank.
|
|||||||
The following configuration is recommended for the AMD EPYC platform::
|
The following configuration is recommended for the AMD EPYC platform::
|
||||||
|
|
||||||
|
|
||||||
../configure --enable-precision=double\
|
../configure --enable-simd=AVX2 \
|
||||||
--enable-simd=AVX2 \
|
|
||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
CXX=mpicxx
|
CXX=mpicxx
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,&Fine);
|
std::vector<LatticeColourMatrix> U(4,&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNGa,Umu);
|
SU<Nc>::HotConfiguration(pRNGa,Umu);
|
||||||
|
|
||||||
|
|
||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
|
@ -84,7 +84,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,&Fine);
|
std::vector<LatticeColourMatrix> U(4,&Fine);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNGa,Umu);
|
SU<Nc>::HotConfiguration(pRNGa,Umu);
|
||||||
|
|
||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
std::string file("./ckpoint_lat.4000");
|
std::string file("./ckpoint_lat.4000");
|
||||||
|
@ -80,7 +80,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
@ -202,7 +202,7 @@ int main (int argc, char ** argv) {
|
|||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
// FieldMetaData header;
|
// FieldMetaData header;
|
||||||
// NerscIO::readConfiguration(Umu,header,Params.config);
|
// NerscIO::readConfiguration(Umu,header,Params.config);
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeGaugeFieldD Umu(UGrid);
|
LatticeGaugeFieldD Umu(UGrid);
|
||||||
LatticeGaugeFieldF Umu_f(UGrid_f);
|
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
precisionChange(Umu_f,Umu);
|
precisionChange(Umu_f,Umu);
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeGaugeFieldD Umu(UGrid);
|
LatticeGaugeFieldD Umu(UGrid);
|
||||||
LatticeGaugeFieldF Umu_f(UGrid_f);
|
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
precisionChange(Umu_f,Umu);
|
precisionChange(Umu_f,Umu);
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion ref(FGrid); ref=Zero();
|
LatticeFermion ref(FGrid); ref=Zero();
|
||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -131,7 +131,7 @@ int main (int argc, char ** argv)
|
|||||||
// LatticeFermion result(FGrid); result=Zero();
|
// LatticeFermion result(FGrid); result=Zero();
|
||||||
// LatticeGaugeField Umu(UGrid);
|
// LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
// SU3::HotConfiguration(RNG4,Umu);
|
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
// std::vector<LatticeColourMatrix> U(4,UGrid);
|
// std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
// for(int mu=0;mu<Nd;mu++){
|
// for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
|
@ -73,7 +73,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion ref (FGrid); ref = Zero();
|
LatticeFermion ref (FGrid); ref = Zero();
|
||||||
LatticeFermion tmp (FGrid); tmp = Zero();
|
LatticeFermion tmp (FGrid); tmp = Zero();
|
||||||
LatticeFermion err (FGrid); err = Zero();
|
LatticeFermion err (FGrid); err = Zero();
|
||||||
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu);
|
LatticeGaugeField Umu (UGrid); SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
// Only one non-zero (y)
|
// Only one non-zero (y)
|
||||||
|
@ -72,7 +72,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion ref(FGrid); ref=Zero();
|
LatticeFermion ref(FGrid); ref=Zero();
|
||||||
LatticeFermion tmp(FGrid); tmp=Zero();
|
LatticeFermion tmp(FGrid); tmp=Zero();
|
||||||
LatticeFermion err(FGrid); tmp=Zero();
|
LatticeFermion err(FGrid); tmp=Zero();
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
// Only one non-zero (y)
|
// Only one non-zero (y)
|
||||||
|
@ -138,7 +138,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeFieldD Umu(&GRID);
|
LatticeGaugeFieldD Umu(&GRID);
|
||||||
|
|
||||||
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
|
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||||
// Umu=Zero();
|
// Umu=Zero();
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Wilson test
|
// Wilson test
|
||||||
|
@ -73,11 +73,11 @@ int main (int argc, char ** argv)
|
|||||||
LatticeColourMatrix xform2(&GRID); // Gauge xform
|
LatticeColourMatrix xform2(&GRID); // Gauge xform
|
||||||
LatticeColourMatrix xform3(&GRID); // Gauge xform
|
LatticeColourMatrix xform3(&GRID); // Gauge xform
|
||||||
|
|
||||||
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
|
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
|
||||||
Uorg=Umu;
|
Uorg=Umu;
|
||||||
Urnd=Umu;
|
Urnd=Umu;
|
||||||
|
|
||||||
SU3::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge
|
SU<Nc>::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge
|
||||||
|
|
||||||
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
||||||
std::cout << " Initial plaquette "<<plaq << std::endl;
|
std::cout << " Initial plaquette "<<plaq << std::endl;
|
||||||
@ -121,7 +121,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<< "* Testing non-unit configuration *" <<std::endl;
|
std::cout<< "* Testing non-unit configuration *" <<std::endl;
|
||||||
std::cout<< "*****************************************************************" <<std::endl;
|
std::cout<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG,Umu); // Unit gauge
|
SU<Nc>::HotConfiguration(pRNG,Umu); // Unit gauge
|
||||||
|
|
||||||
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
||||||
std::cout << " Initial plaquette "<<plaq << std::endl;
|
std::cout << " Initial plaquette "<<plaq << std::endl;
|
||||||
@ -136,7 +136,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<< "*****************************************************************" <<std::endl;
|
std::cout<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
Umu=Urnd;
|
Umu=Urnd;
|
||||||
SU3::HotConfiguration(pRNG,Umu); // Unit gauge
|
SU<Nc>::HotConfiguration(pRNG,Umu); // Unit gauge
|
||||||
|
|
||||||
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
|
||||||
std::cout << " Initial plaquette "<<plaq << std::endl;
|
std::cout << " Initial plaquette "<<plaq << std::endl;
|
||||||
|
@ -114,7 +114,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
GparityGaugeField Umu_2f(UGrid_2f);
|
GparityGaugeField Umu_2f(UGrid_2f);
|
||||||
SU3::HotConfiguration(RNG4_2f,Umu_2f);
|
SU<Nc>::HotConfiguration(RNG4_2f,Umu_2f);
|
||||||
|
|
||||||
StandardFermionField src (FGrid_2f);
|
StandardFermionField src (FGrid_2f);
|
||||||
StandardFermionField tmpsrc(FGrid_2f);
|
StandardFermionField tmpsrc(FGrid_2f);
|
||||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField ref(&Grid); ref=Zero();
|
FermionField ref(&Grid); ref=Zero();
|
||||||
FermionField tmp(&Grid); tmp=Zero();
|
FermionField tmp(&Grid); tmp=Zero();
|
||||||
FermionField err(&Grid); tmp=Zero();
|
FermionField err(&Grid); tmp=Zero();
|
||||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
double volume=1;
|
double volume=1;
|
||||||
|
@ -66,7 +66,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
std::cout << GridLogMessage << "*********************************************"
|
std::cout << GridLogMessage << "*********************************************"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "* Generators for SU(3)" << std::endl;
|
std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl;
|
||||||
std::cout << GridLogMessage << "*********************************************"
|
std::cout << GridLogMessage << "*********************************************"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
SU3::printGenerators();
|
SU3::printGenerators();
|
||||||
@ -114,8 +114,8 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
|
|
||||||
LatticeGaugeField U(grid), V(grid);
|
LatticeGaugeField U(grid), V(grid);
|
||||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U);
|
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U);
|
||||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V);
|
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V);
|
||||||
|
|
||||||
// Adjoint representation
|
// Adjoint representation
|
||||||
// Test group structure
|
// Test group structure
|
||||||
@ -123,8 +123,8 @@ int main(int argc, char** argv) {
|
|||||||
LatticeGaugeField UV(grid);
|
LatticeGaugeField UV(grid);
|
||||||
UV = Zero();
|
UV = Zero();
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
SU<Nc>::LatticeMatrix Umu = peekLorentz(U,mu);
|
SU3::LatticeMatrix Umu = peekLorentz(U,mu);
|
||||||
SU<Nc>::LatticeMatrix Vmu = peekLorentz(V,mu);
|
SU3::LatticeMatrix Vmu = peekLorentz(V,mu);
|
||||||
pokeLorentz(UV,Umu*Vmu, mu);
|
pokeLorentz(UV,Umu*Vmu, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,16 +151,16 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
// Check correspondence of algebra and group transformations
|
// Check correspondence of algebra and group transformations
|
||||||
// Create a random vector
|
// Create a random vector
|
||||||
SU<Nc>::LatticeAlgebraVector h_adj(grid);
|
SU3::LatticeAlgebraVector h_adj(grid);
|
||||||
typename AdjointRep<Nc>::LatticeMatrix Ar(grid);
|
typename AdjointRep<Nc>::LatticeMatrix Ar(grid);
|
||||||
random(gridRNG,h_adj);
|
random(gridRNG,h_adj);
|
||||||
h_adj = real(h_adj);
|
h_adj = real(h_adj);
|
||||||
SU_Adjoint<Nc>::AdjointLieAlgebraMatrix(h_adj,Ar);
|
SU_Adjoint<Nc>::AdjointLieAlgebraMatrix(h_adj,Ar);
|
||||||
|
|
||||||
// Re-extract h_adj
|
// Re-extract h_adj
|
||||||
SU<Nc>::LatticeAlgebraVector h_adj2(grid);
|
SU3::LatticeAlgebraVector h_adj2(grid);
|
||||||
SU_Adjoint<Nc>::projectOnAlgebra(h_adj2, Ar);
|
SU_Adjoint<Nc>::projectOnAlgebra(h_adj2, Ar);
|
||||||
SU<Nc>::LatticeAlgebraVector h_diff = h_adj - h_adj2;
|
SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2;
|
||||||
std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl;
|
std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl;
|
||||||
|
|
||||||
// Exponentiate
|
// Exponentiate
|
||||||
@ -183,14 +183,14 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
|
|
||||||
// Construct the fundamental matrix in the group
|
// Construct the fundamental matrix in the group
|
||||||
SU<Nc>::LatticeMatrix Af(grid);
|
SU3::LatticeMatrix Af(grid);
|
||||||
SU<Nc>::FundamentalLieAlgebraMatrix(h_adj,Af);
|
SU3::FundamentalLieAlgebraMatrix(h_adj,Af);
|
||||||
SU<Nc>::LatticeMatrix Ufund(grid);
|
SU3::LatticeMatrix Ufund(grid);
|
||||||
Ufund = expMat(Af, 1.0, 16);
|
Ufund = expMat(Af, 1.0, 16);
|
||||||
// Check unitarity
|
// Check unitarity
|
||||||
SU<Nc>::LatticeMatrix uno_f(grid);
|
SU3::LatticeMatrix uno_f(grid);
|
||||||
uno_f = 1.0;
|
uno_f = 1.0;
|
||||||
SU<Nc>::LatticeMatrix UnitCheck(grid);
|
SU3::LatticeMatrix UnitCheck(grid);
|
||||||
UnitCheck = Ufund * adj(Ufund) - uno_f;
|
UnitCheck = Ufund * adj(Ufund) - uno_f;
|
||||||
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck)
|
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck)
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
@ -311,14 +311,14 @@ int main(int argc, char** argv) {
|
|||||||
// Test group structure
|
// Test group structure
|
||||||
// (U_f * V_f)_r = U_r * V_r
|
// (U_f * V_f)_r = U_r * V_r
|
||||||
LatticeGaugeField U2(grid), V2(grid);
|
LatticeGaugeField U2(grid), V2(grid);
|
||||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U2);
|
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2);
|
||||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V2);
|
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2);
|
||||||
|
|
||||||
LatticeGaugeField UV2(grid);
|
LatticeGaugeField UV2(grid);
|
||||||
UV2 = Zero();
|
UV2 = Zero();
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
SU<Nc>::LatticeMatrix Umu2 = peekLorentz(U2,mu);
|
SU3::LatticeMatrix Umu2 = peekLorentz(U2,mu);
|
||||||
SU<Nc>::LatticeMatrix Vmu2 = peekLorentz(V2,mu);
|
SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu);
|
||||||
pokeLorentz(UV2,Umu2*Vmu2, mu);
|
pokeLorentz(UV2,Umu2*Vmu2, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -345,16 +345,16 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
// Check correspondence of algebra and group transformations
|
// Check correspondence of algebra and group transformations
|
||||||
// Create a random vector
|
// Create a random vector
|
||||||
SU<Nc>::LatticeAlgebraVector h_sym(grid);
|
SU3::LatticeAlgebraVector h_sym(grid);
|
||||||
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ar_sym(grid);
|
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ar_sym(grid);
|
||||||
random(gridRNG,h_sym);
|
random(gridRNG,h_sym);
|
||||||
h_sym = real(h_sym);
|
h_sym = real(h_sym);
|
||||||
SU_TwoIndex<Nc,Symmetric>::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym);
|
SU_TwoIndex<Nc,Symmetric>::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym);
|
||||||
|
|
||||||
// Re-extract h_sym
|
// Re-extract h_sym
|
||||||
SU<Nc>::LatticeAlgebraVector h_sym2(grid);
|
SU3::LatticeAlgebraVector h_sym2(grid);
|
||||||
SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym);
|
SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym);
|
||||||
SU<Nc>::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
|
SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
|
||||||
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl;
|
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl;
|
||||||
|
|
||||||
|
|
||||||
@ -379,11 +379,11 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
|
|
||||||
// Construct the fundamental matrix in the group
|
// Construct the fundamental matrix in the group
|
||||||
SU<Nc>::LatticeMatrix Af_sym(grid);
|
SU3::LatticeMatrix Af_sym(grid);
|
||||||
SU<Nc>::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
|
SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
|
||||||
SU<Nc>::LatticeMatrix Ufund2(grid);
|
SU3::LatticeMatrix Ufund2(grid);
|
||||||
Ufund2 = expMat(Af_sym, 1.0, 16);
|
Ufund2 = expMat(Af_sym, 1.0, 16);
|
||||||
SU<Nc>::LatticeMatrix UnitCheck2(grid);
|
SU3::LatticeMatrix UnitCheck2(grid);
|
||||||
UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f;
|
UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f;
|
||||||
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2)
|
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2)
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
@ -421,14 +421,14 @@ int main(int argc, char** argv) {
|
|||||||
// Test group structure
|
// Test group structure
|
||||||
// (U_f * V_f)_r = U_r * V_r
|
// (U_f * V_f)_r = U_r * V_r
|
||||||
LatticeGaugeField U2A(grid), V2A(grid);
|
LatticeGaugeField U2A(grid), V2A(grid);
|
||||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U2A);
|
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2A);
|
||||||
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V2A);
|
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2A);
|
||||||
|
|
||||||
LatticeGaugeField UV2A(grid);
|
LatticeGaugeField UV2A(grid);
|
||||||
UV2A = Zero();
|
UV2A = Zero();
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
SU<Nc>::LatticeMatrix Umu2A = peekLorentz(U2,mu);
|
SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu);
|
||||||
SU<Nc>::LatticeMatrix Vmu2A = peekLorentz(V2,mu);
|
SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu);
|
||||||
pokeLorentz(UV2A,Umu2A*Vmu2A, mu);
|
pokeLorentz(UV2A,Umu2A*Vmu2A, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -455,16 +455,16 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
// Check correspondence of algebra and group transformations
|
// Check correspondence of algebra and group transformations
|
||||||
// Create a random vector
|
// Create a random vector
|
||||||
SU<Nc>::LatticeAlgebraVector h_Asym(grid);
|
SU3::LatticeAlgebraVector h_Asym(grid);
|
||||||
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid);
|
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid);
|
||||||
random(gridRNG,h_Asym);
|
random(gridRNG,h_Asym);
|
||||||
h_Asym = real(h_Asym);
|
h_Asym = real(h_Asym);
|
||||||
SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym);
|
SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym);
|
||||||
|
|
||||||
// Re-extract h_sym
|
// Re-extract h_sym
|
||||||
SU<Nc>::LatticeAlgebraVector h_Asym2(grid);
|
SU3::LatticeAlgebraVector h_Asym2(grid);
|
||||||
SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym);
|
SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym);
|
||||||
SU<Nc>::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
|
SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
|
||||||
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl;
|
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl;
|
||||||
|
|
||||||
|
|
||||||
@ -489,11 +489,11 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
|
|
||||||
// Construct the fundamental matrix in the group
|
// Construct the fundamental matrix in the group
|
||||||
SU<Nc>::LatticeMatrix Af_Asym(grid);
|
SU3::LatticeMatrix Af_Asym(grid);
|
||||||
SU<Nc>::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
|
SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
|
||||||
SU<Nc>::LatticeMatrix Ufund2A(grid);
|
SU3::LatticeMatrix Ufund2A(grid);
|
||||||
Ufund2A = expMat(Af_Asym, 1.0, 16);
|
Ufund2A = expMat(Af_Asym, 1.0, 16);
|
||||||
SU<Nc>::LatticeMatrix UnitCheck2A(grid);
|
SU3::LatticeMatrix UnitCheck2A(grid);
|
||||||
UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f;
|
UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f;
|
||||||
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A)
|
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A)
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
@ -444,7 +444,7 @@ int main(int argc, char **argv) {
|
|||||||
// Lattice 12x12 GEMM
|
// Lattice 12x12 GEMM
|
||||||
scFooBar = scFoo * scBar;
|
scFooBar = scFoo * scBar;
|
||||||
|
|
||||||
// Benchmark some simple operations LatticeSU3 * Lattice SU3.
|
// Benchmark some simple operations LatticeSU<Nc> * Lattice SU<Nc>.
|
||||||
double t0, t1, flops;
|
double t0, t1, flops;
|
||||||
double bytes;
|
double bytes;
|
||||||
int ncall = 5000;
|
int ncall = 5000;
|
||||||
|
@ -73,7 +73,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion ref (FGrid); ref = Zero();
|
LatticeFermion ref (FGrid); ref = Zero();
|
||||||
LatticeFermion tmp (FGrid); tmp = Zero();
|
LatticeFermion tmp (FGrid); tmp = Zero();
|
||||||
LatticeFermion err (FGrid); err = Zero();
|
LatticeFermion err (FGrid); err = Zero();
|
||||||
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu);
|
LatticeGaugeField Umu (UGrid); SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
// Only one non-zero (y)
|
// Only one non-zero (y)
|
||||||
|
@ -55,7 +55,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds);
|
GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds);
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds);
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds);
|
||||||
|
|
||||||
// SU3 colour operatoions
|
// SU<Nc> colour operatoions
|
||||||
LatticeColourMatrix link(grid);
|
LatticeColourMatrix link(grid);
|
||||||
LatticeColourMatrix staple(grid);
|
LatticeColourMatrix staple(grid);
|
||||||
|
|
||||||
@ -87,10 +87,10 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
link = PeekIndex<LorentzIndex>(Umu,mu);
|
link = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
|
||||||
for( int subgroup=0;subgroup<SU3::su2subgroups();subgroup++ ) {
|
for( int subgroup=0;subgroup<SU<Nc>::su2subgroups();subgroup++ ) {
|
||||||
|
|
||||||
// update Even checkerboard
|
// update Even checkerboard
|
||||||
SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask);
|
SU<Nc>::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField err(&Grid); tmp=Zero();
|
FermionField err(&Grid); tmp=Zero();
|
||||||
FermionField phi (&Grid); random(pRNG,phi);
|
FermionField phi (&Grid); random(pRNG,phi);
|
||||||
FermionField chi (&Grid); random(pRNG,chi);
|
FermionField chi (&Grid); random(pRNG,chi);
|
||||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField phi (FGrid); random(pRNG5,phi);
|
FermionField phi (FGrid); random(pRNG5,phi);
|
||||||
FermionField chi (FGrid); random(pRNG5,chi);
|
FermionField chi (FGrid); random(pRNG5,chi);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::ColdConfiguration(pRNG4,Umu);
|
||||||
LatticeGaugeField Umua(UGrid); Umua=Umu;
|
LatticeGaugeField Umua(UGrid); Umua=Umu;
|
||||||
|
|
||||||
double volume=Ls;
|
double volume=Ls;
|
||||||
|
@ -84,7 +84,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField chi (FGrid); random(pRNG5,chi);
|
FermionField chi (FGrid); random(pRNG5,chi);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(pRNG4,Umu);
|
SU<Nc>::HotConfiguration(pRNG4,Umu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
for(int mu=1;mu<4;mu++){
|
for(int mu=1;mu<4;mu++){
|
||||||
|
@ -83,7 +83,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField chi (FGrid); random(pRNG5,chi);
|
FermionField chi (FGrid); random(pRNG5,chi);
|
||||||
|
|
||||||
LatticeGaugeFieldF Umu(UGrid);
|
LatticeGaugeFieldF Umu(UGrid);
|
||||||
SU3::HotConfiguration(pRNG4,Umu);
|
SU<Nc>::HotConfiguration(pRNG4,Umu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
for(int mu=1;mu<4;mu++){
|
for(int mu=1;mu<4;mu++){
|
||||||
|
@ -64,7 +64,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField err(&Grid); tmp=Zero();
|
FermionField err(&Grid); tmp=Zero();
|
||||||
FermionField phi (&Grid); random(pRNG,phi);
|
FermionField phi (&Grid); random(pRNG,phi);
|
||||||
FermionField chi (&Grid); random(pRNG,chi);
|
FermionField chi (&Grid); random(pRNG,chi);
|
||||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ int main(int argc, char **argv)
|
|||||||
FermionField chi(&Grid);
|
FermionField chi(&Grid);
|
||||||
random(pRNG, chi);
|
random(pRNG, chi);
|
||||||
LatticeGaugeField Umu(&Grid);
|
LatticeGaugeField Umu(&Grid);
|
||||||
SU3::HotConfiguration(pRNG, Umu);
|
SU<Nc>::HotConfiguration(pRNG, Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4, &Grid);
|
std::vector<LatticeColourMatrix> U(4, &Grid);
|
||||||
|
|
||||||
double volume = 1;
|
double volume = 1;
|
||||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion tmp(&Grid); tmp=Zero();
|
LatticeFermion tmp(&Grid); tmp=Zero();
|
||||||
LatticeFermion err(&Grid); tmp=Zero();
|
LatticeFermion err(&Grid); tmp=Zero();
|
||||||
LatticeGaugeField Umu(&Grid);
|
LatticeGaugeField Umu(&Grid);
|
||||||
SU3::HotConfiguration(pRNG,Umu);
|
SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
double volume=1;
|
double volume=1;
|
||||||
|
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion ref(&Grid); ref=Zero();
|
LatticeFermion ref(&Grid); ref=Zero();
|
||||||
LatticeFermion tmp(&Grid); tmp=Zero();
|
LatticeFermion tmp(&Grid); tmp=Zero();
|
||||||
LatticeFermion err(&Grid); tmp=Zero();
|
LatticeFermion err(&Grid); tmp=Zero();
|
||||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
double volume=1;
|
double volume=1;
|
||||||
|
@ -116,7 +116,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
LatticeGaugeFieldF UmuF(UGridF);
|
LatticeGaugeFieldF UmuF(UGridF);
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
precisionChange(UmuF,Umu);
|
precisionChange(UmuF,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion ref(FGrid); ref=Zero();
|
LatticeFermion ref(FGrid); ref=Zero();
|
||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
@ -70,7 +70,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
|
@ -71,9 +71,9 @@ int main (int argc, char ** argv)
|
|||||||
std::string file("./ckpoint_lat.400");
|
std::string file("./ckpoint_lat.400");
|
||||||
NerscIO::readConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
// SU3::ColdConfiguration(RNG4,Umu);
|
// SU<Nc>::ColdConfiguration(RNG4,Umu);
|
||||||
// SU3::TepidConfiguration(RNG4,Umu);
|
// SU<Nc>::TepidConfiguration(RNG4,Umu);
|
||||||
// SU3::HotConfiguration(RNG4,Umu);
|
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
// Umu=Zero();
|
// Umu=Zero();
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
|
@ -108,8 +108,8 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::ColdConfiguration(Umu);
|
SU<Nc>::ColdConfiguration(Umu);
|
||||||
// SU3::HotConfiguration(RNG4,Umu);
|
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
RealD mass=0.3;
|
RealD mass=0.3;
|
||||||
RealD M5 =1.0;
|
RealD M5 =1.0;
|
||||||
|
@ -73,7 +73,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5);
|
DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5);
|
||||||
DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5);
|
DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5);
|
||||||
|
@ -77,7 +77,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// GparityDomainWallFermionR::ImplParams params;
|
// GparityDomainWallFermionR::ImplParams params;
|
||||||
FermionAction::ImplParams params;
|
FermionAction::ImplParams params;
|
||||||
|
@ -75,7 +75,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c);
|
MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c);
|
||||||
MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c);
|
MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c);
|
||||||
|
@ -79,7 +79,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
FermionAction::ImplParams params;
|
FermionAction::ImplParams params;
|
||||||
FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params);
|
FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params);
|
||||||
|
@ -102,7 +102,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// Initialize RHMC fermion operators
|
// Initialize RHMC fermion operators
|
||||||
DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5);
|
DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5);
|
||||||
|
@ -104,7 +104,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// Initialize RHMC fermion operators
|
// Initialize RHMC fermion operators
|
||||||
GparityDomainWallFermionR::ImplParams params;
|
GparityDomainWallFermionR::ImplParams params;
|
||||||
|
@ -104,7 +104,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// Initialize RHMC fermion operators
|
// Initialize RHMC fermion operators
|
||||||
MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c);
|
MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c);
|
||||||
|
@ -106,7 +106,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// Initialize RHMC fermion operators
|
// Initialize RHMC fermion operators
|
||||||
GparityDomainWallFermionR::ImplParams params;
|
GparityDomainWallFermionR::ImplParams params;
|
||||||
|
@ -59,7 +59,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -93,7 +93,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ int main (int argc, char** argv)
|
|||||||
LatticeFermion MphiPrime (FGrid);
|
LatticeFermion MphiPrime (FGrid);
|
||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -105,7 +105,7 @@ int main (int argc, char** argv)
|
|||||||
|
|
||||||
for(int mu=0; mu<Nd; mu++){
|
for(int mu=0; mu<Nd; mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||||
|
|
||||||
|
@ -63,8 +63,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
// SU3::ColdConfiguration(pRNG,U);
|
// SU<Nc>::ColdConfiguration(pRNG,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -112,7 +112,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
Hmom -= real(sum(trace(mommu*mommu)));
|
Hmom -= real(sum(trace(mommu*mommu)));
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ int main (int argc, char** argv)
|
|||||||
FermionField MphiPrime (FGrid);
|
FermionField MphiPrime (FGrid);
|
||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -109,7 +109,7 @@ int main (int argc, char** argv)
|
|||||||
|
|
||||||
for(int mu=0; mu<Nd; mu++){
|
for(int mu=0; mu<Nd; mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(&Grid);
|
LatticeGaugeField U(&Grid);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG,U);
|
SU<Nc>::HotConfiguration(pRNG,U);
|
||||||
|
|
||||||
double beta = 1.0;
|
double beta = 1.0;
|
||||||
ConjugateWilsonGaugeActionR Action(beta);
|
ConjugateWilsonGaugeActionR Action(beta);
|
||||||
@ -80,7 +80,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(&Grid);
|
LatticeGaugeField U(&Grid);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG,U);
|
SU<Nc>::HotConfiguration(pRNG,U);
|
||||||
|
|
||||||
double beta = 1.0;
|
double beta = 1.0;
|
||||||
double c1 = 0.331;
|
double c1 = 0.331;
|
||||||
@ -82,7 +82,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -100,7 +100,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
|||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
// Traceless antihermitian momentum; gaussian in lie alg
|
// Traceless antihermitian momentum; gaussian in lie alg
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ int main (int argc, char ** argv)
|
|||||||
PokeIndex<LorentzIndex>(P, P_mu, mu);
|
PokeIndex<LorentzIndex>(P, P_mu, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG,U);
|
SU<Nc>::HotConfiguration(pRNG,U);
|
||||||
|
|
||||||
|
|
||||||
ConjugateGradient<LatticeGaugeField> CG(1.0e-8, 10000);
|
ConjugateGradient<LatticeGaugeField> CG(1.0e-8, 10000);
|
||||||
@ -95,7 +95,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage << "Update the U " << std::endl;
|
std::cout << GridLogMessage << "Update the U " << std::endl;
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
// Traceless antihermitian momentum; gaussian in lie algebra
|
// Traceless antihermitian momentum; gaussian in lie algebra
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||||
auto Umu = PeekIndex<LorentzIndex>(U, mu);
|
auto Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
Umu = expMat(mommu, dt, 12) * Umu;
|
Umu = expMat(mommu, dt, 12) * Umu;
|
||||||
|
@ -60,7 +60,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -96,7 +96,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ int main (int argc, char** argv)
|
|||||||
LatticeFermion MphiPrime (FGrid);
|
LatticeFermion MphiPrime (FGrid);
|
||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -107,7 +107,7 @@ int main (int argc, char** argv)
|
|||||||
|
|
||||||
for(int mu=0; mu<Nd; mu++){
|
for(int mu=0; mu<Nd; mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ int main (int argc, char** argv)
|
|||||||
FermionField MphiPrime (FGrid);
|
FermionField MphiPrime (FGrid);
|
||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -112,7 +112,7 @@ int main (int argc, char** argv)
|
|||||||
|
|
||||||
for(int mu=0; mu<Nd; mu++){
|
for(int mu=0; mu<Nd; mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||||
autoView( U_v , U, CpuRead);
|
autoView( U_v , U, CpuRead);
|
||||||
|
@ -62,7 +62,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -96,7 +96,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(&Grid);
|
LatticeGaugeField U(&Grid);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG,U);
|
SU<Nc>::HotConfiguration(pRNG,U);
|
||||||
|
|
||||||
double beta = 1.0;
|
double beta = 1.0;
|
||||||
double c1 = -0.331;
|
double c1 = -0.331;
|
||||||
@ -82,7 +82,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeGaugeField U(&Grid);
|
LatticeGaugeField U(&Grid);
|
||||||
|
|
||||||
//SU2::HotConfiguration(pRNG,U);
|
//SU2::HotConfiguration(pRNG,U);
|
||||||
SU3::ColdConfiguration(pRNG,U);
|
SU<Nc>::ColdConfiguration(pRNG,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -98,7 +98,7 @@ int main (int argc, char ** argv)
|
|||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
// Traceless antihermitian momentum; gaussian in lie alg
|
// Traceless antihermitian momentum; gaussian in lie alg
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||||
|
|
||||||
Hmom -= real(sum(trace(mommu*mommu)));
|
Hmom -= real(sum(trace(mommu*mommu)));
|
||||||
|
|
||||||
|
@ -62,8 +62,8 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(&Grid);
|
LatticeGaugeField U(&Grid);
|
||||||
|
|
||||||
SU3::HotConfiguration(pRNG, U);
|
SU<Nc>::HotConfiguration(pRNG, U);
|
||||||
//SU3::ColdConfiguration(pRNG, U);// Clover term Zero()
|
//SU<Nc>::ColdConfiguration(pRNG, U);// Clover term Zero()
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -101,7 +101,7 @@ int main(int argc, char **argv)
|
|||||||
for (int mu = 0; mu < Nd; mu++)
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
{
|
{
|
||||||
// Traceless antihermitian momentum; gaussian in lie alg
|
// Traceless antihermitian momentum; gaussian in lie alg
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
|
||||||
Hmom -= real(sum(trace(mommu * mommu)));
|
Hmom -= real(sum(trace(mommu * mommu)));
|
||||||
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
PokeIndex<LorentzIndex>(mom, mommu, mu);
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeGaugeField U(UGrid);
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,U);
|
SU<Nc>::HotConfiguration(RNG4,U);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Unmodified matrix element
|
// Unmodified matrix element
|
||||||
@ -109,7 +109,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
|
||||||
|
|
||||||
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
PokeIndex<LorentzIndex>(mom,mommu,mu);
|
||||||
|
|
||||||
|
@ -293,7 +293,7 @@ int main (int argc, char ** argv) {
|
|||||||
{
|
{
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
|
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -61,7 +61,7 @@ int main(int argc, char** argv) {
|
|||||||
RNG5.SeedFixedIntegers(seeds5);
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||||
|
@ -280,7 +280,7 @@ void make_gauge(GaugeField &Umu, Grid::LatticePropagator &q1,Grid::LatticePropag
|
|||||||
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu.Grid();
|
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu.Grid();
|
||||||
Grid::GridParallelRNG RNG4(UGrid);
|
Grid::GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
Grid::SU3::HotConfiguration(RNG4, Umu);
|
Grid::SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// Propagator
|
// Propagator
|
||||||
Grid::gaussian(RNG4, q1);
|
Grid::gaussian(RNG4, q1);
|
||||||
|
@ -277,7 +277,7 @@ double calc_grid_p(Grid::LatticeGaugeField & Umu)
|
|||||||
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
|
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
|
||||||
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
Grid::SU3::HotConfiguration(RNG4,Umu);
|
Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
Grid::LatticeColourMatrix tmp(UGrid);
|
Grid::LatticeColourMatrix tmp(UGrid);
|
||||||
tmp = Grid::zero;
|
tmp = Grid::zero;
|
||||||
|
@ -502,7 +502,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF
|
|||||||
Grid::gaussian(RNG5,src);
|
Grid::gaussian(RNG5,src);
|
||||||
Grid::gaussian(RNG5,res);
|
Grid::gaussian(RNG5,res);
|
||||||
|
|
||||||
Grid::SU3::HotConfiguration(RNG4,Umu);
|
Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Grid::LatticeColourMatrix U(UGrid);
|
Grid::LatticeColourMatrix U(UGrid);
|
||||||
|
@ -333,7 +333,7 @@ void make_gauge(GaugeField & Umu,FermionField &src)
|
|||||||
|
|
||||||
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
|
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
|
||||||
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
Grid::SU3::HotConfiguration(RNG4,Umu);
|
Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
Grid::gaussian(RNG4,src);
|
Grid::gaussian(RNG4,src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -348,7 +348,7 @@ void make_gauge(GaugeField &Umu, FermionField &src)
|
|||||||
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu._grid;
|
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu._grid;
|
||||||
Grid::GridParallelRNG RNG4(UGrid);
|
Grid::GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
Grid::SU3::HotConfiguration(RNG4, Umu);
|
Grid::SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
// Fermion field
|
// Fermion field
|
||||||
Grid::gaussian(RNG4, src);
|
Grid::gaussian(RNG4, src);
|
||||||
|
@ -47,8 +47,8 @@ int main (int argc, char ** argv)
|
|||||||
RealD nrm = norm2(src);
|
RealD nrm = norm2(src);
|
||||||
LatticeFermion result(&Grid); result=Zero();
|
LatticeFermion result(&Grid); result=Zero();
|
||||||
LatticeGaugeField Umu(&Grid);
|
LatticeGaugeField Umu(&Grid);
|
||||||
// SU3::HotConfiguration(pRNG,Umu);
|
// SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
SU3::ColdConfiguration(Umu);
|
SU<Nc>::ColdConfiguration(Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeFermion src(FGrid); random(RNG5,src);
|
LatticeFermion src(FGrid); random(RNG5,src);
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result(FGrid); result=Zero();
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -94,7 +94,7 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
|
@ -67,7 +67,7 @@ int main(int argc, char** argv) {
|
|||||||
result = Zero();
|
result = Zero();
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
|
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
|
||||||
<< " Ls: " << Ls << std::endl;
|
<< " Ls: " << Ls << std::endl;
|
||||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeFermion src(FGrid); random(RNG5,src);
|
LatticeFermion src(FGrid); random(RNG5,src);
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result(FGrid); result=Zero();
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -61,7 +61,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeFermion src(FGrid); random(RNG5,src);
|
LatticeFermion src(FGrid); random(RNG5,src);
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result(FGrid); result=Zero();
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
@ -65,7 +65,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
LatticeFermion src(FGrid); random(RNG5,src);
|
LatticeFermion src(FGrid); random(RNG5,src);
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result(FGrid); result=Zero();
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result(FGrid); result=Zero();
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4,Umu);
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
|
|
||||||
ConjugateResidual<LatticeFermion> CR(1.0e-6,10000);
|
ConjugateResidual<LatticeFermion> CR(1.0e-6,10000);
|
||||||
|
@ -93,7 +93,7 @@ int main (int argc, char ** argv)
|
|||||||
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
|
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
|
||||||
for(int s=0;s<nrhs;s++) result[s]=Zero();
|
for(int s=0;s<nrhs;s++) result[s]=Zero();
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
|
LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Bounce these fields to disk
|
// Bounce these fields to disk
|
||||||
|
@ -136,11 +136,11 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage << "Intialising 4D RNG "<<std::endl;
|
std::cout << GridLogMessage << "Intialising 4D RNG "<<std::endl;
|
||||||
pRNG.SeedFixedIntegers(seeds);
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
std::cout << GridLogMessage << "Intialised 4D RNG "<<std::endl;
|
std::cout << GridLogMessage << "Intialised 4D RNG "<<std::endl;
|
||||||
SU3::HotConfiguration(pRNG,Umu);
|
SU<Nc>::HotConfiguration(pRNG,Umu);
|
||||||
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
|
std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<<std::endl;
|
||||||
// std::cout << " Site zero "<< Umu[0] <<std::endl;
|
// std::cout << " Site zero "<< Umu[0] <<std::endl;
|
||||||
} else {
|
} else {
|
||||||
SU3::ColdConfiguration(Umu);
|
SU<Nc>::ColdConfiguration(Umu);
|
||||||
std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
|
std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<<std::endl;
|
||||||
}
|
}
|
||||||
/////////////////
|
/////////////////
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user