forked from portelli/lattice-benchmarks
Merge pull request 'main' (#2) from portelli/lattice-benchmarks:main into main
Reviewed-on: #2
This commit is contained in:
commit
2efbad5f70
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,5 +1,4 @@
|
|||||||
/.vscode
|
/.vscode
|
||||||
build*
|
|
||||||
.buildutils
|
.buildutils
|
||||||
autom4te.cache
|
autom4te.cache
|
||||||
config.*
|
config.*
|
||||||
@ -12,4 +11,7 @@ install-sh
|
|||||||
missing
|
missing
|
||||||
Makefile.in
|
Makefile.in
|
||||||
.DS_Store
|
.DS_Store
|
||||||
*~
|
*~
|
||||||
|
/*/env
|
||||||
|
/*/build
|
||||||
|
/Grid/json.hpp
|
14
Grid/.clang-format
Normal file
14
Grid/.clang-format
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
BasedOnStyle: LLVM,
|
||||||
|
UseTab: Never,
|
||||||
|
IndentWidth: 2,
|
||||||
|
TabWidth: 2,
|
||||||
|
BreakBeforeBraces: Allman,
|
||||||
|
AllowShortIfStatementsOnASingleLine: false,
|
||||||
|
IndentCaseLabels: false,
|
||||||
|
ColumnLimit: 90,
|
||||||
|
AccessModifierOffset: -4,
|
||||||
|
NamespaceIndentation: All,
|
||||||
|
FixNamespaceComments: false,
|
||||||
|
SortIncludes: true,
|
||||||
|
}
|
1054
Grid/Benchmark_Grid.cpp
Normal file
1054
Grid/Benchmark_Grid.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -32,23 +32,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
#ifdef HAVE_LIME
|
#ifdef HAVE_LIME
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
std::string filestem(const int l)
|
std::string filestem(const int l) { return "iobench_l" + std::to_string(l); }
|
||||||
{
|
|
||||||
return "iobench_l" + std::to_string(l);
|
|
||||||
}
|
|
||||||
|
|
||||||
int vol(const int i)
|
int vol(const int i) { return BENCH_IO_LMIN + 2 * i; }
|
||||||
{
|
|
||||||
return BENCH_IO_LMIN + 2 * i;
|
|
||||||
}
|
|
||||||
|
|
||||||
int volInd(const int l)
|
int volInd(const int l) { return (l - BENCH_IO_LMIN) / 2; }
|
||||||
{
|
|
||||||
return (l - BENCH_IO_LMIN) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Mat>
|
template <typename Mat> void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||||
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
|
||||||
{
|
{
|
||||||
auto nr = data[0].rows(), nc = data[0].cols();
|
auto nr = data[0].rows(), nc = data[0].cols();
|
||||||
Eigen::MatrixXd sqSum(nr, nc);
|
Eigen::MatrixXd sqSum(nr, nc);
|
||||||
@ -66,11 +56,11 @@ void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
|||||||
mean /= n;
|
mean /= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define grid_printf(...) \
|
#define grid_printf(...) \
|
||||||
{ \
|
{ \
|
||||||
char _buf[1024]; \
|
char _buf[1024]; \
|
||||||
sprintf(_buf, __VA_ARGS__); \
|
sprintf(_buf, __VA_ARGS__); \
|
||||||
MSG << _buf; \
|
MSG << _buf; \
|
||||||
}
|
}
|
||||||
|
|
||||||
enum
|
enum
|
||||||
@ -173,47 +163,49 @@ int main(int argc, char **argv)
|
|||||||
MSG << "SUMMARY" << std::endl;
|
MSG << "SUMMARY" << std::endl;
|
||||||
MSG << BIGSEP << std::endl;
|
MSG << BIGSEP << std::endl;
|
||||||
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
||||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
MSG << "Every second colum gives the standard deviation of the previous column."
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
|
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", "L", "std read", "std dev",
|
||||||
"L", "std read", "std dev", "std write", "std dev",
|
"std write", "std dev", "Grid read", "std dev", "Grid write", "std dev");
|
||||||
"Grid read", "std dev", "Grid write", "std dev");
|
|
||||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
{
|
{
|
||||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", l,
|
||||||
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
|
mean(volInd(l), sRead), stdDev(volInd(l), sRead), mean(volInd(l), sWrite),
|
||||||
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
|
stdDev(volInd(l), sWrite), mean(volInd(l), gRead),
|
||||||
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
|
stdDev(volInd(l), gRead), mean(volInd(l), gWrite),
|
||||||
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
|
stdDev(volInd(l), gWrite));
|
||||||
}
|
}
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)"
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%4s %12s %12s %12s %12s\n",
|
grid_printf("%4s %12s %12s %12s %12s\n", "L", "std read", "std write", "Grid read",
|
||||||
"L", "std read", "std write", "Grid read", "Grid write");
|
"Grid write");
|
||||||
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
{
|
{
|
||||||
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", l, rob(volInd(l), sRead),
|
||||||
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
|
rob(volInd(l), sWrite), rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||||
rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
|
||||||
}
|
}
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
|
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX
|
||||||
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
<< "^4 (all results in MB/s)." << std::endl;
|
||||||
|
MSG << "Every second colum gives the standard deviation of the previous column."
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
|
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", "std read", "std dev",
|
||||||
"std read", "std dev", "std write", "std dev",
|
"std write", "std dev", "Grid read", "std dev", "Grid write", "std dev");
|
||||||
"Grid read", "std dev", "Grid write", "std dev");
|
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", avMean(sRead),
|
||||||
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), avMean(gRead),
|
||||||
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
|
avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||||
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)"
|
||||||
|
<< std::endl;
|
||||||
MSG << std::endl;
|
MSG << std::endl;
|
||||||
grid_printf("%12s %12s %12s %12s\n",
|
grid_printf("%12s %12s %12s %12s\n", "std read", "std write", "Grid read",
|
||||||
"std read", "std write", "Grid read", "Grid write");
|
"Grid write");
|
||||||
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
|
grid_printf("%12.1f %12.1f %12.1f %12.1f\n", avRob(sRead), avRob(sWrite), avRob(gRead),
|
||||||
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
|
avRob(gWrite));
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
|
|
||||||
|
@ -20,9 +20,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#define MSG std::cout << GridLogMessage
|
#define MSG std::cout << GridLogMessage
|
||||||
#define SEP \
|
#define SEP \
|
||||||
"-----------------------------------------------------------------------------"
|
"-----------------------------------------------------------------------------"
|
||||||
#define BIGSEP \
|
#define BIGSEP \
|
||||||
"============================================================================="
|
"============================================================================="
|
||||||
#ifdef HAVE_LIME
|
#ifdef HAVE_LIME
|
||||||
|
|
||||||
@ -36,16 +36,15 @@ namespace Grid
|
|||||||
|
|
||||||
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||||
//
|
//
|
||||||
// template <typename Field>
|
// template <typename Field> void stdWrite(const std::string filestem, Field &vec)
|
||||||
// void stdWrite(const std::string filestem, Field &vec)
|
|
||||||
// {
|
// {
|
||||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||||
// size_t size;
|
// size_t size;
|
||||||
// uint32_t crc;
|
// uint32_t crc;
|
||||||
// GridStopWatch ioWatch, crcWatch;
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
// size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||||
// autoView(vec_v, vec, CpuRead);
|
// autoView(vec_v, vec, CpuRead);
|
||||||
// crcWatch.Start();
|
// crcWatch.Start();
|
||||||
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
@ -53,36 +52,39 @@ namespace Grid
|
|||||||
// crcWatch.Stop();
|
// crcWatch.Stop();
|
||||||
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||||
// ioWatch.Start();
|
// ioWatch.Start();
|
||||||
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object),
|
||||||
|
// vec.Grid()->lSites(), file);
|
||||||
// ioWatch.Stop();
|
// ioWatch.Stop();
|
||||||
// std::fclose(file);
|
// std::fclose(file);
|
||||||
// size *= vec.Grid()->ProcessorCount();
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
// auto &p = BinaryIO::lastPerf;
|
// auto &p = BinaryIO::lastPerf;
|
||||||
// p.size = size;
|
// p.size = size;
|
||||||
// p.time = ioWatch.useconds();
|
// p.time = ioWatch.useconds();
|
||||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
// p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
// << ",
|
||||||
|
// "
|
||||||
|
// << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
// }
|
// }
|
||||||
//
|
|
||||||
// template <typename Field>
|
// template <typename Field> void stdRead(Field &vec, const std::string filestem)
|
||||||
// void stdRead(Field &vec, const std::string filestem)
|
|
||||||
// {
|
// {
|
||||||
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||||
// size_t size;
|
// size_t size;
|
||||||
// uint32_t crcRead, crcData;
|
// uint32_t crcRead, crcData;
|
||||||
// GridStopWatch ioWatch, crcWatch;
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
// size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||||
// crcWatch.Start();
|
// crcWatch.Start();
|
||||||
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||||
// crcWatch.Stop();
|
// crcWatch.Stop();
|
||||||
// {
|
// {
|
||||||
// autoView(vec_v, vec, CpuWrite);
|
// autoView(vec_v, vec, CpuWrite);
|
||||||
// ioWatch.Start();
|
// ioWatch.Start();
|
||||||
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object),
|
||||||
|
// vec.Grid()->lSites(), file);
|
||||||
// ioWatch.Stop();
|
// ioWatch.Stop();
|
||||||
// std::fclose(file);
|
// std::fclose(file);
|
||||||
// }
|
// }
|
||||||
@ -96,19 +98,19 @@ namespace Grid
|
|||||||
// assert(crcData == crcRead);
|
// assert(crcData == crcRead);
|
||||||
// size *= vec.Grid()->ProcessorCount();
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
// auto &p = BinaryIO::lastPerf;
|
// auto &p = BinaryIO::lastPerf;
|
||||||
// p.size = size;
|
// p.size = size;
|
||||||
// p.time = ioWatch.useconds();
|
// p.time = ioWatch.useconds();
|
||||||
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
// p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||||
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
// << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void stdWrite(const std::string filestem, Field &vec)
|
||||||
void stdWrite(const std::string filestem, Field &vec)
|
|
||||||
{
|
{
|
||||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
std::ofstream file(filestem + "." + rankStr + ".bin",
|
||||||
|
std::ios::out | std::ios::binary);
|
||||||
size_t size, sizec;
|
size_t size, sizec;
|
||||||
uint32_t crc;
|
uint32_t crc;
|
||||||
GridStopWatch ioWatch, crcWatch;
|
GridStopWatch ioWatch, crcWatch;
|
||||||
@ -130,16 +132,16 @@ namespace Grid
|
|||||||
p.size = size;
|
p.size = size;
|
||||||
p.time = ioWatch.useconds();
|
p.time = ioWatch.useconds();
|
||||||
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
<< p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void stdRead(Field &vec, const std::string filestem)
|
||||||
void stdRead(Field &vec, const std::string filestem)
|
|
||||||
{
|
{
|
||||||
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
std::ifstream file(filestem + "." + rankStr + ".bin",
|
||||||
|
std::ios::in | std::ios::binary);
|
||||||
size_t size, sizec;
|
size_t size, sizec;
|
||||||
uint32_t crcRead, crcData;
|
uint32_t crcRead, crcData;
|
||||||
GridStopWatch ioWatch, crcWatch;
|
GridStopWatch ioWatch, crcWatch;
|
||||||
@ -168,13 +170,12 @@ namespace Grid
|
|||||||
p.size = size;
|
p.size = size;
|
||||||
p.time = ioWatch.useconds();
|
p.time = ioWatch.useconds();
|
||||||
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() << ", "
|
||||||
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
<< p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void limeWrite(const std::string filestem, Field &vec)
|
||||||
void limeWrite(const std::string filestem, Field &vec)
|
|
||||||
{
|
{
|
||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||||
@ -184,8 +185,7 @@ namespace Grid
|
|||||||
binWriter.close();
|
binWriter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Field>
|
template <typename Field> void limeRead(Field &vec, const std::string filestem)
|
||||||
void limeRead(Field &vec, const std::string filestem)
|
|
||||||
{
|
{
|
||||||
emptyUserRecord record;
|
emptyUserRecord record;
|
||||||
ScidacReader binReader;
|
ScidacReader binReader;
|
||||||
@ -225,12 +225,13 @@ namespace Grid
|
|||||||
|
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
void writeBenchmark(const Coordinate &latt, const std::string filename,
|
void writeBenchmark(const Coordinate &latt, const std::string filename,
|
||||||
const WriterFn<Field> &write,
|
const WriterFn<Field> &write, const unsigned int Ls = 1,
|
||||||
const unsigned int Ls = 1, const bool rb = false)
|
const bool rb = false)
|
||||||
{
|
{
|
||||||
auto mpi = GridDefaultMpi();
|
auto mpi = GridDefaultMpi();
|
||||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
std::shared_ptr<GridCartesian> gBasePt(
|
||||||
|
SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
std::shared_ptr<GridBase> gPt;
|
std::shared_ptr<GridBase> gPt;
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
|
|
||||||
@ -251,12 +252,13 @@ namespace Grid
|
|||||||
|
|
||||||
template <typename Field>
|
template <typename Field>
|
||||||
void readBenchmark(const Coordinate &latt, const std::string filename,
|
void readBenchmark(const Coordinate &latt, const std::string filename,
|
||||||
const ReaderFn<Field> &read,
|
const ReaderFn<Field> &read, const unsigned int Ls = 1,
|
||||||
const unsigned int Ls = 1, const bool rb = false)
|
const bool rb = false)
|
||||||
{
|
{
|
||||||
auto mpi = GridDefaultMpi();
|
auto mpi = GridDefaultMpi();
|
||||||
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
std::shared_ptr<GridCartesian> gBasePt(
|
||||||
|
SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
std::shared_ptr<GridBase> gPt;
|
std::shared_ptr<GridBase> gPt;
|
||||||
|
|
||||||
makeGrid(gPt, gBasePt, Ls, rb);
|
makeGrid(gPt, gBasePt, Ls, rb);
|
||||||
|
@ -1,801 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU General Public License
|
|
||||||
as published by the Free Software Foundation; either version 2
|
|
||||||
of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <Grid/Grid.h>
|
|
||||||
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
std::vector<int> L_list;
|
|
||||||
std::vector<int> Ls_list;
|
|
||||||
std::vector<double> mflop_list;
|
|
||||||
|
|
||||||
double mflop_ref;
|
|
||||||
double mflop_ref_err;
|
|
||||||
|
|
||||||
int NN_global;
|
|
||||||
|
|
||||||
struct time_statistics
|
|
||||||
{
|
|
||||||
double mean;
|
|
||||||
double err;
|
|
||||||
double min;
|
|
||||||
double max;
|
|
||||||
|
|
||||||
void statistics(std::vector<double> v)
|
|
||||||
{
|
|
||||||
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
|
||||||
mean = sum / v.size();
|
|
||||||
|
|
||||||
std::vector<double> diff(v.size());
|
|
||||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x)
|
|
||||||
{ return x - mean; });
|
|
||||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
|
||||||
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
|
||||||
|
|
||||||
auto result = std::minmax_element(v.begin(), v.end());
|
|
||||||
min = *result.first;
|
|
||||||
max = *result.second;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
void comms_header()
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << " L "
|
|
||||||
<< "\t"
|
|
||||||
<< " Ls "
|
|
||||||
<< "\t"
|
|
||||||
<< "bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)" << std::endl;
|
|
||||||
};
|
|
||||||
|
|
||||||
Gamma::Algebra Gmu[] = {
|
|
||||||
Gamma::Algebra::GammaX,
|
|
||||||
Gamma::Algebra::GammaY,
|
|
||||||
Gamma::Algebra::GammaZ,
|
|
||||||
Gamma::Algebra::GammaT};
|
|
||||||
struct controls
|
|
||||||
{
|
|
||||||
int Opt;
|
|
||||||
int CommsOverlap;
|
|
||||||
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
|
||||||
};
|
|
||||||
|
|
||||||
class Benchmark
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
static void Decomposition(void)
|
|
||||||
{
|
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
|
||||||
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd())) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd())) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd())) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd())) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd())) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void Comms(void)
|
|
||||||
{
|
|
||||||
int Nloop = 200;
|
|
||||||
int nmu = 0;
|
|
||||||
int maxlat = 32;
|
|
||||||
|
|
||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
|
|
||||||
for (int mu = 0; mu < Nd; mu++)
|
|
||||||
if (mpi_layout[mu] > 1)
|
|
||||||
nmu++;
|
|
||||||
|
|
||||||
std::vector<double> t_time(Nloop);
|
|
||||||
time_statistics timestat;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " << nmu << " dimensions" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
|
||||||
comms_header();
|
|
||||||
|
|
||||||
for (int lat = 16; lat <= maxlat; lat += 8)
|
|
||||||
{
|
|
||||||
// for(int Ls=8;Ls<=8;Ls*=2){
|
|
||||||
{
|
|
||||||
int Ls = 12;
|
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0],
|
|
||||||
lat * mpi_layout[1],
|
|
||||||
lat * mpi_layout[2],
|
|
||||||
lat * mpi_layout[3]});
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
|
||||||
RealD Nrank = Grid._Nprocessors;
|
|
||||||
RealD Nnode = Grid.NodeCount();
|
|
||||||
RealD ppn = Nrank / Nnode;
|
|
||||||
|
|
||||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
|
||||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
|
||||||
// Grid.ShmBufferFreeAll();
|
|
||||||
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
|
||||||
for (int d = 0; d < 8; d++)
|
|
||||||
{
|
|
||||||
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
|
||||||
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
|
||||||
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
|
||||||
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
|
||||||
}
|
|
||||||
|
|
||||||
// int ncomm;
|
|
||||||
double dbytes;
|
|
||||||
|
|
||||||
for (int dir = 0; dir < 8; dir++)
|
|
||||||
{
|
|
||||||
int mu = dir % 4;
|
|
||||||
if (mpi_layout[mu] > 1)
|
|
||||||
{
|
|
||||||
|
|
||||||
std::vector<double> times(Nloop);
|
|
||||||
for (int i = 0; i < Nloop; i++)
|
|
||||||
{
|
|
||||||
|
|
||||||
dbytes = 0;
|
|
||||||
double start = usecond();
|
|
||||||
int xmit_to_rank;
|
|
||||||
int recv_from_rank;
|
|
||||||
|
|
||||||
if (dir == mu)
|
|
||||||
{
|
|
||||||
int comm_proc = 1;
|
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int comm_proc = mpi_layout[mu] - 1;
|
|
||||||
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
|
||||||
}
|
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
|
||||||
(void *)&rbuf[dir][0], recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
dbytes += bytes;
|
|
||||||
|
|
||||||
double stop = usecond();
|
|
||||||
t_time[i] = stop - start; // microseconds
|
|
||||||
}
|
|
||||||
timestat.statistics(t_time);
|
|
||||||
|
|
||||||
dbytes = dbytes * ppn;
|
|
||||||
double xbytes = dbytes * 0.5;
|
|
||||||
double bidibytes = dbytes;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << lat << "\t" << Ls << "\t "
|
|
||||||
<< bytes << " \t "
|
|
||||||
<< xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
|
||||||
<< xbytes / timestat.max << " " << xbytes / timestat.min
|
|
||||||
<< "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
|
||||||
<< bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int d = 0; d < 8; d++)
|
|
||||||
{
|
|
||||||
acceleratorFreeDevice(xbuf[d]);
|
|
||||||
acceleratorFreeDevice(rbuf[d]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void Memory(void)
|
|
||||||
{
|
|
||||||
const int Nvec = 8;
|
|
||||||
typedef Lattice<iVector<vReal, Nvec>> LatticeVec;
|
|
||||||
typedef iVector<vReal, Nvec> Vec;
|
|
||||||
|
|
||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " L "
|
|
||||||
<< "\t\t"
|
|
||||||
<< "bytes"
|
|
||||||
<< "\t\t\t"
|
|
||||||
<< "GB/s"
|
|
||||||
<< "\t\t"
|
|
||||||
<< "Gflop/s"
|
|
||||||
<< "\t\t seconds"
|
|
||||||
<< "\t\tGB/s / node" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
|
||||||
|
|
||||||
// uint64_t NP;
|
|
||||||
uint64_t NN;
|
|
||||||
|
|
||||||
uint64_t lmax = 32;
|
|
||||||
#define NLOOP (1000 * lmax * lmax * lmax * lmax / lat / lat / lat / lat)
|
|
||||||
|
|
||||||
GridSerialRNG sRNG;
|
|
||||||
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
|
||||||
for (int lat = 8; lat <= lmax; lat += 8)
|
|
||||||
{
|
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
|
||||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
|
||||||
|
|
||||||
// NP= Grid.RankCount();
|
|
||||||
NN = Grid.NodeCount();
|
|
||||||
|
|
||||||
Vec rn;
|
|
||||||
random(sRNG, rn);
|
|
||||||
|
|
||||||
LatticeVec z(&Grid);
|
|
||||||
z = Zero();
|
|
||||||
LatticeVec x(&Grid);
|
|
||||||
x = Zero();
|
|
||||||
LatticeVec y(&Grid);
|
|
||||||
y = Zero();
|
|
||||||
double a = 2.0;
|
|
||||||
|
|
||||||
uint64_t Nloop = NLOOP;
|
|
||||||
|
|
||||||
double start = usecond();
|
|
||||||
for (int i = 0; i < Nloop; i++)
|
|
||||||
{
|
|
||||||
z = a * x - y;
|
|
||||||
}
|
|
||||||
double stop = usecond();
|
|
||||||
double time = (stop - start) / Nloop * 1000;
|
|
||||||
|
|
||||||
double flops = vol * Nvec * 2; // mul,add
|
|
||||||
double bytes = 3.0 * vol * Nvec * sizeof(Real);
|
|
||||||
std::cout << GridLogMessage << std::setprecision(3)
|
|
||||||
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
|
||||||
<< "\t\t" << bytes / time / NN << std::endl;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static void SU4(void)
|
|
||||||
{
|
|
||||||
const int Nc4 = 4;
|
|
||||||
typedef Lattice<iMatrix<vComplexF, Nc4>> LatticeSU4;
|
|
||||||
|
|
||||||
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " L "
|
|
||||||
<< "\t\t"
|
|
||||||
<< "bytes"
|
|
||||||
<< "\t\t\t"
|
|
||||||
<< "GB/s"
|
|
||||||
<< "\t\t"
|
|
||||||
<< "Gflop/s"
|
|
||||||
<< "\t\t seconds"
|
|
||||||
<< "\t\tGB/s / node" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
|
||||||
|
|
||||||
uint64_t NN;
|
|
||||||
|
|
||||||
uint64_t lmax = 32;
|
|
||||||
|
|
||||||
GridSerialRNG sRNG;
|
|
||||||
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
|
||||||
for (int lat = 8; lat <= lmax; lat += 8)
|
|
||||||
{
|
|
||||||
|
|
||||||
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
|
||||||
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
|
||||||
|
|
||||||
NN = Grid.NodeCount();
|
|
||||||
|
|
||||||
LatticeSU4 z(&Grid);
|
|
||||||
z = Zero();
|
|
||||||
LatticeSU4 x(&Grid);
|
|
||||||
x = Zero();
|
|
||||||
LatticeSU4 y(&Grid);
|
|
||||||
y = Zero();
|
|
||||||
// double a=2.0;
|
|
||||||
|
|
||||||
uint64_t Nloop = NLOOP;
|
|
||||||
|
|
||||||
double start = usecond();
|
|
||||||
for (int i = 0; i < Nloop; i++)
|
|
||||||
{
|
|
||||||
z = x * y;
|
|
||||||
}
|
|
||||||
double stop = usecond();
|
|
||||||
double time = (stop - start) / Nloop * 1000;
|
|
||||||
|
|
||||||
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add
|
|
||||||
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF);
|
|
||||||
std::cout << GridLogMessage << std::setprecision(3)
|
|
||||||
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
|
||||||
<< "\t\t" << bytes / time / NN << std::endl;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static double DWF(int Ls, int L)
|
|
||||||
{
|
|
||||||
RealD mass = 0.1;
|
|
||||||
RealD M5 = 1.8;
|
|
||||||
|
|
||||||
double mflops;
|
|
||||||
double mflops_best = 0;
|
|
||||||
double mflops_worst = 0;
|
|
||||||
std::vector<double> mflops_all;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
// Set/Get the layout & grid size
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
Coordinate mpi = GridDefaultMpi();
|
|
||||||
assert(mpi.size() == 4);
|
|
||||||
Coordinate local({L, L, L, L});
|
|
||||||
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
|
||||||
|
|
||||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
|
||||||
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
|
||||||
GridDefaultMpi());
|
|
||||||
uint64_t NP = TmpGrid->RankCount();
|
|
||||||
uint64_t NN = TmpGrid->NodeCount();
|
|
||||||
NN_global = NN;
|
|
||||||
uint64_t SHM = NP / NN;
|
|
||||||
|
|
||||||
///////// Welcome message ////////////
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Ls : " << Ls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
///////// Lattice Init ////////////
|
|
||||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
|
||||||
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
|
||||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
|
||||||
|
|
||||||
///////// RNG Init ////////////
|
|
||||||
std::vector<int> seeds4({1, 2, 3, 4});
|
|
||||||
std::vector<int> seeds5({5, 6, 7, 8});
|
|
||||||
GridParallelRNG RNG4(UGrid);
|
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
|
||||||
GridParallelRNG RNG5(FGrid);
|
|
||||||
RNG5.SeedFixedIntegers(seeds5);
|
|
||||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
|
||||||
|
|
||||||
typedef DomainWallFermionF Action;
|
|
||||||
typedef typename Action::FermionField Fermion;
|
|
||||||
typedef LatticeGaugeFieldF Gauge;
|
|
||||||
|
|
||||||
///////// Source preparation ////////////
|
|
||||||
Gauge Umu(UGrid);
|
|
||||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
|
||||||
Fermion src(FGrid);
|
|
||||||
random(RNG5, src);
|
|
||||||
Fermion src_e(FrbGrid);
|
|
||||||
Fermion src_o(FrbGrid);
|
|
||||||
Fermion r_e(FrbGrid);
|
|
||||||
Fermion r_o(FrbGrid);
|
|
||||||
Fermion r_eo(FGrid);
|
|
||||||
Action Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
|
||||||
|
|
||||||
{
|
|
||||||
|
|
||||||
pickCheckerboard(Even, src_e, src);
|
|
||||||
pickCheckerboard(Odd, src_o, src);
|
|
||||||
|
|
||||||
const int num_cases = 4;
|
|
||||||
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
|
||||||
|
|
||||||
controls Cases[] = {
|
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
|
||||||
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
|
||||||
|
|
||||||
for (int c = 0; c < num_cases; c++)
|
|
||||||
{
|
|
||||||
|
|
||||||
WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
|
||||||
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
|
||||||
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
|
||||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
|
||||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
|
||||||
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
|
||||||
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
int nwarm = 10;
|
|
||||||
double t0 = usecond();
|
|
||||||
FGrid->Barrier();
|
|
||||||
for (int i = 0; i < nwarm; i++)
|
|
||||||
{
|
|
||||||
Dw.DhopEO(src_o, r_e, DaggerNo);
|
|
||||||
}
|
|
||||||
FGrid->Barrier();
|
|
||||||
double t1 = usecond();
|
|
||||||
uint64_t ncall = 500;
|
|
||||||
|
|
||||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
|
||||||
Dw.ZeroCounters();
|
|
||||||
|
|
||||||
time_statistics timestat;
|
|
||||||
std::vector<double> t_time(ncall);
|
|
||||||
for (uint64_t i = 0; i < ncall; i++)
|
|
||||||
{
|
|
||||||
t0 = usecond();
|
|
||||||
Dw.DhopEO(src_o, r_e, DaggerNo);
|
|
||||||
t1 = usecond();
|
|
||||||
t_time[i] = t1 - t0;
|
|
||||||
}
|
|
||||||
FGrid->Barrier();
|
|
||||||
|
|
||||||
double volume = Ls;
|
|
||||||
for (int mu = 0; mu < Nd; mu++)
|
|
||||||
volume = volume * latt4[mu];
|
|
||||||
|
|
||||||
// Nc=3 gives
|
|
||||||
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
|
||||||
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
|
||||||
// double flops=(1344.0*volume)/2;
|
|
||||||
#if 0
|
|
||||||
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
|
||||||
#else
|
|
||||||
double fps = Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
|
||||||
#endif
|
|
||||||
double flops = (fps * volume) / 2;
|
|
||||||
double mf_hi, mf_lo, mf_err;
|
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
|
||||||
mf_hi = flops / timestat.min;
|
|
||||||
mf_lo = flops / timestat.max;
|
|
||||||
mf_err = flops / timestat.min * timestat.err / timestat.mean;
|
|
||||||
|
|
||||||
mflops = flops / timestat.mean;
|
|
||||||
mflops_all.push_back(mflops);
|
|
||||||
if (mflops_best == 0)
|
|
||||||
mflops_best = mflops;
|
|
||||||
if (mflops_worst == 0)
|
|
||||||
mflops_worst = mflops;
|
|
||||||
if (mflops > mflops_best)
|
|
||||||
mflops_best = mflops;
|
|
||||||
if (mflops < mflops_worst)
|
|
||||||
mflops_worst = mflops;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Deo FlopsPerSite is " << fps << std::endl;
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
|
||||||
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
|
||||||
std::cout << GridLogMessage << fmt << std::endl;
|
|
||||||
std::cout << GridLogMessage;
|
|
||||||
|
|
||||||
for (int i = 0; i < mflops_all.size(); i++)
|
|
||||||
{
|
|
||||||
std::cout << mflops_all[i] / NN << " ; ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
}
|
|
||||||
return mflops_best;
|
|
||||||
}
|
|
||||||
|
|
||||||
static double Staggered(int L)
|
|
||||||
{
|
|
||||||
double mflops;
|
|
||||||
double mflops_best = 0;
|
|
||||||
double mflops_worst = 0;
|
|
||||||
std::vector<double> mflops_all;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
// Set/Get the layout & grid size
|
|
||||||
///////////////////////////////////////////////////////
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
Coordinate mpi = GridDefaultMpi();
|
|
||||||
assert(mpi.size() == 4);
|
|
||||||
Coordinate local({L, L, L, L});
|
|
||||||
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
|
||||||
|
|
||||||
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
|
||||||
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
|
||||||
GridDefaultMpi());
|
|
||||||
uint64_t NP = TmpGrid->RankCount();
|
|
||||||
uint64_t NN = TmpGrid->NodeCount();
|
|
||||||
NN_global = NN;
|
|
||||||
uint64_t SHM = NP / NN;
|
|
||||||
|
|
||||||
///////// Welcome message ////////////
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L << "^4 local volume " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
///////// Lattice Init ////////////
|
|
||||||
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
|
||||||
|
|
||||||
///////// RNG Init ////////////
|
|
||||||
std::vector<int> seeds4({1, 2, 3, 4});
|
|
||||||
GridParallelRNG RNG4(FGrid);
|
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
|
||||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
|
||||||
|
|
||||||
RealD mass = 0.1;
|
|
||||||
RealD c1 = 9.0 / 8.0;
|
|
||||||
RealD c2 = -1.0 / 24.0;
|
|
||||||
RealD u0 = 1.0;
|
|
||||||
|
|
||||||
typedef ImprovedStaggeredFermionF Action;
|
|
||||||
typedef typename Action::FermionField Fermion;
|
|
||||||
typedef LatticeGaugeFieldF Gauge;
|
|
||||||
|
|
||||||
Gauge Umu(FGrid);
|
|
||||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
|
||||||
|
|
||||||
typename Action::ImplParams params;
|
|
||||||
Action Ds(Umu, Umu, *FGrid, *FrbGrid, mass, c1, c2, u0, params);
|
|
||||||
|
|
||||||
///////// Source preparation ////////////
|
|
||||||
Fermion src(FGrid);
|
|
||||||
random(RNG4, src);
|
|
||||||
Fermion src_e(FrbGrid);
|
|
||||||
Fermion src_o(FrbGrid);
|
|
||||||
Fermion r_e(FrbGrid);
|
|
||||||
Fermion r_o(FrbGrid);
|
|
||||||
Fermion r_eo(FGrid);
|
|
||||||
|
|
||||||
{
|
|
||||||
|
|
||||||
pickCheckerboard(Even, src_e, src);
|
|
||||||
pickCheckerboard(Odd, src_o, src);
|
|
||||||
|
|
||||||
const int num_cases = 4;
|
|
||||||
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
|
||||||
|
|
||||||
controls Cases[] = {
|
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
|
||||||
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
|
||||||
|
|
||||||
for (int c = 0; c < num_cases; c++)
|
|
||||||
{
|
|
||||||
|
|
||||||
StaggeredKernelsStatic::Comms = Cases[c].CommsOverlap;
|
|
||||||
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
|
|
||||||
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" << std::endl;
|
|
||||||
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute)
|
|
||||||
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
|
||||||
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
|
|
||||||
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
int nwarm = 10;
|
|
||||||
double t0 = usecond();
|
|
||||||
FGrid->Barrier();
|
|
||||||
for (int i = 0; i < nwarm; i++)
|
|
||||||
{
|
|
||||||
Ds.DhopEO(src_o, r_e, DaggerNo);
|
|
||||||
}
|
|
||||||
FGrid->Barrier();
|
|
||||||
double t1 = usecond();
|
|
||||||
uint64_t ncall = 500;
|
|
||||||
|
|
||||||
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
|
||||||
Ds.ZeroCounters();
|
|
||||||
|
|
||||||
time_statistics timestat;
|
|
||||||
std::vector<double> t_time(ncall);
|
|
||||||
for (uint64_t i = 0; i < ncall; i++)
|
|
||||||
{
|
|
||||||
t0 = usecond();
|
|
||||||
Ds.DhopEO(src_o, r_e, DaggerNo);
|
|
||||||
t1 = usecond();
|
|
||||||
t_time[i] = t1 - t0;
|
|
||||||
}
|
|
||||||
FGrid->Barrier();
|
|
||||||
|
|
||||||
double volume = 1;
|
|
||||||
for (int mu = 0; mu < Nd; mu++)
|
|
||||||
volume = volume * latt4[mu];
|
|
||||||
double flops = (1146.0 * volume) / 2;
|
|
||||||
double mf_hi, mf_lo, mf_err;
|
|
||||||
|
|
||||||
timestat.statistics(t_time);
|
|
||||||
mf_hi = flops / timestat.min;
|
|
||||||
mf_lo = flops / timestat.max;
|
|
||||||
mf_err = flops / timestat.min * timestat.err / timestat.mean;
|
|
||||||
|
|
||||||
mflops = flops / timestat.mean;
|
|
||||||
mflops_all.push_back(mflops);
|
|
||||||
if (mflops_best == 0)
|
|
||||||
mflops_best = mflops;
|
|
||||||
if (mflops_worst == 0)
|
|
||||||
mflops_worst = mflops;
|
|
||||||
if (mflops > mflops_best)
|
|
||||||
mflops_best = mflops;
|
|
||||||
if (mflops < mflops_worst)
|
|
||||||
mflops_worst = mflops;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << L << "^4 Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
|
||||||
std::cout << GridLogMessage << L << "^4 Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
|
||||||
std::cout << GridLogMessage << fmt << std::endl;
|
|
||||||
std::cout << GridLogMessage;
|
|
||||||
|
|
||||||
for (int i = 0; i < mflops_all.size(); i++)
|
|
||||||
{
|
|
||||||
std::cout << mflops_all[i] / NN << " ; ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
return mflops_best;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc, &argv);
|
|
||||||
|
|
||||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
|
||||||
#ifdef KNL
|
|
||||||
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
|
|
||||||
#else
|
|
||||||
LebesgueOrder::Block = std::vector<int>({2, 2, 2, 2});
|
|
||||||
#endif
|
|
||||||
Benchmark::Decomposition();
|
|
||||||
|
|
||||||
int do_su4 = 1;
|
|
||||||
int do_memory = 1;
|
|
||||||
int do_comms = 1;
|
|
||||||
|
|
||||||
int sel = 4;
|
|
||||||
std::vector<int> L_list({8, 12, 16, 24, 32});
|
|
||||||
int selm1 = sel - 1;
|
|
||||||
|
|
||||||
std::vector<double> wilson;
|
|
||||||
std::vector<double> dwf4;
|
|
||||||
std::vector<double> staggered;
|
|
||||||
|
|
||||||
int Ls = 1;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
|
||||||
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ls = 12;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
|
||||||
double result = Benchmark::DWF(Ls, L_list[l]);
|
|
||||||
dwf4.push_back(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
|
||||||
double result = Benchmark::Staggered(L_list[l]);
|
|
||||||
staggered.push_back(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " << dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
int NN = NN_global;
|
|
||||||
if (do_memory)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Memory benchmark " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
Benchmark::Memory();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (do_su4)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
Benchmark::SU4();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (do_comms)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Communications benchmark " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
Benchmark::Comms();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
|
||||||
for (int l = 0; l < L_list.size(); l++)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
|
||||||
}
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN << " Mflop/s per node" << std::endl;
|
|
||||||
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" << dwf4[selm1] / NN << ") " << std::endl;
|
|
||||||
std::cout << std::setprecision(3);
|
|
||||||
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
|
||||||
|
|
||||||
Grid_finalize();
|
|
||||||
}
|
|
@ -21,231 +21,245 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
struct time_statistics{
|
struct time_statistics
|
||||||
|
{
|
||||||
double mean;
|
double mean;
|
||||||
double err;
|
double err;
|
||||||
double min;
|
double min;
|
||||||
double max;
|
double max;
|
||||||
|
|
||||||
void statistics(std::vector<double> v){
|
void statistics(std::vector<double> v)
|
||||||
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
{
|
||||||
mean = sum / v.size();
|
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||||
|
mean = sum / v.size();
|
||||||
|
|
||||||
std::vector<double> diff(v.size());
|
std::vector<double> diff(v.size());
|
||||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||||
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
|
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
||||||
|
|
||||||
auto result = std::minmax_element(v.begin(), v.end());
|
auto result = std::minmax_element(v.begin(), v.end());
|
||||||
min = *result.first;
|
min = *result.first;
|
||||||
max = *result.second;
|
max = *result.second;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void header(){
|
void header()
|
||||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
|
||||||
<<std::setw(11)<<"bytes\t\t"<<"MB/s uni"<<"\t"<<"MB/s bidi"<<std::endl;
|
|
||||||
};
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
std::cout << GridLogMessage << " L "
|
||||||
|
<< "\t"
|
||||||
|
<< " Ls "
|
||||||
|
<< "\t" << std::setw(11) << "bytes\t\t"
|
||||||
|
<< "MB/s uni"
|
||||||
|
<< "\t"
|
||||||
|
<< "MB/s bidi" << std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
int main(int argc, char **argv)
|
||||||
Coordinate mpi_layout = GridDefaultMpi();
|
{
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
int Nloop=250;
|
int Nloop = 250;
|
||||||
int nmu=0;
|
int nmu = 0;
|
||||||
int maxlat=32;
|
int maxlat = 32;
|
||||||
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
if (mpi_layout[mu] > 1)
|
||||||
|
nmu++;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
|
std::cout << GridLogMessage << "Number of iterations to average: " << Nloop
|
||||||
|
<< std::endl;
|
||||||
std::vector<double> t_time(Nloop);
|
std::vector<double> t_time(Nloop);
|
||||||
// time_statistics timestat;
|
// time_statistics timestat;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from host memory "<<std::endl;
|
<< "========================================================================="
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "= Benchmarking sequential halo exchange from host memory " << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
header();
|
header();
|
||||||
|
|
||||||
for(int lat=8;lat<=maxlat;lat+=4){
|
for (int lat = 8; lat <= maxlat; lat += 4)
|
||||||
for(int Ls=8;Ls<=8;Ls*=2){
|
{
|
||||||
|
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
||||||
|
{
|
||||||
|
|
||||||
Coordinate latt_size ({lat*mpi_layout[0],
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
lat*mpi_layout[1],
|
lat * mpi_layout[3]});
|
||||||
lat*mpi_layout[2],
|
|
||||||
lat*mpi_layout[3]});
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
RealD Nrank = Grid._Nprocessors;
|
RealD Nrank = Grid._Nprocessors;
|
||||||
RealD Nnode = Grid.NodeCount();
|
RealD Nnode = Grid.NodeCount();
|
||||||
RealD ppn = Nrank/Nnode;
|
RealD ppn = Nrank / Nnode;
|
||||||
|
|
||||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8);
|
std::vector<std::vector<HalfSpinColourVectorD>> xbuf(8);
|
||||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8);
|
std::vector<std::vector<HalfSpinColourVectorD>> rbuf(8);
|
||||||
|
|
||||||
for(int mu=0;mu<8;mu++){
|
for (int mu = 0; mu < 8; mu++)
|
||||||
xbuf[mu].resize(lat*lat*lat*Ls);
|
{
|
||||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
xbuf[mu].resize(lat * lat * lat * Ls);
|
||||||
|
rbuf[mu].resize(lat * lat * lat * Ls);
|
||||||
}
|
}
|
||||||
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
int ncomm;
|
int ncomm;
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
for (int mu = 0; mu < 4; mu++)
|
||||||
if (mpi_layout[mu]>1 ) {
|
{
|
||||||
double start=usecond();
|
if (mpi_layout[mu] > 1)
|
||||||
for(int i=0;i<Nloop;i++){
|
{
|
||||||
|
double start = usecond();
|
||||||
|
for (int i = 0; i < Nloop; i++)
|
||||||
|
{
|
||||||
|
|
||||||
ncomm=0;
|
ncomm = 0;
|
||||||
|
|
||||||
|
|
||||||
ncomm++;
|
|
||||||
int comm_proc=1;
|
|
||||||
int xmit_to_rank;
|
|
||||||
int recv_from_rank;
|
|
||||||
|
|
||||||
{
|
|
||||||
std::vector<CommsRequest_t> requests;
|
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
|
||||||
xmit_to_rank,
|
|
||||||
(void *)&rbuf[mu][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
comm_proc = mpi_layout[mu]-1;
|
ncomm++;
|
||||||
{
|
int comm_proc = 1;
|
||||||
std::vector<CommsRequest_t> requests;
|
int xmit_to_rank;
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
int recv_from_rank;
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
|
|
||||||
xmit_to_rank,
|
|
||||||
(void *)&rbuf[mu+4][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Grid.Barrier();
|
|
||||||
double stop=usecond();
|
|
||||||
double mean=(stop-start)/Nloop;
|
|
||||||
double dbytes = bytes*ppn;
|
|
||||||
double xbytes = dbytes*2.0*ncomm;
|
|
||||||
double rbytes = xbytes;
|
|
||||||
double bidibytes = xbytes+rbytes;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
{
|
||||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)<<" "
|
std::vector<CommsRequest_t> requests;
|
||||||
<<std::right<< xbytes/mean<<" "
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
<< "\t\t"<<std::setw(7)<< bidibytes/mean<< std::endl;
|
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0], recv_from_rank, bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu] - 1;
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> requests;
|
||||||
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
double stop = usecond();
|
||||||
|
double mean = (stop - start) / Nloop;
|
||||||
|
double dbytes = bytes * ppn;
|
||||||
|
double xbytes = dbytes * 2.0 * ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes + rbytes;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
||||||
}
|
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
|
||||||
|
<< std::setw(7) << " " << std::right << xbytes / mean << " "
|
||||||
|
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory "<<std::endl;
|
<< "========================================================================="
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "= Benchmarking sequential halo exchange from GPU memory " << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
header();
|
header();
|
||||||
|
|
||||||
for(int lat=8;lat<=maxlat;lat+=4){
|
for (int lat = 8; lat <= maxlat; lat += 4)
|
||||||
for(int Ls=8;Ls<=8;Ls*=2){
|
{
|
||||||
|
for (int Ls = 8; Ls <= 8; Ls *= 2)
|
||||||
|
{
|
||||||
|
|
||||||
Coordinate latt_size ({lat*mpi_layout[0],
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2],
|
||||||
lat*mpi_layout[1],
|
lat * mpi_layout[3]});
|
||||||
lat*mpi_layout[2],
|
|
||||||
lat*mpi_layout[3]});
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
RealD Nrank = Grid._Nprocessors;
|
RealD Nrank = Grid._Nprocessors;
|
||||||
RealD Nnode = Grid.NodeCount();
|
RealD Nnode = Grid.NodeCount();
|
||||||
RealD ppn = Nrank/Nnode;
|
RealD ppn = Nrank / Nnode;
|
||||||
|
|
||||||
|
|
||||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
|
||||||
uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
||||||
for(int d=0;d<8;d++){
|
for (int d = 0; d < 8; d++)
|
||||||
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
{
|
||||||
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ncomm;
|
int ncomm;
|
||||||
|
|
||||||
for(int mu=0;mu<4;mu++){
|
for (int mu = 0; mu < 4; mu++)
|
||||||
if (mpi_layout[mu]>1 ) {
|
{
|
||||||
double start=usecond();
|
if (mpi_layout[mu] > 1)
|
||||||
for(int i=0;i<Nloop;i++){
|
{
|
||||||
|
double start = usecond();
|
||||||
|
for (int i = 0; i < Nloop; i++)
|
||||||
|
{
|
||||||
|
|
||||||
ncomm=0;
|
ncomm = 0;
|
||||||
|
|
||||||
|
|
||||||
ncomm++;
|
|
||||||
int comm_proc=1;
|
|
||||||
int xmit_to_rank;
|
|
||||||
int recv_from_rank;
|
|
||||||
|
|
||||||
{
|
|
||||||
std::vector<CommsRequest_t> requests;
|
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
|
||||||
xmit_to_rank,
|
|
||||||
(void *)&rbuf[mu][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
comm_proc = mpi_layout[mu]-1;
|
ncomm++;
|
||||||
{
|
int comm_proc = 1;
|
||||||
std::vector<CommsRequest_t> requests;
|
int xmit_to_rank;
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
int recv_from_rank;
|
||||||
Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
|
|
||||||
xmit_to_rank,
|
|
||||||
(void *)&rbuf[mu+4][0],
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Grid.Barrier();
|
|
||||||
double stop=usecond();
|
|
||||||
double mean=(stop-start)/Nloop;
|
|
||||||
double dbytes = bytes*ppn;
|
|
||||||
double xbytes = dbytes*2.0*ncomm;
|
|
||||||
double rbytes = xbytes;
|
|
||||||
double bidibytes = xbytes+rbytes;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
{
|
||||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)<<" "
|
std::vector<CommsRequest_t> requests;
|
||||||
<<std::right<< xbytes/mean<<" "
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
<< "\t\t"<<std::setw(7)<< bidibytes/mean<< std::endl;
|
Grid.SendToRecvFrom((void *)&xbuf[mu][0], xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0], recv_from_rank, bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu] - 1;
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> requests;
|
||||||
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[mu + 4][0], xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu + 4][0], recv_from_rank, bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
double stop = usecond();
|
||||||
|
double mean = (stop - start) / Nloop;
|
||||||
|
double dbytes = bytes * ppn;
|
||||||
|
double xbytes = dbytes * 2.0 * ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes + rbytes;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << std::setw(4) << lat << "\t" << Ls << "\t"
|
||||||
}
|
<< std::setw(11) << bytes << std::fixed << std::setprecision(1)
|
||||||
|
<< std::setw(7) << " " << std::right << xbytes / mean << " "
|
||||||
|
<< "\t\t" << std::setw(7) << bidibytes / mean << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int d=0;d<8;d++){
|
for (int d = 0; d < 8; d++)
|
||||||
acceleratorFreeDevice(xbuf[d]);
|
{
|
||||||
acceleratorFreeDevice(rbuf[d]);
|
acceleratorFreeDevice(xbuf[d]);
|
||||||
|
acceleratorFreeDevice(rbuf[d]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
<< "========================================================================="
|
||||||
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
|
"==========================="
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "= All done; Bye Bye" << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "========================================================================="
|
||||||
|
"==========================="
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Copyright © 2023 Simon Bürger <simon.buerger@rwth-aachen.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -16,6 +17,7 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "json.hpp"
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
#define CUDA_PROFILE
|
#define CUDA_PROFILE
|
||||||
@ -28,17 +30,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
template <class d>
|
template <class d> struct scal
|
||||||
struct scal
|
|
||||||
{
|
{
|
||||||
d internal;
|
d internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
Gamma::Algebra Gmu[] = {
|
Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY,
|
||||||
Gamma::Algebra::GammaX,
|
Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT};
|
||||||
Gamma::Algebra::GammaY,
|
|
||||||
Gamma::Algebra::GammaZ,
|
|
||||||
Gamma::Algebra::GammaT};
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
@ -48,24 +46,41 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
Coordinate latt4 = GridDefaultLatt();
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
int Ls = 16;
|
int Ls = 16;
|
||||||
|
std::string json_filename = ""; // empty indicates no json output
|
||||||
|
nlohmann::json json;
|
||||||
|
|
||||||
|
// benchmark specific command line arguments
|
||||||
for (int i = 0; i < argc; i++)
|
for (int i = 0; i < argc; i++)
|
||||||
|
{
|
||||||
if (std::string(argv[i]) == "-Ls")
|
if (std::string(argv[i]) == "-Ls")
|
||||||
{
|
{
|
||||||
std::stringstream ss(argv[i + 1]);
|
std::stringstream ss(argv[i + 1]);
|
||||||
ss >> Ls;
|
ss >> Ls;
|
||||||
}
|
}
|
||||||
|
if (std::string(argv[i]) == "--json-out")
|
||||||
|
json_filename = argv[i + 1];
|
||||||
|
}
|
||||||
|
|
||||||
GridLogLayout();
|
GridLogLayout();
|
||||||
|
|
||||||
long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc);
|
long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc);
|
||||||
|
|
||||||
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
json["single_site_flops"] = single_site_flops;
|
||||||
|
|
||||||
|
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
|
GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||||
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||||
|
|
||||||
|
json["grid"] = FGrid->FullDimensions().toVector();
|
||||||
|
json["local_grid"] = FGrid->LocalDimensions().toVector();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
|
std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
|
||||||
GridCartesian *sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
|
GridCartesian *sUGrid =
|
||||||
|
SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
|
||||||
|
|
||||||
GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
|
GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
|
||||||
GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
||||||
@ -177,13 +192,28 @@ int main(int argc, char **argv)
|
|||||||
RealD NP = UGrid->_Nprocessors;
|
RealD NP = UGrid->_Nprocessors;
|
||||||
RealD NN = UGrid->NodeCount();
|
RealD NN = UGrid->NodeCount();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
json["ranks"] = NP;
|
||||||
std::cout << GridLogMessage << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" << std::endl;
|
json["nodes"] = NN;
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "* Benchmarking DomainWallFermionR::Dhop " << std::endl;
|
<< "*****************************************************************"
|
||||||
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "*****************************************************************"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "*****************************************************************"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "* Benchmarking DomainWallFermionR::Dhop " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
if (sizeof(RealF) == 4)
|
if (sizeof(RealF) == 4)
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
if (sizeof(RealF) == 8)
|
if (sizeof(RealF) == 8)
|
||||||
@ -199,8 +229,11 @@ int main(int argc, char **argv)
|
|||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||||
|
|
||||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||||
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "*****************************************************************"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
||||||
int ncall = 300;
|
int ncall = 300;
|
||||||
@ -230,19 +263,38 @@ int main(int argc, char **argv)
|
|||||||
auto simdwidth = sizeof(vComplex);
|
auto simdwidth = sizeof(vComplex);
|
||||||
|
|
||||||
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||||
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth /
|
||||||
|
nsimd * ncall / (1024. * 1024. * 1024.);
|
||||||
|
|
||||||
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
||||||
double data_mem = (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
double data_mem =
|
||||||
|
(volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth /
|
||||||
|
nsimd * ncall / (1024. * 1024. * 1024.);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0 << " us" << std::endl;
|
json["Dw"]["calls"] = ncall;
|
||||||
|
json["Dw"]["time"] = t1 - t0;
|
||||||
|
json["Dw"]["mflops"] = flops / (t1 - t0);
|
||||||
|
json["Dw"]["mflops_per_rank"] = flops / (t1 - t0) / NP;
|
||||||
|
json["Dw"]["mflops_per_node"] = flops / (t1 - t0) / NN;
|
||||||
|
json["Dw"]["RF"] = 1000000. * data_rf / ((t1 - t0));
|
||||||
|
json["Dw"]["mem"] = 1000000. * data_mem / ((t1 - t0));
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0
|
||||||
|
<< " us" << std::endl;
|
||||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0) << std::endl;
|
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||||
std::cout << GridLogMessage << "mflop/s per rank = " << flops / (t1 - t0) / NP << std::endl;
|
std::cout << GridLogMessage << "mflop/s per rank = " << flops / (t1 - t0) / NP
|
||||||
std::cout << GridLogMessage << "mflop/s per node = " << flops / (t1 - t0) / NN << std::endl;
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "RF GiB/s (base 2) = " << 1000000. * data_rf / ((t1 - t0)) << std::endl;
|
std::cout << GridLogMessage << "mflop/s per node = " << flops / (t1 - t0) / NN
|
||||||
std::cout << GridLogMessage << "mem GiB/s (base 2) = " << 1000000. * data_mem / ((t1 - t0)) << std::endl;
|
<< std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "RF GiB/s (base 2) = " << 1000000. * data_rf / ((t1 - t0))
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "mem GiB/s (base 2) = " << 1000000. * data_mem / ((t1 - t0))
|
||||||
|
<< std::endl;
|
||||||
err = ref - result;
|
err = ref - result;
|
||||||
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
|
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
|
||||||
// exit(0);
|
// exit(0);
|
||||||
@ -313,7 +365,10 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
// dump=1;
|
// dump=1;
|
||||||
Dw.Dhop(src, result, 1);
|
Dw.Dhop(src, result, 1);
|
||||||
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "Compare to naive wilson implementation Dag to verify correctness"
|
||||||
|
<< std::endl;
|
||||||
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
||||||
std::cout << GridLogMessage << "norm dag result " << norm2(result) << std::endl;
|
std::cout << GridLogMessage << "norm dag result " << norm2(result) << std::endl;
|
||||||
std::cout << GridLogMessage << "norm dag ref " << norm2(ref) << std::endl;
|
std::cout << GridLogMessage << "norm dag ref " << norm2(ref) << std::endl;
|
||||||
@ -333,7 +388,8 @@ int main(int argc, char **argv)
|
|||||||
LatticeFermionF r_o(FrbGrid);
|
LatticeFermionF r_o(FrbGrid);
|
||||||
LatticeFermionF r_eo(FGrid);
|
LatticeFermionF r_eo(FGrid);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec" << std::endl;
|
std::cout << GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"
|
||||||
|
<< std::endl;
|
||||||
pickCheckerboard(Even, src_e, src);
|
pickCheckerboard(Even, src_e, src);
|
||||||
pickCheckerboard(Odd, src_o, src);
|
pickCheckerboard(Odd, src_o, src);
|
||||||
|
|
||||||
@ -341,9 +397,14 @@ int main(int argc, char **argv)
|
|||||||
std::cout << GridLogMessage << "src_o" << norm2(src_o) << std::endl;
|
std::cout << GridLogMessage << "src_o" << norm2(src_o) << std::endl;
|
||||||
|
|
||||||
// S-direction is INNERMOST and takes no part in the parity.
|
// S-direction is INNERMOST and takes no part in the parity.
|
||||||
std::cout << GridLogMessage << "*********************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "* Benchmarking DomainWallFermionF::DhopEO " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl;
|
<< "*********************************************************" << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "* Benchmarking DomainWallFermionF::DhopEO " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd()
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
if (sizeof(RealF) == 4)
|
if (sizeof(RealF) == 4)
|
||||||
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
if (sizeof(RealF) == 8)
|
if (sizeof(RealF) == 8)
|
||||||
@ -360,7 +421,9 @@ int main(int argc, char **argv)
|
|||||||
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||||
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||||
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||||
std::cout << GridLogMessage << "*********************************************************" << std::endl;
|
std::cout << GridLogMessage
|
||||||
|
<< "*********************************************************" << std::endl;
|
||||||
|
|
||||||
{
|
{
|
||||||
Dw.ZeroCounters();
|
Dw.ZeroCounters();
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
@ -386,9 +449,18 @@ int main(int argc, char **argv)
|
|||||||
volume = volume * latt4[mu];
|
volume = volume * latt4[mu];
|
||||||
double flops = (single_site_flops * volume * ncall) / 2.0;
|
double flops = (single_site_flops * volume * ncall) / 2.0;
|
||||||
|
|
||||||
|
json["Deo"]["calls"] = ncall;
|
||||||
|
json["Deo"]["time"] = t1 - t0;
|
||||||
|
json["Deo"]["mflops"] = flops / (t1 - t0);
|
||||||
|
json["Deo"]["mflops_per_rank"] = flops / (t1 - t0) / NP;
|
||||||
|
json["Deo"]["mflops_per_node"] = flops / (t1 - t0) / NN;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl;
|
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||||
std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP << std::endl;
|
std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP
|
||||||
std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN << std::endl;
|
<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
Dw.DhopEO(src_o, r_e, DaggerNo);
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
@ -420,6 +492,21 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
assert(norm2(src_e) < 1.0e-4);
|
assert(norm2(src_e) < 1.0e-4);
|
||||||
assert(norm2(src_o) < 1.0e-4);
|
assert(norm2(src_o) < 1.0e-4);
|
||||||
|
|
||||||
|
if (!json_filename.empty())
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "writing benchmark results to " << json_filename
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
int me = 0;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||||
|
if (me == 0)
|
||||||
|
{
|
||||||
|
std::ofstream json_file(json_filename);
|
||||||
|
json_file << std::setw(4) << json;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
36
Grid/Common.hpp
Normal file
36
Grid/Common.hpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef Grid_Benchmarks_Common_hpp_
|
||||||
|
#define Grid_Benchmarks_Common_hpp_
|
||||||
|
|
||||||
|
#ifndef GRID_MSG
|
||||||
|
#define GRID_MSG std::cout << GridLogMessage
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef GRID_MSG_MAXSIZE
|
||||||
|
#define GRID_MSG_MAXSIZE 1024
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define grid_printf(...) \
|
||||||
|
{ \
|
||||||
|
char _buf[GRID_MSG_MAXSIZE]; \
|
||||||
|
snprintf(_buf, GRID_MSG_MAXSIZE, __VA_ARGS__); \
|
||||||
|
GRID_MSG << _buf; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // Grid_Benchmarks_Common_hpp_
|
@ -3,10 +3,10 @@ ACLOCAL_AMFLAGS = -I .buildutils/m4
|
|||||||
bin_PROGRAMS = \
|
bin_PROGRAMS = \
|
||||||
Benchmark_comms_host_device \
|
Benchmark_comms_host_device \
|
||||||
Benchmark_dwf_fp32 \
|
Benchmark_dwf_fp32 \
|
||||||
Benchmark_ITT \
|
Benchmark_Grid \
|
||||||
Benchmark_IO
|
Benchmark_IO
|
||||||
|
|
||||||
Benchmark_comms_host_device_SOURCES = Benchmark_comms_host_device.cpp
|
Benchmark_comms_host_device_SOURCES = Benchmark_comms_host_device.cpp
|
||||||
Benchmark_dwf_fp32_SOURCES = Benchmark_dwf_fp32.cpp
|
Benchmark_dwf_fp32_SOURCES = Benchmark_dwf_fp32.cpp
|
||||||
Benchmark_ITT_SOURCES = Benchmark_ITT.cpp
|
Benchmark_Grid_SOURCES = Benchmark_Grid.cpp
|
||||||
Benchmark_IO_SOURCES = Benchmark_IO.cpp
|
Benchmark_IO_SOURCES = Benchmark_IO.cpp
|
||||||
|
69
Grid/Readme.md
Normal file
69
Grid/Readme.md
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# Grid benchmarks
|
||||||
|
|
||||||
|
This folder contains benchmarks for the [Grid](https://github.com/aportelli/) library.
|
||||||
|
The benchmarks can be summarised as follows
|
||||||
|
|
||||||
|
- `Benchmark_Grid`: This benchmark measure floating point performances for various fermion
|
||||||
|
matrices, as well as bandwidth measurement for different operations. Measurements are
|
||||||
|
performed for a fixed range of problem sizes.
|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
Build and install Grid, all dependencies, and the benchmark with
|
||||||
|
```bash
|
||||||
|
systems/<system>/bootstrap-env.sh <env_dir> # build dependencies, takes a long time
|
||||||
|
./build-grid.sh <env_dir> <config> # build Grid
|
||||||
|
./build-benchmark.sh <env_dir> <config> # build benchmarks
|
||||||
|
```
|
||||||
|
where `<env_dir>` is an arbitrary directory where every product will be stored, `<system>`
|
||||||
|
is a sub-directory of `systems` containing system-specific scripts
|
||||||
|
(an existing preset or your own), and finally `<config>` is the name of a build config
|
||||||
|
in `systems/<system>/grid-config.json`. After a successful execution the benchmark binaries
|
||||||
|
will be in `<env_dir>/prefix/gridbench_<config>`.
|
||||||
|
|
||||||
|
## Environment setup
|
||||||
|
A complete runtime environnement can be deploy using scripts from this repository. System-specific scripts are in the `systems` directory.
|
||||||
|
|
||||||
|
You should first deploy the environment for the specific system you are using, for example
|
||||||
|
```bash
|
||||||
|
systems/tursa/bootstrap-env.sh ./env
|
||||||
|
```
|
||||||
|
will deploy the relevant environment for the [Tursa](https://www.epcc.ed.ac.uk/hpc-services/dirac-tursa-gpu) supercomputer in `./env`. This step might compile from source a large set
|
||||||
|
of packages, and might take some time to complete.
|
||||||
|
|
||||||
|
After that, the environment directory (`./env` in the example above) will contain a `env.sh` file that need to be sourced to activate the environment
|
||||||
|
```bash
|
||||||
|
source ./env/env.sh
|
||||||
|
```
|
||||||
|
Additional scripts `env-*.sh` can be sourced after to activate more specific environments,
|
||||||
|
this should be done after sourcing `env.sh` as above.
|
||||||
|
|
||||||
|
## Building the benchmarks
|
||||||
|
The environnement directory contains a `grid-config.json` file specifying compilation flag
|
||||||
|
configurations for Grid (please see Grid's repository for documentation). All entries have
|
||||||
|
the form
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "foo", // name of the configuration
|
||||||
|
"env-script": "bar.sh", // script to source before building
|
||||||
|
// (path relative to the environment directory)
|
||||||
|
"commit": "...", // Grid commit to use
|
||||||
|
// (anything that can be an argument of git checkout)
|
||||||
|
"config-options": "..." // options to pass to the configure script,
|
||||||
|
"env" : { // environment variables
|
||||||
|
"VAR": "value" // export VAR="value" before building
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Grid can then be built with
|
||||||
|
```
|
||||||
|
./build-grid.sh <env_dir> <config>
|
||||||
|
```
|
||||||
|
where `<env_dir>` is the environment directory and `<config>` is the build config name in
|
||||||
|
`grid-config.json`. Similarly, the benchmarks can then be built with
|
||||||
|
```
|
||||||
|
./build-grid <env_dir> <config>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running the benchmarks
|
||||||
|
After building the benchmarks as above you can find the binaries in
|
||||||
|
`<env_dir>/prefix/gridbench_<config>`.
|
@ -2,5 +2,10 @@
|
|||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
json_url='https://raw.githubusercontent.com/nlohmann/json/bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d/single_include/nlohmann/json.hpp'
|
||||||
|
|
||||||
|
if [ ! -f json.hpp ]; then
|
||||||
|
wget ${json_url}
|
||||||
|
fi
|
||||||
mkdir -p .buildutils/m4
|
mkdir -p .buildutils/m4
|
||||||
autoreconf -fvi
|
autoreconf -fvi
|
||||||
|
31
Grid/build-benchmark.sh
Executable file
31
Grid/build-benchmark.sh
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# shellcheck disable=SC1090,SC1091
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if (( $# != 2 )); then
|
||||||
|
echo "usage: $(basename "$0") <environment directory> <config>" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
env_dir=$1
|
||||||
|
cfg=$2
|
||||||
|
|
||||||
|
call_dir=$(pwd -P)
|
||||||
|
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
||||||
|
cd "${env_dir}"
|
||||||
|
env_dir=$(pwd -P)
|
||||||
|
cd "${call_dir}"
|
||||||
|
build_dir="${env_dir}/build/Grid-benchmarks/${cfg}"
|
||||||
|
mkdir -p "${build_dir}"
|
||||||
|
source "${env_dir}/env.sh"
|
||||||
|
entry=$(jq ".configs[]|select(.name==\"${cfg}\")" "${env_dir}"/grid-config.json)
|
||||||
|
env_script=$(echo "${entry}" | jq -r ".\"env-script\"")
|
||||||
|
cd "${build_dir}" || return
|
||||||
|
source "${env_dir}/${env_script}"
|
||||||
|
if [ ! -f Makefile ]; then
|
||||||
|
"${script_dir}/configure" --with-grid="${env_dir}/prefix/grid_${cfg}" \
|
||||||
|
--prefix="${env_dir}/prefix/gridbench_${cfg}"
|
||||||
|
fi
|
||||||
|
make -j 128
|
||||||
|
make install
|
||||||
|
cd "${call_dir}"
|
42
Grid/build-grid.sh
Executable file
42
Grid/build-grid.sh
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# shellcheck disable=SC1090,SC1091
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if (( $# != 2 )); then
|
||||||
|
echo "usage: $(basename "$0") <environment directory> <config>" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
env_dir=$1
|
||||||
|
cfg=$2
|
||||||
|
|
||||||
|
call_dir=$(pwd -P)
|
||||||
|
cd "${env_dir}"
|
||||||
|
env_dir=$(pwd -P)
|
||||||
|
cd "${call_dir}"
|
||||||
|
build_dir="${env_dir}/build/Grid/${cfg}"
|
||||||
|
if [ -d "${build_dir}" ]; then
|
||||||
|
echo "error: directory '${build_dir}' exists"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
mkdir -p "${build_dir}"
|
||||||
|
source "${env_dir}/env.sh"
|
||||||
|
entry=$(jq ".configs[]|select(.name==\"${cfg}\")" "${env_dir}"/grid-config.json)
|
||||||
|
IFS=" " read -r -a args <<< "$(echo "${entry}" | jq -r ".\"config-options\"")"
|
||||||
|
env_script=$(echo "${entry}" | jq -r ".\"env-script\"")
|
||||||
|
cd "${build_dir}" || return
|
||||||
|
source "${env_dir}/${env_script}"
|
||||||
|
extra_env=$(mktemp)
|
||||||
|
echo "${entry}" | jq -r '.env|to_entries|map("export \(.key)='\''\(.value|tostring)'\''")|.[]' > "${extra_env}"
|
||||||
|
commit=$(echo "${entry}" | jq -r ".commit")
|
||||||
|
git clone https://github.com/paboyle/Grid.git "${build_dir}"
|
||||||
|
cd "${build_dir}"
|
||||||
|
git checkout "${commit}"
|
||||||
|
./bootstrap.sh
|
||||||
|
mkdir build; cd build
|
||||||
|
source "${extra_env}"
|
||||||
|
../configure --prefix="${env_dir}/prefix/grid_${cfg}" "${args[@]}"
|
||||||
|
make -j128
|
||||||
|
make install
|
||||||
|
rm -rf "${extra_env}"
|
||||||
|
cd "${call_dir}"
|
@ -3,55 +3,69 @@ AC_INIT([lattice-bench], [0.1], [antonin.portelli@me.com])
|
|||||||
AC_CANONICAL_BUILD
|
AC_CANONICAL_BUILD
|
||||||
AC_CANONICAL_HOST
|
AC_CANONICAL_HOST
|
||||||
AC_CANONICAL_TARGET
|
AC_CANONICAL_TARGET
|
||||||
AC_CONFIG_SRCDIR([Benchmark_ITT.cpp])
|
AC_CONFIG_SRCDIR([Benchmark_Grid.cpp])
|
||||||
AC_CONFIG_MACRO_DIR([.buildutils/m4])
|
AC_CONFIG_MACRO_DIR([.buildutils/m4])
|
||||||
AC_CONFIG_HEADERS([config.h])
|
AC_CONFIG_HEADERS([config.h])
|
||||||
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
|
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
|
||||||
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
# Checks for programs.
|
|
||||||
AC_PROG_CXX
|
|
||||||
AC_PROG_CC
|
|
||||||
AC_PROG_RANLIB
|
|
||||||
AM_PROG_AR
|
|
||||||
AC_LANG([C++])
|
|
||||||
|
|
||||||
AC_ARG_WITH([grid],
|
AC_ARG_WITH([grid],
|
||||||
[AS_HELP_STRING([--with-grid=<prefix>],
|
[AS_HELP_STRING([--with-grid=<prefix>],
|
||||||
[try this for a non-standard install prefix of Grid])],
|
[try this for a non-standard install prefix of Grid])],
|
||||||
[PATH="$with_grid/bin$PATH_SEPARATOR$PATH"]
|
[PATH="$with_grid/bin$PATH_SEPARATOR$PATH"]
|
||||||
[CXXFLAGS="$CXXFLAGS -I$with_grid/include"]
|
[CXXFLAGS="$CXXFLAGS -I$with_grid/include"]
|
||||||
[LDFLAGS="$LDFLAGS -L$with_grid/lib"])
|
[LDFLAGS="$LDFLAGS -L$with_grid/lib"])
|
||||||
|
|
||||||
AC_CHECK_PROG([GRIDCONF],[grid-config],[yes])
|
AC_CHECK_PROG([GRIDCONF],[grid-config],[yes])
|
||||||
if test x"$GRIDCONF" != x"yes" ; then
|
if test x"$GRIDCONF" != x"yes" ; then
|
||||||
AC_MSG_ERROR([grid-config not found])
|
AC_MSG_ERROR([grid-config not found])
|
||||||
fi
|
fi
|
||||||
|
if test x"$CXX" == x ; then
|
||||||
|
CXX="`grid-config --cxx`"
|
||||||
|
elif test "$CXX" != "`grid-config --cxx`" ; then
|
||||||
|
AC_MSG_WARN([CXX differs from that reported by grid-config])
|
||||||
|
fi
|
||||||
|
if test x"$CXXLD" == x ; then
|
||||||
|
CXXLD="`grid-config --cxxld`"
|
||||||
|
elif test "$CXXLD" != "`grid-config --cxxld`" ; then
|
||||||
|
AC_MSG_WARN([CXXLD differs from that reported by grid-config])
|
||||||
|
fi
|
||||||
CXXFLAGS="$CXXFLAGS `grid-config --cxxflags`"
|
CXXFLAGS="$CXXFLAGS `grid-config --cxxflags`"
|
||||||
LDFLAGS="$LDFLAGS `grid-config --ldflags`"
|
|
||||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
|
|
||||||
|
AC_PROG_CC
|
||||||
|
AM_PROG_CC_C_O
|
||||||
|
AC_PROG_CXX
|
||||||
|
|
||||||
|
LDFLAGS="$LDFLAGS `grid-config --ldflags`"
|
||||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
LIBS=" -lGrid $LIBS `grid-config --libs`"
|
LIBS=" -ldl -lGrid $LIBS `grid-config --libs`"
|
||||||
|
|
||||||
|
AC_PROG_RANLIB
|
||||||
|
AM_PROG_AR
|
||||||
|
AC_LANG([C++])
|
||||||
|
|
||||||
AC_MSG_CHECKING([that a minimal Grid program compiles]);
|
AC_MSG_CHECKING([that a minimal Grid program compiles]);
|
||||||
AC_LINK_IFELSE(
|
AC_LINK_IFELSE(
|
||||||
[AC_LANG_SOURCE([[
|
[AC_LANG_SOURCE([[
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
Grid_init(&argc, &argv);
|
Grid_init(&argc, &argv);
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
]])],
|
]])],
|
||||||
[AC_MSG_RESULT([yes])],
|
[AC_MSG_RESULT([yes])],
|
||||||
[AC_MSG_RESULT([no])]
|
[AC_MSG_RESULT([no])]
|
||||||
[AC_MSG_ERROR([Could not compile a minimal Grid program])])
|
[AC_MSG_ERROR([impossible to compile a minimal Grid program])])
|
||||||
|
|
||||||
|
AC_SUBST([CXXLD])
|
||||||
AC_SUBST([AM_CXXFLAGS])
|
AC_SUBST([AM_CXXFLAGS])
|
||||||
AC_SUBST([AM_LDFLAGS])
|
AC_SUBST([AM_LDFLAGS])
|
||||||
AC_CONFIG_FILES([Makefile])
|
AC_CONFIG_FILES([Makefile])
|
||||||
|
24
Grid/systems/tursa/bootstrap-env.sh
Executable file
24
Grid/systems/tursa/bootstrap-env.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if (( $# != 1 )); then
|
||||||
|
echo "usage: $(basename "$0") <environment directory>" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
dir=$1
|
||||||
|
|
||||||
|
call_dir=$(pwd -P)
|
||||||
|
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
||||||
|
if [ -d "${dir}" ]; then
|
||||||
|
echo "error: directory '${dir}' exists"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
mkdir -p "${dir}"
|
||||||
|
cd "${dir}"
|
||||||
|
git clone https://github.com/spack/spack.git
|
||||||
|
cd "${call_dir}"
|
||||||
|
cp "${script_dir}"/files/* "${dir}"
|
||||||
|
cp "${script_dir}/env.sh" "${script_dir}/grid-config.json" "${dir}"
|
||||||
|
source "${dir}"/spack/share/spack/setup-env.sh
|
||||||
|
"${script_dir}"/spack-bootstrap.sh "${dir}"
|
@ -1,5 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# shellcheck disable=SC2046
|
|
||||||
|
|
||||||
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
|
||||||
spack load $(cat "${script_dir}"/grid-cpu.spack)
|
|
@ -1,5 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# shellcheck disable=SC2046
|
|
||||||
|
|
||||||
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
|
||||||
spack load $(cat "${script_dir}"/grid-gpu.spack)
|
|
8
Grid/systems/tursa/env.sh
Normal file
8
Grid/systems/tursa/env.sh
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
|
||||||
|
GRIDENVDIR="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
||||||
|
export GRIDENVDIR
|
||||||
|
export PATH="${GRIDENVDIR}/prefix/base/bin:${PATH}"
|
||||||
|
export ACLOCAL_PATH="${GRIDENVDIR}/prefix/base/share/aclocal:${ACLOCAL_PATH}"
|
||||||
|
source "${GRIDENVDIR}"/spack/share/spack/setup-env.sh
|
4
Grid/systems/tursa/files/env-cpu.sh
Normal file
4
Grid/systems/tursa/files/env-cpu.sh
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
spack env activate grid-cpu
|
||||||
|
spack env status
|
4
Grid/systems/tursa/files/env-gpu.sh
Normal file
4
Grid/systems/tursa/files/env-gpu.sh
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
spack env activate grid-gpu
|
||||||
|
spack env status
|
25
Grid/systems/tursa/grid-config.json
Normal file
25
Grid/systems/tursa/grid-config.json
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"configs": [
|
||||||
|
{
|
||||||
|
"name": "gpu",
|
||||||
|
"env-script": "env-gpu.sh",
|
||||||
|
"commit": "796abfad80625d81bb16af7ff6ec612a836f17d8",
|
||||||
|
"config-options": "--enable-comms=mpi --enable-simd=GPU --enable-alloc-align=4k --enable-accelerator-cshift --enable-shm=nvlink --enable-gen-simd-width=64 --enable-accelerator=cuda --disable-unified --enable-gparity=no --enable-fermion-reps=no",
|
||||||
|
"env" : {
|
||||||
|
"CXX": "nvcc",
|
||||||
|
"LDFLAGS": "-cudart shared",
|
||||||
|
"CXXFLAGS": "-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "cpu",
|
||||||
|
"env-script": "env-cpu.sh",
|
||||||
|
"commit": "796abfad80625d81bb16af7ff6ec612a836f17d8",
|
||||||
|
"config-options": "--enable-comms=mpi-auto --enable-simd=AVX2 --enable-alloc-align=4k --enable-shm=shmget --enable-gparity=no --enable-fermion-reps=no",
|
||||||
|
"env" : {
|
||||||
|
"CXX": "clang++",
|
||||||
|
"MPICXX": "mpicxx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
@ -1,10 +0,0 @@
|
|||||||
gcc@9.4.0
|
|
||||||
llvm@12.0.1
|
|
||||||
ucx@1.12.0.CPU%gcc@9.4.0
|
|
||||||
openmpi@4.1.1.CPU%gcc@9.4.0
|
|
||||||
hdf5^openmpi@4.1.1.CPU%gcc@9.4.0
|
|
||||||
fftw^openmpi@4.1.1.CPU%gcc@9.4.0
|
|
||||||
openssl
|
|
||||||
gmp%gcc@9.4.0
|
|
||||||
mpfr%gcc@9.4.0
|
|
||||||
c-lime
|
|
@ -1,10 +0,0 @@
|
|||||||
gcc@9.4.0
|
|
||||||
cuda@11.4.0
|
|
||||||
ucx@1.12.0.GPU%gcc@9.4.0
|
|
||||||
openmpi@4.1.1.GPU%gcc@9.4.0
|
|
||||||
hdf5^openmpi@4.1.1.GPU%gcc@9.4.0
|
|
||||||
fftw^openmpi@4.1.1.GPU%gcc@9.4.0
|
|
||||||
openssl
|
|
||||||
gmp%gcc@9.4.0
|
|
||||||
mpfr%gcc@9.4.0
|
|
||||||
c-lime
|
|
169
Grid/systems/tursa/spack-bootstrap.sh
Normal file → Executable file
169
Grid/systems/tursa/spack-bootstrap.sh
Normal file → Executable file
@ -2,16 +2,19 @@
|
|||||||
# shellcheck disable=SC2016
|
# shellcheck disable=SC2016
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
GCC='gcc@9.4.0'
|
gcc_spec='gcc@9.4.0'
|
||||||
CUDA='cuda@11.4.0'
|
cuda_spec='cuda@11.4.0'
|
||||||
HDF5='hdf5@1.10.7'
|
hdf5_spec='hdf5@1.10.7'
|
||||||
|
|
||||||
if (( $# != 1 )); then
|
if (( $# != 1 )); then
|
||||||
echo "usage: $(basename "$0") <env dir>" 1>&2
|
echo "usage: $(basename "$0") <env dir>" 1>&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
ENVDIR=$1
|
dir=$1
|
||||||
CWD=$(pwd -P)
|
cwd=$(pwd -P)
|
||||||
|
cd "${dir}"
|
||||||
|
dir=$(pwd -P)
|
||||||
|
cd "${cwd}"
|
||||||
|
|
||||||
# General configuration ########################################################
|
# General configuration ########################################################
|
||||||
# build with 128 tasks
|
# build with 128 tasks
|
||||||
@ -33,107 +36,92 @@ echo 'packages:
|
|||||||
spack config --scope site add -f external.yaml
|
spack config --scope site add -f external.yaml
|
||||||
rm external.yaml
|
rm external.yaml
|
||||||
|
|
||||||
|
# Base compilers ###############################################################
|
||||||
# configure system base
|
# configure system base
|
||||||
spack compiler find --scope site
|
spack compiler find --scope site
|
||||||
|
|
||||||
# Base packages ################################################################
|
# install GCC, CUDA & LLVM
|
||||||
# install GCC
|
spack install ${gcc_spec} ${cuda_spec} llvm
|
||||||
spack install ${GCC}
|
|
||||||
spack load ${GCC}
|
spack load llvm
|
||||||
spack compiler find --scope site
|
spack compiler find --scope site
|
||||||
spack unload ${GCC}
|
spack unload llvm
|
||||||
|
|
||||||
# clean
|
spack load ${gcc_spec}
|
||||||
spack clean
|
|
||||||
spack gc -y
|
|
||||||
|
|
||||||
# install CUDA
|
|
||||||
spack install ${CUDA}
|
|
||||||
|
|
||||||
# install development tools
|
|
||||||
dev_tools=("autoconf" "automake" "libtool" "git")
|
|
||||||
spack install "${dev_tools[@]}"
|
|
||||||
|
|
||||||
# create view for CLI & dev tools
|
|
||||||
spack view symlink -i "${ENVDIR}/prefix/base" "${dev_tools[@]}"
|
|
||||||
|
|
||||||
# install clang
|
|
||||||
spack install llvm@12.0.1
|
|
||||||
|
|
||||||
# locate new compilers
|
|
||||||
spack load llvm@12.0.1
|
|
||||||
spack compiler find --scope site
|
spack compiler find --scope site
|
||||||
spack unload llvm@12.0.1
|
spack unload ${gcc_spec}
|
||||||
|
|
||||||
# Manual compilation of OpenMPI & UCX ##########################################
|
# Manual compilation of OpenMPI & UCX ##########################################
|
||||||
# set build directories
|
# set build directories
|
||||||
mkdir -p "${ENVDIR}"/build
|
mkdir -p "${dir}"/build
|
||||||
cd "${ENVDIR}"/build
|
cd "${dir}"/build
|
||||||
|
|
||||||
spack load ${GCC} ${CUDA}
|
spack load ${gcc_spec} ${cuda_spec}
|
||||||
|
|
||||||
CUDA_PATH=$(which nvcc | sed "s/bin/@/g" | cut -d "@" -f1)
|
cuda_path=$(spack find --format "{prefix}" cuda)
|
||||||
GDRCOPY_PATH=/mnt/lustre/tursafs1/apps/gdrcopy/2.3.1
|
gdrcopy_path=/mnt/lustre/tursafs1/apps/gdrcopy/2.3.1
|
||||||
|
|
||||||
# Install ucx 1.12.0
|
# Install ucx 1.12.0
|
||||||
UCX_URL=https://github.com/openucx/ucx/releases/download/v1.12.0/ucx-1.12.0.tar.gz
|
ucx_url=https://github.com/openucx/ucx/releases/download/v1.12.0/ucx-1.12.0.tar.gz
|
||||||
|
|
||||||
echo "-- building UCX from source"
|
echo "-- building UCX from source"
|
||||||
wget ${UCX_URL}
|
wget ${ucx_url}
|
||||||
UCX_AR=$(basename ${UCX_URL})
|
ucx_ar=$(basename ${ucx_url})
|
||||||
tar -xvf "${UCX_AR}"
|
tar -xvf "${ucx_ar}"
|
||||||
cd "${UCX_AR%.tar.gz}"
|
cd "${ucx_ar%.tar.gz}"
|
||||||
|
|
||||||
# ucx gpu build
|
# ucx gpu build
|
||||||
mkdir build_gpu; cd build_gpu
|
mkdir -p build_gpu; cd build_gpu
|
||||||
../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \
|
../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \
|
||||||
--disable-dependency-tracking --prefix="${ENVDIR}"/prefix/ucx_gpu \
|
--disable-dependency-tracking --prefix="${dir}"/prefix/ucx_gpu \
|
||||||
--enable-devel-headers --enable-examples --enable-optimizations \
|
--enable-devel-headers --enable-examples --enable-optimizations \
|
||||||
--with-gdrcopy=${GDRCOPY_PATH} --with-verbs --disable-logging \
|
--with-gdrcopy=${gdrcopy_path} --with-verbs --disable-logging \
|
||||||
--disable-debug --disable-assertions --enable-cma \
|
--disable-debug --disable-assertions --enable-cma \
|
||||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
|
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
|
||||||
--without-rocm --without-ugni --without-java \
|
--without-rocm --without-ugni --without-java \
|
||||||
--enable-compiler-opt=3 --with-cuda="${CUDA_PATH}" --without-cm \
|
--enable-compiler-opt=3 --with-cuda="${cuda_path}" --without-cm \
|
||||||
--with-rc --with-ud --with-dc --with-mlx5-dv --with-dm \
|
--with-rc --with-ud --with-dc --with-mlx5-dv --with-dm \
|
||||||
--enable-mt LDFLAGS=-L${GDRCOPY_PATH}/lib
|
--enable-mt --without-go LDFLAGS=-L${gdrcopy_path}/lib
|
||||||
make -j 128
|
make -j 128
|
||||||
make install
|
make install
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
# ucx cpu build
|
# ucx cpu build
|
||||||
mkdir build_cpu; cd build_cpu
|
mkdir -p build_cpu; cd build_cpu
|
||||||
../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \
|
../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \
|
||||||
--disable-dependency-tracking --prefix="${ENVDIR}"/prefix/ucx_cpu \
|
--disable-dependency-tracking --prefix="${dir}"/prefix/ucx_cpu \
|
||||||
--enable-devel-headers --enable-examples --enable-optimizations \
|
--enable-devel-headers --enable-examples --enable-optimizations \
|
||||||
--with-verbs --disable-logging --disable-debug \
|
--with-verbs --disable-logging --disable-debug \
|
||||||
--disable-assertions --enable-mt --enable-cma \
|
--disable-assertions --enable-mt --enable-cma \
|
||||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
|
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
|
||||||
--without-rocm --without-ugni --without-java \
|
--without-rocm --without-ugni --without-java \
|
||||||
--enable-compiler-opt=3 --without-cm --without-ugni --with-rc \
|
--enable-compiler-opt=3 --without-cm --without-ugni --with-rc \
|
||||||
--with-ud --with-dc --with-mlx5-dv --with-dm --enable-mt
|
--with-ud --with-dc --with-mlx5-dv --with-dm --enable-mt --without-go
|
||||||
make -j 128
|
make -j 128
|
||||||
make install
|
make install
|
||||||
|
|
||||||
cd "${ENVDIR}"/build
|
cd "${dir}"/build
|
||||||
|
|
||||||
# Install openmpi 4.1.1 (needs to be done on a gpu node)
|
# Install openmpi 4.1.1
|
||||||
OMPI_URL=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz
|
ompi_url=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz
|
||||||
|
|
||||||
echo "-- building OpenMPI from source"
|
echo "-- building OpenMPI from source"
|
||||||
|
|
||||||
wget ${OMPI_URL}
|
wget ${ompi_url}
|
||||||
OMPI_AR=$(basename ${OMPI_URL})
|
ompi_ar=$(basename ${ompi_url})
|
||||||
tar -xvf "${OMPI_AR}"
|
tar -xvf "${ompi_ar}"
|
||||||
cd "${OMPI_AR%.tar.gz}"
|
cd "${ompi_ar%.tar.gz}"
|
||||||
|
export AUTOMAKE_JOBS=128
|
||||||
|
./autogen.pl -f
|
||||||
|
|
||||||
# openmpi gpu build
|
# openmpi gpu build
|
||||||
mkdir build_gpu; cd build_gpu
|
mkdir build_gpu; cd build_gpu
|
||||||
../configure --prefix="${ENVDIR}"/prefix/ompi_gpu --without-xpmem \
|
../configure --prefix="${dir}"/prefix/ompi_gpu --without-xpmem \
|
||||||
--with-ucx="${ENVDIR}"/prefix/ucx_gpu \
|
--with-ucx="${dir}"/prefix/ucx_gpu \
|
||||||
--with-ucx-libdir="${ENVDIR}"/prefix/ucx_gpu/lib \
|
--with-ucx-libdir="${dir}"/prefix/ucx_gpu/lib \
|
||||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
|
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
|
||||||
--enable-mca-no-build=btl-uct \
|
--enable-mca-no-build=btl-uct \
|
||||||
--with-cuda="${CUDA_PATH}" --disable-getpwuid \
|
--with-cuda="${cuda_path}" --disable-getpwuid \
|
||||||
--with-verbs --with-slurm --enable-mpi-fortran=all \
|
--with-verbs --with-slurm --enable-mpi-fortran=all \
|
||||||
--with-pmix=internal --with-libevent=internal
|
--with-pmix=internal --with-libevent=internal
|
||||||
make -j 128
|
make -j 128
|
||||||
@ -142,61 +130,76 @@ cd ..
|
|||||||
|
|
||||||
# openmpi cpu build
|
# openmpi cpu build
|
||||||
mkdir build_cpu; cd build_cpu
|
mkdir build_cpu; cd build_cpu
|
||||||
../configure --prefix="${ENVDIR}"/prefix/ompi_cpu --without-xpmem \
|
../configure --prefix="${dir}"/prefix/ompi_cpu --without-xpmem \
|
||||||
--with-ucx="${ENVDIR}"/prefix/ucx_cpu \
|
--with-ucx="${dir}"/prefix/ucx_cpu \
|
||||||
--with-ucx-libdir="${ENVDIR}"/prefix/ucx_cpu/lib \
|
--with-ucx-libdir="${dir}"/prefix/ucx_cpu/lib \
|
||||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
|
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
|
||||||
--enable-mca-no-build=btl-uct --disable-getpwuid \
|
--enable-mca-no-build=btl-uct --disable-getpwuid \
|
||||||
--with-verbs --with-slurm --enable-mpi-fortran=all \
|
--with-verbs --with-slurm --enable-mpi-fortran=all \
|
||||||
--with-pmix=internal --with-libevent=internal
|
--with-pmix=internal --with-libevent=internal
|
||||||
make -j 128
|
make -j 128
|
||||||
make install
|
make install
|
||||||
cd "${ENVDIR}"
|
cd "${dir}"
|
||||||
|
|
||||||
# Add externals to spack
|
# Add externals to spack
|
||||||
echo "packages:
|
echo "packages:
|
||||||
ucx:
|
ucx:
|
||||||
externals:
|
externals:
|
||||||
- spec: \"ucx@1.12.0.GPU%gcc@9.4.0\"
|
- spec: \"ucx@1.12.0.GPU%gcc@9.4.0\"
|
||||||
prefix: ${ENVDIR}/prefix/ucx_gpu
|
prefix: ${dir}/prefix/ucx_gpu
|
||||||
- spec: \"ucx@1.12.0.CPU%gcc@9.4.0\"
|
- spec: \"ucx@1.12.0.CPU%gcc@9.4.0\"
|
||||||
prefix: ${ENVDIR}/prefix/ucx_cpu
|
prefix: ${dir}/prefix/ucx_cpu
|
||||||
buildable: False
|
buildable: False
|
||||||
openmpi:
|
openmpi:
|
||||||
externals:
|
externals:
|
||||||
- spec: \"openmpi@4.1.1.GPU%gcc@9.4.0\"
|
- spec: \"openmpi@4.1.1.GPU%gcc@9.4.0\"
|
||||||
prefix: ${ENVDIR}/prefix/ompi_gpu
|
prefix: ${dir}/prefix/ompi_gpu
|
||||||
- spec: \"openmpi@4.1.1.CPU%gcc@9.4.0\"
|
- spec: \"openmpi@4.1.1.CPU%gcc@9.4.0\"
|
||||||
prefix: ${ENVDIR}/prefix/ompi_cpu
|
prefix: ${dir}/prefix/ompi_cpu
|
||||||
buildable: False" > spack.yaml
|
buildable: False" > spack.yaml
|
||||||
|
|
||||||
spack config --scope site add -f spack.yaml
|
spack config --scope site add -f spack.yaml
|
||||||
rm spack.yaml
|
rm spack.yaml
|
||||||
spack install ucx@1.12.0.GPU%gcc@9.4.0
|
spack install ucx@1.12.0.GPU%gcc@9.4.0 openmpi@4.1.1.GPU%gcc@9.4.0
|
||||||
spack install ucx@1.12.0.CPU%gcc@9.4.0
|
spack install ucx@1.12.0.CPU%gcc@9.4.0 openmpi@4.1.1.CPU%gcc@9.4.0
|
||||||
spack install openmpi@4.1.1.GPU%gcc@9.4.0
|
|
||||||
spack install openmpi@4.1.1.CPU%gcc@9.4.0
|
|
||||||
|
|
||||||
# Install Grid dependencies ####################################################
|
cd "${cwd}"
|
||||||
cd "${CWD}"
|
|
||||||
|
|
||||||
OPENMPIGPUHASH=$(spack find --format "{hash}" openmpi@4.1.1.GPU)
|
# environments #################################################################
|
||||||
OPENMPICPUHASH=$(spack find --format "{hash}" openmpi@4.1.1.CPU)
|
dev_tools=("autoconf" "automake" "libtool" "jq")
|
||||||
|
ompi_gpu_hash=$(spack find --format "{hash}" openmpi@4.1.1.GPU)
|
||||||
|
ompi_cpu_hash=$(spack find --format "{hash}" openmpi@4.1.1.CPU)
|
||||||
|
|
||||||
spack install ${HDF5}+cxx+threadsafe ^/"${OPENMPIGPUHASH}"
|
spack env create grid-gpu
|
||||||
spack install ${HDF5}+cxx+threadsafe ^/"${OPENMPICPUHASH}"
|
spack env activate grid-gpu
|
||||||
spack install fftw ^/"${OPENMPIGPUHASH}"
|
spack add ${gcc_spec} ${cuda_spec} "${dev_tools[@]}"
|
||||||
spack install fftw ^/"${OPENMPICPUHASH}"
|
spack add ucx@1.12.0.GPU%gcc@9.4.0 openmpi@4.1.1.GPU%gcc@9.4.0
|
||||||
spack install openssl gmp mpfr c-lime
|
spack add ${hdf5_spec}+cxx+threadsafe ^/"${ompi_gpu_hash}"
|
||||||
|
spack add fftw ^/"${ompi_gpu_hash}"
|
||||||
|
spack add openssl gmp mpfr c-lime
|
||||||
|
spack install
|
||||||
|
spack env deactivate
|
||||||
|
|
||||||
|
spack env create grid-cpu
|
||||||
|
spack env activate grid-cpu
|
||||||
|
spack add llvm "${dev_tools[@]}"
|
||||||
|
spack add ucx@1.12.0.CPU%gcc@9.4.0 openmpi@4.1.1.CPU%gcc@9.4.0
|
||||||
|
spack add ${hdf5_spec}+cxx+threadsafe ^/"${ompi_cpu_hash}"
|
||||||
|
spack add fftw ^/"${ompi_cpu_hash}"
|
||||||
|
spack add openssl gmp mpfr c-lime
|
||||||
|
spack install
|
||||||
|
spack env deactivate
|
||||||
|
|
||||||
# Final setup ##################################################################
|
# Final setup ##################################################################
|
||||||
spack clean
|
spack clean
|
||||||
|
spack gc -y
|
||||||
|
|
||||||
# add more environment variables in module loading
|
# add more environment variables in module loading
|
||||||
spack config --scope site add 'modules:prefix_inspections:lib:[LIBRARY_PATH]'
|
spack config --scope site add 'modules:prefix_inspections:lib:[LD_LIBRARY_PATH,LIBRARY_PATH]'
|
||||||
|
spack config --scope site add 'modules:prefix_inspections:lib64:[LD_LIBRARY_PATH,LIBRARY_PATH]'
|
||||||
spack config --scope site add 'modules:prefix_inspections:include:[C_INCLUDE_PATH,CPLUS_INCLUDE_PATH,INCLUDE]'
|
spack config --scope site add 'modules:prefix_inspections:include:[C_INCLUDE_PATH,CPLUS_INCLUDE_PATH,INCLUDE]'
|
||||||
spack module tcl refresh -y
|
spack module tcl refresh -y
|
||||||
|
|
||||||
# permission change for group access
|
# permission change for group access
|
||||||
chmod -R g+rw "${ENVDIR}/spack/var/spack/cache"
|
chmod -R g+rw "${dir}/spack/var/spack/cache"
|
||||||
setfacl -d -R -m g::rwX "${ENVDIR}/spack/var/spack/cache"
|
setfacl -d -R -m g::rwX "${dir}/spack/var/spack/cache"
|
||||||
|
8
Readme.md
Normal file
8
Readme.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Lattice benchmarks
|
||||||
|
|
||||||
|
This repository is an attempt at packaging benchmarks for various libraries used for
|
||||||
|
lattice field theory simulations. It is currently only featuring the Grid library but
|
||||||
|
more will be added later.
|
||||||
|
|
||||||
|
Libraries:
|
||||||
|
- [Grid](https://github.com/aportelli/) - [Documentation](Grid/Readme.md)
|
Loading…
Reference in New Issue
Block a user