Initial commit
This commit is contained in:
commit
2a64c41a8c
15
.gitignore
vendored
Normal file
15
.gitignore
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
/.vscode
|
||||||
|
build*
|
||||||
|
.buildutils
|
||||||
|
autom4te.cache
|
||||||
|
config.*
|
||||||
|
compile
|
||||||
|
ar-lib
|
||||||
|
aclocal.m4
|
||||||
|
configure
|
||||||
|
depcomp
|
||||||
|
install-sh
|
||||||
|
missing
|
||||||
|
Makefile.in
|
||||||
|
.DS_Store
|
||||||
|
*~
|
224
Grid/Benchmark_IO.cpp
Normal file
224
Grid/Benchmark_IO.cpp
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Benchmark_IO.hpp"
|
||||||
|
|
||||||
|
#ifndef BENCH_IO_LMIN
|
||||||
|
#define BENCH_IO_LMIN 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BENCH_IO_LMAX
|
||||||
|
#define BENCH_IO_LMAX 32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BENCH_IO_NPASS
|
||||||
|
#define BENCH_IO_NPASS 10
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
std::string filestem(const int l)
|
||||||
|
{
|
||||||
|
return "iobench_l" + std::to_string(l);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vol(const int i)
|
||||||
|
{
|
||||||
|
return BENCH_IO_LMIN + 2 * i;
|
||||||
|
}
|
||||||
|
|
||||||
|
int volInd(const int l)
|
||||||
|
{
|
||||||
|
return (l - BENCH_IO_LMIN) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Mat>
|
||||||
|
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
|
||||||
|
{
|
||||||
|
auto nr = data[0].rows(), nc = data[0].cols();
|
||||||
|
Eigen::MatrixXd sqSum(nr, nc);
|
||||||
|
double n = static_cast<double>(data.size());
|
||||||
|
|
||||||
|
assert(n > 1.);
|
||||||
|
mean = Mat::Zero(nr, nc);
|
||||||
|
sqSum = Mat::Zero(nr, nc);
|
||||||
|
for (auto &d : data)
|
||||||
|
{
|
||||||
|
mean += d;
|
||||||
|
sqSum += d.cwiseProduct(d);
|
||||||
|
}
|
||||||
|
stdDev = ((sqSum - mean.cwiseProduct(mean) / n) / (n - 1.)).cwiseSqrt();
|
||||||
|
mean /= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define grid_printf(...) \
|
||||||
|
{ \
|
||||||
|
char _buf[1024]; \
|
||||||
|
sprintf(_buf, __VA_ARGS__); \
|
||||||
|
MSG << _buf; \
|
||||||
|
}
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
sRead = 0,
|
||||||
|
sWrite = 1,
|
||||||
|
gRead = 2,
|
||||||
|
gWrite = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
int64_t threads = GridThread::GetThreads();
|
||||||
|
auto mpi = GridDefaultMpi();
|
||||||
|
unsigned int nVol = (BENCH_IO_LMAX - BENCH_IO_LMIN) / 2 + 1;
|
||||||
|
unsigned int nRelVol = (BENCH_IO_LMAX - 24) / 2 + 1;
|
||||||
|
std::vector<Eigen::MatrixXd> perf(BENCH_IO_NPASS, Eigen::MatrixXd::Zero(nVol, 4));
|
||||||
|
std::vector<Eigen::VectorXd> avPerf(BENCH_IO_NPASS, Eigen::VectorXd::Zero(4));
|
||||||
|
std::vector<int> latt;
|
||||||
|
|
||||||
|
MSG << "Grid is setup to use " << threads << " threads" << std::endl;
|
||||||
|
MSG << "MPI partition " << mpi << std::endl;
|
||||||
|
for (unsigned int i = 0; i < BENCH_IO_NPASS; ++i)
|
||||||
|
{
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << "Pass " << i + 1 << "/" << BENCH_IO_NPASS << std::endl;
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark std write" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l * mpi[0], l * mpi[1], l * mpi[2], l * mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
writeBenchmark<LatticeFermion>(latt, filestem(l), stdWrite<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), sWrite) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark std read" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l * mpi[0], l * mpi[1], l * mpi[2], l * mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
readBenchmark<LatticeFermion>(latt, filestem(l), stdRead<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), sRead) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark Grid C-Lime write" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l * mpi[0], l * mpi[1], l * mpi[2], l * mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), gWrite) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
MSG << "Benchmark Grid C-Lime read" << std::endl;
|
||||||
|
MSG << SEP << std::endl;
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
latt = {l * mpi[0], l * mpi[1], l * mpi[2], l * mpi[3]};
|
||||||
|
|
||||||
|
MSG << "-- Local volume " << l << "^4" << std::endl;
|
||||||
|
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
|
||||||
|
perf[i](volInd(l), gRead) = BinaryIO::lastPerf.mbytesPerSecond;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
avPerf[i].fill(0.);
|
||||||
|
for (int f = 0; f < 4; ++f)
|
||||||
|
for (int l = 24; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
avPerf[i](f) += perf[i](volInd(l), f);
|
||||||
|
}
|
||||||
|
avPerf[i] /= nRelVol;
|
||||||
|
}
|
||||||
|
|
||||||
|
Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4);
|
||||||
|
Eigen::VectorXd avMean(4), avStdDev(4), avRob(4);
|
||||||
|
// double n = BENCH_IO_NPASS;
|
||||||
|
|
||||||
|
stats(mean, stdDev, perf);
|
||||||
|
stats(avMean, avStdDev, avPerf);
|
||||||
|
rob.fill(100.);
|
||||||
|
rob -= 100. * stdDev.cwiseQuotient(mean.cwiseAbs());
|
||||||
|
avRob.fill(100.);
|
||||||
|
avRob -= 100. * avStdDev.cwiseQuotient(avMean.cwiseAbs());
|
||||||
|
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << "SUMMARY" << std::endl;
|
||||||
|
MSG << BIGSEP << std::endl;
|
||||||
|
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
|
||||||
|
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||||
|
"L", "std read", "std dev", "std write", "std dev",
|
||||||
|
"Grid read", "std dev", "Grid write", "std dev");
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
|
||||||
|
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
|
||||||
|
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
|
||||||
|
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
|
||||||
|
}
|
||||||
|
MSG << std::endl;
|
||||||
|
MSG << "Robustness of individual results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%4s %12s %12s %12s %12s\n",
|
||||||
|
"L", "std read", "std write", "Grid read", "Grid write");
|
||||||
|
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
|
||||||
|
{
|
||||||
|
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
|
||||||
|
rob(volInd(l), gRead), rob(volInd(l), gWrite));
|
||||||
|
}
|
||||||
|
MSG << std::endl;
|
||||||
|
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
|
||||||
|
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
|
||||||
|
"std read", "std dev", "std write", "std dev",
|
||||||
|
"Grid read", "std dev", "Grid write", "std dev");
|
||||||
|
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
|
||||||
|
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
|
||||||
|
MSG << std::endl;
|
||||||
|
MSG << "Robustness of volume-averaged results, in %. (rob = 100% - std dev / mean)" << std::endl;
|
||||||
|
MSG << std::endl;
|
||||||
|
grid_printf("%12s %12s %12s %12s\n",
|
||||||
|
"std read", "std write", "Grid read", "Grid write");
|
||||||
|
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
|
||||||
|
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
int main(int argc, char **argv) {}
|
||||||
|
#endif
|
273
Grid/Benchmark_IO.hpp
Normal file
273
Grid/Benchmark_IO.hpp
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef Benchmark_IO_hpp_
|
||||||
|
#define Benchmark_IO_hpp_
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#define MSG std::cout << GridLogMessage
|
||||||
|
#define SEP \
|
||||||
|
"-----------------------------------------------------------------------------"
|
||||||
|
#define BIGSEP \
|
||||||
|
"============================================================================="
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
|
||||||
|
namespace Grid
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
using WriterFn = std::function<void(const std::string, Field &)>;
|
||||||
|
template <typename Field>
|
||||||
|
using ReaderFn = std::function<void(Field &, const std::string)>;
|
||||||
|
|
||||||
|
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
|
||||||
|
//
|
||||||
|
// template <typename Field>
|
||||||
|
// void stdWrite(const std::string filestem, Field &vec)
|
||||||
|
// {
|
||||||
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
|
||||||
|
// size_t size;
|
||||||
|
// uint32_t crc;
|
||||||
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||||
|
// autoView(vec_v, vec, CpuRead);
|
||||||
|
// crcWatch.Start();
|
||||||
|
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
// std::fwrite(&crc, sizeof(uint32_t), 1, file);
|
||||||
|
// crcWatch.Stop();
|
||||||
|
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||||
|
// ioWatch.Start();
|
||||||
|
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||||
|
// ioWatch.Stop();
|
||||||
|
// std::fclose(file);
|
||||||
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
|
// auto &p = BinaryIO::lastPerf;
|
||||||
|
// p.size = size;
|
||||||
|
// p.time = ioWatch.useconds();
|
||||||
|
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||||
|
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// template <typename Field>
|
||||||
|
// void stdRead(Field &vec, const std::string filestem)
|
||||||
|
// {
|
||||||
|
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
|
||||||
|
// size_t size;
|
||||||
|
// uint32_t crcRead, crcData;
|
||||||
|
// GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
|
||||||
|
// crcWatch.Start();
|
||||||
|
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
|
||||||
|
// crcWatch.Stop();
|
||||||
|
// {
|
||||||
|
// autoView(vec_v, vec, CpuWrite);
|
||||||
|
// ioWatch.Start();
|
||||||
|
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
|
||||||
|
// ioWatch.Stop();
|
||||||
|
// std::fclose(file);
|
||||||
|
// }
|
||||||
|
// {
|
||||||
|
// autoView(vec_v, vec, CpuRead);
|
||||||
|
// crcWatch.Start();
|
||||||
|
// crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
// crcWatch.Stop();
|
||||||
|
// }
|
||||||
|
// MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||||
|
// assert(crcData == crcRead);
|
||||||
|
// size *= vec.Grid()->ProcessorCount();
|
||||||
|
// auto &p = BinaryIO::lastPerf;
|
||||||
|
// p.size = size;
|
||||||
|
// p.time = ioWatch.useconds();
|
||||||
|
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
|
||||||
|
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void stdWrite(const std::string filestem, Field &vec)
|
||||||
|
{
|
||||||
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
|
||||||
|
size_t size, sizec;
|
||||||
|
uint32_t crc;
|
||||||
|
GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||||
|
sizec = size / sizeof(char); // just in case of...
|
||||||
|
autoView(vec_v, vec, CpuRead);
|
||||||
|
crcWatch.Start();
|
||||||
|
crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
file.write(reinterpret_cast<char *>(&crc), sizeof(uint32_t) / sizeof(char));
|
||||||
|
crcWatch.Stop();
|
||||||
|
MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
|
||||||
|
ioWatch.Start();
|
||||||
|
file.write(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||||
|
file.flush();
|
||||||
|
ioWatch.Stop();
|
||||||
|
size *= vec.Grid()->ProcessorCount();
|
||||||
|
auto &p = BinaryIO::lastPerf;
|
||||||
|
p.size = size;
|
||||||
|
p.time = ioWatch.useconds();
|
||||||
|
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
|
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void stdRead(Field &vec, const std::string filestem)
|
||||||
|
{
|
||||||
|
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
|
||||||
|
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
|
||||||
|
size_t size, sizec;
|
||||||
|
uint32_t crcRead, crcData;
|
||||||
|
GridStopWatch ioWatch, crcWatch;
|
||||||
|
|
||||||
|
size = vec.Grid()->lSites() * sizeof(typename Field::scalar_object);
|
||||||
|
sizec = size / sizeof(char); // just in case of...
|
||||||
|
crcWatch.Start();
|
||||||
|
file.read(reinterpret_cast<char *>(&crcRead), sizeof(uint32_t) / sizeof(char));
|
||||||
|
crcWatch.Stop();
|
||||||
|
{
|
||||||
|
autoView(vec_v, vec, CpuWrite);
|
||||||
|
ioWatch.Start();
|
||||||
|
file.read(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
|
||||||
|
ioWatch.Stop();
|
||||||
|
}
|
||||||
|
{
|
||||||
|
autoView(vec_v, vec, CpuRead);
|
||||||
|
crcWatch.Start();
|
||||||
|
crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
|
||||||
|
crcWatch.Stop();
|
||||||
|
}
|
||||||
|
MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
|
||||||
|
assert(crcData == crcRead);
|
||||||
|
size *= vec.Grid()->ProcessorCount();
|
||||||
|
auto &p = BinaryIO::lastPerf;
|
||||||
|
p.size = size;
|
||||||
|
p.time = ioWatch.useconds();
|
||||||
|
p.mbytesPerSecond = size / 1024. / 1024. / (ioWatch.useconds() / 1.e6);
|
||||||
|
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
|
||||||
|
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
|
||||||
|
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void limeWrite(const std::string filestem, Field &vec)
|
||||||
|
{
|
||||||
|
emptyUserRecord record;
|
||||||
|
ScidacWriter binWriter(vec.Grid()->IsBoss());
|
||||||
|
|
||||||
|
binWriter.open(filestem + ".lime.bin");
|
||||||
|
binWriter.writeScidacFieldRecord(vec, record);
|
||||||
|
binWriter.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void limeRead(Field &vec, const std::string filestem)
|
||||||
|
{
|
||||||
|
emptyUserRecord record;
|
||||||
|
ScidacReader binReader;
|
||||||
|
|
||||||
|
binReader.open(filestem + ".lime.bin");
|
||||||
|
binReader.readScidacFieldRecord(vec, record);
|
||||||
|
binReader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void makeGrid(std::shared_ptr<GridBase> &gPt,
|
||||||
|
const std::shared_ptr<GridCartesian> &gBasePt,
|
||||||
|
const unsigned int Ls = 1, const bool rb = false)
|
||||||
|
{
|
||||||
|
if (rb)
|
||||||
|
{
|
||||||
|
if (Ls > 1)
|
||||||
|
{
|
||||||
|
gPt.reset(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, gBasePt.get()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gPt.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(gBasePt.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (Ls > 1)
|
||||||
|
{
|
||||||
|
gPt.reset(SpaceTimeGrid::makeFiveDimGrid(Ls, gBasePt.get()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gPt = gBasePt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void writeBenchmark(const Coordinate &latt, const std::string filename,
|
||||||
|
const WriterFn<Field> &write,
|
||||||
|
const unsigned int Ls = 1, const bool rb = false)
|
||||||
|
{
|
||||||
|
auto mpi = GridDefaultMpi();
|
||||||
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
|
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
|
std::shared_ptr<GridBase> gPt;
|
||||||
|
std::random_device rd;
|
||||||
|
|
||||||
|
makeGrid(gPt, gBasePt, Ls, rb);
|
||||||
|
|
||||||
|
GridBase *g = gPt.get();
|
||||||
|
GridParallelRNG rng(g);
|
||||||
|
Field vec(g);
|
||||||
|
|
||||||
|
rng.SeedFixedIntegers({static_cast<int>(rd()), static_cast<int>(rd()),
|
||||||
|
static_cast<int>(rd()), static_cast<int>(rd()),
|
||||||
|
static_cast<int>(rd()), static_cast<int>(rd()),
|
||||||
|
static_cast<int>(rd()), static_cast<int>(rd())});
|
||||||
|
|
||||||
|
random(rng, vec);
|
||||||
|
write(filename, vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Field>
|
||||||
|
void readBenchmark(const Coordinate &latt, const std::string filename,
|
||||||
|
const ReaderFn<Field> &read,
|
||||||
|
const unsigned int Ls = 1, const bool rb = false)
|
||||||
|
{
|
||||||
|
auto mpi = GridDefaultMpi();
|
||||||
|
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
|
||||||
|
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
|
||||||
|
std::shared_ptr<GridBase> gPt;
|
||||||
|
|
||||||
|
makeGrid(gPt, gBasePt, Ls, rb);
|
||||||
|
|
||||||
|
GridBase *g = gPt.get();
|
||||||
|
Field vec(g);
|
||||||
|
|
||||||
|
read(vec, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // LIME
|
||||||
|
#endif // Benchmark_IO_hpp_
|
801
Grid/Benchmark_ITT.cpp
Normal file
801
Grid/Benchmark_ITT.cpp
Normal file
@ -0,0 +1,801 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
std::vector<int> L_list;
|
||||||
|
std::vector<int> Ls_list;
|
||||||
|
std::vector<double> mflop_list;
|
||||||
|
|
||||||
|
double mflop_ref;
|
||||||
|
double mflop_ref_err;
|
||||||
|
|
||||||
|
int NN_global;
|
||||||
|
|
||||||
|
struct time_statistics
|
||||||
|
{
|
||||||
|
double mean;
|
||||||
|
double err;
|
||||||
|
double min;
|
||||||
|
double max;
|
||||||
|
|
||||||
|
void statistics(std::vector<double> v)
|
||||||
|
{
|
||||||
|
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||||
|
mean = sum / v.size();
|
||||||
|
|
||||||
|
std::vector<double> diff(v.size());
|
||||||
|
std::transform(v.begin(), v.end(), diff.begin(), [=](double x)
|
||||||
|
{ return x - mean; });
|
||||||
|
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||||
|
err = std::sqrt(sq_sum / (v.size() * (v.size() - 1)));
|
||||||
|
|
||||||
|
auto result = std::minmax_element(v.begin(), v.end());
|
||||||
|
min = *result.first;
|
||||||
|
max = *result.second;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void comms_header()
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << " L "
|
||||||
|
<< "\t"
|
||||||
|
<< " Ls "
|
||||||
|
<< "\t"
|
||||||
|
<< "bytes\t MB/s uni (err/min/max) \t\t MB/s bidi (err/min/max)" << std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::Algebra Gmu[] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ,
|
||||||
|
Gamma::Algebra::GammaT};
|
||||||
|
struct controls
|
||||||
|
{
|
||||||
|
int Opt;
|
||||||
|
int CommsOverlap;
|
||||||
|
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Benchmark
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static void Decomposition(void)
|
||||||
|
{
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "= Grid is setup to use " << threads << " threads" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Grid Default Decomposition patterns\n";
|
||||||
|
std::cout << GridLogMessage << "\tOpenMP threads : " << GridThread::GetThreads() << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tMPI tasks : " << GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvReal : " << sizeof(vReal) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vReal::Nsimd())) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvRealF : " << sizeof(vRealF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealF::Nsimd())) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvRealD : " << sizeof(vRealD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vRealD::Nsimd())) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvComplex : " << sizeof(vComplex) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplex::Nsimd())) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvComplexF : " << sizeof(vComplexF) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexF::Nsimd())) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tvComplexD : " << sizeof(vComplexD) * 8 << "bits ; " << GridCmdVectorIntToString(GridDefaultSimd(4, vComplexD::Nsimd())) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Comms(void)
|
||||||
|
{
|
||||||
|
int Nloop = 200;
|
||||||
|
int nmu = 0;
|
||||||
|
int maxlat = 32;
|
||||||
|
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
if (mpi_layout[mu] > 1)
|
||||||
|
nmu++;
|
||||||
|
|
||||||
|
std::vector<double> t_time(Nloop);
|
||||||
|
time_statistics timestat;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in " << nmu << " dimensions" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "====================================================================================================" << std::endl;
|
||||||
|
comms_header();
|
||||||
|
|
||||||
|
for (int lat = 16; lat <= maxlat; lat += 8)
|
||||||
|
{
|
||||||
|
// for(int Ls=8;Ls<=8;Ls*=2){
|
||||||
|
{
|
||||||
|
int Ls = 12;
|
||||||
|
|
||||||
|
Coordinate latt_size({lat * mpi_layout[0],
|
||||||
|
lat * mpi_layout[1],
|
||||||
|
lat * mpi_layout[2],
|
||||||
|
lat * mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
RealD Nrank = Grid._Nprocessors;
|
||||||
|
RealD Nnode = Grid.NodeCount();
|
||||||
|
RealD ppn = Nrank / Nnode;
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
// Grid.ShmBufferFreeAll();
|
||||||
|
uint64_t bytes = lat * lat * lat * Ls * sizeof(HalfSpinColourVectorD);
|
||||||
|
for (int d = 0; d < 8; d++)
|
||||||
|
{
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
|
// int ncomm;
|
||||||
|
double dbytes;
|
||||||
|
|
||||||
|
for (int dir = 0; dir < 8; dir++)
|
||||||
|
{
|
||||||
|
int mu = dir % 4;
|
||||||
|
if (mpi_layout[mu] > 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
std::vector<double> times(Nloop);
|
||||||
|
for (int i = 0; i < Nloop; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
dbytes = 0;
|
||||||
|
double start = usecond();
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
if (dir == mu)
|
||||||
|
{
|
||||||
|
int comm_proc = 1;
|
||||||
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int comm_proc = mpi_layout[mu] - 1;
|
||||||
|
Grid.ShiftedRanks(mu, comm_proc, xmit_to_rank, recv_from_rank);
|
||||||
|
}
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||||
|
(void *)&rbuf[dir][0], recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
dbytes += bytes;
|
||||||
|
|
||||||
|
double stop = usecond();
|
||||||
|
t_time[i] = stop - start; // microseconds
|
||||||
|
}
|
||||||
|
timestat.statistics(t_time);
|
||||||
|
|
||||||
|
dbytes = dbytes * ppn;
|
||||||
|
double xbytes = dbytes * 0.5;
|
||||||
|
double bidibytes = dbytes;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << lat << "\t" << Ls << "\t "
|
||||||
|
<< bytes << " \t "
|
||||||
|
<< xbytes / timestat.mean << " \t " << xbytes * timestat.err / (timestat.mean * timestat.mean) << " \t "
|
||||||
|
<< xbytes / timestat.max << " " << xbytes / timestat.min
|
||||||
|
<< "\t\t" << bidibytes / timestat.mean << " " << bidibytes * timestat.err / (timestat.mean * timestat.mean) << " "
|
||||||
|
<< bidibytes / timestat.max << " " << bidibytes / timestat.min << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int d = 0; d < 8; d++)
|
||||||
|
{
|
||||||
|
acceleratorFreeDevice(xbuf[d]);
|
||||||
|
acceleratorFreeDevice(rbuf[d]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Memory(void)
|
||||||
|
{
|
||||||
|
const int Nvec = 8;
|
||||||
|
typedef Lattice<iVector<vReal, Nvec>> LatticeVec;
|
||||||
|
typedef iVector<vReal, Nvec> Vec;
|
||||||
|
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd, vReal::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "= Benchmarking a*x + y bandwidth" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " L "
|
||||||
|
<< "\t\t"
|
||||||
|
<< "bytes"
|
||||||
|
<< "\t\t\t"
|
||||||
|
<< "GB/s"
|
||||||
|
<< "\t\t"
|
||||||
|
<< "Gflop/s"
|
||||||
|
<< "\t\t seconds"
|
||||||
|
<< "\t\tGB/s / node" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
||||||
|
|
||||||
|
// uint64_t NP;
|
||||||
|
uint64_t NN;
|
||||||
|
|
||||||
|
uint64_t lmax = 32;
|
||||||
|
#define NLOOP (1000 * lmax * lmax * lmax * lmax / lat / lat / lat / lat)
|
||||||
|
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
||||||
|
for (int lat = 8; lat <= lmax; lat += 8)
|
||||||
|
{
|
||||||
|
|
||||||
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
||||||
|
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
|
||||||
|
// NP= Grid.RankCount();
|
||||||
|
NN = Grid.NodeCount();
|
||||||
|
|
||||||
|
Vec rn;
|
||||||
|
random(sRNG, rn);
|
||||||
|
|
||||||
|
LatticeVec z(&Grid);
|
||||||
|
z = Zero();
|
||||||
|
LatticeVec x(&Grid);
|
||||||
|
x = Zero();
|
||||||
|
LatticeVec y(&Grid);
|
||||||
|
y = Zero();
|
||||||
|
double a = 2.0;
|
||||||
|
|
||||||
|
uint64_t Nloop = NLOOP;
|
||||||
|
|
||||||
|
double start = usecond();
|
||||||
|
for (int i = 0; i < Nloop; i++)
|
||||||
|
{
|
||||||
|
z = a * x - y;
|
||||||
|
}
|
||||||
|
double stop = usecond();
|
||||||
|
double time = (stop - start) / Nloop * 1000;
|
||||||
|
|
||||||
|
double flops = vol * Nvec * 2; // mul,add
|
||||||
|
double bytes = 3.0 * vol * Nvec * sizeof(Real);
|
||||||
|
std::cout << GridLogMessage << std::setprecision(3)
|
||||||
|
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
||||||
|
<< "\t\t" << bytes / time / NN << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void SU4(void)
|
||||||
|
{
|
||||||
|
const int Nc4 = 4;
|
||||||
|
typedef Lattice<iMatrix<vComplexF, Nc4>> LatticeSU4;
|
||||||
|
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd, vComplexF::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " L "
|
||||||
|
<< "\t\t"
|
||||||
|
<< "bytes"
|
||||||
|
<< "\t\t\t"
|
||||||
|
<< "GB/s"
|
||||||
|
<< "\t\t"
|
||||||
|
<< "Gflop/s"
|
||||||
|
<< "\t\t seconds"
|
||||||
|
<< "\t\tGB/s / node" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "----------------------------------------------------------" << std::endl;
|
||||||
|
|
||||||
|
uint64_t NN;
|
||||||
|
|
||||||
|
uint64_t lmax = 32;
|
||||||
|
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedFixedIntegers(std::vector<int>({45, 12, 81, 9}));
|
||||||
|
for (int lat = 8; lat <= lmax; lat += 8)
|
||||||
|
{
|
||||||
|
|
||||||
|
Coordinate latt_size({lat * mpi_layout[0], lat * mpi_layout[1], lat * mpi_layout[2], lat * mpi_layout[3]});
|
||||||
|
int64_t vol = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
|
||||||
|
NN = Grid.NodeCount();
|
||||||
|
|
||||||
|
LatticeSU4 z(&Grid);
|
||||||
|
z = Zero();
|
||||||
|
LatticeSU4 x(&Grid);
|
||||||
|
x = Zero();
|
||||||
|
LatticeSU4 y(&Grid);
|
||||||
|
y = Zero();
|
||||||
|
// double a=2.0;
|
||||||
|
|
||||||
|
uint64_t Nloop = NLOOP;
|
||||||
|
|
||||||
|
double start = usecond();
|
||||||
|
for (int i = 0; i < Nloop; i++)
|
||||||
|
{
|
||||||
|
z = x * y;
|
||||||
|
}
|
||||||
|
double stop = usecond();
|
||||||
|
double time = (stop - start) / Nloop * 1000;
|
||||||
|
|
||||||
|
double flops = vol * Nc4 * Nc4 * (6 + (Nc4 - 1) * 8); // mul,add
|
||||||
|
double bytes = 3.0 * vol * Nc4 * Nc4 * 2 * sizeof(RealF);
|
||||||
|
std::cout << GridLogMessage << std::setprecision(3)
|
||||||
|
<< lat << "\t\t" << bytes << " \t\t" << bytes / time << "\t\t" << flops / time << "\t\t" << (stop - start) / 1000. / 1000.
|
||||||
|
<< "\t\t" << bytes / time / NN << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static double DWF(int Ls, int L)
|
||||||
|
{
|
||||||
|
RealD mass = 0.1;
|
||||||
|
RealD M5 = 1.8;
|
||||||
|
|
||||||
|
double mflops;
|
||||||
|
double mflops_best = 0;
|
||||||
|
double mflops_worst = 0;
|
||||||
|
std::vector<double> mflops_all;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
// Set/Get the layout & grid size
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
Coordinate mpi = GridDefaultMpi();
|
||||||
|
assert(mpi.size() == 4);
|
||||||
|
Coordinate local({L, L, L, L});
|
||||||
|
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||||
|
|
||||||
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
||||||
|
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
uint64_t NP = TmpGrid->RankCount();
|
||||||
|
uint64_t NN = TmpGrid->NodeCount();
|
||||||
|
NN_global = NN;
|
||||||
|
uint64_t SHM = NP / NN;
|
||||||
|
|
||||||
|
///////// Welcome message ////////////
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Benchmark DWF on " << L << "^4 local volume " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Nc : " << Nc << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Ls : " << Ls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
///////// Lattice Init ////////////
|
||||||
|
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||||
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||||
|
|
||||||
|
///////// RNG Init ////////////
|
||||||
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
|
std::vector<int> seeds5({5, 6, 7, 8});
|
||||||
|
GridParallelRNG RNG4(UGrid);
|
||||||
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG RNG5(FGrid);
|
||||||
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
typedef DomainWallFermionF Action;
|
||||||
|
typedef typename Action::FermionField Fermion;
|
||||||
|
typedef LatticeGaugeFieldF Gauge;
|
||||||
|
|
||||||
|
///////// Source preparation ////////////
|
||||||
|
Gauge Umu(UGrid);
|
||||||
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
Fermion src(FGrid);
|
||||||
|
random(RNG5, src);
|
||||||
|
Fermion src_e(FrbGrid);
|
||||||
|
Fermion src_o(FrbGrid);
|
||||||
|
Fermion r_e(FrbGrid);
|
||||||
|
Fermion r_o(FrbGrid);
|
||||||
|
Fermion r_eo(FGrid);
|
||||||
|
Action Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
pickCheckerboard(Even, src_e, src);
|
||||||
|
pickCheckerboard(Odd, src_o, src);
|
||||||
|
|
||||||
|
const int num_cases = 4;
|
||||||
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||||
|
|
||||||
|
controls Cases[] = {
|
||||||
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
||||||
|
{WilsonKernelsStatic::OptGeneric, WilsonKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
||||||
|
|
||||||
|
for (int c = 0; c < num_cases; c++)
|
||||||
|
{
|
||||||
|
|
||||||
|
WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||||
|
WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||||
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||||
|
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
int nwarm = 10;
|
||||||
|
double t0 = usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
for (int i = 0; i < nwarm; i++)
|
||||||
|
{
|
||||||
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
}
|
||||||
|
FGrid->Barrier();
|
||||||
|
double t1 = usecond();
|
||||||
|
uint64_t ncall = 500;
|
||||||
|
|
||||||
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||||
|
|
||||||
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
|
||||||
|
time_statistics timestat;
|
||||||
|
std::vector<double> t_time(ncall);
|
||||||
|
for (uint64_t i = 0; i < ncall; i++)
|
||||||
|
{
|
||||||
|
t0 = usecond();
|
||||||
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
t1 = usecond();
|
||||||
|
t_time[i] = t1 - t0;
|
||||||
|
}
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume = Ls;
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
volume = volume * latt4[mu];
|
||||||
|
|
||||||
|
// Nc=3 gives
|
||||||
|
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
||||||
|
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
||||||
|
// double flops=(1344.0*volume)/2;
|
||||||
|
#if 0
|
||||||
|
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
||||||
|
#else
|
||||||
|
double fps = Nc * (6 + (Nc - 1) * 8) * Ns * Nd + 2 * Nd * Nc * Ns + 2 * Nd * Nc * Ns * 2;
|
||||||
|
#endif
|
||||||
|
double flops = (fps * volume) / 2;
|
||||||
|
double mf_hi, mf_lo, mf_err;
|
||||||
|
|
||||||
|
timestat.statistics(t_time);
|
||||||
|
mf_hi = flops / timestat.min;
|
||||||
|
mf_lo = flops / timestat.max;
|
||||||
|
mf_err = flops / timestat.min * timestat.err / timestat.mean;
|
||||||
|
|
||||||
|
mflops = flops / timestat.mean;
|
||||||
|
mflops_all.push_back(mflops);
|
||||||
|
if (mflops_best == 0)
|
||||||
|
mflops_best = mflops;
|
||||||
|
if (mflops_worst == 0)
|
||||||
|
mflops_worst = mflops;
|
||||||
|
if (mflops > mflops_best)
|
||||||
|
mflops_best = mflops;
|
||||||
|
if (mflops < mflops_worst)
|
||||||
|
mflops_worst = mflops;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Deo FlopsPerSite is " << fps << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
||||||
|
std::cout << GridLogMessage << L << "^4 x " << Ls << " Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
||||||
|
std::cout << GridLogMessage << fmt << std::endl;
|
||||||
|
std::cout << GridLogMessage;
|
||||||
|
|
||||||
|
for (int i = 0; i < mflops_all.size(); i++)
|
||||||
|
{
|
||||||
|
std::cout << mflops_all[i] / NN << " ; ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
}
|
||||||
|
return mflops_best;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double Staggered(int L)
|
||||||
|
{
|
||||||
|
double mflops;
|
||||||
|
double mflops_best = 0;
|
||||||
|
double mflops_worst = 0;
|
||||||
|
std::vector<double> mflops_all;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
// Set/Get the layout & grid size
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
Coordinate mpi = GridDefaultMpi();
|
||||||
|
assert(mpi.size() == 4);
|
||||||
|
Coordinate local({L, L, L, L});
|
||||||
|
Coordinate latt4({local[0] * mpi[0], local[1] * mpi[1], local[2] * mpi[2], local[3] * mpi[3]});
|
||||||
|
|
||||||
|
GridCartesian *TmpGrid = SpaceTimeGrid::makeFourDimGrid(latt4,
|
||||||
|
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
uint64_t NP = TmpGrid->RankCount();
|
||||||
|
uint64_t NN = TmpGrid->NodeCount();
|
||||||
|
NN_global = NN;
|
||||||
|
uint64_t SHM = NP / NN;
|
||||||
|
|
||||||
|
///////// Welcome message ////////////
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Benchmark ImprovedStaggered on " << L << "^4 local volume " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Global volume : " << GridCmdVectorIntToString(latt4) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* ranks : " << NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* nodes : " << NN << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* ranks/node : " << SHM << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* ranks geom : " << GridCmdVectorIntToString(mpi) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Using " << threads << " threads" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
///////// Lattice Init ////////////
|
||||||
|
GridCartesian *FGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
|
||||||
|
|
||||||
|
///////// RNG Init ////////////
|
||||||
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
|
GridParallelRNG RNG4(FGrid);
|
||||||
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
RealD mass = 0.1;
|
||||||
|
RealD c1 = 9.0 / 8.0;
|
||||||
|
RealD c2 = -1.0 / 24.0;
|
||||||
|
RealD u0 = 1.0;
|
||||||
|
|
||||||
|
typedef ImprovedStaggeredFermionF Action;
|
||||||
|
typedef typename Action::FermionField Fermion;
|
||||||
|
typedef LatticeGaugeFieldF Gauge;
|
||||||
|
|
||||||
|
Gauge Umu(FGrid);
|
||||||
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
|
typename Action::ImplParams params;
|
||||||
|
Action Ds(Umu, Umu, *FGrid, *FrbGrid, mass, c1, c2, u0, params);
|
||||||
|
|
||||||
|
///////// Source preparation ////////////
|
||||||
|
Fermion src(FGrid);
|
||||||
|
random(RNG4, src);
|
||||||
|
Fermion src_e(FrbGrid);
|
||||||
|
Fermion src_o(FrbGrid);
|
||||||
|
Fermion r_e(FrbGrid);
|
||||||
|
Fermion r_o(FrbGrid);
|
||||||
|
Fermion r_eo(FGrid);
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
pickCheckerboard(Even, src_e, src);
|
||||||
|
pickCheckerboard(Odd, src_o, src);
|
||||||
|
|
||||||
|
const int num_cases = 4;
|
||||||
|
std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
|
||||||
|
|
||||||
|
controls Cases[] = {
|
||||||
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicyConcurrent},
|
||||||
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsThenCompute, CartesianCommunicator::CommunicatorPolicySequential},
|
||||||
|
{StaggeredKernelsStatic::OptGeneric, StaggeredKernelsStatic::CommsAndCompute, CartesianCommunicator::CommunicatorPolicySequential}};
|
||||||
|
|
||||||
|
for (int c = 0; c < num_cases; c++)
|
||||||
|
{
|
||||||
|
|
||||||
|
StaggeredKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||||
|
StaggeredKernelsStatic::Opt = Cases[c].Opt;
|
||||||
|
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
if (StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric)
|
||||||
|
std::cout << GridLogMessage << "* Using GENERIC Nc StaggeredKernels" << std::endl;
|
||||||
|
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||||
|
if (StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsThenCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using sequential Comms/Compute" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
int nwarm = 10;
|
||||||
|
double t0 = usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
for (int i = 0; i < nwarm; i++)
|
||||||
|
{
|
||||||
|
Ds.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
}
|
||||||
|
FGrid->Barrier();
|
||||||
|
double t1 = usecond();
|
||||||
|
uint64_t ncall = 500;
|
||||||
|
|
||||||
|
FGrid->Broadcast(0, &ncall, sizeof(ncall));
|
||||||
|
|
||||||
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||||
|
Ds.ZeroCounters();
|
||||||
|
|
||||||
|
time_statistics timestat;
|
||||||
|
std::vector<double> t_time(ncall);
|
||||||
|
for (uint64_t i = 0; i < ncall; i++)
|
||||||
|
{
|
||||||
|
t0 = usecond();
|
||||||
|
Ds.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
t1 = usecond();
|
||||||
|
t_time[i] = t1 - t0;
|
||||||
|
}
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume = 1;
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
volume = volume * latt4[mu];
|
||||||
|
double flops = (1146.0 * volume) / 2;
|
||||||
|
double mf_hi, mf_lo, mf_err;
|
||||||
|
|
||||||
|
timestat.statistics(t_time);
|
||||||
|
mf_hi = flops / timestat.min;
|
||||||
|
mf_lo = flops / timestat.max;
|
||||||
|
mf_err = flops / timestat.min * timestat.err / timestat.mean;
|
||||||
|
|
||||||
|
mflops = flops / timestat.mean;
|
||||||
|
mflops_all.push_back(mflops);
|
||||||
|
if (mflops_best == 0)
|
||||||
|
mflops_best = mflops;
|
||||||
|
if (mflops_worst == 0)
|
||||||
|
mflops_worst = mflops;
|
||||||
|
if (mflops > mflops_best)
|
||||||
|
mflops_best = mflops;
|
||||||
|
if (mflops < mflops_worst)
|
||||||
|
mflops_worst = mflops;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s = " << mflops << " (" << mf_err << ") " << mf_lo << "-" << mf_hi << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per rank " << mflops / NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << std::fixed << std::setprecision(1) << "Deo mflop/s per node " << mflops / NN << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << L << "^4 Deo Best mflop/s = " << mflops_best << " ; " << mflops_best / NN << " per node " << std::endl;
|
||||||
|
std::cout << GridLogMessage << L << "^4 Deo Worst mflop/s = " << mflops_worst << " ; " << mflops_worst / NN << " per node " << std::endl;
|
||||||
|
std::cout << GridLogMessage << fmt << std::endl;
|
||||||
|
std::cout << GridLogMessage;
|
||||||
|
|
||||||
|
for (int i = 0; i < mflops_all.size(); i++)
|
||||||
|
{
|
||||||
|
std::cout << mflops_all[i] / NN << " ; ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
return mflops_best;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
||||||
|
#ifdef KNL
|
||||||
|
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
|
||||||
|
#else
|
||||||
|
LebesgueOrder::Block = std::vector<int>({2, 2, 2, 2});
|
||||||
|
#endif
|
||||||
|
Benchmark::Decomposition();
|
||||||
|
|
||||||
|
int do_su4 = 1;
|
||||||
|
int do_memory = 1;
|
||||||
|
int do_comms = 1;
|
||||||
|
|
||||||
|
int sel = 4;
|
||||||
|
std::vector<int> L_list({8, 12, 16, 24, 32});
|
||||||
|
int selm1 = sel - 1;
|
||||||
|
|
||||||
|
std::vector<double> wilson;
|
||||||
|
std::vector<double> dwf4;
|
||||||
|
std::vector<double> staggered;
|
||||||
|
|
||||||
|
int Ls = 1;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Wilson dslash 4D vectorised" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
wilson.push_back(Benchmark::DWF(Ls, L_list[l]));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ls = 12;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Domain wall dslash 4D vectorised" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
double result = Benchmark::DWF(Ls, L_list[l]);
|
||||||
|
dwf4.push_back(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Improved Staggered dslash 4D vectorised" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
double result = Benchmark::Staggered(L_list[l]);
|
||||||
|
staggered.push_back(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Summary table Ls=" << Ls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\t Staggered" << std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] << " \t\t " << dwf4[l] << " \t\t " << staggered[l] << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
int NN = NN_global;
|
||||||
|
if (do_memory)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Memory benchmark " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
Benchmark::Memory();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_su4)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " SU(4) benchmark " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
Benchmark::SU4();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_comms)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Communications benchmark " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
Benchmark::Comms();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Per Node Summary table Ls=" << Ls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " L \t\t Wilson\t\t DWF4\t\t Staggered " << std::endl;
|
||||||
|
for (int l = 0; l < L_list.size(); l++)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << L_list[l] << " \t\t " << wilson[l] / NN << " \t " << dwf4[l] / NN << " \t " << staggered[l] / NN << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Comparison point result: " << 0.5 * (dwf4[sel] + dwf4[selm1]) / NN << " Mflop/s per node" << std::endl;
|
||||||
|
std::cout << GridLogMessage << " Comparison point is 0.5*(" << dwf4[sel] / NN << "+" << dwf4[selm1] / NN << ") " << std::endl;
|
||||||
|
std::cout << std::setprecision(3);
|
||||||
|
std::cout << GridLogMessage << "==================================================================================" << std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
251
Grid/Benchmark_comms_host_device.cpp
Normal file
251
Grid/Benchmark_comms_host_device.cpp
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
struct time_statistics{
|
||||||
|
double mean;
|
||||||
|
double err;
|
||||||
|
double min;
|
||||||
|
double max;
|
||||||
|
|
||||||
|
void statistics(std::vector<double> v){
|
||||||
|
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||||
|
mean = sum / v.size();
|
||||||
|
|
||||||
|
std::vector<double> diff(v.size());
|
||||||
|
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||||
|
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||||
|
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
|
||||||
|
|
||||||
|
auto result = std::minmax_element(v.begin(), v.end());
|
||||||
|
min = *result.first;
|
||||||
|
max = *result.second;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void header(){
|
||||||
|
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||||
|
<<std::setw(11)<<"bytes\t\t"<<"MB/s uni"<<"\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
int Nloop=250;
|
||||||
|
int nmu=0;
|
||||||
|
int maxlat=32;
|
||||||
|
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
|
||||||
|
std::vector<double> t_time(Nloop);
|
||||||
|
// time_statistics timestat;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from host memory "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
header();
|
||||||
|
|
||||||
|
for(int lat=8;lat<=maxlat;lat+=4){
|
||||||
|
for(int Ls=8;Ls<=8;Ls*=2){
|
||||||
|
|
||||||
|
Coordinate latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
RealD Nrank = Grid._Nprocessors;
|
||||||
|
RealD Nnode = Grid.NodeCount();
|
||||||
|
RealD ppn = Nrank/Nnode;
|
||||||
|
|
||||||
|
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8);
|
||||||
|
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8);
|
||||||
|
|
||||||
|
for(int mu=0;mu<8;mu++){
|
||||||
|
xbuf[mu].resize(lat*lat*lat*Ls);
|
||||||
|
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||||
|
}
|
||||||
|
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> requests;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> requests;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
double stop=usecond();
|
||||||
|
double mean=(stop-start)/Nloop;
|
||||||
|
double dbytes = bytes*ppn;
|
||||||
|
double xbytes = dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||||
|
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)<<" "
|
||||||
|
<<std::right<< xbytes/mean<<" "
|
||||||
|
<< "\t\t"<<std::setw(7)<< bidibytes/mean<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange from GPU memory "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
header();
|
||||||
|
|
||||||
|
for(int lat=8;lat<=maxlat;lat+=4){
|
||||||
|
for(int Ls=8;Ls<=8;Ls*=2){
|
||||||
|
|
||||||
|
Coordinate latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
RealD Nrank = Grid._Nprocessors;
|
||||||
|
RealD Nnode = Grid.NodeCount();
|
||||||
|
RealD ppn = Nrank/Nnode;
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
|
||||||
|
uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> requests;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> requests;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
double stop=usecond();
|
||||||
|
double mean=(stop-start)/Nloop;
|
||||||
|
double dbytes = bytes*ppn;
|
||||||
|
double xbytes = dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||||
|
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)<<" "
|
||||||
|
<<std::right<< xbytes/mean<<" "
|
||||||
|
<< "\t\t"<<std::setw(7)<< bidibytes/mean<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
acceleratorFreeDevice(xbuf[d]);
|
||||||
|
acceleratorFreeDevice(rbuf[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
425
Grid/Benchmark_dwf_fp32.cpp
Normal file
425
Grid/Benchmark_dwf_fp32.cpp
Normal file
@ -0,0 +1,425 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2015 Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Copyright © 2022 Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#ifdef GRID_CUDA
|
||||||
|
#define CUDA_PROFILE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CUDA_PROFILE
|
||||||
|
#include <cuda_profiler_api.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
template <class d>
|
||||||
|
struct scal
|
||||||
|
{
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::Algebra Gmu[] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ,
|
||||||
|
Gamma::Algebra::GammaT};
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
|
int Ls = 16;
|
||||||
|
for (int i = 0; i < argc; i++)
|
||||||
|
if (std::string(argv[i]) == "-Ls")
|
||||||
|
{
|
||||||
|
std::stringstream ss(argv[i + 1]);
|
||||||
|
ss >> Ls;
|
||||||
|
}
|
||||||
|
|
||||||
|
GridLogLayout();
|
||||||
|
|
||||||
|
long unsigned int single_site_flops = 8 * Nc * (7 + 16 * Nc);
|
||||||
|
|
||||||
|
GridCartesian *UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian *UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian *FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||||
|
GridRedBlackCartesian *FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Making s innermost grids" << std::endl;
|
||||||
|
GridCartesian *sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(), GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian *sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
|
GridCartesian *sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls, UGrid);
|
||||||
|
GridRedBlackCartesian *sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls, UGrid);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
|
std::vector<int> seeds5({5, 6, 7, 8});
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG4(UGrid);
|
||||||
|
RNG4.SeedUniqueString(std::string("The 4D RNG"));
|
||||||
|
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG5(FGrid);
|
||||||
|
RNG5.SeedUniqueString(std::string("The 5D RNG"));
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
LatticeFermionF src(FGrid);
|
||||||
|
random(RNG5, src);
|
||||||
|
#if 0
|
||||||
|
src = Zero();
|
||||||
|
{
|
||||||
|
Coordinate origin({0,0,0,latt4[2]-1,0});
|
||||||
|
SpinColourVectorF tmp;
|
||||||
|
tmp=Zero();
|
||||||
|
tmp()(0)(0)=Complex(-2.0,0.0);
|
||||||
|
std::cout << " source site 0 " << tmp<<std::endl;
|
||||||
|
pokeSite(tmp,src,origin);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
RealD N2 = 1.0 / ::sqrt(norm2(src));
|
||||||
|
src = src * N2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LatticeFermionF result(FGrid);
|
||||||
|
result = Zero();
|
||||||
|
LatticeFermionF ref(FGrid);
|
||||||
|
ref = Zero();
|
||||||
|
LatticeFermionF tmp(FGrid);
|
||||||
|
LatticeFermionF err(FGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||||
|
LatticeGaugeFieldF Umu(UGrid);
|
||||||
|
SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||||
|
#if 0
|
||||||
|
Umu=1.0;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
LatticeColourMatrixF ttmp(UGrid);
|
||||||
|
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
// if (mu !=2 ) ttmp = 0;
|
||||||
|
// ttmp = ttmp* pow(10.0,mu);
|
||||||
|
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Naive wilson implementation
|
||||||
|
////////////////////////////////////
|
||||||
|
// replicate across fifth dimension
|
||||||
|
// LatticeGaugeFieldF Umu5d(FGrid);
|
||||||
|
std::vector<LatticeColourMatrixF> U(4, UGrid);
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
{
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||||
|
|
||||||
|
if (1)
|
||||||
|
{
|
||||||
|
ref = Zero();
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
{
|
||||||
|
|
||||||
|
tmp = Cshift(src, mu + 1, 1);
|
||||||
|
{
|
||||||
|
autoView(tmp_v, tmp, CpuWrite);
|
||||||
|
autoView(U_v, U[mu], CpuRead);
|
||||||
|
for (int ss = 0; ss < U[mu].Grid()->oSites(); ss++)
|
||||||
|
{
|
||||||
|
for (int s = 0; s < Ls; s++)
|
||||||
|
{
|
||||||
|
tmp_v[Ls * ss + s] = U_v[ss] * tmp_v[Ls * ss + s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ref = ref + tmp - Gamma(Gmu[mu]) * tmp;
|
||||||
|
|
||||||
|
{
|
||||||
|
autoView(tmp_v, tmp, CpuWrite);
|
||||||
|
autoView(U_v, U[mu], CpuRead);
|
||||||
|
autoView(src_v, src, CpuRead);
|
||||||
|
for (int ss = 0; ss < U[mu].Grid()->oSites(); ss++)
|
||||||
|
{
|
||||||
|
for (int s = 0; s < Ls; s++)
|
||||||
|
{
|
||||||
|
tmp_v[Ls * ss + s] = adj(U_v[ss]) * src_v[Ls * ss + s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tmp = Cshift(tmp, mu + 1, -1);
|
||||||
|
ref = ref + tmp + Gamma(Gmu[mu]) * tmp;
|
||||||
|
}
|
||||||
|
ref = -0.5 * ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass = 0.1;
|
||||||
|
RealD M5 = 1.8;
|
||||||
|
|
||||||
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
RealD NN = UGrid->NodeCount();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Benchmarking DomainWallFermionR::Dhop " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* VComplexF size is " << sizeof(vComplexF) << " B" << std::endl;
|
||||||
|
if (sizeof(RealF) == 4)
|
||||||
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
|
if (sizeof(RealF) == 8)
|
||||||
|
std::cout << GridLogMessage << "* DOUBLE precision " << std::endl;
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using sequential comms compute" << std::endl;
|
||||||
|
#endif
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||||
|
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||||
|
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||||
|
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "*****************************************************************" << std::endl;
|
||||||
|
|
||||||
|
DomainWallFermionF Dw(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
||||||
|
int ncall = 300;
|
||||||
|
|
||||||
|
if (1)
|
||||||
|
{
|
||||||
|
FGrid->Barrier();
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
Dw.Dhop(src, result, 0);
|
||||||
|
std::cout << GridLogMessage << "Called warmup" << std::endl;
|
||||||
|
double t0 = usecond();
|
||||||
|
for (int i = 0; i < ncall; i++)
|
||||||
|
{
|
||||||
|
__SSC_START;
|
||||||
|
Dw.Dhop(src, result, 0);
|
||||||
|
__SSC_STOP;
|
||||||
|
}
|
||||||
|
double t1 = usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume = Ls;
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
volume = volume * latt4[mu];
|
||||||
|
double flops = single_site_flops * volume * ncall;
|
||||||
|
|
||||||
|
auto nsimd = vComplex::Nsimd();
|
||||||
|
auto simdwidth = sizeof(vComplex);
|
||||||
|
|
||||||
|
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
|
||||||
|
double data_rf = volume * ((2 * Nd + 1) * Nd * Nc + 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
||||||
|
|
||||||
|
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
|
||||||
|
double data_mem = (volume * (2 * Nd + 1) * Nd * Nc + (volume / Ls) * 2 * Nd * Nc * Nc) * simdwidth / nsimd * ncall / (1024. * 1024. * 1024.);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Called Dw " << ncall << " times in " << t1 - t0 << " us" << std::endl;
|
||||||
|
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
|
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mflop/s per rank = " << flops / (t1 - t0) / NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mflop/s per node = " << flops / (t1 - t0) / NN << std::endl;
|
||||||
|
std::cout << GridLogMessage << "RF GiB/s (base 2) = " << 1000000. * data_rf / ((t1 - t0)) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mem GiB/s (base 2) = " << 1000000. * data_mem / ((t1 - t0)) << std::endl;
|
||||||
|
err = ref - result;
|
||||||
|
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
|
||||||
|
// exit(0);
|
||||||
|
|
||||||
|
if ((norm2(err) > 1.0e-4))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
std::cout << "RESULT\n " << result<<std::endl;
|
||||||
|
std::cout << "REF \n " << ref <<std::endl;
|
||||||
|
std::cout << "ERR \n " << err <<std::endl;
|
||||||
|
*/
|
||||||
|
std::cout << GridLogMessage << "WRONG RESULT" << std::endl;
|
||||||
|
FGrid->Barrier();
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
assert(norm2(err) < 1.0e-4);
|
||||||
|
Dw.Report();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (1)
|
||||||
|
{ // Naive wilson dag implementation
|
||||||
|
ref = Zero();
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
{
|
||||||
|
|
||||||
|
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
||||||
|
tmp = Cshift(src, mu + 1, 1);
|
||||||
|
{
|
||||||
|
autoView(ref_v, ref, CpuWrite);
|
||||||
|
autoView(tmp_v, tmp, CpuRead);
|
||||||
|
autoView(U_v, U[mu], CpuRead);
|
||||||
|
for (int ss = 0; ss < U[mu].Grid()->oSites(); ss++)
|
||||||
|
{
|
||||||
|
for (int s = 0; s < Ls; s++)
|
||||||
|
{
|
||||||
|
int i = s + Ls * ss;
|
||||||
|
ref_v[i] += U_v[ss] * (tmp_v[i] + Gamma(Gmu[mu]) * tmp_v[i]);
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
autoView(tmp_v, tmp, CpuWrite);
|
||||||
|
autoView(U_v, U[mu], CpuRead);
|
||||||
|
autoView(src_v, src, CpuRead);
|
||||||
|
for (int ss = 0; ss < U[mu].Grid()->oSites(); ss++)
|
||||||
|
{
|
||||||
|
for (int s = 0; s < Ls; s++)
|
||||||
|
{
|
||||||
|
tmp_v[Ls * ss + s] = adj(U_v[ss]) * src_v[Ls * ss + s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// tmp =adj(U[mu])*src;
|
||||||
|
tmp = Cshift(tmp, mu + 1, -1);
|
||||||
|
{
|
||||||
|
autoView(ref_v, ref, CpuWrite);
|
||||||
|
autoView(tmp_v, tmp, CpuRead);
|
||||||
|
for (int i = 0; i < ref_v.size(); i++)
|
||||||
|
{
|
||||||
|
ref_v[i] += tmp_v[i] - Gamma(Gmu[mu]) * tmp_v[i];
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ref = -0.5 * ref;
|
||||||
|
}
|
||||||
|
// dump=1;
|
||||||
|
Dw.Dhop(src, result, 1);
|
||||||
|
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Called DwDag" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "norm dag result " << norm2(result) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "norm dag ref " << norm2(ref) << std::endl;
|
||||||
|
err = ref - result;
|
||||||
|
std::cout << GridLogMessage << "norm dag diff " << norm2(err) << std::endl;
|
||||||
|
if ((norm2(err) > 1.0e-4))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
std::cout<< "DAG RESULT\n " <<ref << std::endl;
|
||||||
|
std::cout<< "DAG sRESULT\n " <<result << std::endl;
|
||||||
|
std::cout<< "DAG ERR \n " << err <<std::endl;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
LatticeFermionF src_e(FrbGrid);
|
||||||
|
LatticeFermionF src_o(FrbGrid);
|
||||||
|
LatticeFermionF r_e(FrbGrid);
|
||||||
|
LatticeFermionF r_o(FrbGrid);
|
||||||
|
LatticeFermionF r_eo(FGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec" << std::endl;
|
||||||
|
pickCheckerboard(Even, src_e, src);
|
||||||
|
pickCheckerboard(Odd, src_o, src);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "src_e" << norm2(src_e) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "src_o" << norm2(src_o) << std::endl;
|
||||||
|
|
||||||
|
// S-direction is INNERMOST and takes no part in the parity.
|
||||||
|
std::cout << GridLogMessage << "*********************************************************" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Benchmarking DomainWallFermionF::DhopEO " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "* Vectorising space-time by " << vComplexF::Nsimd() << std::endl;
|
||||||
|
if (sizeof(RealF) == 4)
|
||||||
|
std::cout << GridLogMessage << "* SINGLE precision " << std::endl;
|
||||||
|
if (sizeof(RealF) == 8)
|
||||||
|
std::cout << GridLogMessage << "* DOUBLE precision " << std::endl;
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using Overlapped Comms/Compute" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute)
|
||||||
|
std::cout << GridLogMessage << "* Using sequential comms compute" << std::endl;
|
||||||
|
#endif
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric)
|
||||||
|
std::cout << GridLogMessage << "* Using GENERIC Nc WilsonKernels" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||||
|
std::cout << GridLogMessage << "* Using Nc=3 WilsonKernels" << std::endl;
|
||||||
|
if (WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm)
|
||||||
|
std::cout << GridLogMessage << "* Using Asm Nc=3 WilsonKernels" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "*********************************************************" << std::endl;
|
||||||
|
{
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
FGrid->Barrier();
|
||||||
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
double t0 = usecond();
|
||||||
|
for (int i = 0; i < ncall; i++)
|
||||||
|
{
|
||||||
|
#ifdef CUDA_PROFILE
|
||||||
|
if (i == 10)
|
||||||
|
cudaProfilerStart();
|
||||||
|
#endif
|
||||||
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
#ifdef CUDA_PROFILE
|
||||||
|
if (i == 20)
|
||||||
|
cudaProfilerStop();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
double t1 = usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
|
double volume = Ls;
|
||||||
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
|
volume = volume * latt4[mu];
|
||||||
|
double flops = (single_site_flops * volume * ncall) / 2.0;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Deo mflop/s = " << flops / (t1 - t0) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Deo mflop/s per rank " << flops / (t1 - t0) / NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Deo mflop/s per node " << flops / (t1 - t0) / NN << std::endl;
|
||||||
|
Dw.Report();
|
||||||
|
}
|
||||||
|
Dw.DhopEO(src_o, r_e, DaggerNo);
|
||||||
|
Dw.DhopOE(src_e, r_o, DaggerNo);
|
||||||
|
Dw.Dhop(src, result, DaggerNo);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "r_e" << norm2(r_e) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "r_o" << norm2(r_o) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "res" << norm2(result) << std::endl;
|
||||||
|
|
||||||
|
setCheckerboard(r_eo, r_o);
|
||||||
|
setCheckerboard(r_eo, r_e);
|
||||||
|
|
||||||
|
err = r_eo - result;
|
||||||
|
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
|
||||||
|
if ((norm2(err) > 1.0e-4))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
|
||||||
|
std::cout<< "Deo REF\n " <<result << std::endl;
|
||||||
|
std::cout<< "Deo ERR \n " << err <<std::endl;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
pickCheckerboard(Even, src_e, err);
|
||||||
|
pickCheckerboard(Odd, src_o, err);
|
||||||
|
std::cout << GridLogMessage << "norm diff even " << norm2(src_e) << std::endl;
|
||||||
|
std::cout << GridLogMessage << "norm diff odd " << norm2(src_o) << std::endl;
|
||||||
|
|
||||||
|
assert(norm2(src_e) < 1.0e-4);
|
||||||
|
assert(norm2(src_o) < 1.0e-4);
|
||||||
|
Grid_finalize();
|
||||||
|
exit(0);
|
||||||
|
}
|
339
Grid/LICENSE
Normal file
339
Grid/LICENSE
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
Version 2, June 1991
|
||||||
|
|
||||||
|
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The licenses for most software are designed to take away your
|
||||||
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
License is intended to guarantee your freedom to share and change free
|
||||||
|
software--to make sure the software is free for all its users. This
|
||||||
|
General Public License applies to most of the Free Software
|
||||||
|
Foundation's software and to any other program whose authors commit to
|
||||||
|
using it. (Some other Free Software Foundation software is covered by
|
||||||
|
the GNU Lesser General Public License instead.) You can apply it to
|
||||||
|
your programs, too.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
this service if you wish), that you receive source code or can get it
|
||||||
|
if you want it, that you can change the software or use pieces of it
|
||||||
|
in new free programs; and that you know you can do these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
anyone to deny you these rights or to ask you to surrender the rights.
|
||||||
|
These restrictions translate to certain responsibilities for you if you
|
||||||
|
distribute copies of the software, or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of such a program, whether
|
||||||
|
gratis or for a fee, you must give the recipients all the rights that
|
||||||
|
you have. You must make sure that they, too, receive or can get the
|
||||||
|
source code. And you must show them these terms so they know their
|
||||||
|
rights.
|
||||||
|
|
||||||
|
We protect your rights with two steps: (1) copyright the software, and
|
||||||
|
(2) offer you this license which gives you legal permission to copy,
|
||||||
|
distribute and/or modify the software.
|
||||||
|
|
||||||
|
Also, for each author's protection and ours, we want to make certain
|
||||||
|
that everyone understands that there is no warranty for this free
|
||||||
|
software. If the software is modified by someone else and passed on, we
|
||||||
|
want its recipients to know that what they have is not the original, so
|
||||||
|
that any problems introduced by others will not reflect on the original
|
||||||
|
authors' reputations.
|
||||||
|
|
||||||
|
Finally, any free program is threatened constantly by software
|
||||||
|
patents. We wish to avoid the danger that redistributors of a free
|
||||||
|
program will individually obtain patent licenses, in effect making the
|
||||||
|
program proprietary. To prevent this, we have made it clear that any
|
||||||
|
patent must be licensed for everyone's free use or not licensed at all.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License applies to any program or other work which contains
|
||||||
|
a notice placed by the copyright holder saying it may be distributed
|
||||||
|
under the terms of this General Public License. The "Program", below,
|
||||||
|
refers to any such program or work, and a "work based on the Program"
|
||||||
|
means either the Program or any derivative work under copyright law:
|
||||||
|
that is to say, a work containing the Program or a portion of it,
|
||||||
|
either verbatim or with modifications and/or translated into another
|
||||||
|
language. (Hereinafter, translation is included without limitation in
|
||||||
|
the term "modification".) Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
Activities other than copying, distribution and modification are not
|
||||||
|
covered by this License; they are outside its scope. The act of
|
||||||
|
running the Program is not restricted, and the output from the Program
|
||||||
|
is covered only if its contents constitute a work based on the
|
||||||
|
Program (independent of having been made by running the Program).
|
||||||
|
Whether that is true depends on what the Program does.
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Program's
|
||||||
|
source code as you receive it, in any medium, provided that you
|
||||||
|
conspicuously and appropriately publish on each copy an appropriate
|
||||||
|
copyright notice and disclaimer of warranty; keep intact all the
|
||||||
|
notices that refer to this License and to the absence of any warranty;
|
||||||
|
and give any other recipients of the Program a copy of this License
|
||||||
|
along with the Program.
|
||||||
|
|
||||||
|
You may charge a fee for the physical act of transferring a copy, and
|
||||||
|
you may at your option offer warranty protection in exchange for a fee.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Program or any portion
|
||||||
|
of it, thus forming a work based on the Program, and copy and
|
||||||
|
distribute such modifications or work under the terms of Section 1
|
||||||
|
above, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) You must cause the modified files to carry prominent notices
|
||||||
|
stating that you changed the files and the date of any change.
|
||||||
|
|
||||||
|
b) You must cause any work that you distribute or publish, that in
|
||||||
|
whole or in part contains or is derived from the Program or any
|
||||||
|
part thereof, to be licensed as a whole at no charge to all third
|
||||||
|
parties under the terms of this License.
|
||||||
|
|
||||||
|
c) If the modified program normally reads commands interactively
|
||||||
|
when run, you must cause it, when started running for such
|
||||||
|
interactive use in the most ordinary way, to print or display an
|
||||||
|
announcement including an appropriate copyright notice and a
|
||||||
|
notice that there is no warranty (or else, saying that you provide
|
||||||
|
a warranty) and that users may redistribute the program under
|
||||||
|
these conditions, and telling the user how to view a copy of this
|
||||||
|
License. (Exception: if the Program itself is interactive but
|
||||||
|
does not normally print such an announcement, your work based on
|
||||||
|
the Program is not required to print an announcement.)
|
||||||
|
|
||||||
|
These requirements apply to the modified work as a whole. If
|
||||||
|
identifiable sections of that work are not derived from the Program,
|
||||||
|
and can be reasonably considered independent and separate works in
|
||||||
|
themselves, then this License, and its terms, do not apply to those
|
||||||
|
sections when you distribute them as separate works. But when you
|
||||||
|
distribute the same sections as part of a whole which is a work based
|
||||||
|
on the Program, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
|
entire whole, and thus to each and every part regardless of who wrote it.
|
||||||
|
|
||||||
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
|
exercise the right to control the distribution of derivative or
|
||||||
|
collective works based on the Program.
|
||||||
|
|
||||||
|
In addition, mere aggregation of another work not based on the Program
|
||||||
|
with the Program (or with a work based on the Program) on a volume of
|
||||||
|
a storage or distribution medium does not bring the other work under
|
||||||
|
the scope of this License.
|
||||||
|
|
||||||
|
3. You may copy and distribute the Program (or a work based on it,
|
||||||
|
under Section 2) in object code or executable form under the terms of
|
||||||
|
Sections 1 and 2 above provided that you also do one of the following:
|
||||||
|
|
||||||
|
a) Accompany it with the complete corresponding machine-readable
|
||||||
|
source code, which must be distributed under the terms of Sections
|
||||||
|
1 and 2 above on a medium customarily used for software interchange; or,
|
||||||
|
|
||||||
|
b) Accompany it with a written offer, valid for at least three
|
||||||
|
years, to give any third party, for a charge no more than your
|
||||||
|
cost of physically performing source distribution, a complete
|
||||||
|
machine-readable copy of the corresponding source code, to be
|
||||||
|
distributed under the terms of Sections 1 and 2 above on a medium
|
||||||
|
customarily used for software interchange; or,
|
||||||
|
|
||||||
|
c) Accompany it with the information you received as to the offer
|
||||||
|
to distribute corresponding source code. (This alternative is
|
||||||
|
allowed only for noncommercial distribution and only if you
|
||||||
|
received the program in object code or executable form with such
|
||||||
|
an offer, in accord with Subsection b above.)
|
||||||
|
|
||||||
|
The source code for a work means the preferred form of the work for
|
||||||
|
making modifications to it. For an executable work, complete source
|
||||||
|
code means all the source code for all modules it contains, plus any
|
||||||
|
associated interface definition files, plus the scripts used to
|
||||||
|
control compilation and installation of the executable. However, as a
|
||||||
|
special exception, the source code distributed need not include
|
||||||
|
anything that is normally distributed (in either source or binary
|
||||||
|
form) with the major components (compiler, kernel, and so on) of the
|
||||||
|
operating system on which the executable runs, unless that component
|
||||||
|
itself accompanies the executable.
|
||||||
|
|
||||||
|
If distribution of executable or object code is made by offering
|
||||||
|
access to copy from a designated place, then offering equivalent
|
||||||
|
access to copy the source code from the same place counts as
|
||||||
|
distribution of the source code, even though third parties are not
|
||||||
|
compelled to copy the source along with the object code.
|
||||||
|
|
||||||
|
4. You may not copy, modify, sublicense, or distribute the Program
|
||||||
|
except as expressly provided under this License. Any attempt
|
||||||
|
otherwise to copy, modify, sublicense or distribute the Program is
|
||||||
|
void, and will automatically terminate your rights under this License.
|
||||||
|
However, parties who have received copies, or rights, from you under
|
||||||
|
this License will not have their licenses terminated so long as such
|
||||||
|
parties remain in full compliance.
|
||||||
|
|
||||||
|
5. You are not required to accept this License, since you have not
|
||||||
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Program or its derivative works. These actions are
|
||||||
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Program (or any work based on the
|
||||||
|
Program), you indicate your acceptance of this License to do so, and
|
||||||
|
all its terms and conditions for copying, distributing or modifying
|
||||||
|
the Program or works based on it.
|
||||||
|
|
||||||
|
6. Each time you redistribute the Program (or any work based on the
|
||||||
|
Program), the recipient automatically receives a license from the
|
||||||
|
original licensor to copy, distribute or modify the Program subject to
|
||||||
|
these terms and conditions. You may not impose any further
|
||||||
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
|
You are not responsible for enforcing compliance by third parties to
|
||||||
|
this License.
|
||||||
|
|
||||||
|
7. If, as a consequence of a court judgment or allegation of patent
|
||||||
|
infringement or for any other reason (not limited to patent issues),
|
||||||
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot
|
||||||
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you
|
||||||
|
may not distribute the Program at all. For example, if a patent
|
||||||
|
license would not permit royalty-free redistribution of the Program by
|
||||||
|
all those who receive copies directly or indirectly through you, then
|
||||||
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Program.
|
||||||
|
|
||||||
|
If any portion of this section is held invalid or unenforceable under
|
||||||
|
any particular circumstance, the balance of the section is intended to
|
||||||
|
apply and the section as a whole is intended to apply in other
|
||||||
|
circumstances.
|
||||||
|
|
||||||
|
It is not the purpose of this section to induce you to infringe any
|
||||||
|
patents or other property right claims or to contest validity of any
|
||||||
|
such claims; this section has the sole purpose of protecting the
|
||||||
|
integrity of the free software distribution system, which is
|
||||||
|
implemented by public license practices. Many people have made
|
||||||
|
generous contributions to the wide range of software distributed
|
||||||
|
through that system in reliance on consistent application of that
|
||||||
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
|
8. If the distribution and/or use of the Program is restricted in
|
||||||
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Program under this License
|
||||||
|
may add an explicit geographical distribution limitation excluding
|
||||||
|
those countries, so that distribution is permitted only in or among
|
||||||
|
countries not thus excluded. In such case, this License incorporates
|
||||||
|
the limitation as if written in the body of this License.
|
||||||
|
|
||||||
|
9. The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the General Public License from time to time. Such new versions will
|
||||||
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Program
|
||||||
|
specifies a version number of this License which applies to it and "any
|
||||||
|
later version", you have the option of following the terms and conditions
|
||||||
|
either of that version or of any later version published by the Free
|
||||||
|
Software Foundation. If the Program does not specify a version number of
|
||||||
|
this License, you may choose any version ever published by the Free Software
|
||||||
|
Foundation.
|
||||||
|
|
||||||
|
10. If you wish to incorporate parts of the Program into other free
|
||||||
|
programs whose distribution conditions are different, write to the author
|
||||||
|
to ask for permission. For software which is copyrighted by the Free
|
||||||
|
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||||
|
make exceptions for this. Our decision will be guided by the two goals
|
||||||
|
of preserving the free status of all derivatives of our free software and
|
||||||
|
of promoting the sharing and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||||
|
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||||
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||||
|
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||||
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||||
|
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||||
|
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||||
|
REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||||
|
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||||
|
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||||
|
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||||
|
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||||
|
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
convey the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If the program is interactive, make it output a short notice like this
|
||||||
|
when it starts in an interactive mode:
|
||||||
|
|
||||||
|
Gnomovision version 69, Copyright (C) year name of author
|
||||||
|
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
|
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||||
|
parts of the General Public License. Of course, the commands you use may
|
||||||
|
be called something other than `show w' and `show c'; they could even be
|
||||||
|
mouse-clicks or menu items--whatever suits your program.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||||
|
necessary. Here is a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||||
|
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1989
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
This General Public License does not permit incorporating your program into
|
||||||
|
proprietary programs. If your program is a subroutine library, you may
|
||||||
|
consider it more useful to permit linking proprietary applications with the
|
||||||
|
library. If this is what you want to do, use the GNU Lesser General
|
||||||
|
Public License instead of this License.
|
12
Grid/Makefile.am
Normal file
12
Grid/Makefile.am
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
ACLOCAL_AMFLAGS = -I .buildutils/m4
|
||||||
|
|
||||||
|
bin_PROGRAMS = \
|
||||||
|
Benchmark_comms_host_device \
|
||||||
|
Benchmark_dwf_fp32 \
|
||||||
|
Benchmark_ITT \
|
||||||
|
Benchmark_IO
|
||||||
|
|
||||||
|
Benchmark_comms_host_device_SOURCES = Benchmark_comms_host_device.cpp
|
||||||
|
Benchmark_dwf_fp32_SOURCES = Benchmark_dwf_fp32.cpp
|
||||||
|
Benchmark_ITT_SOURCES = Benchmark_ITT.cpp
|
||||||
|
Benchmark_IO_SOURCES = Benchmark_IO.cpp
|
6
Grid/bootstrap.sh
Executable file
6
Grid/bootstrap.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
mkdir -p .buildutils/m4
|
||||||
|
autoreconf -fvi
|
58
Grid/configure.ac
Normal file
58
Grid/configure.ac
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
AC_PREREQ([2.69])
|
||||||
|
AC_INIT([lattice-bench], [0.1], [antonin.portelli@me.com])
|
||||||
|
AC_CANONICAL_BUILD
|
||||||
|
AC_CANONICAL_HOST
|
||||||
|
AC_CANONICAL_TARGET
|
||||||
|
AC_CONFIG_SRCDIR([Benchmark_ITT.cpp])
|
||||||
|
AC_CONFIG_MACRO_DIR([.buildutils/m4])
|
||||||
|
AC_CONFIG_HEADERS([config.h])
|
||||||
|
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
|
||||||
|
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
|
# Checks for programs.
|
||||||
|
AC_PROG_CXX
|
||||||
|
AC_PROG_CC
|
||||||
|
AC_PROG_RANLIB
|
||||||
|
AM_PROG_AR
|
||||||
|
AC_LANG([C++])
|
||||||
|
|
||||||
|
AC_ARG_WITH([grid],
|
||||||
|
[AS_HELP_STRING([--with-grid=<prefix>],
|
||||||
|
[try this for a non-standard install prefix of Grid])],
|
||||||
|
[PATH="$with_grid/bin$PATH_SEPARATOR$PATH"]
|
||||||
|
[CXXFLAGS="$CXXFLAGS -I$with_grid/include"]
|
||||||
|
[LDFLAGS="$LDFLAGS -L$with_grid/lib"])
|
||||||
|
AC_CHECK_PROG([GRIDCONF],[grid-config],[yes])
|
||||||
|
if test x"$GRIDCONF" != x"yes" ; then
|
||||||
|
AC_MSG_ERROR([grid-config not found])
|
||||||
|
fi
|
||||||
|
CXXFLAGS="$CXXFLAGS `grid-config --cxxflags`"
|
||||||
|
LDFLAGS="$LDFLAGS `grid-config --ldflags`"
|
||||||
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
|
LIBS=" -lGrid $LIBS `grid-config --libs`"
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([that a minimal Grid program compiles]);
|
||||||
|
AC_LINK_IFELSE(
|
||||||
|
[AC_LANG_SOURCE([[
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
Grid_finalize();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
]])],
|
||||||
|
[AC_MSG_RESULT([yes])],
|
||||||
|
[AC_MSG_RESULT([no])]
|
||||||
|
[AC_MSG_ERROR([Could not compile a minimal Grid program])])
|
||||||
|
|
||||||
|
AC_SUBST([AM_CXXFLAGS])
|
||||||
|
AC_SUBST([AM_LDFLAGS])
|
||||||
|
AC_CONFIG_FILES([Makefile])
|
||||||
|
AC_OUTPUT
|
Loading…
Reference in New Issue
Block a user