From acac2d693855c13691b344cb021293701ba04a8c Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 6 Oct 2020 17:57:00 +0100 Subject: [PATCH 01/24] standard C/C++ I/O in benchmark --- benchmarks/Benchmark_IO.cc | 30 ++++++++- benchmarks/Benchmark_IO.hpp | 131 +++++++++++++++++++++++++++++++++++- 2 files changed, 157 insertions(+), 4 deletions(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index c8c0937f..b59e4741 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -19,6 +19,31 @@ int main (int argc, char ** argv) int64_t threads = GridThread::GetThreads(); MSG << "Grid is setup to use " << threads << " threads" << std::endl; + + MSG << SEP << std::endl; + MSG << "Benchmark std C++ write" << std::endl; + MSG << SEP << std::endl; + for (int l = 4; l <= BENCH_IO_LMAX; l += 2) + { + auto mpi = GridDefaultMpi(); + std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + + MSG << "-- Local volume " << l << "^4" << std::endl; + writeBenchmark(latt, filestem(l), stdWrite); + } + + MSG << SEP << std::endl; + MSG << "Benchmark std C++ read" << std::endl; + MSG << SEP << std::endl; + for (int l = 4; l <= BENCH_IO_LMAX; l += 2) + { + auto mpi = GridDefaultMpi(); + std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + + MSG << "-- Local volume " << l << "^4" << std::endl; + readBenchmark(latt, filestem(l), stdRead); + } + MSG << SEP << std::endl; MSG << "Benchmark Lime write" << std::endl; MSG << SEP << std::endl; @@ -27,10 +52,11 @@ int main (int argc, char ** argv) auto mpi = GridDefaultMpi(); std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; - std::cout << "-- Local volume " << l << "^4" << std::endl; + MSG << "-- Local volume " << l << "^4" << std::endl; writeBenchmark(latt, filestem(l), limeWrite); } + MSG << SEP << std::endl; MSG << "Benchmark Lime read" << std::endl; MSG << SEP << std::endl; for (int l = 4; l <= BENCH_IO_LMAX; l += 2) @@ -38,7 +64,7 @@ int main (int argc, char ** argv) auto mpi = GridDefaultMpi(); std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; - std::cout << "-- Local volume " << l << "^4" << std::endl; + MSG << "-- Local volume " << l << "^4" << std::endl; readBenchmark(latt, filestem(l), limeRead); } diff --git a/benchmarks/Benchmark_IO.hpp b/benchmarks/Benchmark_IO.hpp index d3416353..73c198dc 100644 --- a/benchmarks/Benchmark_IO.hpp +++ b/benchmarks/Benchmark_IO.hpp @@ -14,13 +14,140 @@ using WriterFn = std::function ; template using ReaderFn = std::function; +// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API +// +// template +// void stdWrite(const std::string filestem, Field &vec) +// { +// std::string rankStr = std::to_string(vec.Grid()->ThisRank()); +// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb"); +// size_t size; +// uint32_t crc; +// GridStopWatch ioWatch, crcWatch; + +// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object); +// autoView(vec_v, vec, CpuRead); +// crcWatch.Start(); +// crc = GridChecksum::crc32(vec_v.cpu_ptr, size); +// std::fwrite(&crc, sizeof(uint32_t), 1, file); +// crcWatch.Stop(); +// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl; +// ioWatch.Start(); +// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file); +// ioWatch.Stop(); +// std::fclose(file); +// size *= vec.Grid()->ProcessorCount(); +// MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed() +// << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) +// << " MB/s" << std::endl; +// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; +// } +// +// template +// void stdRead(Field &vec, const std::string filestem) +// { +// std::string rankStr = std::to_string(vec.Grid()->ThisRank()); +// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb"); +// size_t size; +// uint32_t crcRead, crcData; +// GridStopWatch ioWatch, crcWatch; + +// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object); +// crcWatch.Start(); +// std::fread(&crcRead, sizeof(uint32_t), 1, file); +// crcWatch.Stop(); +// { +// autoView(vec_v, vec, CpuWrite); +// ioWatch.Start(); +// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file); +// ioWatch.Stop(); +// std::fclose(file); +// } +// { +// autoView(vec_v, vec, CpuRead); +// crcWatch.Start(); +// crcData = GridChecksum::crc32(vec_v.cpu_ptr, size); +// crcWatch.Stop(); +// } +// MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl; +// assert(crcData == crcRead); +// size *= vec.Grid()->ProcessorCount(); +// MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed() +// << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) +// << " MB/s" << std::endl; +// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; +// } + +template +void stdWrite(const std::string filestem, Field &vec) +{ + std::string rankStr = std::to_string(vec.Grid()->ThisRank()); + std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary); + size_t size, sizec; + uint32_t crc; + GridStopWatch ioWatch, crcWatch; + + size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object); + sizec = size/sizeof(char); // just in case of... + autoView(vec_v, vec, CpuRead); + crcWatch.Start(); + crc = GridChecksum::crc32(vec_v.cpu_ptr, size); + file.write(reinterpret_cast(&crc), sizeof(uint32_t)/sizeof(char)); + crcWatch.Stop(); + MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl; + ioWatch.Start(); + file.write(reinterpret_cast(vec_v.cpu_ptr), sizec); + file.flush(); + ioWatch.Stop(); + size *= vec.Grid()->ProcessorCount(); + MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed() + << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) + << " MB/s" << std::endl; + MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; +} + +template +void stdRead(Field &vec, const std::string filestem) +{ + std::string rankStr = std::to_string(vec.Grid()->ThisRank()); + std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary); + size_t size, sizec; + uint32_t crcRead, crcData; + GridStopWatch ioWatch, crcWatch; + + size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object); + sizec = size/sizeof(char); // just in case of... + crcWatch.Start(); + file.read(reinterpret_cast(&crcRead), sizeof(uint32_t)/sizeof(char)); + crcWatch.Stop(); + { + autoView(vec_v, vec, CpuWrite); + ioWatch.Start(); + file.read(reinterpret_cast(vec_v.cpu_ptr), sizec); + ioWatch.Stop(); + } + { + autoView(vec_v, vec, CpuRead); + crcWatch.Start(); + crcData = GridChecksum::crc32(vec_v.cpu_ptr, size); + crcWatch.Stop(); + } + MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl; + assert(crcData == crcRead); + size *= vec.Grid()->ProcessorCount(); + MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed() + << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) + << " MB/s" << std::endl; + MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; +} + template void limeWrite(const std::string filestem, Field &vec) { emptyUserRecord record; ScidacWriter binWriter(vec.Grid()->IsBoss()); - binWriter.open(filestem + ".bin"); + binWriter.open(filestem + ".lime.bin"); binWriter.writeScidacFieldRecord(vec, record); binWriter.close(); } @@ -31,7 +158,7 @@ void limeRead(Field &vec, const std::string filestem) emptyUserRecord record; ScidacReader binReader; - binReader.open(filestem + ".bin"); + binReader.open(filestem + ".lime.bin"); binReader.readScidacFieldRecord(vec, record); binReader.close(); } From e9c5a271a886c90f73f41aba22f703292f75a1e5 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 6 Oct 2020 17:58:16 +0100 Subject: [PATCH 02/24] fixing potential issues with log alignment and timer I/O --- Grid/log/Log.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Grid/log/Log.h b/Grid/log/Log.h index d459a4a9..68693647 100644 --- a/Grid/log/Log.h +++ b/Grid/log/Log.h @@ -130,6 +130,8 @@ public: friend std::ostream& operator<< (std::ostream& stream, Logger& log){ if ( log.active ) { + std::ios_base::fmtflags f(stream.flags()); + stream << log.background()<< std::left; if (log.topWidth > 0) { @@ -152,6 +154,8 @@ public: << now << log.background() << " : " ; } stream << log.colour(); + stream.flags(f); + return stream; } else { return devnull; From 5ee832f7386b44a8ce8d15747c04dd0e1fc9d7c4 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Oct 2020 15:31:51 +0100 Subject: [PATCH 03/24] I/O benchmark code cleaning --- benchmarks/Benchmark_IO.cc | 31 ++++++++-------- benchmarks/Benchmark_IO.hpp | 4 +-- benchmarks/Benchmark_IO_vs_dir.cc | 59 +++++++++++++++++-------------- 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index b59e4741..5e4cef9f 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -14,61 +14,62 @@ std::string filestem(const int l) int main (int argc, char ** argv) { -#ifdef HAVE_LIME Grid_init(&argc,&argv); - int64_t threads = GridThread::GetThreads(); + int64_t threads = GridThread::GetThreads(); + auto mpi = GridDefaultMpi(); + std::vector latt; + MSG << "Grid is setup to use " << threads << " threads" << std::endl; + MSG << "MPI partition " << mpi << std::endl; MSG << SEP << std::endl; - MSG << "Benchmark std C++ write" << std::endl; + MSG << "Benchmark std write" << std::endl; MSG << SEP << std::endl; for (int l = 4; l <= BENCH_IO_LMAX; l += 2) { - auto mpi = GridDefaultMpi(); - std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; MSG << "-- Local volume " << l << "^4" << std::endl; writeBenchmark(latt, filestem(l), stdWrite); } MSG << SEP << std::endl; - MSG << "Benchmark std C++ read" << std::endl; + MSG << "Benchmark std read" << std::endl; MSG << SEP << std::endl; for (int l = 4; l <= BENCH_IO_LMAX; l += 2) { - auto mpi = GridDefaultMpi(); - std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; MSG << "-- Local volume " << l << "^4" << std::endl; readBenchmark(latt, filestem(l), stdRead); } +#ifdef HAVE_LIME MSG << SEP << std::endl; - MSG << "Benchmark Lime write" << std::endl; + MSG << "Benchmark Grid C-Lime write" << std::endl; MSG << SEP << std::endl; for (int l = 4; l <= BENCH_IO_LMAX; l += 2) { - auto mpi = GridDefaultMpi(); - std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; MSG << "-- Local volume " << l << "^4" << std::endl; writeBenchmark(latt, filestem(l), limeWrite); } MSG << SEP << std::endl; - MSG << "Benchmark Lime read" << std::endl; + MSG << "Benchmark Grid C-Lime read" << std::endl; MSG << SEP << std::endl; for (int l = 4; l <= BENCH_IO_LMAX; l += 2) { - auto mpi = GridDefaultMpi(); - std::vector latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; MSG << "-- Local volume " << l << "^4" << std::endl; readBenchmark(latt, filestem(l), limeRead); } +#endif Grid_finalize(); -#endif + return EXIT_SUCCESS; } diff --git a/benchmarks/Benchmark_IO.hpp b/benchmarks/Benchmark_IO.hpp index 73c198dc..39af14ba 100644 --- a/benchmarks/Benchmark_IO.hpp +++ b/benchmarks/Benchmark_IO.hpp @@ -101,7 +101,7 @@ void stdWrite(const std::string filestem, Field &vec) ioWatch.Stop(); size *= vec.Grid()->ProcessorCount(); MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed() - << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) + << ", " << size/1024./1024./(ioWatch.useconds()/1.e6) << " MB/s" << std::endl; MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; } @@ -136,7 +136,7 @@ void stdRead(Field &vec, const std::string filestem) assert(crcData == crcRead); size *= vec.Grid()->ProcessorCount(); MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed() - << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) + << ", " << size/1024./1024./(ioWatch.useconds()/1.e6) << " MB/s" << std::endl; MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; } diff --git a/benchmarks/Benchmark_IO_vs_dir.cc b/benchmarks/Benchmark_IO_vs_dir.cc index 6e6c9ae0..9c254e27 100644 --- a/benchmarks/Benchmark_IO_vs_dir.cc +++ b/benchmarks/Benchmark_IO_vs_dir.cc @@ -34,46 +34,51 @@ int main (int argc, char ** argv) } Grid_init(&argc,&argv); - int64_t threads = GridThread::GetThreads(); + auto mpi = GridDefaultMpi(); + MSG << "Grid is setup to use " << threads << " threads" << std::endl; - MSG << SEP << std::endl; - MSG << "Benchmark double precision Lime write" << std::endl; - MSG << SEP << std::endl; - for (auto &d: dir) - { - MSG << "-- Directory " << d << std::endl; - writeBenchmark(GridDefaultLatt(), d + "/ioBench", limeWrite, Ls, rb); - } + MSG << "MPI partition " << mpi << std::endl; MSG << SEP << std::endl; - MSG << "Benchmark double precision Lime read" << std::endl; + MSG << "Benchmark Grid C-Lime write" << std::endl; MSG << SEP << std::endl; for (auto &d: dir) { MSG << "-- Directory " << d << std::endl; - readBenchmark(GridDefaultLatt(), d + "/ioBench", limeRead, Ls, rb); + writeBenchmark(GridDefaultLatt(), d + "/ioBench", + limeWrite, Ls, rb); + } + MSG << SEP << std::endl; + MSG << "Benchmark Grid C-Lime read" << std::endl; + MSG << SEP << std::endl; + for (auto &d: dir) + { + MSG << "-- Directory " << d << std::endl; + readBenchmark(GridDefaultLatt(), d + "/ioBench", + limeRead, Ls, rb); } - MSG << SEP << std::endl; - MSG << "Benchmark single precision Lime write" << std::endl; - MSG << SEP << std::endl; - for (auto &d: dir) - { - MSG << "-- Directory " << d << std::endl; - writeBenchmark(GridDefaultLatt(), d + "/ioBench", limeWrite, Ls, rb); - } + // MSG << SEP << std::endl; + // MSG << "Benchmark single precision Lime write" << std::endl; + // MSG << SEP << std::endl; + // for (auto &d: dir) + // { + // MSG << "-- Directory " << d << std::endl; + // writeBenchmark(GridDefaultLatt(), d + "/ioBench", limeWrite, Ls, rb); + // } - MSG << SEP << std::endl; - MSG << "Benchmark single precision Lime read" << std::endl; - MSG << SEP << std::endl; - for (auto &d: dir) - { - MSG << "-- Directory " << d << std::endl; - readBenchmark(GridDefaultLatt(), d + "/ioBench", limeRead, Ls, rb); - } + // MSG << SEP << std::endl; + // MSG << "Benchmark single precision Lime read" << std::endl; + // MSG << SEP << std::endl; + // for (auto &d: dir) + // { + // MSG << "-- Directory " << d << std::endl; + // readBenchmark(GridDefaultLatt(), d + "/ioBench", limeRead, Ls, rb); + // } Grid_finalize(); + #endif return EXIT_SUCCESS; } From 9ba3647bdf466e4757284ed4a49ec4ee32f679c9 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Oct 2020 15:35:03 +0100 Subject: [PATCH 04/24] script to convert I/O benchmark logs to CSV --- benchmarks/benchmark-io-csv.sh | 76 ++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100755 benchmarks/benchmark-io-csv.sh diff --git a/benchmarks/benchmark-io-csv.sh b/benchmarks/benchmark-io-csv.sh new file mode 100755 index 00000000..cc61b006 --- /dev/null +++ b/benchmarks/benchmark-io-csv.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +awkscript=' +BEGIN{ + i = 0; + print "local L,std read (MB/s),std write (MB/s),Grid Lime read (MB/s),Grid Lime write (MB/s)" +} + +/Benchmark std write/{ + i = 0; + mode = "stdWrite"; +} + +/Benchmark std read/{ + i = 0; + mode = "stdRead" +} + +/Benchmark Grid C-Lime write/{ + i = 0; + mode = "gridWrite"; +} + +/Benchmark Grid C-Lime read/{ + i = 0; + mode = "gridRead"; +} + +/Local volume/{ + match($0, "[0-9]+\\^4"); + l[i] = substr($0, RSTART, RLENGTH-2); +} + +/MB\/s/{ + match($0, "[0-9.eE]+ MB/s"); + p = substr($0, RSTART, RLENGTH-5); + if (mode == "stdWrite") + { + sw[i] = p; + } + else if (mode == "stdRead") + { + sr[i] = p; + } + else if (mode == "gridWrite") + { + gw[i] = p; + } + else if (mode == "gridRead") + { + gr[i] = p; + } + i++; +} + +END{ + s = 0 + for (a in l) + { + s++; + } + for (j = 0; j < s; j++) + { + printf("%s,%s,%s,%s,%s\n", l[j], sr[j], sw[j], gr[j], gw[j]); + } + printf("\n"); +} +' + +if (( $# != 1 )); then + echo "usage: `basename $0` " 1>&2 + exit 1 +fi +LOG=$1 + +awk "${awkscript}" ${LOG} From 1ba25a0d8c728f18dad32649c5be0572f79e27af Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 7 Oct 2020 15:38:41 +0100 Subject: [PATCH 05/24] more I/O benchmark code cleaning --- benchmarks/Benchmark_IO_vs_dir.cc | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/benchmarks/Benchmark_IO_vs_dir.cc b/benchmarks/Benchmark_IO_vs_dir.cc index 9c254e27..9ccfd554 100644 --- a/benchmarks/Benchmark_IO_vs_dir.cc +++ b/benchmarks/Benchmark_IO_vs_dir.cc @@ -8,7 +8,6 @@ using namespace Grid; int main (int argc, char ** argv) { -#ifdef HAVE_LIME std::vector dir; unsigned int Ls; bool rb; @@ -40,6 +39,26 @@ int main (int argc, char ** argv) MSG << "Grid is setup to use " << threads << " threads" << std::endl; MSG << "MPI partition " << mpi << std::endl; + MSG << SEP << std::endl; + MSG << "Benchmark Grid std write" << std::endl; + MSG << SEP << std::endl; + for (auto &d: dir) + { + MSG << "-- Directory " << d << std::endl; + writeBenchmark(GridDefaultLatt(), d + "/ioBench", + stdWrite, Ls, rb); + } + MSG << SEP << std::endl; + MSG << "Benchmark Grid std read" << std::endl; + MSG << SEP << std::endl; + for (auto &d: dir) + { + MSG << "-- Directory " << d << std::endl; + readBenchmark(GridDefaultLatt(), d + "/ioBench", + stdRead, Ls, rb); + } + +#ifdef HAVE_LIME MSG << SEP << std::endl; MSG << "Benchmark Grid C-Lime write" << std::endl; MSG << SEP << std::endl; @@ -58,6 +77,7 @@ int main (int argc, char ** argv) readBenchmark(GridDefaultLatt(), d + "/ioBench", limeRead, Ls, rb); } +#endif // MSG << SEP << std::endl; // MSG << "Benchmark single precision Lime write" << std::endl; @@ -78,7 +98,6 @@ int main (int argc, char ** argv) // } Grid_finalize(); - -#endif + return EXIT_SUCCESS; } From d2012776524c616955a688b6ae2e05097d84548e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 7 Oct 2020 13:07:00 -0400 Subject: [PATCH 06/24] Expose Nc as a compile time configure option. Remove precision option --- Grid/qcd/QCD.h | 2 +- benchmarks/Benchmark_ITT.cc | 16 ++++-- benchmarks/Benchmark_dwf.cc | 2 +- benchmarks/Benchmark_gparity.cc | 2 +- benchmarks/Benchmark_mooee.cc | 4 +- configure.ac | 52 +++++++++++++------ tests/IO/Test_ildg_io.cc | 2 +- tests/IO/Test_nersc_io.cc | 2 +- tests/Test_cayley_even_odd_vec.cc | 2 +- tests/Test_compressed_lanczos_hot_start.cc | 2 +- tests/Test_dwf_mixedcg_prec.cc | 2 +- tests/Test_dwf_mixedcg_prec_halfcomms.cc | 2 +- tests/core/Test_cf_coarsen_support.cc | 2 +- tests/core/Test_checker.cc | 2 +- tests/core/Test_contfrac_even_odd.cc | 2 +- tests/core/Test_dwf_eofa_even_odd.cc | 2 +- tests/core/Test_dwf_even_odd.cc | 2 +- tests/core/Test_fft.cc | 2 +- tests/core/Test_fft_gfix.cc | 8 +-- tests/core/Test_gparity.cc | 2 +- tests/core/Test_gpwilson_even_odd.cc | 2 +- tests/core/Test_lie_generators.cc | 46 ++++++++-------- tests/core/Test_main.cc | 2 +- tests/core/Test_mobius_eofa_even_odd.cc | 2 +- tests/core/Test_quenched_update.cc | 6 +-- tests/core/Test_staggered.cc | 2 +- tests/core/Test_staggered5D.cc | 2 +- tests/core/Test_staggered5Dvec.cc | 2 +- tests/core/Test_staggered5DvecF.cc | 2 +- tests/core/Test_staggered_naive.cc | 2 +- tests/core/Test_wilson_clover.cc | 2 +- tests/core/Test_wilson_even_odd.cc | 2 +- .../core/Test_wilson_twisted_mass_even_odd.cc | 2 +- tests/debug/Test_cayley_cg.cc | 2 +- tests/debug/Test_cayley_coarsen_support.cc | 2 +- tests/debug/Test_cayley_even_odd.cc | 2 +- tests/debug/Test_cayley_ldop_cr.cc | 6 +-- tests/debug/Test_cayley_mres.cc | 4 +- tests/debug/Test_heatbath_dwf_eofa.cc | 2 +- tests/debug/Test_heatbath_dwf_eofa_gparity.cc | 2 +- tests/debug/Test_heatbath_mobius_eofa.cc | 2 +- .../Test_heatbath_mobius_eofa_gparity.cc | 2 +- tests/debug/Test_reweight_dwf_eofa.cc | 2 +- tests/debug/Test_reweight_dwf_eofa_gparity.cc | 2 +- tests/debug/Test_reweight_mobius_eofa.cc | 2 +- .../Test_reweight_mobius_eofa_gparity.cc | 2 +- tests/forces/Test_contfrac_force.cc | 4 +- tests/forces/Test_dwf_force.cc | 4 +- tests/forces/Test_dwf_force_eofa.cc | 4 +- tests/forces/Test_dwf_gpforce.cc | 6 +-- tests/forces/Test_dwf_gpforce_eofa.cc | 4 +- tests/forces/Test_gp_plaq_force.cc | 4 +- tests/forces/Test_gp_rect_force.cc | 4 +- tests/forces/Test_gpdwf_force.cc | 4 +- tests/forces/Test_gpwilson_force.cc | 4 +- tests/forces/Test_laplacian_force.cc | 4 +- tests/forces/Test_mobius_force.cc | 4 +- tests/forces/Test_mobius_force_eofa.cc | 4 +- tests/forces/Test_mobius_gpforce_eofa.cc | 4 +- tests/forces/Test_partfrac_force.cc | 4 +- tests/forces/Test_rect_force.cc | 4 +- tests/forces/Test_wilson_force.cc | 4 +- tests/forces/Test_wilsonclover_force.cc | 6 +-- tests/forces/Test_zmobius_force.cc | 4 +- ..._dwf_compressed_lanczos_reorg_synthetic.cc | 2 +- tests/lanczos/Test_dwf_lanczos.cc | 2 +- tests/lanczos/Test_wilson_lanczos.cc | 2 +- tests/qdpxx/Test_qdpxx_baryon.cc | 2 +- tests/qdpxx/Test_qdpxx_loops_staples.cc | 2 +- tests/qdpxx/Test_qdpxx_munprec.cc | 2 +- tests/qdpxx/Test_qdpxx_stag.cc | 2 +- tests/qdpxx/Test_qdpxx_wilson.cc | 2 +- tests/smearing/Test_smearing.cc | 4 +- tests/solver/Test_cf_cr_unprec.cc | 2 +- tests/solver/Test_contfrac_cg.cc | 2 +- tests/solver/Test_dwf_cg_prec.cc | 2 +- tests/solver/Test_dwf_cg_schur.cc | 2 +- tests/solver/Test_dwf_cg_unprec.cc | 2 +- tests/solver/Test_dwf_cr_unprec.cc | 2 +- tests/solver/Test_dwf_fpgcr.cc | 2 +- tests/solver/Test_dwf_mrhs_cg.cc | 2 +- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 4 +- tests/solver/Test_dwf_mrhs_cg_mpieo.cc | 2 +- tests/solver/Test_dwf_qmr_unprec.cc | 2 +- tests/solver/Test_mobius_bcg.cc | 2 +- tests/solver/Test_mobius_bcg_nosplit.cc | 4 +- tests/solver/Test_mobius_bcg_phys_nosplit.cc | 4 +- tests/solver/Test_mobius_bcg_prec_nosplit.cc | 4 +- tests/solver/Test_split_grid.cc | 2 +- tests/solver/Test_staggered_block_cg_prec.cc | 2 +- .../solver/Test_staggered_block_cg_unprec.cc | 2 +- tests/solver/Test_staggered_cagmres_unprec.cc | 2 +- tests/solver/Test_staggered_cg_prec.cc | 2 +- tests/solver/Test_staggered_cg_schur.cc | 2 +- tests/solver/Test_staggered_cg_unprec.cc | 2 +- tests/solver/Test_staggered_fcagmres_prec.cc | 2 +- tests/solver/Test_staggered_fgmres_prec.cc | 2 +- tests/solver/Test_staggered_gmres_unprec.cc | 2 +- tests/solver/Test_staggered_mr_unprec.cc | 2 +- tests/solver/Test_staggered_multishift.cc | 2 +- tests/solver/Test_wilson_cagmres_unprec.cc | 2 +- tests/solver/Test_wilson_cg_prec.cc | 2 +- tests/solver/Test_wilson_cg_schur.cc | 2 +- tests/solver/Test_wilson_cg_unprec.cc | 2 +- tests/solver/Test_wilson_cr_unprec.cc | 2 +- tests/solver/Test_wilson_fcagmres_prec.cc | 2 +- tests/solver/Test_wilson_fgmres_prec.cc | 2 +- tests/solver/Test_wilson_gmres_unprec.cc | 2 +- tests/solver/Test_wilson_mg.cc | 2 +- tests/solver/Test_wilson_mg_mp.cc | 2 +- tests/solver/Test_wilson_mr_unprec.cc | 2 +- tests/solver/Test_wilson_qmr_unprec.cc | 2 +- .../solver/Test_wilsonclover_bicgstab_prec.cc | 2 +- .../Test_wilsonclover_bicgstab_schur.cc | 2 +- .../Test_wilsonclover_bicgstab_unprec.cc | 2 +- .../Test_wilsonclover_cagmres_unprec.cc | 2 +- tests/solver/Test_wilsonclover_cg_prec.cc | 2 +- tests/solver/Test_wilsonclover_cg_schur.cc | 2 +- tests/solver/Test_wilsonclover_cg_unprec.cc | 2 +- .../solver/Test_wilsonclover_fcagmres_prec.cc | 2 +- tests/solver/Test_wilsonclover_fgmres_prec.cc | 2 +- .../solver/Test_wilsonclover_gmres_unprec.cc | 2 +- tests/solver/Test_wilsonclover_mg.cc | 2 +- tests/solver/Test_wilsonclover_mg_lime.cc | 2 +- tests/solver/Test_wilsonclover_mg_mp.cc | 2 +- .../Test_wilsonclover_mixedbicgstab_prec.cc | 2 +- .../solver/Test_wilsonclover_mixedcg_prec.cc | 2 +- tests/solver/Test_wilsonclover_mr_unprec.cc | 2 +- tests/solver/Test_zMADWF_prec.cc | 2 +- tests/solver/Test_zmobius_cg_prec.cc | 2 +- 130 files changed, 232 insertions(+), 204 deletions(-) diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h index faacac63..76d7def4 100644 --- a/Grid/qcd/QCD.h +++ b/Grid/qcd/QCD.h @@ -47,7 +47,7 @@ static constexpr int Ym = 5; static constexpr int Zm = 6; static constexpr int Tm = 7; -static constexpr int Nc=3; +static constexpr int Nc=Config_Nc; static constexpr int Ns=4; static constexpr int Nd=4; static constexpr int Nhs=2; // half spinor diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 5e1e1f66..df5427c1 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -358,6 +358,7 @@ public: ///////// Welcome message //////////// std::cout<::HotConfiguration(RNG4,Umu); Fermion src (FGrid); random(RNG5,src); Fermion src_e (FrbGrid); Fermion src_o (FrbGrid); @@ -449,7 +450,13 @@ public: FGrid->Barrier(); double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops; if ( mflops::HotConfiguration(RNG4,Umu); typename Action::ImplParams params; Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params); diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 2ef5921d..d7b49122 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -108,7 +108,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "Drawing gauge field" << std::endl; LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); std::cout << GridLogMessage << "Random gauge initialised " << std::endl; #if 0 Umu=1.0; diff --git a/benchmarks/Benchmark_gparity.cc b/benchmarks/Benchmark_gparity.cc index b03e1b63..7fa7508a 100644 --- a/benchmarks/Benchmark_gparity.cc +++ b/benchmarks/Benchmark_gparity.cc @@ -63,7 +63,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "Drawing gauge field" << std::endl; LatticeGaugeFieldF Umu(UGrid); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); std::cout << GridLogMessage << "Random gauge initialised " << std::endl; RealD mass=0.1; diff --git a/benchmarks/Benchmark_mooee.cc b/benchmarks/Benchmark_mooee.cc index ef16c908..0aaccecc 100644 --- a/benchmarks/Benchmark_mooee.cc +++ b/benchmarks/Benchmark_mooee.cc @@ -30,7 +30,7 @@ Author: paboyle using namespace std; using namespace Grid; - ; + int main (int argc, char ** argv) @@ -53,7 +53,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); std::cout << GridLogMessage << "Seeded"<::HotConfiguration(RNG4,Umu); std::cout << GridLogMessage << "made random gauge fields"< U(4,&Fine); - SU3::HotConfiguration(pRNGa,Umu); + SU::HotConfiguration(pRNGa,Umu); FieldMetaData header; diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index f5413e3b..c15c320e 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -84,7 +84,7 @@ int main (int argc, char ** argv) std::vector U(4,&Fine); - SU3::HotConfiguration(pRNGa,Umu); + SU::HotConfiguration(pRNGa,Umu); FieldMetaData header; std::string file("./ckpoint_lat.4000"); diff --git a/tests/Test_cayley_even_odd_vec.cc b/tests/Test_cayley_even_odd_vec.cc index 0e71d910..c345efd9 100644 --- a/tests/Test_cayley_even_odd_vec.cc +++ b/tests/Test_cayley_even_odd_vec.cc @@ -80,7 +80,7 @@ int main (int argc, char ** argv) GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); RealD mass=0.1; RealD M5 =1.8; diff --git a/tests/Test_compressed_lanczos_hot_start.cc b/tests/Test_compressed_lanczos_hot_start.cc index 8eb7a921..dc22cfca 100644 --- a/tests/Test_compressed_lanczos_hot_start.cc +++ b/tests/Test_compressed_lanczos_hot_start.cc @@ -202,7 +202,7 @@ int main (int argc, char ** argv) { std::vector seeds4({1,2,3,4}); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); // FieldMetaData header; // NerscIO::readConfiguration(Umu,header,Params.config); diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index be881db9..da0b54cd 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -71,7 +71,7 @@ int main (int argc, char ** argv) LatticeGaugeFieldD Umu(UGrid); LatticeGaugeFieldF Umu_f(UGrid_f); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); precisionChange(Umu_f,Umu); diff --git a/tests/Test_dwf_mixedcg_prec_halfcomms.cc b/tests/Test_dwf_mixedcg_prec_halfcomms.cc index 4d94632c..8b0126dc 100644 --- a/tests/Test_dwf_mixedcg_prec_halfcomms.cc +++ b/tests/Test_dwf_mixedcg_prec_halfcomms.cc @@ -69,7 +69,7 @@ int main (int argc, char ** argv) LatticeGaugeFieldD Umu(UGrid); LatticeGaugeFieldF Umu_f(UGrid_f); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); precisionChange(Umu_f,Umu); diff --git a/tests/core/Test_cf_coarsen_support.cc b/tests/core/Test_cf_coarsen_support.cc index e787905e..ad0309b9 100644 --- a/tests/core/Test_cf_coarsen_support.cc +++ b/tests/core/Test_cf_coarsen_support.cc @@ -64,7 +64,7 @@ int main (int argc, char ** argv) LatticeFermion ref(FGrid); ref=Zero(); LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); for(int mu=0;mu::HotConfiguration(RNG4,Umu); // std::vector U(4,UGrid); // for(int mu=0;mu::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); RealD mass=0.1; diff --git a/tests/core/Test_dwf_eofa_even_odd.cc b/tests/core/Test_dwf_eofa_even_odd.cc index 01fff9ea..64701069 100644 --- a/tests/core/Test_dwf_eofa_even_odd.cc +++ b/tests/core/Test_dwf_eofa_even_odd.cc @@ -73,7 +73,7 @@ int main (int argc, char ** argv) LatticeFermion ref (FGrid); ref = Zero(); LatticeFermion tmp (FGrid); tmp = Zero(); LatticeFermion err (FGrid); err = Zero(); - LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu); + LatticeGaugeField Umu (UGrid); SU::HotConfiguration(RNG4, Umu); std::vector U(4,UGrid); // Only one non-zero (y) diff --git a/tests/core/Test_dwf_even_odd.cc b/tests/core/Test_dwf_even_odd.cc index 6093ee8f..4918f02a 100644 --- a/tests/core/Test_dwf_even_odd.cc +++ b/tests/core/Test_dwf_even_odd.cc @@ -72,7 +72,7 @@ int main (int argc, char ** argv) LatticeFermion ref(FGrid); ref=Zero(); LatticeFermion tmp(FGrid); tmp=Zero(); LatticeFermion err(FGrid); tmp=Zero(); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); // Only one non-zero (y) diff --git a/tests/core/Test_fft.cc b/tests/core/Test_fft.cc index 2ba3752b..212b1a35 100644 --- a/tests/core/Test_fft.cc +++ b/tests/core/Test_fft.cc @@ -138,7 +138,7 @@ int main (int argc, char ** argv) LatticeGaugeFieldD Umu(&GRID); - SU3::ColdConfiguration(pRNG,Umu); // Unit gauge + SU::ColdConfiguration(pRNG,Umu); // Unit gauge // Umu=Zero(); //////////////////////////////////////////////////// // Wilson test diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc index 228770a8..87dbc242 100644 --- a/tests/core/Test_fft_gfix.cc +++ b/tests/core/Test_fft_gfix.cc @@ -73,11 +73,11 @@ int main (int argc, char ** argv) LatticeColourMatrix xform2(&GRID); // Gauge xform LatticeColourMatrix xform3(&GRID); // Gauge xform - SU3::ColdConfiguration(pRNG,Umu); // Unit gauge + SU::ColdConfiguration(pRNG,Umu); // Unit gauge Uorg=Umu; Urnd=Umu; - SU3::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge + SU::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge Real plaq=WilsonLoops::avgPlaquette(Umu); std::cout << " Initial plaquette "<::HotConfiguration(pRNG,Umu); // Unit gauge plaq=WilsonLoops::avgPlaquette(Umu); std::cout << " Initial plaquette "<::HotConfiguration(pRNG,Umu); // Unit gauge plaq=WilsonLoops::avgPlaquette(Umu); std::cout << " Initial plaquette "<::HotConfiguration(RNG4_2f,Umu_2f); StandardFermionField src (FGrid_2f); StandardFermionField tmpsrc(FGrid_2f); diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index bf37f4d5..69ace859 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) FermionField ref(&Grid); ref=Zero(); FermionField tmp(&Grid); tmp=Zero(); FermionField err(&Grid); tmp=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); double volume=1; diff --git a/tests/core/Test_lie_generators.cc b/tests/core/Test_lie_generators.cc index 471cea25..9ae59774 100644 --- a/tests/core/Test_lie_generators.cc +++ b/tests/core/Test_lie_generators.cc @@ -66,14 +66,14 @@ int main(int argc, char** argv) { std::cout << GridLogMessage << "*********************************************" << std::endl; - std::cout << GridLogMessage << "* Generators for SU(3)" << std::endl; + std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl; std::cout << GridLogMessage << "*********************************************" << std::endl; - SU3::printGenerators(); - std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; - SU3Adjoint::printGenerators(); - SU3::testGenerators(); - SU3Adjoint::testGenerators(); + SU::printGenerators(); + std::cout << "Dimension of adjoint representation: "<< SUAdjoint::Dimension << std::endl; + SUAdjoint::printGenerators(); + SU::testGenerators(); + SUAdjoint::testGenerators(); std::cout<({45,12,81,9})); - SU3Adjoint::LatticeAdjMatrix Gauss(grid); - SU3::LatticeAlgebraVector ha(grid); - SU3::LatticeAlgebraVector hb(grid); + SUAdjoint::LatticeAdjMatrix Gauss(grid); + SU::LatticeAlgebraVector ha(grid); + SU::LatticeAlgebraVector hb(grid); random(gridRNG,Gauss); std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; - SU3Adjoint::projectOnAlgebra(ha, Gauss); + SUAdjoint::projectOnAlgebra(ha, Gauss); std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl; - SU3Adjoint::projector(hb, Gauss); + SUAdjoint::projector(hb, Gauss); std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl; - SU3Adjoint::projector(hb, Gauss); + SUAdjoint::projector(hb, Gauss); std::cout << GridLogMessage << "end projector" << std::endl; - SU3::LatticeAlgebraVector diff = ha -hb; + SU::LatticeAlgebraVector diff = ha -hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; @@ -260,20 +260,20 @@ int main(int argc, char** argv) { std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" << std::endl; // Projectors - SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); + SUTwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); random(gridRNG,Gauss2); std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; - SU3TwoIndexSymm::projectOnAlgebra(ha, Gauss2); + SUTwoIndexSymm::projectOnAlgebra(ha, Gauss2); std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl; - SU3TwoIndexSymm::projector(hb, Gauss2); + SUTwoIndexSymm::projector(hb, Gauss2); std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl; - SU3TwoIndexSymm::projector(hb, Gauss2); + SUTwoIndexSymm::projector(hb, Gauss2); std::cout << GridLogMessage << "end projector" << std::endl; - SU3::LatticeAlgebraVector diff2 = ha - hb; + SU::LatticeAlgebraVector diff2 = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "*********************************************" << std::endl; @@ -284,20 +284,20 @@ int main(int argc, char** argv) { std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" << std::endl; // Projectors - SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); + SUTwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); random(gridRNG,Gauss2a); std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; - SU3TwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a); + SUTwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a); std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl; - SU3TwoIndexAntiSymm::projector(hb, Gauss2a); + SUTwoIndexAntiSymm::projector(hb, Gauss2a); std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl; - SU3TwoIndexAntiSymm::projector(hb, Gauss2a); + SUTwoIndexAntiSymm::projector(hb, Gauss2a); std::cout << GridLogMessage << "end projector" << std::endl; - SU3::LatticeAlgebraVector diff2a = ha - hb; + SU::LatticeAlgebraVector diff2a = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "*********************************************" << std::endl; diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index af8b747b..d7ed04ba 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -444,7 +444,7 @@ int main(int argc, char **argv) { // Lattice 12x12 GEMM scFooBar = scFoo * scBar; - // Benchmark some simple operations LatticeSU3 * Lattice SU3. + // Benchmark some simple operations LatticeSU * Lattice SU. double t0, t1, flops; double bytes; int ncall = 5000; diff --git a/tests/core/Test_mobius_eofa_even_odd.cc b/tests/core/Test_mobius_eofa_even_odd.cc index 68091229..7339f156 100644 --- a/tests/core/Test_mobius_eofa_even_odd.cc +++ b/tests/core/Test_mobius_eofa_even_odd.cc @@ -73,7 +73,7 @@ int main (int argc, char ** argv) LatticeFermion ref (FGrid); ref = Zero(); LatticeFermion tmp (FGrid); tmp = Zero(); LatticeFermion err (FGrid); err = Zero(); - LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu); + LatticeGaugeField Umu (UGrid); SU::HotConfiguration(RNG4, Umu); std::vector U(4,UGrid); // Only one non-zero (y) diff --git a/tests/core/Test_quenched_update.cc b/tests/core/Test_quenched_update.cc index ef428d1b..22675913 100644 --- a/tests/core/Test_quenched_update.cc +++ b/tests/core/Test_quenched_update.cc @@ -55,7 +55,7 @@ int main (int argc, char ** argv) GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds); - // SU3 colour operatoions + // SU colour operatoions LatticeColourMatrix link(grid); LatticeColourMatrix staple(grid); @@ -87,10 +87,10 @@ int main (int argc, char ** argv) link = PeekIndex(Umu,mu); - for( int subgroup=0;subgroup::su2subgroups();subgroup++ ) { // update Even checkerboard - SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask); + SU::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask); } diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc index 1f42ff0d..51f92993 100644 --- a/tests/core/Test_staggered.cc +++ b/tests/core/Test_staggered.cc @@ -64,7 +64,7 @@ int main (int argc, char ** argv) FermionField err(&Grid); tmp=Zero(); FermionField phi (&Grid); random(pRNG,phi); FermionField chi (&Grid); random(pRNG,chi); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); diff --git a/tests/core/Test_staggered5D.cc b/tests/core/Test_staggered5D.cc index 3d175890..6ab15873 100644 --- a/tests/core/Test_staggered5D.cc +++ b/tests/core/Test_staggered5D.cc @@ -75,7 +75,7 @@ int main (int argc, char ** argv) FermionField phi (FGrid); random(pRNG5,phi); FermionField chi (FGrid); random(pRNG5,chi); - LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::ColdConfiguration(pRNG4,Umu); LatticeGaugeField Umua(UGrid); Umua=Umu; double volume=Ls; diff --git a/tests/core/Test_staggered5Dvec.cc b/tests/core/Test_staggered5Dvec.cc index 73241276..ef8da662 100644 --- a/tests/core/Test_staggered5Dvec.cc +++ b/tests/core/Test_staggered5Dvec.cc @@ -84,7 +84,7 @@ int main (int argc, char ** argv) FermionField chi (FGrid); random(pRNG5,chi); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(pRNG4,Umu); + SU::HotConfiguration(pRNG4,Umu); /* for(int mu=1;mu<4;mu++){ diff --git a/tests/core/Test_staggered5DvecF.cc b/tests/core/Test_staggered5DvecF.cc index 2386d054..6893551c 100644 --- a/tests/core/Test_staggered5DvecF.cc +++ b/tests/core/Test_staggered5DvecF.cc @@ -83,7 +83,7 @@ int main (int argc, char ** argv) FermionField chi (FGrid); random(pRNG5,chi); LatticeGaugeFieldF Umu(UGrid); - SU3::HotConfiguration(pRNG4,Umu); + SU::HotConfiguration(pRNG4,Umu); /* for(int mu=1;mu<4;mu++){ diff --git a/tests/core/Test_staggered_naive.cc b/tests/core/Test_staggered_naive.cc index 9fe35a54..f41d723d 100644 --- a/tests/core/Test_staggered_naive.cc +++ b/tests/core/Test_staggered_naive.cc @@ -64,7 +64,7 @@ int main (int argc, char ** argv) FermionField err(&Grid); tmp=Zero(); FermionField phi (&Grid); random(pRNG,phi); FermionField chi (&Grid); random(pRNG,chi); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 3e31f7f6..642c30a8 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -74,7 +74,7 @@ int main(int argc, char **argv) FermionField chi(&Grid); random(pRNG, chi); LatticeGaugeField Umu(&Grid); - SU3::HotConfiguration(pRNG, Umu); + SU::HotConfiguration(pRNG, Umu); std::vector U(4, &Grid); double volume = 1; diff --git a/tests/core/Test_wilson_even_odd.cc b/tests/core/Test_wilson_even_odd.cc index dc49cf81..e7733a79 100644 --- a/tests/core/Test_wilson_even_odd.cc +++ b/tests/core/Test_wilson_even_odd.cc @@ -70,7 +70,7 @@ int main (int argc, char ** argv) LatticeFermion tmp(&Grid); tmp=Zero(); LatticeFermion err(&Grid); tmp=Zero(); LatticeGaugeField Umu(&Grid); - SU3::HotConfiguration(pRNG,Umu); + SU::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); double volume=1; diff --git a/tests/core/Test_wilson_twisted_mass_even_odd.cc b/tests/core/Test_wilson_twisted_mass_even_odd.cc index ba80fd0e..e0f73456 100644 --- a/tests/core/Test_wilson_twisted_mass_even_odd.cc +++ b/tests/core/Test_wilson_twisted_mass_even_odd.cc @@ -71,7 +71,7 @@ int main (int argc, char ** argv) LatticeFermion ref(&Grid); ref=Zero(); LatticeFermion tmp(&Grid); tmp=Zero(); LatticeFermion err(&Grid); tmp=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); double volume=1; diff --git a/tests/debug/Test_cayley_cg.cc b/tests/debug/Test_cayley_cg.cc index 5a9c696f..5418a8af 100644 --- a/tests/debug/Test_cayley_cg.cc +++ b/tests/debug/Test_cayley_cg.cc @@ -116,7 +116,7 @@ int main (int argc, char ** argv) LatticeGaugeField Umu(UGrid); LatticeGaugeFieldF UmuF(UGridF); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); precisionChange(UmuF,Umu); std::vector U(4,UGrid); diff --git a/tests/debug/Test_cayley_coarsen_support.cc b/tests/debug/Test_cayley_coarsen_support.cc index e91b3070..b2f691d7 100644 --- a/tests/debug/Test_cayley_coarsen_support.cc +++ b/tests/debug/Test_cayley_coarsen_support.cc @@ -77,7 +77,7 @@ int main (int argc, char ** argv) LatticeFermion ref(FGrid); ref=Zero(); LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4,Umu); #if 0 std::vector U(4,UGrid); diff --git a/tests/debug/Test_cayley_even_odd.cc b/tests/debug/Test_cayley_even_odd.cc index 433f0722..5e800b26 100644 --- a/tests/debug/Test_cayley_even_odd.cc +++ b/tests/debug/Test_cayley_even_odd.cc @@ -70,7 +70,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); RealD mass=0.1; diff --git a/tests/debug/Test_cayley_ldop_cr.cc b/tests/debug/Test_cayley_ldop_cr.cc index 82f388ab..416017e5 100644 --- a/tests/debug/Test_cayley_ldop_cr.cc +++ b/tests/debug/Test_cayley_ldop_cr.cc @@ -71,9 +71,9 @@ int main (int argc, char ** argv) std::string file("./ckpoint_lat.400"); NerscIO::readConfiguration(Umu,header,file); - // SU3::ColdConfiguration(RNG4,Umu); - // SU3::TepidConfiguration(RNG4,Umu); - // SU3::HotConfiguration(RNG4,Umu); + // SU::ColdConfiguration(RNG4,Umu); + // SU::TepidConfiguration(RNG4,Umu); + // SU::HotConfiguration(RNG4,Umu); // Umu=Zero(); RealD mass=0.1; diff --git a/tests/debug/Test_cayley_mres.cc b/tests/debug/Test_cayley_mres.cc index 2ad605b8..2e56fa81 100644 --- a/tests/debug/Test_cayley_mres.cc +++ b/tests/debug/Test_cayley_mres.cc @@ -108,8 +108,8 @@ int main (int argc, char ** argv) GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); LatticeGaugeField Umu(UGrid); - SU3::ColdConfiguration(Umu); - // SU3::HotConfiguration(RNG4,Umu); + SU::ColdConfiguration(Umu); + // SU::HotConfiguration(RNG4,Umu); RealD mass=0.3; RealD M5 =1.0; diff --git a/tests/debug/Test_heatbath_dwf_eofa.cc b/tests/debug/Test_heatbath_dwf_eofa.cc index 1e64a568..9d453a96 100644 --- a/tests/debug/Test_heatbath_dwf_eofa.cc +++ b/tests/debug/Test_heatbath_dwf_eofa.cc @@ -73,7 +73,7 @@ int main(int argc, char** argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5); DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5); diff --git a/tests/debug/Test_heatbath_dwf_eofa_gparity.cc b/tests/debug/Test_heatbath_dwf_eofa_gparity.cc index cc118d1d..22cc1e90 100644 --- a/tests/debug/Test_heatbath_dwf_eofa_gparity.cc +++ b/tests/debug/Test_heatbath_dwf_eofa_gparity.cc @@ -77,7 +77,7 @@ int main(int argc, char** argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); // GparityDomainWallFermionR::ImplParams params; FermionAction::ImplParams params; diff --git a/tests/debug/Test_heatbath_mobius_eofa.cc b/tests/debug/Test_heatbath_mobius_eofa.cc index 95ab935e..4cf4bf53 100644 --- a/tests/debug/Test_heatbath_mobius_eofa.cc +++ b/tests/debug/Test_heatbath_mobius_eofa.cc @@ -75,7 +75,7 @@ int main(int argc, char** argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c); MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c); diff --git a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc index 7ed3a308..2fcb4b9f 100644 --- a/tests/debug/Test_heatbath_mobius_eofa_gparity.cc +++ b/tests/debug/Test_heatbath_mobius_eofa_gparity.cc @@ -79,7 +79,7 @@ int main(int argc, char** argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); FermionAction::ImplParams params; FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params); diff --git a/tests/debug/Test_reweight_dwf_eofa.cc b/tests/debug/Test_reweight_dwf_eofa.cc index 728fbf78..a150b18f 100644 --- a/tests/debug/Test_reweight_dwf_eofa.cc +++ b/tests/debug/Test_reweight_dwf_eofa.cc @@ -102,7 +102,7 @@ int main(int argc, char **argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5); diff --git a/tests/debug/Test_reweight_dwf_eofa_gparity.cc b/tests/debug/Test_reweight_dwf_eofa_gparity.cc index fcc01b8d..df2d95a0 100644 --- a/tests/debug/Test_reweight_dwf_eofa_gparity.cc +++ b/tests/debug/Test_reweight_dwf_eofa_gparity.cc @@ -104,7 +104,7 @@ int main(int argc, char **argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators GparityDomainWallFermionR::ImplParams params; diff --git a/tests/debug/Test_reweight_mobius_eofa.cc b/tests/debug/Test_reweight_mobius_eofa.cc index c5e46bcf..88ecab7d 100644 --- a/tests/debug/Test_reweight_mobius_eofa.cc +++ b/tests/debug/Test_reweight_mobius_eofa.cc @@ -104,7 +104,7 @@ int main(int argc, char **argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c); diff --git a/tests/debug/Test_reweight_mobius_eofa_gparity.cc b/tests/debug/Test_reweight_mobius_eofa_gparity.cc index bfc7543a..31708265 100644 --- a/tests/debug/Test_reweight_mobius_eofa_gparity.cc +++ b/tests/debug/Test_reweight_mobius_eofa_gparity.cc @@ -106,7 +106,7 @@ int main(int argc, char **argv) // Random gauge field LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); // Initialize RHMC fermion operators GparityDomainWallFermionR::ImplParams params; diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index cb30faad..dc9eedce 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -59,7 +59,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -93,7 +93,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_dwf_force.cc b/tests/forces/Test_dwf_force.cc index 81a1b8c4..e7d17347 100644 --- a/tests/forces/Test_dwf_force.cc +++ b/tests/forces/Test_dwf_force.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -94,7 +94,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc index 0b0ba346..80d36934 100644 --- a/tests/forces/Test_dwf_force_eofa.cc +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -72,7 +72,7 @@ int main (int argc, char** argv) LatticeFermion MphiPrime (FGrid); LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -105,7 +105,7 @@ int main (int argc, char** argv) for(int mu=0; mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index b39fdd14..28133cc6 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -63,8 +63,8 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); - // SU3::ColdConfiguration(pRNG,U); + SU::HotConfiguration(RNG4,U); + // SU::ColdConfiguration(pRNG,U); //////////////////////////////////// // Unmodified matrix element @@ -112,7 +112,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg Hmom -= real(sum(trace(mommu*mommu))); diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index 58258a5e..7e480e7a 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -75,7 +75,7 @@ int main (int argc, char** argv) FermionField MphiPrime (FGrid); LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -109,7 +109,7 @@ int main (int argc, char** argv) for(int mu=0; mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc index 21f0b9d0..bc2b5b26 100644 --- a/tests/forces/Test_gp_plaq_force.cc +++ b/tests/forces/Test_gp_plaq_force.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(&Grid); - SU3::HotConfiguration(pRNG,U); + SU::HotConfiguration(pRNG,U); double beta = 1.0; ConjugateWilsonGaugeActionR Action(beta); @@ -80,7 +80,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index bb4ea6de..98ebb2fa 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -54,7 +54,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(&Grid); - SU3::HotConfiguration(pRNG,U); + SU::HotConfiguration(pRNG,U); double beta = 1.0; double c1 = 0.331; @@ -82,7 +82,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc index bdc332d9..d6744080 100644 --- a/tests/forces/Test_gpdwf_force.cc +++ b/tests/forces/Test_gpdwf_force.cc @@ -63,7 +63,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -100,7 +100,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index 1c85a5d9..d731f27a 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -57,7 +57,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -94,7 +94,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_laplacian_force.cc b/tests/forces/Test_laplacian_force.cc index 639378dc..18508860 100644 --- a/tests/forces/Test_laplacian_force.cc +++ b/tests/forces/Test_laplacian_force.cc @@ -58,7 +58,7 @@ int main (int argc, char ** argv) PokeIndex(P, P_mu, mu); } - SU3::HotConfiguration(pRNG,U); + SU::HotConfiguration(pRNG,U); ConjugateGradient CG(1.0e-8, 10000); @@ -95,7 +95,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "Update the U " << std::endl; for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); auto Umu = PeekIndex(U, mu); PokeIndex(mom,mommu,mu); Umu = expMat(mommu, dt, 12) * Umu; diff --git a/tests/forces/Test_mobius_force.cc b/tests/forces/Test_mobius_force.cc index 11e69652..ba7bc363 100644 --- a/tests/forces/Test_mobius_force.cc +++ b/tests/forces/Test_mobius_force.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -96,7 +96,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc index f85501fa..28523e9c 100644 --- a/tests/forces/Test_mobius_force_eofa.cc +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -72,7 +72,7 @@ int main (int argc, char** argv) LatticeFermion MphiPrime (FGrid); LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -107,7 +107,7 @@ int main (int argc, char** argv) for(int mu=0; mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index 68163e63..9c80b2aa 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -76,7 +76,7 @@ int main (int argc, char** argv) FermionField MphiPrime (FGrid); LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -112,7 +112,7 @@ int main (int argc, char** argv) for(int mu=0; mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); autoView( U_v , U, CpuRead); diff --git a/tests/forces/Test_partfrac_force.cc b/tests/forces/Test_partfrac_force.cc index 17dce530..33f7b5fd 100644 --- a/tests/forces/Test_partfrac_force.cc +++ b/tests/forces/Test_partfrac_force.cc @@ -62,7 +62,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -96,7 +96,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index ed72f2c0..c9326f8d 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -54,7 +54,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(&Grid); - SU3::HotConfiguration(pRNG,U); + SU::HotConfiguration(pRNG,U); double beta = 1.0; double c1 = -0.331; @@ -82,7 +82,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index c8b3a7f4..b7bf1268 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(&Grid); //SU2::HotConfiguration(pRNG,U); - SU3::ColdConfiguration(pRNG,U); + SU::ColdConfiguration(pRNG,U); //////////////////////////////////// // Unmodified matrix element @@ -98,7 +98,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); Hmom -= real(sum(trace(mommu*mommu))); diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index f26f0ac9..6a28e4e2 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -62,8 +62,8 @@ int main(int argc, char **argv) LatticeGaugeField U(&Grid); - SU3::HotConfiguration(pRNG, U); - //SU3::ColdConfiguration(pRNG, U);// Clover term Zero() + SU::HotConfiguration(pRNG, U); + //SU::ColdConfiguration(pRNG, U);// Clover term Zero() //////////////////////////////////// // Unmodified matrix element @@ -101,7 +101,7 @@ int main(int argc, char **argv) for (int mu = 0; mu < Nd; mu++) { // Traceless antihermitian momentum; gaussian in lie alg - SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); + SU::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); Hmom -= real(sum(trace(mommu * mommu))); PokeIndex(mom, mommu, mu); diff --git a/tests/forces/Test_zmobius_force.cc b/tests/forces/Test_zmobius_force.cc index e24ae601..89673bc7 100644 --- a/tests/forces/Test_zmobius_force.cc +++ b/tests/forces/Test_zmobius_force.cc @@ -59,7 +59,7 @@ int main (int argc, char ** argv) LatticeGaugeField U(UGrid); - SU3::HotConfiguration(RNG4,U); + SU::HotConfiguration(RNG4,U); //////////////////////////////////// // Unmodified matrix element @@ -109,7 +109,7 @@ int main (int argc, char ** argv) for(int mu=0;mu::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom,mommu,mu); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc index d9249e0d..3766e069 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc @@ -293,7 +293,7 @@ int main (int argc, char ** argv) { { std::vector seeds4({1,2,3,4}); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); } std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 12283921..00d29ec0 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -54,7 +54,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); std::vector U(4,UGrid); for(int mu=0;mu::HotConfiguration(RNG4, Umu); /* std::vector U(4, UGrid); diff --git a/tests/qdpxx/Test_qdpxx_baryon.cc b/tests/qdpxx/Test_qdpxx_baryon.cc index a1d8f738..d8225f82 100644 --- a/tests/qdpxx/Test_qdpxx_baryon.cc +++ b/tests/qdpxx/Test_qdpxx_baryon.cc @@ -280,7 +280,7 @@ void make_gauge(GaugeField &Umu, Grid::LatticePropagator &q1,Grid::LatticePropag Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu.Grid(); Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - Grid::SU3::HotConfiguration(RNG4, Umu); + Grid::SU::HotConfiguration(RNG4, Umu); // Propagator Grid::gaussian(RNG4, q1); diff --git a/tests/qdpxx/Test_qdpxx_loops_staples.cc b/tests/qdpxx/Test_qdpxx_loops_staples.cc index bbb41f4e..33057eeb 100644 --- a/tests/qdpxx/Test_qdpxx_loops_staples.cc +++ b/tests/qdpxx/Test_qdpxx_loops_staples.cc @@ -277,7 +277,7 @@ double calc_grid_p(Grid::LatticeGaugeField & Umu) Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid(); Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - Grid::SU3::HotConfiguration(RNG4,Umu); + Grid::SU::HotConfiguration(RNG4,Umu); Grid::LatticeColourMatrix tmp(UGrid); tmp = Grid::zero; diff --git a/tests/qdpxx/Test_qdpxx_munprec.cc b/tests/qdpxx/Test_qdpxx_munprec.cc index fbc1ec82..82874546 100644 --- a/tests/qdpxx/Test_qdpxx_munprec.cc +++ b/tests/qdpxx/Test_qdpxx_munprec.cc @@ -502,7 +502,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF Grid::gaussian(RNG5,src); Grid::gaussian(RNG5,res); - Grid::SU3::HotConfiguration(RNG4,Umu); + Grid::SU::HotConfiguration(RNG4,Umu); /* Grid::LatticeColourMatrix U(UGrid); diff --git a/tests/qdpxx/Test_qdpxx_stag.cc b/tests/qdpxx/Test_qdpxx_stag.cc index f283d5a9..8f81fa99 100644 --- a/tests/qdpxx/Test_qdpxx_stag.cc +++ b/tests/qdpxx/Test_qdpxx_stag.cc @@ -333,7 +333,7 @@ void make_gauge(GaugeField & Umu,FermionField &src) Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid(); Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - Grid::SU3::HotConfiguration(RNG4,Umu); + Grid::SU::HotConfiguration(RNG4,Umu); Grid::gaussian(RNG4,src); } diff --git a/tests/qdpxx/Test_qdpxx_wilson.cc b/tests/qdpxx/Test_qdpxx_wilson.cc index fdf59982..8ce28dca 100644 --- a/tests/qdpxx/Test_qdpxx_wilson.cc +++ b/tests/qdpxx/Test_qdpxx_wilson.cc @@ -348,7 +348,7 @@ void make_gauge(GaugeField &Umu, FermionField &src) Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu._grid; Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - Grid::SU3::HotConfiguration(RNG4, Umu); + Grid::SU::HotConfiguration(RNG4, Umu); // Fermion field Grid::gaussian(RNG4, src); diff --git a/tests/smearing/Test_smearing.cc b/tests/smearing/Test_smearing.cc index c1c7c457..adab1c6e 100644 --- a/tests/smearing/Test_smearing.cc +++ b/tests/smearing/Test_smearing.cc @@ -47,8 +47,8 @@ int main (int argc, char ** argv) RealD nrm = norm2(src); LatticeFermion result(&Grid); result=Zero(); LatticeGaugeField Umu(&Grid); - // SU3::HotConfiguration(pRNG,Umu); - SU3::ColdConfiguration(Umu); + // SU::HotConfiguration(pRNG,Umu); + SU::ColdConfiguration(Umu); std::vector U(4,&Grid); for(int mu=0;mu::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); for(int mu=0;mu::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); RealD mass=0.1; diff --git a/tests/solver/Test_dwf_cg_prec.cc b/tests/solver/Test_dwf_cg_prec.cc index cb53894f..debb736a 100644 --- a/tests/solver/Test_dwf_cg_prec.cc +++ b/tests/solver/Test_dwf_cg_prec.cc @@ -67,7 +67,7 @@ int main(int argc, char** argv) { result = Zero(); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; diff --git a/tests/solver/Test_dwf_cg_schur.cc b/tests/solver/Test_dwf_cg_schur.cc index 6216c366..6541e73d 100644 --- a/tests/solver/Test_dwf_cg_schur.cc +++ b/tests/solver/Test_dwf_cg_schur.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) LatticeFermion src(FGrid); random(RNG5,src); LatticeFermion result(FGrid); result=Zero(); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); for(int mu=0;mu::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); for(int mu=0;mu::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); diff --git a/tests/solver/Test_dwf_fpgcr.cc b/tests/solver/Test_dwf_fpgcr.cc index 156f678a..42cc8de1 100644 --- a/tests/solver/Test_dwf_fpgcr.cc +++ b/tests/solver/Test_dwf_fpgcr.cc @@ -68,7 +68,7 @@ int main (int argc, char ** argv) LatticeFermion result(FGrid); result=Zero(); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4,Umu); + SU::HotConfiguration(RNG4,Umu); ConjugateResidual CR(1.0e-6,10000); diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index 982a8247..b912ba4f 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -93,7 +93,7 @@ int main (int argc, char ** argv) for(int s=0;s::HotConfiguration(pRNG,Umu); /////////////////////////////////////////////////////////////// // Bounce these fields to disk diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index 8ace9b43..d0a32460 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -136,11 +136,11 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "Intialising 4D RNG "<::HotConfiguration(pRNG,Umu); std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<::ColdConfiguration(Umu); std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<::HotConfiguration(pRNG,Umu); ///////////////// // MPI only sends diff --git a/tests/solver/Test_dwf_qmr_unprec.cc b/tests/solver/Test_dwf_qmr_unprec.cc index ba44ee93..370e7409 100644 --- a/tests/solver/Test_dwf_qmr_unprec.cc +++ b/tests/solver/Test_dwf_qmr_unprec.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) LatticeFermion src(FGrid); random(RNG5,src); LatticeFermion result(FGrid); result=Zero(); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); diff --git a/tests/solver/Test_mobius_bcg.cc b/tests/solver/Test_mobius_bcg.cc index 8b34a6a5..8092d61c 100644 --- a/tests/solver/Test_mobius_bcg.cc +++ b/tests/solver/Test_mobius_bcg.cc @@ -128,7 +128,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "Intialising 4D RNG "<::HotConfiguration(pRNG,Umu); std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<::HotConfiguration(pRNG,Umu); std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<::ColdConfiguration(Umu); std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<::HotConfiguration(pRNG,Umu); std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<::ColdConfiguration(Umu); std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<::HotConfiguration(pRNG,Umu); std::cout << GridLogMessage << "Intialised the HOT Gauge Field"<::ColdConfiguration(Umu); std::cout << GridLogMessage << "Intialised the COLD Gauge Field"<::HotConfiguration(pRNG,Umu); ///////////////// // MPI only sends diff --git a/tests/solver/Test_staggered_block_cg_prec.cc b/tests/solver/Test_staggered_block_cg_prec.cc index 2499fc8a..c5306e85 100644 --- a/tests/solver/Test_staggered_block_cg_prec.cc +++ b/tests/solver/Test_staggered_block_cg_prec.cc @@ -87,7 +87,7 @@ int main (int argc, char ** argv) FermionField result_o(FrbGrid); result_o=Zero(); RealD nrm = norm2(src); - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(UGrid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); RealD mass=0.003; RealD c1=9.0/8.0; diff --git a/tests/solver/Test_staggered_cagmres_unprec.cc b/tests/solver/Test_staggered_cagmres_unprec.cc index 8121c90d..1b7a2f56 100644 --- a/tests/solver/Test_staggered_cagmres_unprec.cc +++ b/tests/solver/Test_staggered_cagmres_unprec.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) FermionField src(&Grid); random(pRNG,src); RealD nrm = norm2(src); FermionField result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); FermionField src(&Grid); random(pRNG,src); FermionField result(&Grid); result=Zero(); diff --git a/tests/solver/Test_staggered_cg_unprec.cc b/tests/solver/Test_staggered_cg_unprec.cc index 9625a9c8..e023b910 100644 --- a/tests/solver/Test_staggered_cg_unprec.cc +++ b/tests/solver/Test_staggered_cg_unprec.cc @@ -65,7 +65,7 @@ int main (int argc, char ** argv) FermionField src(&Grid); random(pRNG,src); RealD nrm = norm2(src); FermionField result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); diff --git a/tests/solver/Test_wilson_cg_schur.cc b/tests/solver/Test_wilson_cg_schur.cc index 23383032..97482131 100644 --- a/tests/solver/Test_wilson_cg_schur.cc +++ b/tests/solver/Test_wilson_cg_schur.cc @@ -57,7 +57,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); LatticeFermion src(&Grid); random(pRNG,src); LatticeFermion result(&Grid); result=Zero(); diff --git a/tests/solver/Test_wilson_cg_unprec.cc b/tests/solver/Test_wilson_cg_unprec.cc index f3335d45..07f6ba7b 100644 --- a/tests/solver/Test_wilson_cg_unprec.cc +++ b/tests/solver/Test_wilson_cg_unprec.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); diff --git a/tests/solver/Test_wilson_fcagmres_prec.cc b/tests/solver/Test_wilson_fcagmres_prec.cc index b821a25f..d2a1acf4 100644 --- a/tests/solver/Test_wilson_fcagmres_prec.cc +++ b/tests/solver/Test_wilson_fcagmres_prec.cc @@ -47,7 +47,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(fPRNG, Umu); // clang-format on RealD mass = -0.25; diff --git a/tests/solver/Test_wilson_mg_mp.cc b/tests/solver/Test_wilson_mg_mp.cc index e631cd15..89bbbf74 100644 --- a/tests/solver/Test_wilson_mg_mp.cc +++ b/tests/solver/Test_wilson_mg_mp.cc @@ -52,7 +52,7 @@ int main(int argc, char **argv) { LatticeFermionD src_d(FGrid_d); gaussian(fPRNG, src_d); LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = Zero(); LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = Zero(); - LatticeGaugeFieldD Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d); + LatticeGaugeFieldD Umu_d(FGrid_d); SU::HotConfiguration(fPRNG, Umu_d); LatticeGaugeFieldF Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d); // clang-format on diff --git a/tests/solver/Test_wilson_mr_unprec.cc b/tests/solver/Test_wilson_mr_unprec.cc index 1cc1f418..fef83794 100644 --- a/tests/solver/Test_wilson_mr_unprec.cc +++ b/tests/solver/Test_wilson_mr_unprec.cc @@ -47,7 +47,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(RNG4,Umu); std::vector U(4,Grid); diff --git a/tests/solver/Test_wilsonclover_bicgstab_prec.cc b/tests/solver/Test_wilsonclover_bicgstab_prec.cc index c1905400..b382b1bb 100644 --- a/tests/solver/Test_wilsonclover_bicgstab_prec.cc +++ b/tests/solver/Test_wilsonclover_bicgstab_prec.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); diff --git a/tests/solver/Test_wilsonclover_cg_schur.cc b/tests/solver/Test_wilsonclover_cg_schur.cc index eaae24b3..567a8283 100644 --- a/tests/solver/Test_wilsonclover_cg_schur.cc +++ b/tests/solver/Test_wilsonclover_cg_schur.cc @@ -57,7 +57,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); LatticeFermion src(&Grid); random(pRNG,src); LatticeFermion result(&Grid); result=Zero(); diff --git a/tests/solver/Test_wilsonclover_cg_unprec.cc b/tests/solver/Test_wilsonclover_cg_unprec.cc index 49c52cdf..755d80e1 100644 --- a/tests/solver/Test_wilsonclover_cg_unprec.cc +++ b/tests/solver/Test_wilsonclover_cg_unprec.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(fPRNG, Umu); // clang-format on RealD mass = -0.25; diff --git a/tests/solver/Test_wilsonclover_mg_lime.cc b/tests/solver/Test_wilsonclover_mg_lime.cc index bd2990d4..0a29c034 100644 --- a/tests/solver/Test_wilsonclover_mg_lime.cc +++ b/tests/solver/Test_wilsonclover_mg_lime.cc @@ -75,7 +75,7 @@ int main(int argc, char **argv) { NerscIO::readConfiguration(Umu_d,header,file); } #endif - // SU3::HotConfiguration(fPRNG, Umu_d); + // SU::HotConfiguration(fPRNG, Umu_d); LatticeGaugeFieldF Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d); // clang-format on diff --git a/tests/solver/Test_wilsonclover_mg_mp.cc b/tests/solver/Test_wilsonclover_mg_mp.cc index b5178d2e..2efe5f08 100644 --- a/tests/solver/Test_wilsonclover_mg_mp.cc +++ b/tests/solver/Test_wilsonclover_mg_mp.cc @@ -52,7 +52,7 @@ int main(int argc, char **argv) { LatticeFermionD src_d(FGrid_d); gaussian(fPRNG, src_d); LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero; LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero; - LatticeGaugeFieldD Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d); + LatticeGaugeFieldD Umu_d(FGrid_d); SU::HotConfiguration(fPRNG, Umu_d); LatticeGaugeFieldF Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d); // clang-format on diff --git a/tests/solver/Test_wilsonclover_mixedbicgstab_prec.cc b/tests/solver/Test_wilsonclover_mixedbicgstab_prec.cc index 0af83f8b..d47dac2a 100644 --- a/tests/solver/Test_wilsonclover_mixedbicgstab_prec.cc +++ b/tests/solver/Test_wilsonclover_mixedbicgstab_prec.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) // clang-format off LatticeFermionD src(FGrid_d); gaussian(fPRNG, src); LatticeFermionD result(FGrid_d); result = Zero(); - LatticeGaugeFieldD Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d); + LatticeGaugeFieldD Umu_d(FGrid_d); SU::HotConfiguration(fPRNG, Umu_d); LatticeGaugeFieldF Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d); // clang-format on diff --git a/tests/solver/Test_wilsonclover_mixedcg_prec.cc b/tests/solver/Test_wilsonclover_mixedcg_prec.cc index 8af9036f..95590004 100644 --- a/tests/solver/Test_wilsonclover_mixedcg_prec.cc +++ b/tests/solver/Test_wilsonclover_mixedcg_prec.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) // clang-format off LatticeFermionD src(FGrid_d); gaussian(fPRNG, src); LatticeFermionD result(FGrid_d); result = Zero(); - LatticeGaugeFieldD Umu_d(FGrid_d); SU3::HotConfiguration(fPRNG, Umu_d); + LatticeGaugeFieldD Umu_d(FGrid_d); SU::HotConfiguration(fPRNG, Umu_d); LatticeGaugeFieldF Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d); // clang-format on diff --git a/tests/solver/Test_wilsonclover_mr_unprec.cc b/tests/solver/Test_wilsonclover_mr_unprec.cc index c7b5ecfe..ab49ec1f 100644 --- a/tests/solver/Test_wilsonclover_mr_unprec.cc +++ b/tests/solver/Test_wilsonclover_mr_unprec.cc @@ -51,7 +51,7 @@ int main (int argc, char ** argv) FermionField src(&Grid); random(pRNG,src); RealD nrm = norm2(src); FermionField result(&Grid); result=Zero(); - LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu::HotConfiguration(RNG4, Umu); } std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() diff --git a/tests/solver/Test_zmobius_cg_prec.cc b/tests/solver/Test_zmobius_cg_prec.cc index fb57cff1..6b007afc 100644 --- a/tests/solver/Test_zmobius_cg_prec.cc +++ b/tests/solver/Test_zmobius_cg_prec.cc @@ -67,7 +67,7 @@ int main(int argc, char** argv) { result = Zero(); LatticeGaugeField Umu(UGrid); - SU3::HotConfiguration(RNG4, Umu); + SU::HotConfiguration(RNG4, Umu); std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; From 6b1486e89b8d97470e8b50657e158618a67fb392 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 8 Oct 2020 16:31:24 +0100 Subject: [PATCH 07/24] fixing number of colours defaulting to 4 in most cases --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index c1dc09d4..cee2a84c 100644 --- a/configure.ac +++ b/configure.ac @@ -130,9 +130,9 @@ AC_ARG_ENABLE([Nc], case ${ac_Nc} in 2) - AC_DEFINE([Config_Nc],[4],[Gauge group Nc]);; + AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);; 3) - AC_DEFINE([Config_Nc],[4],[Gauge group Nc]);; + AC_DEFINE([Config_Nc],[3],[Gauge group Nc]);; 4) AC_DEFINE([Config_Nc],[4],[Gauge group Nc]);; 5) From 5f0fe029d213bbab483cb3d48076ecce22790751 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 8 Oct 2020 19:51:28 -0400 Subject: [PATCH 08/24] Improve meemory benchmarks for GPU (avoid host mem ping pong) --- benchmarks/Benchmark_ITT.cc | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index df5427c1..0ddf5068 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -125,7 +125,7 @@ public: lat*mpi_layout[1], lat*mpi_layout[2], lat*mpi_layout[3]}); - std::cout << GridLogMessage<< latt_size <({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=8){ @@ -249,11 +249,6 @@ public: double start=usecond(); for(int i=0;i({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=8){ @@ -309,11 +304,6 @@ public: double start=usecond(); for(int i=0;i Date: Thu, 8 Oct 2020 19:52:08 -0400 Subject: [PATCH 09/24] Single prec benchmark in double prec compile --- benchmarks/Benchmark_dwf_fp32.cc | 364 +++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 benchmarks/Benchmark_dwf_fp32.cc diff --git a/benchmarks/Benchmark_dwf_fp32.cc b/benchmarks/Benchmark_dwf_fp32.cc new file mode 100644 index 00000000..cb86177e --- /dev/null +++ b/benchmarks/Benchmark_dwf_fp32.cc @@ -0,0 +1,364 @@ + /************************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + Source file: ./benchmarks/Benchmark_dwf.cc + Copyright (C) 2015 + + Author: Peter Boyle + Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#ifdef GRID_CUDA +#define CUDA_PROFILE +#endif + +#ifdef CUDA_PROFILE +#include +#endif + +using namespace std; +using namespace Grid; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + int threads = GridThread::GetThreads(); + + Coordinate latt4 = GridDefaultLatt(); + int Ls=8; + for(int i=0;i> Ls; + } + + GridLogLayout(); + + long unsigned int single_site_flops = 8*Nc*(7+16*Nc); + + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::cout << GridLogMessage << "Making s innermost grids"< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; + GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG")); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG")); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + LatticeFermionF src (FGrid); random(RNG5,src); +#if 0 + src = Zero(); + { + Coordinate origin({0,0,0,latt4[2]-1,0}); + SpinColourVectorF tmp; + tmp=Zero(); + tmp()(0)(0)=Complex(-2.0,0.0); + std::cout << " source site 0 " << tmp<::HotConfiguration(RNG4,Umu); + std::cout << GridLogMessage << "Random gauge initialised " << std::endl; +#if 0 + Umu=1.0; + for(int mu=0;mu(Umu,mu); + // if (mu !=2 ) ttmp = 0; + // ttmp = ttmp* pow(10.0,mu); + PokeIndex(Umu,ttmp,mu); + } + std::cout << GridLogMessage << "Forced to diagonal " << std::endl; +#endif + + //////////////////////////////////// + // Naive wilson implementation + //////////////////////////////////// + // replicate across fifth dimension + LatticeGaugeFieldF Umu5d(FGrid); + std::vector U(4,FGrid); + { + autoView( Umu5d_v, Umu5d, CpuWrite); + autoView( Umu_v , Umu , CpuRead); + for(int ss=0;ssoSites();ss++){ + for(int s=0;s(Umu5d,mu); + } + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; + + if (1) + { + ref = Zero(); + for(int mu=0;mu_Nprocessors; + RealD NN = UGrid->NodeCount(); + + std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); + Dw.ZeroCounters(); + Dw.Dhop(src,result,0); + std::cout<Barrier(); + + double volume=Ls; for(int mu=0;mu1.0e-4) ) { + /* + std::cout << "RESULT\n " << result<Barrier(); + exit(-1); + } + assert (norm2(err)< 1.0e-4 ); + Dw.Report(); + } + + if (1) + { // Naive wilson dag implementation + ref = Zero(); + for(int mu=0;mu1.0e-4)){ +/* + std::cout<< "DAG RESULT\n " <Barrier(); + Dw.DhopEO(src_o,r_e,DaggerNo); + double t0=usecond(); + for(int i=0;iBarrier(); + + double volume=Ls; for(int mu=0;mu1.0e-4)){ + /* + std::cout<< "Deo RESULT\n " < Date: Thu, 8 Oct 2020 22:19:20 -0400 Subject: [PATCH 10/24] more runtime --- benchmarks/Benchmark_ITT.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 0ddf5068..eb275728 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -422,7 +422,7 @@ public: } FGrid->Barrier(); double t1=usecond(); - uint64_t ncall = 50; + uint64_t ncall = 500; FGrid->Broadcast(0,&ncall,sizeof(ncall)); From b24a504d7c8f201c7689f4d05ece21e20eced345 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 9 Oct 2020 20:28:54 +0100 Subject: [PATCH 11/24] hook to access last parallel I/O performance measurement --- Grid/parallelIO/BinaryIO.cc | 3 ++- Grid/parallelIO/BinaryIO.h | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Grid/parallelIO/BinaryIO.cc b/Grid/parallelIO/BinaryIO.cc index 221a7fe8..ef1b6683 100644 --- a/Grid/parallelIO/BinaryIO.cc +++ b/Grid/parallelIO/BinaryIO.cc @@ -1,3 +1,4 @@ #include -int Grid::BinaryIO::latticeWriteMaxRetry = -1; +int Grid::BinaryIO::latticeWriteMaxRetry = -1; +Grid::BinaryIO::IoPerf Grid::BinaryIO::lastPerf; diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h index 1f11add9..e390b575 100644 --- a/Grid/parallelIO/BinaryIO.h +++ b/Grid/parallelIO/BinaryIO.h @@ -79,6 +79,13 @@ inline void removeWhitespace(std::string &key) /////////////////////////////////////////////////////////////////////////////////////////////////// class BinaryIO { public: + struct IoPerf + { + uint64_t size{0},time{0}; + double mbytesPerSecond{0.}; + }; + + static IoPerf lastPerf; static int latticeWriteMaxRetry; ///////////////////////////////////////////////////////////////////////////// @@ -502,12 +509,15 @@ class BinaryIO { timer.Stop(); } + lastPerf.size = sizeof(fobj)*iodata.size()*nrank; + lastPerf.time = timer.useconds(); + lastPerf.mbytesPerSecond = lastPerf.size/1024./1024./(lastPerf.time/1.0e6); std::cout< Date: Fri, 9 Oct 2020 20:29:40 +0100 Subject: [PATCH 12/24] multi-pass I/O benchmark, with statistic and robustness summary --- benchmarks/Benchmark_IO.cc | 202 ++++++++++++++++++++++++++++-------- benchmarks/Benchmark_IO.hpp | 54 +++++++--- 2 files changed, 198 insertions(+), 58 deletions(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index 5e4cef9f..0393257d 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -1,8 +1,16 @@ #include "Benchmark_IO.hpp" +#ifndef BENCH_IO_LMIN +#define BENCH_IO_LMIN 20 +#endif + #ifndef BENCH_IO_LMAX -#define BENCH_IO_LMAX 40 +#define BENCH_IO_LMAX 30 +#endif + +#ifndef BENCH_IO_NPASS +#define BENCH_IO_NPASS 10 #endif using namespace Grid; @@ -12,62 +20,174 @@ std::string filestem(const int l) return "iobench_l" + std::to_string(l); } +int vol(const int i) +{ + return BENCH_IO_LMIN + 2*i; +} + +int volInd(const int l) +{ + return (l - BENCH_IO_LMIN)/2; +} + +template +void stats(Mat &mean, Mat &stdDev, const std::vector &data) +{ + auto nr = data[0].rows(), nc = data[0].cols(); + Eigen::MatrixXd sqSum(nr, nc); + double n = static_cast(data.size()); + + assert(n > 1.); + mean = Mat::Zero(nr, nc); + sqSum = Mat::Zero(nr, nc); + for (auto &d: data) + { + mean += d; + sqSum += d.cwiseProduct(d); + } + stdDev = ((sqSum - mean.cwiseProduct(mean)/n)/(n - 1.)).cwiseSqrt(); + mean /= n; +} + +#define grid_printf(...) \ +MSG << "";\ +printf(__VA_ARGS__); + +enum {sRead = 0, sWrite = 1, gRead = 2, gWrite = 3}; + int main (int argc, char ** argv) { Grid_init(&argc,&argv); - int64_t threads = GridThread::GetThreads(); - auto mpi = GridDefaultMpi(); - std::vector latt; + int64_t threads = GridThread::GetThreads(); + auto mpi = GridDefaultMpi(); + unsigned int nVol = (BENCH_IO_LMAX - BENCH_IO_LMIN)/2 + 1; + unsigned int nRelVol = (BENCH_IO_LMAX - 24)/2 + 1; + std::vector perf(BENCH_IO_NPASS, Eigen::MatrixXd::Zero(nVol, 4)); + std::vector avPerf(BENCH_IO_NPASS, Eigen::VectorXd::Zero(4)); + std::vector latt; MSG << "Grid is setup to use " << threads << " threads" << std::endl; MSG << "MPI partition " << mpi << std::endl; - - MSG << SEP << std::endl; - MSG << "Benchmark std write" << std::endl; - MSG << SEP << std::endl; - for (int l = 4; l <= BENCH_IO_LMAX; l += 2) + for (unsigned int i = 0; i < BENCH_IO_NPASS; ++i) { - latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + MSG << BIGSEP << std::endl; + MSG << "Pass " << i + 1 << "/" << BENCH_IO_NPASS << std::endl; + MSG << BIGSEP << std::endl; + MSG << SEP << std::endl; + MSG << "Benchmark std write" << std::endl; + MSG << SEP << std::endl; + for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) + { + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; - MSG << "-- Local volume " << l << "^4" << std::endl; - writeBenchmark(latt, filestem(l), stdWrite); - } + MSG << "-- Local volume " << l << "^4" << std::endl; + writeBenchmark(latt, filestem(l), stdWrite); + perf[i](volInd(l), sWrite) = BinaryIO::lastPerf.mbytesPerSecond; + } - MSG << SEP << std::endl; - MSG << "Benchmark std read" << std::endl; - MSG << SEP << std::endl; - for (int l = 4; l <= BENCH_IO_LMAX; l += 2) - { - latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + MSG << SEP << std::endl; + MSG << "Benchmark std read" << std::endl; + MSG << SEP << std::endl; + for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) + { + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; - MSG << "-- Local volume " << l << "^4" << std::endl; - readBenchmark(latt, filestem(l), stdRead); - } + MSG << "-- Local volume " << l << "^4" << std::endl; + readBenchmark(latt, filestem(l), stdRead); + perf[i](volInd(l), sRead) = BinaryIO::lastPerf.mbytesPerSecond; + } -#ifdef HAVE_LIME - MSG << SEP << std::endl; - MSG << "Benchmark Grid C-Lime write" << std::endl; - MSG << SEP << std::endl; - for (int l = 4; l <= BENCH_IO_LMAX; l += 2) - { - latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + #ifdef HAVE_LIME + MSG << SEP << std::endl; + MSG << "Benchmark Grid C-Lime write" << std::endl; + MSG << SEP << std::endl; + for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) + { + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; - MSG << "-- Local volume " << l << "^4" << std::endl; - writeBenchmark(latt, filestem(l), limeWrite); - } + MSG << "-- Local volume " << l << "^4" << std::endl; + writeBenchmark(latt, filestem(l), limeWrite); + perf[i](volInd(l), gWrite) = BinaryIO::lastPerf.mbytesPerSecond; + } - MSG << SEP << std::endl; - MSG << "Benchmark Grid C-Lime read" << std::endl; - MSG << SEP << std::endl; - for (int l = 4; l <= BENCH_IO_LMAX; l += 2) - { - latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; + MSG << SEP << std::endl; + MSG << "Benchmark Grid C-Lime read" << std::endl; + MSG << SEP << std::endl; + for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) + { + latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; - MSG << "-- Local volume " << l << "^4" << std::endl; - readBenchmark(latt, filestem(l), limeRead); - } + MSG << "-- Local volume " << l << "^4" << std::endl; + readBenchmark(latt, filestem(l), limeRead); + perf[i](volInd(l), gRead) = BinaryIO::lastPerf.mbytesPerSecond; + } #endif + avPerf[i].fill(0.); + for (int f = 0; f < 4; ++f) + for (int l = 24; l <= BENCH_IO_LMAX; l += 2) + { + avPerf[i](f) += perf[i](volInd(l), f); + } + avPerf[i] /= nRelVol; + } + + Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4); + Eigen::VectorXd avMean(4), avStdDev(4), avRob(4); + double n = BENCH_IO_NPASS; + + stats(mean, stdDev, perf); + stats(avMean, avStdDev, avPerf); + rob.fill(100.); + rob -= 100.*stdDev.cwiseQuotient(mean.cwiseAbs()); + avRob.fill(100.); + avRob -= 100.*avStdDev.cwiseQuotient(avMean.cwiseAbs()); + + MSG << BIGSEP << std::endl; + MSG << "SUMMARY" << std::endl; + MSG << BIGSEP << std::endl; + MSG << "Summary of individual results (all results in MB/s)." << std::endl; + MSG << "Every second colum gives the standard deviation of the previous column." << std::endl; + MSG << std::endl; + grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n", + "L", "std read", "std dev", "std write", "std dev", + "Grid read", "std dev", "Grid write", "std dev"); + for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) + { + grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", + l, mean(volInd(l), sRead), stdDev(volInd(l), sRead), + mean(volInd(l), sWrite), stdDev(volInd(l), sWrite), + mean(volInd(l), gRead), stdDev(volInd(l), gRead), + mean(volInd(l), gWrite), stdDev(volInd(l), gWrite)); + } + MSG << std::endl; + MSG << "Robustness of individual results, in \%. (rob = 100\% - std dev / mean)" << std::endl; + MSG << std::endl; + grid_printf("%4s %12s %12s %12s %12s\n", + "L", "std read", "std write", "Grid read", "Grid write"); + for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2) + { + grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n", + l, rob(volInd(l), sRead), rob(volInd(l), sWrite), + rob(volInd(l), gRead), rob(volInd(l), gWrite)); + } + MSG << std::endl; + MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl; + MSG << "Every second colum gives the standard deviation of the previous column." << std::endl; + MSG << std::endl; + grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", + "std read", "std dev", "std write", "std dev", + "Grid read", "std dev", "Grid write", "std dev"); + grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n", + avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite), + avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite)); + MSG << std::endl; + MSG << "Robustness of volume-averaged results, in \%. (rob = 100\% - std dev / mean)" << std::endl; + MSG << std::endl; + grid_printf("%12s %12s %12s %12s\n", + "std read", "std write", "Grid read", "Grid write"); + grid_printf("%12.1f %12.1f %12.1f %12.1f\n", + avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite)); Grid_finalize(); diff --git a/benchmarks/Benchmark_IO.hpp b/benchmarks/Benchmark_IO.hpp index 39af14ba..c4a6ca58 100644 --- a/benchmarks/Benchmark_IO.hpp +++ b/benchmarks/Benchmark_IO.hpp @@ -5,6 +5,8 @@ #ifdef HAVE_LIME #define MSG std::cout << GridLogMessage #define SEP \ +"-----------------------------------------------------------------------------" +#define BIGSEP \ "=============================================================================" namespace Grid { @@ -37,9 +39,12 @@ using ReaderFn = std::function; // ioWatch.Stop(); // std::fclose(file); // size *= vec.Grid()->ProcessorCount(); -// MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed() -// << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) -// << " MB/s" << std::endl; +// auto &p = BinaryIO::lastPerf; +// p.size = size; +// p.time = ioWatch.useconds(); +// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6); +// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() +// << ", " << p.mbytesPerSecond << " MB/s" << std::endl; // MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; // } // @@ -72,9 +77,12 @@ using ReaderFn = std::function; // MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl; // assert(crcData == crcRead); // size *= vec.Grid()->ProcessorCount(); -// MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed() -// << ", performance " << size/1024./1024./(ioWatch.useconds()/1.e6) -// << " MB/s" << std::endl; +// auto &p = BinaryIO::lastPerf; +// p.size = size; +// p.time = ioWatch.useconds(); +// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6); +// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() +// << ", " << p.mbytesPerSecond << " MB/s" << std::endl; // MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; // } @@ -100,9 +108,12 @@ void stdWrite(const std::string filestem, Field &vec) file.flush(); ioWatch.Stop(); size *= vec.Grid()->ProcessorCount(); - MSG << "Std I/O write: Wrote " << size << " bytes in " << ioWatch.Elapsed() - << ", " << size/1024./1024./(ioWatch.useconds()/1.e6) - << " MB/s" << std::endl; + auto &p = BinaryIO::lastPerf; + p.size = size; + p.time = ioWatch.useconds(); + p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6); + MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed() + << ", " << p.mbytesPerSecond << " MB/s" << std::endl; MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl; } @@ -135,9 +146,12 @@ void stdRead(Field &vec, const std::string filestem) MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl; assert(crcData == crcRead); size *= vec.Grid()->ProcessorCount(); - MSG << "Std I/O read: Read " << size << " bytes in " << ioWatch.Elapsed() - << ", " << size/1024./1024./(ioWatch.useconds()/1.e6) - << " MB/s" << std::endl; + auto &p = BinaryIO::lastPerf; + p.size = size; + p.time = ioWatch.useconds(); + p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6); + MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed() + << ", " << p.mbytesPerSecond << " MB/s" << std::endl; MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl; } @@ -200,12 +214,18 @@ void writeBenchmark(const Coordinate &latt, const std::string filename, auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd()); std::shared_ptr gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); std::shared_ptr gPt; + std::random_device rd; makeGrid(gPt, gBasePt, Ls, rb); - GridBase *g = gPt.get(); - GridParallelRNG rng(g); - Field vec(g); + GridBase *g = gPt.get(); + GridParallelRNG rng(g); + Field vec(g); + + rng.SeedFixedIntegers({static_cast(rd()), static_cast(rd()), + static_cast(rd()), static_cast(rd()), + static_cast(rd()), static_cast(rd()), + static_cast(rd()), static_cast(rd())}); random(rng, vec); write(filename, vec); @@ -223,8 +243,8 @@ void readBenchmark(const Coordinate &latt, const std::string filename, makeGrid(gPt, gBasePt, Ls, rb); - GridBase *g = gPt.get(); - Field vec(g); + GridBase *g = gPt.get(); + Field vec(g); read(vec, filename); } From 0e17bd6597cb6e62af58394e4eac726910caa477 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 9 Oct 2020 20:29:57 +0100 Subject: [PATCH 13/24] I/O benchmark cleanup --- benchmarks/Benchmark_IO_vs_dir.cc | 4 -- benchmarks/benchmark-io-csv.sh | 76 ------------------------------- 2 files changed, 80 deletions(-) delete mode 100755 benchmarks/benchmark-io-csv.sh diff --git a/benchmarks/Benchmark_IO_vs_dir.cc b/benchmarks/Benchmark_IO_vs_dir.cc index 9ccfd554..e030bc39 100644 --- a/benchmarks/Benchmark_IO_vs_dir.cc +++ b/benchmarks/Benchmark_IO_vs_dir.cc @@ -1,9 +1,5 @@ #include "Benchmark_IO.hpp" -#define MSG std::cout << GridLogMessage -#define SEP \ -"=============================================================================" - using namespace Grid; int main (int argc, char ** argv) diff --git a/benchmarks/benchmark-io-csv.sh b/benchmarks/benchmark-io-csv.sh deleted file mode 100755 index cc61b006..00000000 --- a/benchmarks/benchmark-io-csv.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash - -awkscript=' -BEGIN{ - i = 0; - print "local L,std read (MB/s),std write (MB/s),Grid Lime read (MB/s),Grid Lime write (MB/s)" -} - -/Benchmark std write/{ - i = 0; - mode = "stdWrite"; -} - -/Benchmark std read/{ - i = 0; - mode = "stdRead" -} - -/Benchmark Grid C-Lime write/{ - i = 0; - mode = "gridWrite"; -} - -/Benchmark Grid C-Lime read/{ - i = 0; - mode = "gridRead"; -} - -/Local volume/{ - match($0, "[0-9]+\\^4"); - l[i] = substr($0, RSTART, RLENGTH-2); -} - -/MB\/s/{ - match($0, "[0-9.eE]+ MB/s"); - p = substr($0, RSTART, RLENGTH-5); - if (mode == "stdWrite") - { - sw[i] = p; - } - else if (mode == "stdRead") - { - sr[i] = p; - } - else if (mode == "gridWrite") - { - gw[i] = p; - } - else if (mode == "gridRead") - { - gr[i] = p; - } - i++; -} - -END{ - s = 0 - for (a in l) - { - s++; - } - for (j = 0; j < s; j++) - { - printf("%s,%s,%s,%s,%s\n", l[j], sr[j], sw[j], gr[j], gw[j]); - } - printf("\n"); -} -' - -if (( $# != 1 )); then - echo "usage: `basename $0` " 1>&2 - exit 1 -fi -LOG=$1 - -awk "${awkscript}" ${LOG} From 5f893bf9aff17781ce363512cccdd63b2e126b1a Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 9 Oct 2020 21:31:59 +0100 Subject: [PATCH 14/24] Benchmark_IO procurement sizes --- benchmarks/Benchmark_IO.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index 0393257d..04e3a735 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -2,11 +2,11 @@ #include "Benchmark_IO.hpp" #ifndef BENCH_IO_LMIN -#define BENCH_IO_LMIN 20 +#define BENCH_IO_LMIN 8 #endif #ifndef BENCH_IO_LMAX -#define BENCH_IO_LMAX 30 +#define BENCH_IO_LMAX 48 #endif #ifndef BENCH_IO_NPASS From b0d61b9687d441bd651622970aadbf6200f16c22 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 9 Oct 2020 21:46:45 +0100 Subject: [PATCH 15/24] Benchmark_IO cleaner output --- benchmarks/Benchmark_IO.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index 04e3a735..76a2375f 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -50,8 +50,11 @@ void stats(Mat &mean, Mat &stdDev, const std::vector &data) } #define grid_printf(...) \ -MSG << "";\ -printf(__VA_ARGS__); +{\ + char _buf[1024];\ + sprintf(_buf, __VA_ARGS__);\ + MSG << _buf;\ +} enum {sRead = 0, sWrite = 1, gRead = 2, gWrite = 3}; From c2b688abc949d67cff19ba296dbd3d02ea3ec8de Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sat, 10 Oct 2020 16:52:56 +0100 Subject: [PATCH 16/24] Benchmark_IO: reducing max local volume to 32^4 --- benchmarks/Benchmark_IO.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index 76a2375f..0d80d425 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -6,7 +6,7 @@ #endif #ifndef BENCH_IO_LMAX -#define BENCH_IO_LMAX 48 +#define BENCH_IO_LMAX 32 #endif #ifndef BENCH_IO_NPASS From d55cc5b3809986be1a194dba56b22544c797b2a5 Mon Sep 17 00:00:00 2001 From: Sam Mangham Date: Mon, 12 Oct 2020 12:33:13 +0100 Subject: [PATCH 17/24] Fixed typo on --enable-comm, removed all references to --enable-precision except for config options, where it is listed as deprecated. Removed travis test for single precision. --- .travis.yml | 7 +------ README | 33 +++++++++++------------------ README.md | 33 +++++++++++------------------ SVE_README.txt | 24 ++++++++++----------- documentation/GridXcode/readme.md | 8 +++---- documentation/manual.rst | 35 ++++++++++--------------------- 6 files changed, 52 insertions(+), 88 deletions(-) diff --git a/.travis.yml b/.travis.yml index 129fd582..3a0e1e35 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,11 +9,6 @@ matrix: - os: osx osx_image: xcode8.3 compiler: clang - env: PREC=single - - os: osx - osx_image: xcode8.3 - compiler: clang - env: PREC=double before_install: - export GRIDDIR=`pwd` @@ -55,7 +50,7 @@ script: - make -j4 - make install - cd $CWD/build - - ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF} + - ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF} - make -j4 - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - make check diff --git a/README b/README index 86506f52..0beabff3 100644 --- a/README +++ b/README @@ -111,11 +111,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu ``` bash mkdir build; cd build -../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix= +../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix= ``` -where `--enable-precision=` set the default precision, -`--enable-simd=` set the SIMD type, `--enable- +where `--enable-simd=` set the SIMD type, `--enable- comms=`, and `` should be replaced by the prefix path where you want to install Grid. Other options are detailed in the next section, you can also use `configure --help` to display them. Like with any other program using GNU autotool, the @@ -146,8 +145,8 @@ If you want to build all the tests at once just use `make tests`. - `--enable-numa`: enable NUMA first touch optimisation - `--enable-simd=`: setup Grid for the SIMD target `` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. - `--enable-gen-simd-width=`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). -- `--enable-precision={single|double}`: set the default precision (default: `double`). -- `--enable-precision=`: Use `` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. +- `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option** +- `--enable-comms=`: Use `` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--disable-timers`: disable system dependent high-resolution timers. - `--enable-chroma`: enable Chroma regression tests. @@ -201,8 +200,7 @@ Alternatively, some CPU codenames can be directly used: The following configuration is recommended for the Intel Knights Landing platform: ``` bash -../configure --enable-precision=double\ - --enable-simd=KNL \ +../configure --enable-simd=KNL \ --enable-comms=mpi-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc @@ -212,8 +210,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash -../configure --enable-precision=double\ - --enable-simd=KNL \ +../configure --enable-simd=KNL \ --enable-comms=mpi \ --enable-mkl \ CXX=CC CC=cc @@ -232,8 +229,7 @@ for interior communication. This is the mpi3 communications implementation. We recommend four ranks per node for best performance, but optimum is local volume dependent. ``` bash -../configure --enable-precision=double\ - --enable-simd=KNL \ +../configure --enable-simd=KNL \ --enable-comms=mpi3-auto \ --enable-mkl \ CC=icpc MPICXX=mpiicpc @@ -244,8 +240,7 @@ We recommend four ranks per node for best performance, but optimum is local volu The following configuration is recommended for the Intel Haswell platform: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX2 \ +../configure --enable-simd=AVX2 \ --enable-comms=mpi3-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc @@ -262,8 +257,7 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX2 \ +../configure --enable-simd=AVX2 \ --enable-comms=mpi3 \ --enable-mkl \ CXX=CC CC=cc @@ -280,8 +274,7 @@ This is the default. The following configuration is recommended for the Intel Skylake platform: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX512 \ +../configure --enable-simd=AVX512 \ --enable-comms=mpi3 \ --enable-mkl \ CXX=mpiicpc @@ -298,8 +291,7 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX512 \ +../configure --enable-simd=AVX512 \ --enable-comms=mpi3 \ --enable-mkl \ CXX=CC CC=cc @@ -330,8 +322,7 @@ and 8 threads per rank. The following configuration is recommended for the AMD EPYC platform. ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX2 \ +../configure --enable-simd=AVX2 \ --enable-comms=mpi3 \ CXX=mpicxx ``` diff --git a/README.md b/README.md index 9f690ce0..4cbae720 100644 --- a/README.md +++ b/README.md @@ -115,11 +115,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu ``` bash mkdir build; cd build -../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix= +../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix= ``` -where `--enable-precision=` set the default precision, -`--enable-simd=` set the SIMD type, `--enable- +where `--enable-simd=` set the SIMD type, `--enable- comms=`, and `` should be replaced by the prefix path where you want to install Grid. Other options are detailed in the next section, you can also use `configure --help` to display them. Like with any other program using GNU autotool, the @@ -150,8 +149,8 @@ If you want to build all the tests at once just use `make tests`. - `--enable-numa`: enable NUMA first touch optimisation - `--enable-simd=`: setup Grid for the SIMD target `` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. - `--enable-gen-simd-width=`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). -- `--enable-precision={single|double}`: set the default precision (default: `double`). -- `--enable-precision=`: Use `` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. +- `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option** +- `--enable-comms=`: Use `` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--disable-timers`: disable system dependent high-resolution timers. - `--enable-chroma`: enable Chroma regression tests. @@ -205,8 +204,7 @@ Alternatively, some CPU codenames can be directly used: The following configuration is recommended for the Intel Knights Landing platform: ``` bash -../configure --enable-precision=double\ - --enable-simd=KNL \ +../configure --enable-simd=KNL \ --enable-comms=mpi-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc @@ -216,8 +214,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash -../configure --enable-precision=double\ - --enable-simd=KNL \ +../configure --enable-simd=KNL \ --enable-comms=mpi \ --enable-mkl \ CXX=CC CC=cc @@ -236,8 +233,7 @@ for interior communication. This is the mpi3 communications implementation. We recommend four ranks per node for best performance, but optimum is local volume dependent. ``` bash -../configure --enable-precision=double\ - --enable-simd=KNL \ +../configure --enable-simd=KNL \ --enable-comms=mpi3-auto \ --enable-mkl \ CC=icpc MPICXX=mpiicpc @@ -248,8 +244,7 @@ We recommend four ranks per node for best performance, but optimum is local volu The following configuration is recommended for the Intel Haswell platform: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX2 \ +../configure --enable-simd=AVX2 \ --enable-comms=mpi3-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc @@ -266,8 +261,7 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX2 \ +../configure --enable-simd=AVX2 \ --enable-comms=mpi3 \ --enable-mkl \ CXX=CC CC=cc @@ -284,8 +278,7 @@ This is the default. The following configuration is recommended for the Intel Skylake platform: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX512 \ +../configure --enable-simd=AVX512 \ --enable-comms=mpi3 \ --enable-mkl \ CXX=mpiicpc @@ -302,8 +295,7 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX512 \ +../configure --enable-simd=AVX512 \ --enable-comms=mpi3 \ --enable-mkl \ CXX=CC CC=cc @@ -334,8 +326,7 @@ and 8 threads per rank. The following configuration is recommended for the AMD EPYC platform. ``` bash -../configure --enable-precision=double\ - --enable-simd=AVX2 \ +../configure --enable-simd=AVX2 \ --enable-comms=mpi3 \ CXX=mpicxx ``` diff --git a/SVE_README.txt b/SVE_README.txt index 0c167c4a..cefec4be 100644 --- a/SVE_README.txt +++ b/SVE_README.txt @@ -12,31 +12,31 @@ module load mpi/openmpi-aarch64 scl enable gcc-toolset-10 bash -../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" +../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" * gcc 10.1 prebuild w/ MPI, QPACE4 interactive login scl enable gcc-toolset-10 bash module load mpi/openmpi-aarch64 -../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" +../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" ------------------------------------------------------------------------------ * armclang 20.2 (qp4) -../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN" +../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN" ------------------------------------------------------------------------------ * gcc 10.0.1 VLA (merlin) -../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static +../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static * gcc 10.0.1 fixed-size ACLE (merlin) -../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" +../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" * gcc 10.0.1 fixed-size ACLE (fjt) w/ MPI @@ -46,34 +46,34 @@ export OMPI_CXX=g++-10.0.1 export MPICH_CC=gcc-10.0.1 export MPICH_CXX=g++-10.0.1 -$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt" +$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt" -------------------------------------------------------- * armclang 20.0 VLA (merlin) -../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static +../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static TODO check ARMCLANGCOMPAT * armclang 20.1 VLA (merlin) -../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static +../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static TODO check ARMCLANGCOMPAT * armclang 20.1 VLA (fjt cluster) -../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU" +../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU" TODO check ARMCLANGCOMPAT * armclang 20.1 VLA w/MPI (fjt cluster) -../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64" +../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64" No ARMCLANGCOMPAT -> still correct ? @@ -81,9 +81,9 @@ No ARMCLANGCOMPAT -> still correct ? * Fujitsu fcc -../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN" +../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN" * Fujitsu fcc w/ MPI -../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU" +../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU" diff --git a/documentation/GridXcode/readme.md b/documentation/GridXcode/readme.md index b8342828..3bd6fc30 100644 --- a/documentation/GridXcode/readme.md +++ b/documentation/GridXcode/readme.md @@ -184,19 +184,19 @@ Below are shown the `configure` script invocations for three recommended configu This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging: - ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=double --prefix=$GridPre/Debug + ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Debug #### 2. `Release` -Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation): +Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`: - ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --enable-precision=single --prefix=$GridPre/Release + ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Release #### 3. `MPIDebug` Debug configuration with MPI: - ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --enable-precision=double --prefix=$GridPre/MPIDebug + ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --prefix=$GridPre/MPIDebug ### 5.3 Build Grid diff --git a/documentation/manual.rst b/documentation/manual.rst index 1596de5e..d51f07c1 100644 --- a/documentation/manual.rst +++ b/documentation/manual.rst @@ -178,15 +178,10 @@ Then enter the cloned directory and set up the build system:: Now you can execute the `configure` script to generate makefiles (here from a build directory):: mkdir build; cd build - ../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto \ + ../configure --enable-simd=AVX --enable-comms=mpi-auto \ --prefix= -where:: - - --enable-precision=single|double - -sets the **default precision**. Since this is largely a benchmarking convenience, it is anticipated that the default precision may be removed in future implementations, -and that explicit type selection be made at all points. Naturally, most code will be type templated in any case.:: +:: --enable-simd=GEN|SSE4|AVX|AVXFMA|AVXFMA4|AVX2|AVX512|NEONv8|QPX @@ -236,7 +231,7 @@ Detailed build configuration options --enable-mkl[=path] use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional). --enable-simd=code setup Grid for the SIMD target ``(default: `GEN`). A list of possible SIMD targets is detailed in a section below. --enable-gen-simd-width=size select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). E.g. SSE 128 bit corresponds to 16 bytes. - --enable-precision=single|double set the default precision (default: `double`). + --enable-precision=single|double set the default precision (default: `double`). **Deprecated option** --enable-comms=mpi|none use `` for message passing (default: `none`). --enable-rng=sitmo|ranlux48|mt19937 choose the RNG (default: `sitmo`). --disable-timers disable system dependent high-resolution timers. @@ -304,8 +299,7 @@ Build setup for Intel Knights Landing platform The following configuration is recommended for the Intel Knights Landing platform:: - ../configure --enable-precision=double\ - --enable-simd=KNL \ + ../configure --enable-simd=KNL \ --enable-comms=mpi-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc @@ -314,8 +308,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:: - ../configure --enable-precision=double\ - --enable-simd=KNL \ + ../configure --enable-simd=KNL \ --enable-comms=mpi \ --enable-mkl \ CXX=CC CC=cc @@ -332,8 +325,7 @@ presently performs better with use of more than one rank per node, using shared for interior communication. We recommend four ranks per node for best performance, but optimum is local volume dependent. :: - ../configure --enable-precision=double\ - --enable-simd=KNL \ + ../configure --enable-simd=KNL \ --enable-comms=mpi-auto \ --enable-mkl \ CC=icpc MPICXX=mpiicpc @@ -343,8 +335,7 @@ Build setup for Intel Haswell Xeon platform The following configuration is recommended for the Intel Haswell platform:: - ../configure --enable-precision=double\ - --enable-simd=AVX2 \ + ../configure --enable-simd=AVX2 \ --enable-comms=mpi-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc @@ -360,8 +351,7 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:: - ../configure --enable-precision=double\ - --enable-simd=AVX2 \ + ../configure --enable-simd=AVX2 \ --enable-comms=mpi \ --enable-mkl \ CXX=CC CC=cc @@ -379,8 +369,7 @@ Build setup for Intel Skylake Xeon platform The following configuration is recommended for the Intel Skylake platform:: - ../configure --enable-precision=double\ - --enable-simd=AVX512 \ + ../configure --enable-simd=AVX512 \ --enable-comms=mpi \ --enable-mkl \ CXX=mpiicpc @@ -396,8 +385,7 @@ where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:: - ../configure --enable-precision=double\ - --enable-simd=AVX512 \ + ../configure --enable-simd=AVX512 \ --enable-comms=mpi \ --enable-mkl \ CXX=CC CC=cc @@ -422,8 +410,7 @@ and 8 threads per rank. The following configuration is recommended for the AMD EPYC platform:: - ../configure --enable-precision=double\ - --enable-simd=AVX2 \ + ../configure --enable-simd=AVX2 \ --enable-comms=mpi \ CXX=mpicxx From 3f0620972061a62fe8802e25ca43d896d9172f09 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 13 Oct 2020 22:18:51 -0400 Subject: [PATCH 18/24] Pretty print --- benchmarks/Benchmark_ITT.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index eb275728..54fe1ab0 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -62,7 +62,7 @@ struct time_statistics{ void comms_header(){ std::cout < Date: Tue, 13 Oct 2020 22:23:57 -0400 Subject: [PATCH 19/24] Reality forced included --- Grid/lattice/Lattice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/lattice/Lattice.h b/Grid/lattice/Lattice.h index 28ea0294..9f5f1da7 100644 --- a/Grid/lattice/Lattice.h +++ b/Grid/lattice/Lattice.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include #include #include -//#include +#include #include #include #include From 9945399e609945bae2c01492cbb4ab56a9246ec8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 13 Oct 2020 22:24:32 -0400 Subject: [PATCH 20/24] Reaality issues fix by drop from ET --- Grid/lattice/Lattice_ET.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index c43844f8..8d9f4744 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -342,14 +342,10 @@ inline void ExpressionViewClose(LatticeTrinaryExpression &expr) GridUnopClass(UnarySub, -a); GridUnopClass(UnaryNot, Not(a)); -GridUnopClass(UnaryAdj, adj(a)); -GridUnopClass(UnaryConj, conjugate(a)); GridUnopClass(UnaryTrace, trace(a)); GridUnopClass(UnaryTranspose, transpose(a)); GridUnopClass(UnaryTa, Ta(a)); GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a)); -GridUnopClass(UnaryToReal, toReal(a)); -GridUnopClass(UnaryToComplex, toComplex(a)); GridUnopClass(UnaryTimesI, timesI(a)); GridUnopClass(UnaryTimesMinusI, timesMinusI(a)); GridUnopClass(UnaryAbs, abs(a)); @@ -456,14 +452,12 @@ GridTrinOpClass(TrinaryWhere, GRID_DEF_UNOP(operator-, UnarySub); GRID_DEF_UNOP(Not, UnaryNot); GRID_DEF_UNOP(operator!, UnaryNot); -GRID_DEF_UNOP(adj, UnaryAdj); -GRID_DEF_UNOP(conjugate, UnaryConj); +//GRID_DEF_UNOP(adj, UnaryAdj); +//GRID_DEF_UNOP(conjugate, UnaryConj); GRID_DEF_UNOP(trace, UnaryTrace); GRID_DEF_UNOP(transpose, UnaryTranspose); GRID_DEF_UNOP(Ta, UnaryTa); GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup); -GRID_DEF_UNOP(toReal, UnaryToReal); -GRID_DEF_UNOP(toComplex, UnaryToComplex); GRID_DEF_UNOP(timesI, UnaryTimesI); GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI); GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the From aa135412f554b5712a62164fdd3136f7e38e16c5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 13 Oct 2020 22:25:01 -0400 Subject: [PATCH 21/24] toComplex, toReal --- Grid/lattice/Lattice_reality.h | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Grid/lattice/Lattice_reality.h b/Grid/lattice/Lattice_reality.h index 61491d6b..e07dd545 100644 --- a/Grid/lattice/Lattice_reality.h +++ b/Grid/lattice/Lattice_reality.h @@ -64,6 +64,43 @@ template inline Lattice conjugate(const Lattice &lhs){ return ret; }; +template inline Lattice toComplex(const Lattice &lhs){ + Lattice ret(lhs.Grid()); + + autoView( lhs_v, lhs, AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); + + ret.Checkerboard() = lhs.Checkerboard(); + accelerator_for( ss, lhs_v.size(), 1, { + ret_v[ss] = toComplex(lhs_v[ss]); + }); + return ret; +}; +template inline Lattice toReal(const Lattice &lhs){ + Lattice ret(lhs.Grid()); + + autoView( lhs_v, lhs, AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); + + ret.Checkerboard() = lhs.Checkerboard(); + accelerator_for( ss, lhs_v.size(), 1, { + ret_v[ss] = toReal(lhs_v[ss]); + }); + return ret; +}; + + +template::value,void>::type * = nullptr> +auto toComplex(const Expression &expr) -> decltype(closure(expr)) +{ + return toComplex(closure(expr)); +} +template::value,void>::type * = nullptr> +auto toReal(const Expression &expr) -> decltype(closure(expr)) +{ + return toReal(closure(expr)); +} + NAMESPACE_END(Grid); #endif From a88b3ceca57b616b21e88896a02d5bf224de7242 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 14 Oct 2020 21:33:51 -0400 Subject: [PATCH 22/24] Closure cases --- Grid/lattice/Lattice_reality.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Grid/lattice/Lattice_reality.h b/Grid/lattice/Lattice_reality.h index e07dd545..2e80ce4a 100644 --- a/Grid/lattice/Lattice_reality.h +++ b/Grid/lattice/Lattice_reality.h @@ -100,6 +100,16 @@ auto toReal(const Expression &expr) -> decltype(closure(expr)) { return toReal(closure(expr)); } +template::value,void>::type * = nullptr> +auto adj(const Expression &expr) -> decltype(closure(expr)) +{ + return adj(closure(expr)); +} +template::value,void>::type * = nullptr> +auto conjugate(const Expression &expr) -> decltype(closure(expr)) +{ + return conjugate(closure(expr)); +} NAMESPACE_END(Grid); From bf3c9857e0409036ddb9922775e3c7c8d7e331af Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 14 Oct 2020 21:37:14 -0400 Subject: [PATCH 23/24] Closure changes --- .../implementation/WilsonCloverFermionImplementation.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index df1bce7c..e721c20d 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -133,14 +133,14 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm); - pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm))); - pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm))); + pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); + pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); - pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv))); - pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv))); + pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); + pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); } template From 3362f8dfa0b9e1122a5923d9d13becdab534e54a Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 14 Oct 2020 22:59:41 -0400 Subject: [PATCH 24/24] happy compile --- Grid/lattice/Lattice_ET.h | 16 ++--- Grid/lattice/Lattice_reality.h | 4 +- Grid/qcd/utils/SUn.h | 3 +- tests/core/Test_lie_generators.cc | 112 +++++++++++++++--------------- 4 files changed, 68 insertions(+), 67 deletions(-) diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index 8d9f4744..f828ef30 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -488,27 +488,27 @@ GRID_DEF_TRINOP(where, TrinaryWhere); ///////////////////////////////////////////////////////////// template auto closure(const LatticeUnaryExpression &expr) - -> Lattice + -> Lattice::type > { - Lattice ret(expr); + Lattice::type > ret(expr); return ret; } template auto closure(const LatticeBinaryExpression &expr) - -> Lattice + -> Lattice::type > { - Lattice ret(expr); + Lattice::type > ret(expr); return ret; } template auto closure(const LatticeTrinaryExpression &expr) - -> Lattice Lattice + vecEval(0, expr.arg3)))>::type > { - Lattice ret(expr); + vecEval(0, expr.arg3)))>::type > ret(expr); return ret; } #define EXPRESSION_CLOSURE(function) \ diff --git a/Grid/lattice/Lattice_reality.h b/Grid/lattice/Lattice_reality.h index 2e80ce4a..51deeb01 100644 --- a/Grid/lattice/Lattice_reality.h +++ b/Grid/lattice/Lattice_reality.h @@ -45,8 +45,8 @@ template inline Lattice adj(const Lattice &lhs){ autoView( ret_v, ret, AcceleratorWrite); ret.Checkerboard()=lhs.Checkerboard(); - accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { - coalescedWrite(ret_v[ss], adj(lhs_v(ss))); + accelerator_for( ss, lhs_v.size(), 1, { + ret_v[ss] = adj(lhs_v[ss]); }); return ret; }; diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 0cc0cc1a..7ac53246 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -449,7 +449,8 @@ public: LatticeReal alpha(grid); // std::cout<::printGenerators(); - std::cout << "Dimension of adjoint representation: "<< SUAdjoint::Dimension << std::endl; - SUAdjoint::printGenerators(); - SU::testGenerators(); - SUAdjoint::testGenerators(); + SU3::printGenerators(); + std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; + SU3Adjoint::printGenerators(); + SU3::testGenerators(); + SU3Adjoint::testGenerators(); std::cout<({45,12,81,9})); - SUAdjoint::LatticeAdjMatrix Gauss(grid); - SU::LatticeAlgebraVector ha(grid); - SU::LatticeAlgebraVector hb(grid); + SU3Adjoint::LatticeAdjMatrix Gauss(grid); + SU3::LatticeAlgebraVector ha(grid); + SU3::LatticeAlgebraVector hb(grid); random(gridRNG,Gauss); std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; - SUAdjoint::projectOnAlgebra(ha, Gauss); + SU3Adjoint::projectOnAlgebra(ha, Gauss); std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl; - SUAdjoint::projector(hb, Gauss); + SU3Adjoint::projector(hb, Gauss); std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl; - SUAdjoint::projector(hb, Gauss); + SU3Adjoint::projector(hb, Gauss); std::cout << GridLogMessage << "end projector" << std::endl; - SU::LatticeAlgebraVector diff = ha -hb; + SU3::LatticeAlgebraVector diff = ha -hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; @@ -114,8 +114,8 @@ int main(int argc, char** argv) { LatticeGaugeField U(grid), V(grid); - SU::HotConfiguration(gridRNG, U); - SU::HotConfiguration(gridRNG, V); + SU3::HotConfiguration(gridRNG, U); + SU3::HotConfiguration(gridRNG, V); // Adjoint representation // Test group structure @@ -123,8 +123,8 @@ int main(int argc, char** argv) { LatticeGaugeField UV(grid); UV = Zero(); for (int mu = 0; mu < Nd; mu++) { - SU::LatticeMatrix Umu = peekLorentz(U,mu); - SU::LatticeMatrix Vmu = peekLorentz(V,mu); + SU3::LatticeMatrix Umu = peekLorentz(U,mu); + SU3::LatticeMatrix Vmu = peekLorentz(V,mu); pokeLorentz(UV,Umu*Vmu, mu); } @@ -151,16 +151,16 @@ int main(int argc, char** argv) { // Check correspondence of algebra and group transformations // Create a random vector - SU::LatticeAlgebraVector h_adj(grid); + SU3::LatticeAlgebraVector h_adj(grid); typename AdjointRep::LatticeMatrix Ar(grid); random(gridRNG,h_adj); h_adj = real(h_adj); SU_Adjoint::AdjointLieAlgebraMatrix(h_adj,Ar); // Re-extract h_adj - SU::LatticeAlgebraVector h_adj2(grid); + SU3::LatticeAlgebraVector h_adj2(grid); SU_Adjoint::projectOnAlgebra(h_adj2, Ar); - SU::LatticeAlgebraVector h_diff = h_adj - h_adj2; + SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2; std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl; // Exponentiate @@ -183,14 +183,14 @@ int main(int argc, char** argv) { // Construct the fundamental matrix in the group - SU::LatticeMatrix Af(grid); - SU::FundamentalLieAlgebraMatrix(h_adj,Af); - SU::LatticeMatrix Ufund(grid); + SU3::LatticeMatrix Af(grid); + SU3::FundamentalLieAlgebraMatrix(h_adj,Af); + SU3::LatticeMatrix Ufund(grid); Ufund = expMat(Af, 1.0, 16); // Check unitarity - SU::LatticeMatrix uno_f(grid); + SU3::LatticeMatrix uno_f(grid); uno_f = 1.0; - SU::LatticeMatrix UnitCheck(grid); + SU3::LatticeMatrix UnitCheck(grid); UnitCheck = Ufund * adj(Ufund) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck) << std::endl; @@ -260,20 +260,20 @@ int main(int argc, char** argv) { std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" << std::endl; // Projectors - SUTwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); + SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); random(gridRNG,Gauss2); std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; - SUTwoIndexSymm::projectOnAlgebra(ha, Gauss2); + SU3TwoIndexSymm::projectOnAlgebra(ha, Gauss2); std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl; - SUTwoIndexSymm::projector(hb, Gauss2); + SU3TwoIndexSymm::projector(hb, Gauss2); std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl; - SUTwoIndexSymm::projector(hb, Gauss2); + SU3TwoIndexSymm::projector(hb, Gauss2); std::cout << GridLogMessage << "end projector" << std::endl; - SU::LatticeAlgebraVector diff2 = ha - hb; + SU3::LatticeAlgebraVector diff2 = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "*********************************************" << std::endl; @@ -284,20 +284,20 @@ int main(int argc, char** argv) { std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" << std::endl; // Projectors - SUTwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); + SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); random(gridRNG,Gauss2a); std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; - SUTwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a); + SU3TwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a); std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl; - SUTwoIndexAntiSymm::projector(hb, Gauss2a); + SU3TwoIndexAntiSymm::projector(hb, Gauss2a); std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl; - SUTwoIndexAntiSymm::projector(hb, Gauss2a); + SU3TwoIndexAntiSymm::projector(hb, Gauss2a); std::cout << GridLogMessage << "end projector" << std::endl; - SU::LatticeAlgebraVector diff2a = ha - hb; + SU3::LatticeAlgebraVector diff2a = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "*********************************************" << std::endl; @@ -311,14 +311,14 @@ int main(int argc, char** argv) { // Test group structure // (U_f * V_f)_r = U_r * V_r LatticeGaugeField U2(grid), V2(grid); - SU::HotConfiguration(gridRNG, U2); - SU::HotConfiguration(gridRNG, V2); + SU3::HotConfiguration(gridRNG, U2); + SU3::HotConfiguration(gridRNG, V2); LatticeGaugeField UV2(grid); UV2 = Zero(); for (int mu = 0; mu < Nd; mu++) { - SU::LatticeMatrix Umu2 = peekLorentz(U2,mu); - SU::LatticeMatrix Vmu2 = peekLorentz(V2,mu); + SU3::LatticeMatrix Umu2 = peekLorentz(U2,mu); + SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu); pokeLorentz(UV2,Umu2*Vmu2, mu); } @@ -345,16 +345,16 @@ int main(int argc, char** argv) { // Check correspondence of algebra and group transformations // Create a random vector - SU::LatticeAlgebraVector h_sym(grid); + SU3::LatticeAlgebraVector h_sym(grid); typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ar_sym(grid); random(gridRNG,h_sym); h_sym = real(h_sym); SU_TwoIndex::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym); // Re-extract h_sym - SU::LatticeAlgebraVector h_sym2(grid); + SU3::LatticeAlgebraVector h_sym2(grid); SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym); - SU::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; + SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl; @@ -379,11 +379,11 @@ int main(int argc, char** argv) { // Construct the fundamental matrix in the group - SU::LatticeMatrix Af_sym(grid); - SU::FundamentalLieAlgebraMatrix(h_sym,Af_sym); - SU::LatticeMatrix Ufund2(grid); + SU3::LatticeMatrix Af_sym(grid); + SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym); + SU3::LatticeMatrix Ufund2(grid); Ufund2 = expMat(Af_sym, 1.0, 16); - SU::LatticeMatrix UnitCheck2(grid); + SU3::LatticeMatrix UnitCheck2(grid); UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2) << std::endl; @@ -421,14 +421,14 @@ int main(int argc, char** argv) { // Test group structure // (U_f * V_f)_r = U_r * V_r LatticeGaugeField U2A(grid), V2A(grid); - SU::HotConfiguration(gridRNG, U2A); - SU::HotConfiguration(gridRNG, V2A); + SU3::HotConfiguration(gridRNG, U2A); + SU3::HotConfiguration(gridRNG, V2A); LatticeGaugeField UV2A(grid); UV2A = Zero(); for (int mu = 0; mu < Nd; mu++) { - SU::LatticeMatrix Umu2A = peekLorentz(U2,mu); - SU::LatticeMatrix Vmu2A = peekLorentz(V2,mu); + SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); + SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); pokeLorentz(UV2A,Umu2A*Vmu2A, mu); } @@ -455,16 +455,16 @@ int main(int argc, char** argv) { // Check correspondence of algebra and group transformations // Create a random vector - SU::LatticeAlgebraVector h_Asym(grid); + SU3::LatticeAlgebraVector h_Asym(grid); typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid); random(gridRNG,h_Asym); h_Asym = real(h_Asym); SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); // Re-extract h_sym - SU::LatticeAlgebraVector h_Asym2(grid); + SU3::LatticeAlgebraVector h_Asym2(grid); SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); - SU::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; + SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; @@ -489,11 +489,11 @@ int main(int argc, char** argv) { // Construct the fundamental matrix in the group - SU::LatticeMatrix Af_Asym(grid); - SU::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); - SU::LatticeMatrix Ufund2A(grid); + SU3::LatticeMatrix Af_Asym(grid); + SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); + SU3::LatticeMatrix Ufund2A(grid); Ufund2A = expMat(Af_Asym, 1.0, 16); - SU::LatticeMatrix UnitCheck2A(grid); + SU3::LatticeMatrix UnitCheck2A(grid); UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) << std::endl;