From 9015c229dc194fdf2b6552e6137b7ffbe23c6e78 Mon Sep 17 00:00:00 2001 From: david clarke Date: Tue, 27 Jun 2023 21:28:26 -0600 Subject: [PATCH] add benchmark to see whether matrix multiplication is slower than read from object --- Grid/qcd/smearing/HISQSmearing.h | 149 +++------------------- benchmarks/Benchmark_su3mult_vs_lookup.cc | 125 ++++++++++++++++++ tests/smearing/Test_fatLinks.cc | 78 ++++++----- 3 files changed, 188 insertions(+), 164 deletions(-) create mode 100644 benchmarks/Benchmark_su3mult_vs_lookup.cc diff --git a/Grid/qcd/smearing/HISQSmearing.h b/Grid/qcd/smearing/HISQSmearing.h index e8587c9b..0c11bde4 100644 --- a/Grid/qcd/smearing/HISQSmearing.h +++ b/Grid/qcd/smearing/HISQSmearing.h @@ -2,9 +2,9 @@ Grid physics library, www.github.com/paboyle/Grid -Source file: ./lib/qcd/smearing/StoutSmearing.h +Source file: ./lib/qcd/smearing/HISQSmearing.h -Copyright (C) 2019 +Copyright (C) 2023 Author: D. A. Clarke @@ -56,7 +56,7 @@ template void gpermute(vobj & inout,int perm) { /*! @brief signals that you want to go backwards in direction dir */ inline int Back(const int dir) { // generalShift will use BACKWARD_CONST to determine whether we step forward or - // backward. Should work as long as BACKWARD_CONST > Nd. + // backward. Should work as long as BACKWARD_CONST > Nd. Trick inspired by SIMULATeQCD. return dir + BACKWARD_CONST; } @@ -191,45 +191,33 @@ public: for(int mu=0;mu_offset; - int o1 = SE1->_offset; - int o2 = SE2->_offset; - int o3 = SE3->_offset; - int o4 = SE4->_offset; + auto SE0 = gStencil.GetEntry(s+0,ss); int x_p_mu = SE0->_offset; + auto SE1 = gStencil.GetEntry(s+1,ss); int x_p_nu = SE1->_offset; + auto SE2 = gStencil.GetEntry(s+2,ss); int x = SE2->_offset; + auto SE3 = gStencil.GetEntry(s+3,ss); int x_p_mu_m_nu = SE3->_offset; + auto SE4 = gStencil.GetEntry(s+4,ss); int x_m_nu = SE4->_offset; // When you're deciding whether to take an adjoint, the question is: how is the // stored link oriented compared to the one you want? If I imagine myself travelling // with the to-be-updated link, I have two possible, alternative 3-link paths I can // take, one starting by going to the left, the other starting by going to the right. - auto U0 = adj(U_v[o0](nu)); - auto U1 = U_v[o1](mu); - auto U2 = U_v[o2](nu); + auto U0 = adj(U_v[x_p_mu](nu)); + auto U1 = U_v[x_p_nu](mu) ; + auto U2 = U_v[x ](nu) ; gpermute(U0,SE0->_permute); gpermute(U1,SE1->_permute); gpermute(U2,SE2->_permute); - auto U3 = U_v[o3](nu); - auto U4 = U_v[o4](mu); - auto U5 = adj(U_v[o4](nu)); + auto U3 = U_v[x_p_mu_m_nu](nu) ; + auto U4 = U_v[x_m_nu ](mu) ; + auto U5 = adj(U_v[x_m_nu ](nu)); gpermute(U3,SE3->_permute); gpermute(U4,SE4->_permute); - gpermute(U4,SE4->_permute); // "left" "right" auto W = U2*U1*U0 + U5*U4*U3; @@ -265,111 +253,8 @@ public: ~Smear_HISQ_Naik() {} - void smear(LGF& u_smr, const LGF& U) const { - - int depth = 1; - PaddedCell Ghost(depth,this->_grid); - LGF Ughost = Ghost.Exchange(u_smr); - - GridBase *GhostGrid = Ughost.Grid(); - LatticeComplex gplaq(GhostGrid); - - LGF Ughost_naik(Ughost.Grid()); - - std::vector shifts; - for(int mu=0;mu_offset; - int o1 = SE1->_offset; - int o2 = SE2->_offset; - int o3 = SE3->_offset; - int o4 = SE4->_offset; - int o5 = SE5->_offset; - - auto U0 = U_v[o0](nu); - auto U1 = adj(U_v[o1](mu)); - auto U2 = adj(U_v[o2](nu)); - - gpermute(U0,SE0->_permute); - gpermute(U1,SE1->_permute); - gpermute(U2,SE2->_permute); - - auto U3 = adj(U_v[o3](nu)); - auto U4 = adj(U_v[o4](mu)); - auto U5 = U_v[o5](nu); - - gpermute(U3,SE3->_permute); - gpermute(U4,SE4->_permute); - gpermute(U5,SE5->_permute); - - // Forward contribution from this orientation - auto W = U0*U1*U2; - U_naik_v[ss](mu) = U_naik_v[ss](mu) + W; - - // Backward contribution from this orientation - W = U3*U4*U5; - U_naik_v[ss](mu) = U_naik_v[ss](mu) + W; - - s=s+6; - } - } - } - - // Here is my understanding of this part: The padded cell has its own periodic BCs, so - // if I take a step to the right at the right-most side of the cell, I end up on the - // left-most side. This means that the plaquettes in the padding are wrong. Luckily - // all we care about are the plaquettes in the cell, which we obtain from Extract. - u_smr = Ghost.Extract(Ughost_naik); - }; +// void smear(LGF& u_smr, const LGF& U) const { +// }; // void derivative(const GaugeField& Gauge) const { // }; diff --git a/benchmarks/Benchmark_su3mult_vs_lookup.cc b/benchmarks/Benchmark_su3mult_vs_lookup.cc new file mode 100644 index 00000000..5f9c98ba --- /dev/null +++ b/benchmarks/Benchmark_su3mult_vs_lookup.cc @@ -0,0 +1,125 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_su3mult_vs_lookup.cc + + Copyright (C) 2023 + + Author: D. A. Clarke + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* + @file Benchmark_su3mult_vs_lookup.cc + @brief check to see whether su3 multiplication or lookup tables is faster +*/ + +#include +using namespace Grid; + +/*! @brief make the logger work like python print */ +template +inline std::string sjoin(Args&&... args) noexcept { + std::ostringstream msg; + (msg << ... << args); + return msg.str(); +} +template +inline void Grid_log(Args&&... args) { + std::string msg = sjoin(std::forward(args)...); + std::cout << GridLogMessage << msg << std::endl; +} + +/*! @brief parameter file to easily adjust Nloop */ +struct ConfParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS( + ConfParameters, + int, Nloop); + + template + ConfParameters(Reader& Reader){ + read(Reader, "parameters", *this); + } +}; + +int main (int argc, char** argv) { + + // Params for the test. + int Ns = 8; + int Nt = 4; + int threads = GridThread::GetThreads(); + std::string conf_in = "nersc.l8t4b3360"; + Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; + + Grid_init(&argc,&argv); + + Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + GridCartesian GRID(latt_size,simd_layout,mpi_layout); + + Grid_log(" mpi = ",mpi_layout); + Grid_log(" simd = ",simd_layout); + Grid_log(" latt = ",latt_size); + Grid_log("threads = ",threads); + + XmlReader Reader("mult_vs_lookup.xml",false, "grid"); + ConfParameters param(Reader); + Grid_log(" Nloop = ",param.Nloop); + + // Gauge field and accessor + LatticeGaugeField Umu(&GRID); + autoView(U_v, Umu, CpuRead); + + // Read the configuration into Umu + FieldMetaData header; + NerscIO::readConfiguration(Umu, header, conf_in); + + // Read in lattice sequentially, Nloop times + double lookupTime = 0.; + for(int i=0;i + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* + @file Test_fatLinks.cc + @brief test of the HISQ smearing +*/ + #include #include @@ -14,7 +38,7 @@ using namespace Grid; -// Make the logger work like Python print() +/*! @brief make the logger work like python print */ template inline std::string sjoin(Args&&... args) noexcept { std::ostringstream msg; @@ -27,32 +51,26 @@ inline void Grid_log(Args&&... args) { std::cout << GridLogMessage << msg << std::endl; } -struct fatParams: Serializable { - GRID_SERIALIZABLE_CLASS_MEMBERS( - fatParams, - std::string, conf_in, - std::string, conf_out); - - template - fatParams(Reader& Reader){ - read(Reader, "parameters", *this); - } -}; - // // one method: input --> fat // another : input --> long (naik) // another : input --> unitarize // -int main (int argc, char **argv) -{ +int main (int argc, char** argv) { + + // Params for the test. + int Ns = 8; + int Nt = 4; + Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt; + std::string conf_in = "nersc.l8t4b3360"; + std::string conf_out = "nersc.l8t4b3360.3link"; + // Initialize the Grid Grid_init(&argc,&argv); - Coordinate latt_size = GridDefaultLatt(); Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); - Grid_log("mpi = ",mpi_layout); + Grid_log(" mpi = ",mpi_layout); Grid_log("simd = ",simd_layout); Grid_log("latt = ",latt_size); GridCartesian GRID(latt_size,simd_layout,mpi_layout); @@ -61,19 +79,15 @@ int main (int argc, char **argv) LatticeGaugeField Umu(&GRID); LatticeGaugeField U_smr(&GRID); - // Read in the parameter file - XmlReader Reader("fatParams.xml",false, "grid"); - fatParams param(Reader); - FieldMetaData header; - // Read the configuration into Umu - NerscIO::readConfiguration(Umu, header, param.conf_in); + FieldMetaData header; + NerscIO::readConfiguration(Umu, header, conf_in); // Smear Umu and store result in U_smr Smear_HISQ_fat hisq_fat(&GRID,1/8.,0.,1/16.,1/64.,1/384.,0.); hisq_fat.smear(U_smr,Umu); - NerscIO::writeConfiguration(U_smr,param.conf_out,"HISQ"); + NerscIO::writeConfiguration(U_smr,conf_out,"HISQ"); // Test a C-style instantiation double path_coeff[6] = {1, 2, 3, 4, 5, 6};