Merge pull request #424 from giltirn/feature/dirichlet-precchange

Precision change implementation
2026-05-06 02:04:33 +01:00 · 2023-03-22 23:04:52 -04:00
parent 52384e34cf 83d86943db
commit 23298acb81
9 changed files with 744 additions and 19 deletions
@@ -0,0 +1,189 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./benchmarks/Benchmark_prec_change.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int Ls = 12;
+  Coordinate latt4 = GridDefaultLatt();
+
+  GridCartesian         * UGridD   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD);
+  GridCartesian         * FGridD   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
+  GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
+
+  GridCartesian         * UGridF   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
+  GridCartesian         * FGridF   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
+  GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
+
+  
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  
+  std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
+  GridParallelRNG          RNG4(UGridD);  RNG4.SeedFixedIntegers(seeds4);
+  std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
+  GridParallelRNG          RNG5(FGridD);  RNG5.SeedFixedIntegers(seeds5);
+  std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
+
+  LatticeFermionD field_d(FGridD), tmp_d(FGridD);
+  random(RNG5,field_d); tmp_d = field_d;
+
+  LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF);
+  precisionChange(field_d2, field_d); tmp_d2 = field_d2;
+
+  LatticeFermionF field_f(FGridF), tmp_f(FGridF);
+  precisionChange(field_f, field_d); tmp_f = field_f;
+
+  int N = 500;
+
+  double time_ds = 0, time_sd = 0;
+
+  std::cout<<GridLogMessage << "Benchmarking single<->double original implementation (fields initially device-resident)" << std::endl;
+  for(int i=0;i<N;i++){
+    //We want to benchmark the typical scenario of both fields being device resident
+    //To do this, invoke an operation that will open a device view and touch all sites
+    //with a write operation that invalidates the CPU copy
+    field_d = tmp_d;
+    field_f = tmp_f;
+
+    double start=usecond();
+    precisionChangeOrig(field_d,field_f);
+    double stop=usecond();
+    time_sd += stop - start;
+
+    field_d = tmp_d;
+    field_f = tmp_f;
+
+    start=usecond();
+    precisionChangeOrig(field_f,field_d);
+    stop=usecond();
+    time_ds += stop - start;   
+  }
+  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
+
+
+  precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid());
+  precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid());
+  
+  std::cout<<GridLogMessage << "Benchmarking single<->double with pregenerated workspace(fields initially device-resident)" << std::endl;
+  time_sd = time_ds = 0;
+  for(int i=0;i<N;i++){
+    field_d = tmp_d;
+    field_f = tmp_f;
+
+    double start=usecond();
+    precisionChange(field_d,field_f, wk_sp_to_dp);
+    double stop=usecond();
+    time_sd += stop - start;
+
+    field_d = tmp_d;
+    field_f = tmp_f;
+
+    start=usecond();
+    precisionChange(field_f,field_d, wk_dp_to_sp);
+    stop=usecond();
+    time_ds += stop - start;   
+  }
+  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
+  
+  std::cout<<GridLogMessage << "Benchmarking single<->double with workspace generated on-the-fly (fields initially device-resident)" << std::endl;
+  time_sd = time_ds = 0;
+  for(int i=0;i<N;i++){
+    field_d = tmp_d;
+    field_f = tmp_f;
+
+    double start=usecond();
+    precisionChange(field_d,field_f);
+    double stop=usecond();
+    time_sd += stop - start;
+
+    field_d = tmp_d;
+    field_f = tmp_f;
+
+    start=usecond();
+    precisionChange(field_f,field_d);
+    stop=usecond();
+    time_ds += stop - start;
+
+  }
+  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
+
+
+  std::cout<<GridLogMessage << "Benchmarking single<->double2 (fields initially device-resident)" << std::endl;
+  time_sd = time_ds = 0;
+  for(int i=0;i<N;i++){
+    field_d2 = tmp_d2;
+    field_f = tmp_f;
+
+    double start=usecond();
+    precisionChangeFast(field_d2,field_f);
+    double stop=usecond();
+    time_sd += stop - start;
+
+    field_d2 = tmp_d2;
+    field_f = tmp_f;
+
+    start=usecond();
+    precisionChangeFast(field_f,field_d2);
+    stop=usecond();
+    time_ds += stop - start;
+  }
+  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
+
+
+  std::cout<<GridLogMessage << "Benchmarking single<->double2 through standard precisionChange call(fields initially device-resident) [NB: perf should be the same as the previous test!]" << std::endl;
+  time_sd = time_ds = 0;
+  for(int i=0;i<N;i++){
+    field_d2 = tmp_d2;
+    field_f = tmp_f;
+
+    double start=usecond();
+    precisionChange(field_d2,field_f);
+    double stop=usecond();
+    time_sd += stop - start;
+
+    field_d2 = tmp_d2;
+    field_f = tmp_f;
+
+    start=usecond();
+    precisionChange(field_f,field_d2);
+    stop=usecond();
+    time_ds += stop - start;
+  }
+  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
+
+  Grid_finalize();
+}