mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge pull request #424 from giltirn/feature/dirichlet-precchange
Precision change implementation
This commit is contained in:
		
							
								
								
									
										189
									
								
								benchmarks/Benchmark_prec_change.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										189
									
								
								benchmarks/Benchmark_prec_change.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,189 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_prec_change.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Christopher Kelly <ckelly@bnl.gov>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  int Ls = 12;
 | 
			
		||||
  Coordinate latt4 = GridDefaultLatt();
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * UGridD   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD);
 | 
			
		||||
  GridCartesian         * FGridD   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
 | 
			
		||||
  GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * UGridF   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
 | 
			
		||||
  GridCartesian         * FGridF   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
 | 
			
		||||
  GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
  
 | 
			
		||||
  std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
 | 
			
		||||
  GridParallelRNG          RNG4(UGridD);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
 | 
			
		||||
  GridParallelRNG          RNG5(FGridD);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
 | 
			
		||||
 | 
			
		||||
  LatticeFermionD field_d(FGridD), tmp_d(FGridD);
 | 
			
		||||
  random(RNG5,field_d); tmp_d = field_d;
 | 
			
		||||
 | 
			
		||||
  LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF);
 | 
			
		||||
  precisionChange(field_d2, field_d); tmp_d2 = field_d2;
 | 
			
		||||
 | 
			
		||||
  LatticeFermionF field_f(FGridF), tmp_f(FGridF);
 | 
			
		||||
  precisionChange(field_f, field_d); tmp_f = field_f;
 | 
			
		||||
 | 
			
		||||
  int N = 500;
 | 
			
		||||
 | 
			
		||||
  double time_ds = 0, time_sd = 0;
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "Benchmarking single<->double original implementation (fields initially device-resident)" << std::endl;
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    //We want to benchmark the typical scenario of both fields being device resident
 | 
			
		||||
    //To do this, invoke an operation that will open a device view and touch all sites
 | 
			
		||||
    //with a write operation that invalidates the CPU copy
 | 
			
		||||
    field_d = tmp_d;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    double start=usecond();
 | 
			
		||||
    precisionChangeOrig(field_d,field_f);
 | 
			
		||||
    double stop=usecond();
 | 
			
		||||
    time_sd += stop - start;
 | 
			
		||||
 | 
			
		||||
    field_d = tmp_d;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    start=usecond();
 | 
			
		||||
    precisionChangeOrig(field_f,field_d);
 | 
			
		||||
    stop=usecond();
 | 
			
		||||
    time_ds += stop - start;   
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid());
 | 
			
		||||
  precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid());
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Benchmarking single<->double with pregenerated workspace(fields initially device-resident)" << std::endl;
 | 
			
		||||
  time_sd = time_ds = 0;
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    field_d = tmp_d;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    double start=usecond();
 | 
			
		||||
    precisionChange(field_d,field_f, wk_sp_to_dp);
 | 
			
		||||
    double stop=usecond();
 | 
			
		||||
    time_sd += stop - start;
 | 
			
		||||
 | 
			
		||||
    field_d = tmp_d;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    start=usecond();
 | 
			
		||||
    precisionChange(field_f,field_d, wk_dp_to_sp);
 | 
			
		||||
    stop=usecond();
 | 
			
		||||
    time_ds += stop - start;   
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Benchmarking single<->double with workspace generated on-the-fly (fields initially device-resident)" << std::endl;
 | 
			
		||||
  time_sd = time_ds = 0;
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    field_d = tmp_d;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    double start=usecond();
 | 
			
		||||
    precisionChange(field_d,field_f);
 | 
			
		||||
    double stop=usecond();
 | 
			
		||||
    time_sd += stop - start;
 | 
			
		||||
 | 
			
		||||
    field_d = tmp_d;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    start=usecond();
 | 
			
		||||
    precisionChange(field_f,field_d);
 | 
			
		||||
    stop=usecond();
 | 
			
		||||
    time_ds += stop - start;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "Benchmarking single<->double2 (fields initially device-resident)" << std::endl;
 | 
			
		||||
  time_sd = time_ds = 0;
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    field_d2 = tmp_d2;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    double start=usecond();
 | 
			
		||||
    precisionChangeFast(field_d2,field_f);
 | 
			
		||||
    double stop=usecond();
 | 
			
		||||
    time_sd += stop - start;
 | 
			
		||||
 | 
			
		||||
    field_d2 = tmp_d2;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    start=usecond();
 | 
			
		||||
    precisionChangeFast(field_f,field_d2);
 | 
			
		||||
    stop=usecond();
 | 
			
		||||
    time_ds += stop - start;
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "Benchmarking single<->double2 through standard precisionChange call(fields initially device-resident) [NB: perf should be the same as the previous test!]" << std::endl;
 | 
			
		||||
  time_sd = time_ds = 0;
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    field_d2 = tmp_d2;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    double start=usecond();
 | 
			
		||||
    precisionChange(field_d2,field_f);
 | 
			
		||||
    double stop=usecond();
 | 
			
		||||
    time_sd += stop - start;
 | 
			
		||||
 | 
			
		||||
    field_d2 = tmp_d2;
 | 
			
		||||
    field_f = tmp_f;
 | 
			
		||||
 | 
			
		||||
    start=usecond();
 | 
			
		||||
    precisionChange(field_f,field_d2);
 | 
			
		||||
    stop=usecond();
 | 
			
		||||
    time_ds += stop - start;
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user