1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 04:37:05 +01:00

Precision change improvements

Added a new, much faster implementation of precision change that uses (optionally) a precomputed workspace containing pointer offsets that is device resident, such that all lattice copying occurs only on the device and no host<->device transfer is required, other than the pointer table. It also avoids the need to unpack and repack the fields using explicit lane copying. When this new precisionChange is called without a workspace, one will be computed on-the-fly; however it is still considerably faster than the original implementation.

In the special case of using double2 and when the Grids are the same, calls to the new precisionChange will automatically use precisionChangeFast, such that there is a single API call for all precision changes.

Reliable update and mixed-prec multishift have been modified to precompute precision change workspaces

Renamed the original precisionChange as precisionChangeOrig

Fixed incorrect pointer offset bug in copyLane

Added a test and a benchmark for precisionChange

Added a test for reliable update CG
This commit is contained in:
Christopher Kelly
2023-02-21 10:52:42 -05:00
parent 472ed2dd5c
commit 1db58a8acc
7 changed files with 616 additions and 16 deletions

View File

@ -0,0 +1,124 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/core/Test_prec_change.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int Ls = 12;
Coordinate latt4 = GridDefaultLatt();
GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD);
GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG5F(FGridF); RNG5F.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermionD field_d(FGridD), tmp_d(FGridD);
random(RNG5,field_d);
RealD norm2_field_d = norm2(field_d);
LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF);
random(RNG5F,field_d2);
RealD norm2_field_d2 = norm2(field_d2);
LatticeFermionF field_f(FGridF);
//Test original implementation
{
std::cout << GridLogMessage << "Testing original implementation" << std::endl;
field_f = Zero();
precisionChangeOrig(field_f,field_d);
RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
tmp_d = Zero();
precisionChangeOrig(tmp_d, field_f);
Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
}
//Test new implementation with pregenerated workspace
{
std::cout << GridLogMessage << "Testing new implementation with pregenerated workspace" << std::endl;
precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid());
precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid());
field_f = Zero();
precisionChange(field_f,field_d,wk_dp_to_sp);
RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
tmp_d = Zero();
precisionChange(tmp_d, field_f,wk_sp_to_dp);
Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
}
//Test new implementation without pregenerated workspace
{
std::cout << GridLogMessage << "Testing new implementation without pregenerated workspace" << std::endl;
field_f = Zero();
precisionChange(field_f,field_d);
RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
tmp_d = Zero();
precisionChange(tmp_d, field_f);
Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
}
//Test fast implementation
{
std::cout << GridLogMessage << "Testing fast (double2) implementation" << std::endl;
field_f = Zero();
precisionChangeFast(field_f,field_d2);
RealD Ndiff = (norm2_field_d2 - norm2(field_f))/norm2_field_d2;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
tmp_d2 = Zero();
precisionChangeFast(tmp_d2, field_f);
Ndiff = norm2( LatticeFermionD2(tmp_d2-field_d2) ) / norm2_field_d2;
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
}
std::cout << "Done" << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,143 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/solver/Test_dwf_relupcg_prec.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
double relup_delta = 0.2;
for(int i=1;i<argc-1;i++){
std::string sarg = argv[i];
if(sarg == "--relup_delta"){
std::stringstream ss; ss << argv[i+1]; ss >> relup_delta;
std::cout << GridLogMessage << "Set reliable update Delta to " << relup_delta << std::endl;
}
}
const int Ls=12;
{
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f);
GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeFermionD src(FGrid); random(RNG5,src);
LatticeFermionD result(FGrid); result=Zero();
LatticeGaugeFieldD Umu(UGrid);
LatticeGaugeFieldF Umu_f(UGrid_f);
SU<Nc>::HotConfiguration(RNG4,Umu);
precisionChange(Umu_f,Umu);
RealD mass=0.1;
RealD M5=1.8;
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5);
LatticeFermionD src_o(FrbGrid);
LatticeFermionD result_o(FrbGrid);
LatticeFermionD result_o_2(FrbGrid);
pickCheckerboard(Odd,src_o,src);
result_o.Checkerboard() = Odd;
result_o = Zero();
result_o_2.Checkerboard() = Odd;
result_o_2 = Zero();
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
ConjugateGradientReliableUpdate<LatticeFermionD,LatticeFermionF> mCG(1e-8, 10000, relup_delta, FrbGrid_f, HermOpEO_f, HermOpEO);
double t1,t2,flops;
double MdagMsiteflops = 1452; // Mobius (real coeffs)
// CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of)
double CGsiteflops = (8+4+8+4+4)*Nc*Ns ;
std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
int iters, iters_cleanup, relups, tot_iters;
for(int i=0;i<10;i++){
result_o = Zero();
t1=usecond();
mCG(src_o,result_o);
t2=usecond();
iters = mCG.IterationsToComplete; //Number of single prec CG iterations
iters_cleanup = mCG.IterationsToCleanup;
relups = mCG.ReliableUpdatesPerformed;
tot_iters = iters + iters_cleanup + relups; //relup cost MdagM application in double
flops = MdagMsiteflops*4*FrbGrid->gSites()*tot_iters;
flops+= CGsiteflops*FrbGrid->gSites()*tot_iters;
std::cout << " SinglePrecision single prec iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " SinglePrecision double prec cleanup iterations/sec "<< iters_cleanup/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " SinglePrecision reliable updates/sec "<< relups/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
}
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
for(int i=0;i<1;i++){
result_o_2 = Zero();
t1=usecond();
CG(HermOpEO,src_o,result_o_2);
t2=usecond();
iters = CG.IterationsToComplete;
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
flops+= CGsiteflops*FrbGrid->gSites()*iters;
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
}
// MemoryManager::Print();
LatticeFermionD diff_o(FrbGrid);
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
}
MemoryManager::Print();
Grid_finalize();
}