1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-13 01:05:36 +00:00

Added some instrumentation to benchmark the force computation

This commit is contained in:
Guido Cossu 2016-10-06 17:52:45 +01:00
parent 4089984431
commit 2e453dfbf5
9 changed files with 377 additions and 298 deletions

View File

@ -86,18 +86,6 @@ int main (int argc, char ** argv)
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
/* src=zero;
std::vector<int> origin(5,0);
SpinColourVector f=zero;
for(int sp=0;sp<4;sp++){
for(int co=0;co<3;co++){
f()(sp)(co)=Complex(1.0,0.0);
}}
pokeSite(f,src,origin);
*/
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu(UGrid);
random(RNG4,Umu);
@ -144,6 +132,7 @@ int main (int argc, char ** argv)
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl;
std::cout << GridLogMessage<< "Calling Dw"<<std::endl;
int ncall =100;
if (1) {
@ -189,7 +178,7 @@ int main (int argc, char ** argv)
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
double t0=usecond();
sDw.ZeroCounters();
for(int i=0;i<ncall;i++){
@ -201,7 +190,7 @@ int main (int argc, char ** argv)
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report();
@ -217,7 +206,7 @@ int main (int argc, char ** argv)
}
}
std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealF sum=0;
@ -237,7 +226,7 @@ int main (int argc, char ** argv)
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}}
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
if (1) {
@ -310,6 +299,7 @@ int main (int argc, char ** argv)
ref = -0.5*ref;
}
Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;

View File

@ -106,7 +106,6 @@
#define SERIAL_SENDS
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
comms_bytes+=2.0*bytes;
#ifdef SEND_IMMEDIATE
commtime-=usecond();
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
@ -304,23 +303,23 @@
double calls;
void ZeroCounters(void) {
gathertime=0;
jointime=0;
commtime=0;
halogtime=0;
mergetime=0;
spintime=0;
gathermtime=0;
splicetime=0;
nosplicetime=0;
comms_bytes=0;
calls=0;
gathertime = 0.;
jointime = 0.;
commtime = 0.;
halogtime = 0.;
mergetime = 0.;
spintime = 0.;
gathermtime = 0.;
splicetime = 0.;
nosplicetime = 0.;
comms_bytes = 0.;
calls = 0.;
};
void Report(void) {
#define PRINTIT(A) \
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
if ( calls > 0 ) {
if ( calls > 0. ) {
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
PRINTIT(jointime);
PRINTIT(gathertime);

View File

@ -24,7 +24,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_H
@ -40,15 +41,17 @@ namespace Grid {
template <class Field>
class ConjugateGradient : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true.
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
ConjugateGradient(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
@ -62,8 +65,10 @@ public:
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
Linop.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp;
p = r;
@ -71,12 +76,18 @@ public:
cp = a;
ssq = norm2(src);
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@ -85,7 +96,9 @@ public:
return;
}
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
<< std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
@ -94,7 +107,6 @@ public:
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
c = cp;
MatrixTimer.Start();
@ -116,11 +128,11 @@ public:
p = p * b + r;
LinalgTimer.Stop();
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
@ -131,22 +143,25 @@ public:
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
<<" computed residual "<<sqrt(cp/ssq)
<<" true residual " <<true_residual
<<" target "<<Tolerance<<std::endl;
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
std::cout << GridLogMessage
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations "
<< SolverTimer.Elapsed() << " Matrix "
<< MatrixTimer.Elapsed() << " Linalg "
<< LinalgTimer.Elapsed();
std::cout << std::endl;
if(ErrorOnNoConverge)
assert(true_residual/Tolerance < 1000.0);
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
return;
}
}
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
if(ErrorOnNoConverge)
assert(0);
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
}
};
}

View File

@ -178,26 +178,64 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
template<class Impl>
void WilsonFermion5D<Impl>::Report(void)
{
if ( Calls > 0 ) {
std::cout << GridLogMessage << "WilsonFermion5D Dhop Calls " <<Calls <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime " <<CommTime/Calls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime " <<ComputeTime/Calls<<" us" <<std::endl;
std::vector<int> latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors;
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;
Stencil.Report();
if ( DhopCalls > 0 ) {
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
<< " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : "
<< DhopCommTime / DhopCalls << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
<< DhopComputeTime << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
<< DhopComputeTime / DhopCalls << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;
StencilEven.Report();
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;
StencilOdd.Report();
}
if ( DerivCalls > 0 ) {
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
}
if (DerivCalls > 0 || DhopCalls > 0){
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
}
}
template<class Impl>
void WilsonFermion5D<Impl>::ZeroCounters(void) {
Calls=0;
CommTime=0;
ComputeTime=0;
DhopCalls = 0;
DhopCommTime = 0;
DhopComputeTime = 0;
DerivCalls = 0;
DerivCommTime = 0;
DerivComputeTime = 0;
DerivDhopComputeTime = 0;
Stencil.ZeroCounters();
StencilEven.ZeroCounters();
StencilOdd.ZeroCounters();
@ -250,6 +288,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
const FermionField &B,
int dag)
{
DerivCalls++;
assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st._grid,A._grid);
@ -260,12 +299,14 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
DerivCommTime-=usecond();
st.HaloExchange(B,compressor);
DerivCommTime+=usecond();
Atilde=A;
DerivComputeTime-=usecond();
for (int mu = 0; mu < Nd; mu++) {
////////////////////////////////////////////////////////////////////////
// Flip gamma if dag
////////////////////////////////////////////////////////////////////////
@ -276,6 +317,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
// Call the single hop
////////////////////////
DerivDhopComputeTime -= usecond();
PARALLEL_FOR_LOOP
for (int sss = 0; sss < U._grid->oSites(); sss++) {
for (int s = 0; s < Ls; s++) {
@ -285,19 +327,18 @@ PARALLEL_FOR_LOOP
assert(sF < B._grid->oSites());
assert(sU < U._grid->oSites());
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
}
}
DerivDhopComputeTime += usecond();
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
}
DerivComputeTime += usecond();
}
template<class Impl>
@ -355,34 +396,36 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
Calls++;
DhopCalls++;
// assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0];
CommTime-=usecond();
DhopCommTime-=usecond();
st.HaloExchange(in,compressor);
CommTime+=usecond();
DhopCommTime+=usecond();
ComputeTime-=usecond();
DhopComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) {
PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
out);
}
} else {
PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
out);
}
}
ComputeTime+=usecond();
DhopComputeTime+=usecond();
}

View File

@ -63,9 +63,14 @@ namespace Grid {
void Report(void);
void ZeroCounters(void);
double Calls;
double CommTime;
double ComputeTime;
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
///////////////////////////////////////////////////////////////
// Implement the abstract base

View File

@ -131,9 +131,11 @@ namespace Grid{
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
X=zero;
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
//Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
// Multiply by Ydag
RealD action = real(innerProduct(Y,X));
RealD action = norm2(Y);
//RealD action = norm2(Y);
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
// Only really clover term that creates this. Leave the EE portion as a future to do to make most

View File

@ -76,6 +76,12 @@ public:
TheAction.push_back(Level1);
Run(argc,argv);
std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl;
NumOp.Report();
std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl;
DenOp.Report();
};
};

View File

@ -22,7 +22,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
@ -36,41 +37,47 @@ struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
Gamma::GammaMatrix Gmu[] = {Gamma::GammaX, Gamma::GammaY, Gamma::GammaZ,
Gamma::GammaT};
int main (int argc, char ** argv)
{
int main(int argc, char** argv) {
Grid_init(&argc, &argv);
const int Ls=8;
const int Ls = 16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian* UrbGrid =
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridRedBlackCartesian* FrbGrid =
SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
LatticeFermion src(FGrid); random(RNG5,src);
LatticeFermion result(FGrid); result=zero;
LatticeFermion src(FGrid);
random(RNG5, src);
LatticeFermion result(FGrid);
result = zero;
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu);
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
<< " Ls: " << Ls << std::endl;
std::vector<LatticeColourMatrix> U(4, UGrid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
RealD mass=0.1;
RealD mass = 0.01;
RealD M5 = 1.8;
DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
@ -79,9 +86,20 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd, src_o, src);
result_o = zero;
GridStopWatch CGTimer;
SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> HermOpEO(Ddwf);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);// switch off the assert
CGTimer.Start();
CG(HermOpEO, src_o, result_o);
CGTimer.Stop();
std::cout << GridLogMessage << "Total CG time : " << CGTimer.Elapsed()
<< std::endl;
std::cout << GridLogMessage << "######## Dhop calls summary" << std::endl;
Ddwf.Report();
Grid_finalize();
}

View File

@ -84,5 +84,6 @@ int main (int argc, char ** argv)
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o);
Grid_finalize();
}