1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Timing hooks

This commit is contained in:
paboyle 2016-10-06 09:25:12 +01:00
parent 98439847cf
commit 4089984431
4 changed files with 82 additions and 14 deletions

View File

@ -148,6 +148,7 @@ int main (int argc, char ** argv)
int ncall =100;
if (1) {
Dw.ZeroCounters();
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
@ -166,7 +167,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
// Dw.Report();
Dw.Report();
}
if (1)
@ -190,6 +191,7 @@ int main (int argc, char ** argv)
}}}}}
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
double t0=usecond();
sDw.ZeroCounters();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.Dhop(ssrc,sresult,0);
@ -202,7 +204,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
// sDw.Report();
sDw.Report();
if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
@ -259,6 +261,7 @@ int main (int argc, char ** argv)
sr_e = zero;
sr_o = zero;
sDw.ZeroCounters();
double t0=usecond();
for(int i=0;i<ncall;i++){
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@ -270,6 +273,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
@ -327,6 +331,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
{
Dw.ZeroCounters();
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo);
@ -338,6 +343,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
Dw.Report();
}
Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo);

View File

@ -301,6 +301,39 @@
double gathermtime;
double splicetime;
double nosplicetime;
double calls;
void ZeroCounters(void) {
gathertime=0;
jointime=0;
commtime=0;
halogtime=0;
mergetime=0;
spintime=0;
gathermtime=0;
splicetime=0;
nosplicetime=0;
comms_bytes=0;
calls=0;
};
void Report(void) {
#define PRINTIT(A) \
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
if ( calls > 0 ) {
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
PRINTIT(jointime);
PRINTIT(gathertime);
PRINTIT(commtime);
PRINTIT(halogtime);
PRINTIT(mergetime);
PRINTIT(spintime);
PRINTIT(comms_bytes);
PRINTIT(gathermtime);
PRINTIT(splicetime);
PRINTIT(nosplicetime);
}
};
#endif
CartesianStencil(GridBase *grid,
@ -310,18 +343,6 @@
const std::vector<int> &distances)
: _permute_type(npoints), _comm_buf_size(npoints)
{
#ifdef TIMING_HACK
gathertime=0;
jointime=0;
commtime=0;
halogtime=0;
mergetime=0;
spintime=0;
gathermtime=0;
splicetime=0;
nosplicetime=0;
comms_bytes=0;
#endif
_npoints = npoints;
_grid = grid;
_directions = directions;
@ -623,6 +644,7 @@
template<class compressor>
void HaloExchange(const Lattice<vobj> &source,compressor &compress)
{
calls++;
Mergers.resize(0);
Packets.resize(0);
HaloGather(source,compress);

View File

@ -175,6 +175,35 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
}
*/
template<class Impl>
void WilsonFermion5D<Impl>::Report(void)
{
if ( Calls > 0 ) {
std::cout << GridLogMessage << "WilsonFermion5D Dhop Calls " <<Calls <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime " <<CommTime/Calls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime " <<ComputeTime/Calls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;
Stencil.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;
StencilEven.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;
StencilOdd.Report();
}
}
template<class Impl>
void WilsonFermion5D<Impl>::ZeroCounters(void) {
Calls=0;
CommTime=0;
ComputeTime=0;
Stencil.ZeroCounters();
StencilEven.ZeroCounters();
StencilOdd.ZeroCounters();
}
template<class Impl>
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{
@ -326,13 +355,17 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
Calls++;
// assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0];
CommTime-=usecond();
st.HaloExchange(in,compressor);
CommTime+=usecond();
ComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if ( dag == DaggerYes ) {
PARALLEL_FOR_LOOP
@ -349,6 +382,7 @@ PARALLEL_FOR_LOOP
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
}
}
ComputeTime+=usecond();
}

View File

@ -61,6 +61,12 @@ namespace Grid {
INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
void Report(void);
void ZeroCounters(void);
double Calls;
double CommTime;
double ComputeTime;
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////