1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

More timing. Think I should start to use nvtx and rocmtx ??

This commit is contained in:
Peter Boyle 2022-08-04 13:37:16 -04:00
parent 7ba4788715
commit de40395773

View File

@ -233,10 +233,10 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
GaugeField HUmu(_Umu.Grid()); GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5); HUmu = _Umu*(-0.5);
if ( Dirichlet ) { if ( Dirichlet ) {
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl; std::cout << GridLogDslash << " Dirichlet BCs 5d " <<Block<<std::endl;
Coordinate GaugeBlock(Nd); Coordinate GaugeBlock(Nd);
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1]; for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl; std::cout << GridLogDslash << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
DirichletFilter<GaugeField> Filter(GaugeBlock); DirichletFilter<GaugeField> Filter(GaugeBlock);
Filter.applyFilter(HUmu); Filter.applyFilter(HUmu);
} }
@ -382,12 +382,14 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) const FermionField &in, FermionField &out,int dag)
{ {
DhopTotalTime-=usecond(); // std::cout << GridLogDslash<<"Dhop internal"<<std::endl;
DhopTotalTime=-usecond();
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,in,out,dag); DhopInternalOverlappedComms(st,lo,U,in,out,dag);
else else
DhopInternalSerialComms(st,lo,U,in,out,dag); DhopInternalSerialComms(st,lo,U,in,out,dag);
DhopTotalTime+=usecond(); DhopTotalTime+=usecond();
// std::cout << GridLogDslash<<"Dhop took"<<DhopTotalTime<<std::endl;
} }
@ -404,53 +406,59 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
///////////////////////////// /////////////////////////////
// Start comms // Gather intranode and extra node differentiated?? // Start comms // Gather intranode and extra node differentiated??
///////////////////////////// /////////////////////////////
DhopFaceTime-=usecond(); DhopFaceTime=-usecond();
st.HaloExchangeOptGather(in,compressor); st.HaloExchangeOptGather(in,compressor);
DhopFaceTime+=usecond(); DhopFaceTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Gather end "<< DhopFaceTime<<" us " <<std::endl;
DhopCommTime -=usecond(); DhopCommTime =-usecond();
std::vector<std::vector<CommsRequest_t> > requests; std::vector<std::vector<CommsRequest_t> > requests;
st.CommunicateBegin(requests); st.CommunicateBegin(requests);
///////////////////////////// /////////////////////////////
// Overlap with comms // Overlap with comms
///////////////////////////// /////////////////////////////
DhopFaceTime-=usecond(); DhopFaceTime=-usecond();
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond(); DhopFaceTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Commsmerge end "<<DhopFaceTime<< " us "<<std::endl;
///////////////////////////// /////////////////////////////
// do the compute interior // do the compute interior
///////////////////////////// /////////////////////////////
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
DhopComputeTime-=usecond(); DhopComputeTime=-usecond();
if (dag == DaggerYes) { if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} else { } else {
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} }
DhopComputeTime+=usecond(); DhopComputeTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Compute 1 end "<< DhopComputeTime<<" us" <<std::endl;
///////////////////////////// /////////////////////////////
// Complete comms // Complete comms
///////////////////////////// /////////////////////////////
st.CommunicateComplete(requests); st.CommunicateComplete(requests);
DhopCommTime +=usecond(); DhopCommTime +=usecond();
// std::cout << GridLogDslash<< " Dhop Comunicate end "<< DhopCommTime << " us" <<std::endl;
///////////////////////////// /////////////////////////////
// do the compute exterior // do the compute exterior
///////////////////////////// /////////////////////////////
DhopFaceTime-=usecond(); DhopFaceTime=-usecond();
st.CommsMerge(compressor); st.CommsMerge(compressor);
DhopFaceTime+=usecond(); DhopFaceTime+=usecond();
// std::cout << GridLogDslash<< " Dhop CommsMerge2 end "<<DhopFaceTime << " us "<<std::endl;
DhopComputeTime2-=usecond(); DhopComputeTime2=-usecond();
if (dag == DaggerYes) { if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} else { } else {
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1); Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} }
DhopComputeTime2+=usecond(); DhopComputeTime2+=usecond();
// std::cout << GridLogDslash<< " Dhop Ext end "<<DhopComputeTime2 <<"us "<<std::endl;
} }
@ -463,12 +471,14 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
Compressor compressor(dag); Compressor compressor(dag);
int LLs = in.Grid()->_rdimensions[0]; int LLs = in.Grid()->_rdimensions[0];
DhopCommTime-=usecond(); // std::cout << GridLogDslash<< " Dhop Halo exchange begine " <<std::endl;
DhopCommTime=-usecond();
st.HaloExchangeOpt(in,compressor); st.HaloExchangeOpt(in,compressor);
DhopCommTime+=usecond(); DhopCommTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Comms end "<<DhopCommTime<<" us"<<std::endl;
DhopComputeTime-=usecond(); DhopComputeTime=-usecond();
int Opt = WilsonKernelsStatic::Opt; int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) { if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out); Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
@ -476,6 +486,7 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out); Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
} }
DhopComputeTime+=usecond(); DhopComputeTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Compute end "<<DhopComputeTime<<" us" <<std::endl;
} }