mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
More timing. Think I should start to use nvtx and rocmtx ??
This commit is contained in:
parent
7ba4788715
commit
de40395773
@ -233,10 +233,10 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
GaugeField HUmu(_Umu.Grid());
|
GaugeField HUmu(_Umu.Grid());
|
||||||
HUmu = _Umu*(-0.5);
|
HUmu = _Umu*(-0.5);
|
||||||
if ( Dirichlet ) {
|
if ( Dirichlet ) {
|
||||||
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl;
|
std::cout << GridLogDslash << " Dirichlet BCs 5d " <<Block<<std::endl;
|
||||||
Coordinate GaugeBlock(Nd);
|
Coordinate GaugeBlock(Nd);
|
||||||
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
|
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
|
||||||
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
|
std::cout << GridLogDslash << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
|
||||||
DirichletFilter<GaugeField> Filter(GaugeBlock);
|
DirichletFilter<GaugeField> Filter(GaugeBlock);
|
||||||
Filter.applyFilter(HUmu);
|
Filter.applyFilter(HUmu);
|
||||||
}
|
}
|
||||||
@ -382,12 +382,14 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopTotalTime-=usecond();
|
// std::cout << GridLogDslash<<"Dhop internal"<<std::endl;
|
||||||
|
DhopTotalTime=-usecond();
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||||
else
|
else
|
||||||
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
||||||
DhopTotalTime+=usecond();
|
DhopTotalTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<<"Dhop took"<<DhopTotalTime<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -404,53 +406,59 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Start comms // Gather intranode and extra node differentiated??
|
// Start comms // Gather intranode and extra node differentiated??
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
DhopFaceTime=-usecond();
|
||||||
st.HaloExchangeOptGather(in,compressor);
|
st.HaloExchangeOptGather(in,compressor);
|
||||||
DhopFaceTime+=usecond();
|
DhopFaceTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Gather end "<< DhopFaceTime<<" us " <<std::endl;
|
||||||
|
|
||||||
DhopCommTime -=usecond();
|
DhopCommTime =-usecond();
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Overlap with comms
|
// Overlap with comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
DhopFaceTime=-usecond();
|
||||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||||
DhopFaceTime+=usecond();
|
DhopFaceTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Commsmerge end "<<DhopFaceTime<< " us "<<std::endl;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute interior
|
// do the compute interior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
|
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
|
||||||
DhopComputeTime-=usecond();
|
DhopComputeTime=-usecond();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||||
} else {
|
} else {
|
||||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
DhopComputeTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Compute 1 end "<< DhopComputeTime<<" us" <<std::endl;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Complete comms
|
// Complete comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
DhopCommTime +=usecond();
|
DhopCommTime +=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Comunicate end "<< DhopCommTime << " us" <<std::endl;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute exterior
|
// do the compute exterior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
DhopFaceTime=-usecond();
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
DhopFaceTime+=usecond();
|
DhopFaceTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop CommsMerge2 end "<<DhopFaceTime << " us "<<std::endl;
|
||||||
|
|
||||||
DhopComputeTime2-=usecond();
|
DhopComputeTime2=-usecond();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||||
} else {
|
} else {
|
||||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||||
}
|
}
|
||||||
DhopComputeTime2+=usecond();
|
DhopComputeTime2+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Ext end "<<DhopComputeTime2 <<"us "<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -463,12 +471,14 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
|||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
DhopCommTime-=usecond();
|
// std::cout << GridLogDslash<< " Dhop Halo exchange begine " <<std::endl;
|
||||||
|
DhopCommTime=-usecond();
|
||||||
st.HaloExchangeOpt(in,compressor);
|
st.HaloExchangeOpt(in,compressor);
|
||||||
DhopCommTime+=usecond();
|
DhopCommTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Comms end "<<DhopCommTime<<" us"<<std::endl;
|
||||||
|
|
||||||
DhopComputeTime-=usecond();
|
DhopComputeTime=-usecond();
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||||
@ -476,6 +486,7 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
|||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
DhopComputeTime+=usecond();
|
||||||
|
// std::cout << GridLogDslash<< " Dhop Compute end "<<DhopComputeTime<<" us" <<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user