mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-27 14:15:55 +01:00
Provide info if EE term is trivial (m^2 factor)
Better timing in staggered 4d case
This commit is contained in:
parent
31d83ee046
commit
2cbb72a81c
@ -330,6 +330,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionF
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
||||||
|
DhopCalls+=2;
|
||||||
conformable(in._grid, _grid); // verifies full grid
|
conformable(in._grid, _grid); // verifies full grid
|
||||||
conformable(in._grid, out._grid);
|
conformable(in._grid, out._grid);
|
||||||
|
|
||||||
@ -340,6 +341,7 @@ void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
||||||
|
DhopCalls+=1;
|
||||||
conformable(in._grid, _cbgrid); // verifies half grid
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
conformable(in._grid, out._grid); // drops the cb check
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
@ -351,6 +353,7 @@ void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
|
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
|
||||||
|
DhopCalls+=1;
|
||||||
conformable(in._grid, _cbgrid); // verifies half grid
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
conformable(in._grid, out._grid); // drops the cb check
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
@ -403,13 +406,18 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
int len = U._grid->oSites();
|
int len = U._grid->oSites();
|
||||||
const int LLs = 1;
|
const int LLs = 1;
|
||||||
|
|
||||||
|
DhopTotalTime -= usecond();
|
||||||
|
|
||||||
|
DhopFaceTime -= usecond();
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
st.CommsMergeSHM(compressor);
|
st.CommsMergeSHM(compressor);
|
||||||
|
DhopFaceTime += usecond();
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
DhopComputeTime -= usecond();
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
int tid = omp_get_thread_num();
|
int tid = omp_get_thread_num();
|
||||||
@ -451,10 +459,14 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
st.CommunicateThreaded();
|
st.CommunicateThreaded();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DhopComputeTime += usecond();
|
||||||
|
|
||||||
// First to enter, last to leave timing
|
// First to enter, last to leave timing
|
||||||
|
DhopFaceTime -= usecond();
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
|
DhopFaceTime -= usecond();
|
||||||
|
|
||||||
|
DhopComputeTime2 -= usecond();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
int sz=st.surface_list.size();
|
int sz=st.surface_list.size();
|
||||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||||
@ -468,6 +480,7 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
|
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DhopComputeTime2 += usecond();
|
||||||
#else
|
#else
|
||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
@ -483,9 +496,14 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
|||||||
{
|
{
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
DhopTotalTime -= usecond();
|
||||||
|
|
||||||
|
DhopCommTime -= usecond();
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
|
DhopCommTime += usecond();
|
||||||
|
|
||||||
|
DhopComputeTime -= usecond();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||||
@ -495,8 +513,65 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
|||||||
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DhopComputeTime += usecond();
|
||||||
|
DhopTotalTime += usecond();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Reporting
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
template<class Impl>
|
||||||
|
void ImprovedStaggeredFermion<Impl>::Report(void)
|
||||||
|
{
|
||||||
|
std::vector<int> latt = GridDefaultLatt();
|
||||||
|
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
|
RealD NP = _grid->_Nprocessors;
|
||||||
|
RealD NN = _grid->NodeCount();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion Number of DhopEO Calls : "
|
||||||
|
<< DhopCalls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion TotalTime /Calls : "
|
||||||
|
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion CommTime /Calls : "
|
||||||
|
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion ComputeTime/Calls : "
|
||||||
|
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||||
|
|
||||||
|
// Average the compute time
|
||||||
|
_grid->GlobalSum(DhopComputeTime);
|
||||||
|
DhopComputeTime/=NP;
|
||||||
|
|
||||||
|
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||||
|
|
||||||
|
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||||
|
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void ImprovedStaggeredFermion<Impl>::ZeroCounters(void)
|
||||||
|
{
|
||||||
|
DhopCalls = 0;
|
||||||
|
DhopTotalTime = 0;
|
||||||
|
DhopCommTime = 0;
|
||||||
|
DhopComputeTime = 0;
|
||||||
|
DhopFaceTime = 0;
|
||||||
|
|
||||||
|
Stencil.ZeroCounters();
|
||||||
|
StencilEven.ZeroCounters();
|
||||||
|
StencilOdd.ZeroCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion);
|
FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion);
|
||||||
|
|
||||||
//AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion);
|
//AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion);
|
||||||
|
@ -49,6 +49,18 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
|
|||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
|
////////////////////////////////////////
|
||||||
|
// Performance monitoring
|
||||||
|
////////////////////////////////////////
|
||||||
|
void Report(void);
|
||||||
|
void ZeroCounters(void);
|
||||||
|
double DhopTotalTime;
|
||||||
|
double DhopCalls;
|
||||||
|
double DhopCommTime;
|
||||||
|
double DhopComputeTime;
|
||||||
|
double DhopComputeTime2;
|
||||||
|
double DhopFaceTime;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
@ -142,7 +154,8 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
|
|||||||
// protected:
|
// protected:
|
||||||
public:
|
public:
|
||||||
// any other parameters of action ???
|
// any other parameters of action ???
|
||||||
|
virtual int isTrivialEE(void) { return 1; };
|
||||||
|
virtual RealD Mass(void) { return mass; }
|
||||||
RealD mass;
|
RealD mass;
|
||||||
RealD u0;
|
RealD u0;
|
||||||
RealD c1;
|
RealD c1;
|
||||||
|
@ -291,14 +291,12 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
|||||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopTotalTime-=usecond();
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||||
DhopTotalTime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -412,6 +410,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//double t1=usecond();
|
||||||
DhopTotalTime -= usecond();
|
DhopTotalTime -= usecond();
|
||||||
DhopCommTime -= usecond();
|
DhopCommTime -= usecond();
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
@ -432,6 +431,12 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
DhopComputeTime += usecond();
|
||||||
DhopTotalTime += usecond();
|
DhopTotalTime += usecond();
|
||||||
|
//double t2=usecond();
|
||||||
|
//std::cout << __FILE__ << " " << __func__ << " Total Time " << DhopTotalTime << std::endl;
|
||||||
|
//std::cout << __FILE__ << " " << __func__ << " Total Time Org " << t2-t1 << std::endl;
|
||||||
|
//std::cout << __FILE__ << " " << __func__ << " Comml Time " << DhopCommTime << std::endl;
|
||||||
|
//std::cout << __FILE__ << " " << __func__ << " Compute Time " << DhopComputeTime << std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
/*CHANGE END*/
|
/*CHANGE END*/
|
||||||
|
|
||||||
|
@ -179,6 +179,9 @@ namespace QCD {
|
|||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual int isTrivialEE(void) { return 1; };
|
||||||
|
virtual RealD Mass(void) { return mass; }
|
||||||
|
|
||||||
GridBase *_FourDimGrid;
|
GridBase *_FourDimGrid;
|
||||||
GridBase *_FourDimRedBlackGrid;
|
GridBase *_FourDimRedBlackGrid;
|
||||||
GridBase *_FiveDimGrid;
|
GridBase *_FiveDimGrid;
|
||||||
|
@ -135,6 +135,8 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
|||||||
|
|
||||||
// protected:
|
// protected:
|
||||||
public:
|
public:
|
||||||
|
virtual RealD Mass(void) { return mass; }
|
||||||
|
virtual int isTrivialEE(void) { return 1; };
|
||||||
RealD mass;
|
RealD mass;
|
||||||
|
|
||||||
GridBase *_grid;
|
GridBase *_grid;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user