From 4fca66a7c6b0423eab7626ebd5d5b428ccc2366e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 6 May 2021 23:44:45 +0200 Subject: [PATCH] Implement Dirichlet Comms option --- Grid/qcd/action/fermion/WilsonFermion.h | 3 ++ Grid/qcd/action/fermion/WilsonFermion5D.h | 9 ++++- Grid/qcd/action/fermion/WilsonKernels.h | 2 +- .../WilsonFermion5DImplementation.h | 39 +++++++++++++++++- .../WilsonFermionImplementation.h | 40 +++++++++++++++++-- 5 files changed, 85 insertions(+), 8 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index bf8926d0..e23d7092 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -146,6 +146,9 @@ public: void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, FermionField &out, int dag); + void DhopInternalDirichlet(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, FermionField &out, int dag); diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 80231bb4..859af04e 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -165,7 +165,14 @@ public: const FermionField &in, FermionField &out, int dag); - + + void DhopInternalDirichletComms(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); + // Constructors WilsonFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/Grid/qcd/action/fermion/WilsonKernels.h b/Grid/qcd/action/fermion/WilsonKernels.h index 68422f28..adacd8a9 100644 --- a/Grid/qcd/action/fermion/WilsonKernels.h +++ b/Grid/qcd/action/fermion/WilsonKernels.h @@ -39,7 +39,7 @@ NAMESPACE_BEGIN(Grid); class WilsonKernelsStatic { public: enum { OptGeneric, OptHandUnroll, OptInlineAsm }; - enum { CommsAndCompute, CommsThenCompute }; + enum { CommsAndCompute, CommsThenCompute, CommsDirichlet }; static int Opt; static int Comms; }; diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 2cc308cc..6863cf62 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -361,10 +361,21 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, const FermionField &in, FermionField &out,int dag) { DhopTotalTime-=usecond(); - if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) + + assert( (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) + ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) + ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); + + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { DhopInternalOverlappedComms(st,lo,U,in,out,dag); - else + } + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { DhopInternalSerialComms(st,lo,U,in,out,dag); + } + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { + DhopInternalDirichletComms(st,lo,U,in,out,dag); + } + DhopTotalTime+=usecond(); } @@ -431,6 +442,30 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg DhopComputeTime2+=usecond(); } +template +void WilsonFermion5D::DhopInternalDirichletComms(StencilImpl & st, LebesgueOrder &lo, + DoubledGaugeField & U, + const FermionField &in, FermionField &out,int dag) +{ + Compressor compressor(dag); + + int LLs = in.Grid()->_rdimensions[0]; + int len = U.Grid()->oSites(); + + ///////////////////////////// + // do the compute interior + ///////////////////////////// + int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know + DhopComputeTime-=usecond(); + if (dag == DaggerYes) { + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); + } else { + Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); + } + DhopComputeTime+=usecond(); + +} + template void WilsonFermion5D::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 84ac25c1..a9460dd6 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -488,12 +488,21 @@ void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, FermionField &out, int dag) { DhopTotalTime-=usecond(); -#ifdef GRID_OMP - if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) + + assert( (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) + ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) + ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); + + + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { DhopInternalOverlappedComms(st,lo,U,in,out,dag); - else -#endif + } + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { DhopInternalSerial(st,lo,U,in,out,dag); + } + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { + DhopInternalDirichletComms(st,lo,U,in,out,dag); + } DhopTotalTime+=usecond(); } @@ -562,6 +571,29 @@ void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO DhopComputeTime2+=usecond(); }; +template +void WilsonFermion::DhopDirichletComms(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor(dag); + int len = U.Grid()->oSites(); + + ///////////////////////////// + // do the compute interior + ///////////////////////////// + int Opt = WilsonKernelsStatic::Opt; + DhopComputeTime-=usecond(); + if (dag == DaggerYes) { + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); + } else { + Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); + } + DhopComputeTime+=usecond(); +}; template void WilsonFermion::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,