diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 1d90cace..580f5e30 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -26,7 +26,7 @@ namespace Grid { // and Methods: // void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) // void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - // void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,CartesianStencil &St) + // void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) // void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) // void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) // @@ -101,6 +101,7 @@ namespace Grid { typedef typename Impl::SiteSpinor SiteSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::Compressor Compressor; \ + typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::ImplParams ImplParams; /////// @@ -112,7 +113,6 @@ namespace Grid { typedef ImplGauge<S,Nrepresentation> Gimpl; - INHERIT_GIMPL_TYPES(Gimpl); template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; @@ -128,10 +128,11 @@ namespace Grid { typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; typedef WilsonImplParams ImplParams; + typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl; ImplParams Params; WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,CartesianStencil &St){ + inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ mult(&phi(),&U(mu),&chi()); } @@ -198,13 +199,15 @@ PARALLEL_FOR_LOOP typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; + typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl; typedef GparityWilsonImplParams ImplParams; ImplParams Params; GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; + // provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,CartesianStencil &St){ + inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ typedef SiteHalfSpinor vobj; typedef typename SiteHalfSpinor::scalar_object sobj; diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 053526cd..5e37238c 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -109,7 +109,7 @@ namespace QCD { /////////////////////////////////// template<class Impl> - void WilsonFermion<Impl>::DerivInternal(CartesianStencil & st, + void WilsonFermion<Impl>::DerivInternal(StencilImpl & st, DoubledGaugeField & U, GaugeField &mat, const FermionField &A, @@ -123,7 +123,7 @@ namespace QCD { FermionField Atilde(B._grid); Atilde = A; - st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(B,comm_buf,compressor); + st.HaloExchange(B,comm_buf,compressor); for(int mu=0;mu<Nd;mu++){ @@ -242,7 +242,7 @@ PARALLEL_FOR_LOOP Compressor compressor(dag); - Stencil.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor); + Stencil.HaloExchange(in,comm_buf,compressor); PARALLEL_FOR_LOOP for(int sss=0;sss<in._grid->oSites();sss++){ @@ -253,13 +253,13 @@ PARALLEL_FOR_LOOP template<class Impl> - void WilsonFermion<Impl>::DhopInternal(CartesianStencil & st,DoubledGaugeField & U, + void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { assert((dag==DaggerNo) ||(dag==DaggerYes)); Compressor compressor(dag); - st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor); + st.HaloExchange(in,comm_buf,compressor); if ( dag == DaggerYes ) { if( HandOptDslash ) { diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index bab8ff15..9a8c848a 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -73,14 +73,14 @@ namespace Grid { /////////////////////////////////////////////////////////////// // Extra methods added by derived /////////////////////////////////////////////////////////////// - void DerivInternal(CartesianStencil & st, + void DerivInternal(StencilImpl & st, DoubledGaugeField & U, GaugeField &mat, const FermionField &A, const FermionField &B, int dag); - void DhopInternal(CartesianStencil & st,DoubledGaugeField & U, + void DhopInternal(StencilImpl & st,DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) ; @@ -108,9 +108,9 @@ namespace Grid { GridBase * _cbgrid; //Defines the stencils for even and odd - CartesianStencil Stencil; - CartesianStencil StencilEven; - CartesianStencil StencilOdd; + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; // Copy of the gauge field , with even and odd subsets DoubledGaugeField Umu; diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 66ca67d5..8f22bc32 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -68,6 +68,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO ImportGauge(_Umu); + commtime=0; + dslashtime=0; } template<class Impl> void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu) @@ -85,7 +87,7 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in // assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; Compressor compressor(DaggerNo); - Stencil.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor); + Stencil.HaloExchange(in,comm_buf,compressor); int skip = (disp==1) ? 0 : 1; @@ -105,7 +107,7 @@ PARALLEL_FOR_LOOP }; template<class Impl> -void WilsonFermion5D<Impl>::DerivInternal(CartesianStencil & st, +void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, DoubledGaugeField & U, GaugeField &mat, const FermionField &A, @@ -122,7 +124,7 @@ void WilsonFermion5D<Impl>::DerivInternal(CartesianStencil & st, FermionField Btilde(B._grid); FermionField Atilde(B._grid); - st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(B,comm_buf,compressor); + st.HaloExchange(B,comm_buf,compressor); Atilde=A; @@ -194,6 +196,27 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, DerivInternal(StencilOdd,UmuEven,mat,A,B,dag); } + +template<class Impl> +void WilsonFermion5D<Impl>::Report(void) +{ + std::cout<<GridLogMessage << "********************"<<std::endl; + std::cout<<GridLogMessage << "Halo time "<<commtime <<" us"<<std::endl; + std::cout<<GridLogMessage << "Dslash time "<<dslashtime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil All time "<<Stencil.halotime<<" us"<<std::endl; + std::cout<<GridLogMessage << "********************"<<std::endl; + std::cout<<GridLogMessage << "Stencil nosplice time "<<Stencil.nosplicetime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil gather time "<<Stencil.gathertime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil comm time "<<Stencil.commtime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil scattertime "<<Stencil.scattertime<<" us"<<std::endl; + std::cout<<GridLogMessage << "********************"<<std::endl; + std::cout<<GridLogMessage << "Stencil splice time "<<Stencil.splicetime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil comm time "<<Stencil.commstime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil gathremtime "<<Stencil.gathermtime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil merge time "<<Stencil.mergetime<<" us"<<std::endl; + std::cout<<GridLogMessage << "Stencil buf time "<<Stencil.buftime<<" us"<<std::endl; + std::cout<<GridLogMessage << "********************"<<std::endl; +} template<class Impl> void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &A, @@ -212,7 +235,7 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, } template<class Impl> -void WilsonFermion5D<Impl>::DhopInternal(CartesianStencil & st, LebesgueOrder &lo, +void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { @@ -220,13 +243,16 @@ void WilsonFermion5D<Impl>::DhopInternal(CartesianStencil & st, LebesgueOrder &l Compressor compressor(dag); - st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor); + commtime -=usecond(); + st.HaloExchange(in,comm_buf,compressor); + commtime +=usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion // Not loop ordering and data layout. // Designed to create // - per thread reuse in L1 cache for U // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable. + dslashtime -=usecond(); if ( dag == DaggerYes ) { if( this->HandOptDslash ) { PARALLEL_FOR_LOOP @@ -274,6 +300,7 @@ PARALLEL_FOR_LOOP } } } + dslashtime +=usecond(); } template<class Impl> void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index b0d25309..e95fc2b3 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -31,7 +31,8 @@ namespace Grid { public: INHERIT_IMPL_TYPES(Impl); typedef WilsonKernels<Impl> Kernels; - + double commtime; + double dslashtime; /////////////////////////////////////////////////////////////// // Implement the abstract base /////////////////////////////////////////////////////////////// @@ -72,14 +73,14 @@ namespace Grid { /////////////////////////////////////////////////////////////// // New methods added /////////////////////////////////////////////////////////////// - void DerivInternal(CartesianStencil & st, + void DerivInternal(StencilImpl & st, DoubledGaugeField & U, GaugeField &mat, const FermionField &A, const FermionField &B, int dag); - void DhopInternal(CartesianStencil & st, + void DhopInternal(StencilImpl & st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, @@ -97,6 +98,7 @@ namespace Grid { // DoubleStore void ImportGauge(const GaugeField &_Umu); + void Report(void); /////////////////////////////////////////////////////////////// // Data members require to support the functionality /////////////////////////////////////////////////////////////// @@ -112,9 +114,9 @@ namespace Grid { int Ls; //Defines the stencils for even and odd - CartesianStencil Stencil; - CartesianStencil StencilEven; - CartesianStencil StencilOdd; + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; // Copy of the gauge field , with even and odd subsets DoubledGaugeField Umu; diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index a897921a..a37e7f9e 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -3,7 +3,7 @@ namespace Grid { namespace QCD { template<class Impl> -void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeField &U, +void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out) { @@ -122,7 +122,7 @@ void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel }; template<class Impl> -void WilsonKernels<Impl>::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U, +void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out) { @@ -241,7 +241,7 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } template<class Impl> -void WilsonKernels<Impl>::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U, +void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma) { diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 78d7c6a0..9696fad5 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -17,36 +17,36 @@ namespace Grid { typedef FermionOperator<Impl> Base; public: - void DiracOptDhopSite(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out); - void DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in,FermionField &out); - void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma); #define HANDOPT #ifdef HANDOPT - void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out); - void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out); #else - void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out) { DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 } - void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF,int sU,const FermionField &in, FermionField &out) { diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 9ccd18c6..fedfdd00 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -73,7 +73,7 @@ namespace Grid { namespace QCD { template<class Impl> -void WilsonKernels<Impl >::DiracOptAsmDhopSite(CartesianStencil &st,DoubledGaugeField &U, +void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int ss,int sU,const FermionField &in, FermionField &out,uint64_t *timers) { diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index fee1b0fb..b263ea83 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -282,7 +282,7 @@ namespace QCD { #ifdef HANDOPT template<class Impl> -void WilsonKernels<Impl >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U, +void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int ss,int sU,const FermionField &in, FermionField &out) { @@ -526,7 +526,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaug } template<class Impl> -void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U, +void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int ss,int sU,const FermionField &in, FermionField &out) {