/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/WilsonKernels.h Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_QCD_DHOP_H #define GRID_QCD_DHOP_H NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Helper routines that implement Wilson stencil for a single site. // Common to both the WilsonFermion and WilsonFermion5D //////////////////////////////////////////////////////////////////////////////////////////////////////////////// class WilsonKernelsStatic { public: enum { OptGeneric, OptHandUnroll, OptInlineAsm, OptGpu }; enum { CommsAndCompute, CommsThenCompute }; static int Opt; static int Comms; }; template class WilsonKernels { public: INHERIT_IMPL_TYPES(Impl); public: static void Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, int Ls, int Nsite, const FermionField &in, FermionField &out, int interior=1,int exterior=1) { auto U_v = U.View(); auto in_v = in.View(); auto out_v = out.View(); auto st_v = st.View(); if ( (Opt == WilsonKernelsStatic::OptGpu) && interior && exterior ) { const uint64_t nsimd = Simd::Nsimd(); const uint64_t NN = Nsite*Ls*nsimd; accelerator_loopN( sss, NN, { uint64_t cur = sss; // uint64_t lane = cur % nsimd; cur = cur / nsimd; uint64_t sF = cur; cur = cur / Ls; uint64_t sU = cur; WilsonKernels::GpuDhopSite(st_v,U_v,buf,sF,sU,in_v,out_v); }); } else { accelerator_loop( ss, U_v, { int sU = ss; int sF = Ls * sU; DhopSite(Opt,st_v,U_v,st.CommBuf(),sF,sU,Ls,1,in_v,out_v); }); } } static void DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, int Ls, int Nsite, const FermionField &in, FermionField &out, int interior=1,int exterior=1) { auto U_v = U.View(); auto in_v = in.View(); auto out_v = out.View(); auto st_v = st.View(); if ( (Opt == WilsonKernelsStatic::OptGpu) && interior && exterior ) { const uint64_t nsimd = Simd::Nsimd(); const uint64_t NN = Nsite*Ls*nsimd; accelerator_loopN( sss, NN, { uint64_t cur = sss; // uint64_t lane = cur % nsimd; cur = cur / nsimd; uint64_t sF = cur; cur = cur / Ls; uint64_t sU = cur; WilsonKernels::GpuDhopSiteDag(st_v,U_v,buf,sF,sU,in_v,out_v); }); } else { accelerator_loop( ss, U_v, { int sU = ss; int sF = Ls * sU; DhopSiteDag(Opt,st,U_v,st.CommBuf(),sF,sU,Ls,1,in_v,out_v); }); } } template static accelerator typename std::enable_if::type DhopSite(int Opt,StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { //bgq_l1p_optimisation(1); switch(Opt) { #if defined(AVX512) || defined (QPX) case WilsonKernelsStatic::OptInlineAsm: if(interior&&exterior) WilsonKernels::AsmDhopSite (st,U,buf,sF,sU,Ls,Nsite,in,out); else if (interior) WilsonKernels::AsmDhopSiteInt(st,U,buf,sF,sU,Ls,Nsite,in,out); else if (exterior) WilsonKernels::AsmDhopSiteExt(st,U,buf,sF,sU,Ls,Nsite,in,out); else assert(0); break; #endif case WilsonKernelsStatic::OptHandUnroll: for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { if(interior&&exterior) WilsonKernels::HandDhopSite(st,U,buf,sF,sU,in,out); else if (interior) WilsonKernels::HandDhopSiteInt(st,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels::HandDhopSiteExt(st,U,buf,sF,sU,in,out); sF++; } sU++; } break; case WilsonKernelsStatic::OptGpu: case WilsonKernelsStatic::OptGeneric: for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { if(interior&&exterior) WilsonKernels::GenericDhopSite(st,U,buf,sF,sU,in,out); else if (interior) WilsonKernels::GenericDhopSiteInt(st,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels::GenericDhopSiteExt(st,U,buf,sF,sU,in,out); else assert(0); sF++; } sU++; } break; default: assert(0); } //bgq_l1p_optimisation(0); } template static accelerator typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type DhopSite(int Opt, StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) { // no kernel choice for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { if(interior&&exterior) WilsonKernels::GenericDhopSite(st,U,buf,sF,sU,in,out); else if (interior) WilsonKernels::GenericDhopSiteInt(st,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels::GenericDhopSiteExt(st,U,buf,sF,sU,in,out); else assert(0); sF++; } sU++; } } template static accelerator typename std::enable_if::type DhopSiteDag(int Opt, StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { //bgq_l1p_optimisation(1); switch(Opt) { #if defined(AVX512) || defined (QPX) case WilsonKernelsStatic::OptInlineAsm: if(interior&&exterior) WilsonKernels::AsmDhopSiteDag (st,U,buf,sF,sU,Ls,Nsite,in,out); else if (interior) WilsonKernels::AsmDhopSiteDagInt(st,U,buf,sF,sU,Ls,Nsite,in,out); else if (exterior) WilsonKernels::AsmDhopSiteDagExt(st,U,buf,sF,sU,Ls,Nsite,in,out); else assert(0); break; #endif case WilsonKernelsStatic::OptHandUnroll: for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { if(interior&&exterior) WilsonKernels::HandDhopSiteDag(st,U,buf,sF,sU,in,out); else if (interior) WilsonKernels::HandDhopSiteDagInt(st,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels::HandDhopSiteDagExt(st,U,buf,sF,sU,in,out); else assert(0); sF++; } sU++; } break; case WilsonKernelsStatic::OptGpu: case WilsonKernelsStatic::OptGeneric: for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { if(interior&&exterior) WilsonKernels::GenericDhopSiteDag(st,U,buf,sF,sU,in,out); else if (interior) WilsonKernels::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out); else assert(0); sF++; } sU++; } break; default: assert(0); } //bgq_l1p_optimisation(0); } template static accelerator typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type DhopSiteDag(int Opt,StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { if(interior&&exterior) WilsonKernels::GenericDhopSiteDag(st,U,buf,sF,sU,in,out); else if (interior) WilsonKernels::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out); else if (exterior) WilsonKernels::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out); else assert(0); sF++; } sU++; } } static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); ////////////////////////////////////////////////////////////////////////////// // Utilities for inserting Wilson conserved current. ////////////////////////////////////////////////////////////////////////////// static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign = false); static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign = false); static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, SitePropagator &q_out, DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, vInteger t_mask, bool switch_sign = false); static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, SitePropagator &q_out, DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, vInteger t_mask, bool switch_sign = false); private: // Specialised variants static accelerator void GpuDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GpuDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); static accelerator void AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); static accelerator void AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); static accelerator void AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); static accelerator void AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); static accelerator void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); }; NAMESPACE_END(Grid); #endif