From eb7d34a4cc1e3627f98694a1becae732bd330693 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 14 May 2018 19:41:47 -0400 Subject: [PATCH] GPU version --- lib/qcd/action/fermion/WilsonKernels.h | 33 +++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index a12c7101..79b08cf2 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -39,7 +39,7 @@ NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////////////////////////////////////////////// class WilsonKernelsStatic { public: - enum { OptGeneric, OptHandUnroll, OptInlineAsm }; + enum { OptGeneric, OptHandUnroll, OptInlineAsm, OptGpu }; enum { CommsAndCompute, CommsThenCompute }; static int Opt; static int Comms; @@ -59,6 +59,7 @@ public: { //bgq_l1p_optimisation(1); switch(Opt) { + #if defined(AVX512) || defined (QPX) case WilsonKernelsStatic::OptInlineAsm: if(interior&&exterior) WilsonKernels::AsmDhopSite (st,U,buf,sF,sU,Ls,Nsite,in,out); @@ -67,6 +68,18 @@ public: else assert(0); break; #endif + case WilsonKernelsStatic::OptGpu: + for (int site = 0; site < Nsite; site++) { + for (int s = 0; s < Ls; s++) { + if(interior&&exterior) WilsonKernels::GpuDhopSite(st,U,buf,sF,sU,in,out); + else if (interior) WilsonKernels::GenericDhopSiteInt(st,U,buf,sF,sU,in,out); + else if (exterior) WilsonKernels::GenericDhopSiteExt(st,U,buf,sF,sU,in,out); + else assert(0); + sF++; + } + sU++; + } + break; case WilsonKernelsStatic::OptHandUnroll: for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { @@ -128,6 +141,18 @@ public: else assert(0); break; #endif + case WilsonKernelsStatic::OptGpu: + for (int site = 0; site < Nsite; site++) { + for (int s = 0; s < Ls; s++) { + if(interior&&exterior) WilsonKernels::GenericDhopSiteDag(st,U,buf,sF,sU,in,out); + else if (interior) WilsonKernels::GenericDhopSiteDagInt(st,U,buf,sF,sU,in,out); + else if (exterior) WilsonKernels::GenericDhopSiteDagExt(st,U,buf,sF,sU,in,out); + else assert(0); + sF++; + } + sU++; + } + break; case WilsonKernelsStatic::OptHandUnroll: for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { @@ -215,6 +240,12 @@ public: private: // Specialised variants + static accelerator void GpuDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void GpuDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out);