diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h index b96a8a95..190caecd 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h @@ -958,12 +958,26 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #endif } -#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \ - template void CLASS::FUNC(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeFieldView &U, \ - DoubledGaugeFieldView &UUU, \ - SiteSpinor *buf, int LLs, \ - int sU, const FermionFieldView &in, FermionFieldView &out,int dag); +extern template void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag); +extern template void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag); +extern template void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag); +extern template void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag); NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h index f04d129c..e1685957 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h @@ -370,7 +370,6 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, } } - #define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ @@ -387,6 +386,8 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, SiteSpinor *buf, int LLs, int sU, \ const FermionFieldView &in, FermionFieldView &out, int dag); \ +#undef LOAD_CHI + NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h index 2b1bbfde..4aed13bf 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h @@ -36,7 +36,10 @@ Author: paboyle /////////////////////////////////////////////////////////// #include #include - + +/// Switch off the 5d vectorised code optimisations +#undef DWFVEC5D + static Vector signsF; template @@ -209,6 +212,9 @@ WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFie ///////////////////////////////////////////////////////////////// // Ls vectorised, undag Kernel, single ///////////////////////////////////////////////////////////////// + +#ifdef DWFVEC5D + #undef KERNEL_DAG #define INTERIOR_AND_EXTERIOR #undef INTERIOR @@ -344,6 +350,8 @@ WilsonKernels::AsmDhopSiteDagExt(StencilView &st, Double int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include +#endif // VEC 5D + #undef COMPLEX_SIGNS #undef MAYBEPERM #undef MULT_2SPIN @@ -510,6 +518,8 @@ WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFie ///////////////////////////////////////////////////////////////// // Ls vectorised, undag Kernel, single ///////////////////////////////////////////////////////////////// +#ifdef DWFVEC5D + #undef KERNEL_DAG #define INTERIOR_AND_EXTERIOR #undef INTERIOR @@ -625,6 +635,7 @@ WilsonKernels::AsmDhopSiteDagInt(StencilView &st, Double #undef INTERIOR_AND_EXTERIOR #undef INTERIOR #define EXTERIOR + template<> void WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) @@ -643,8 +654,63 @@ WilsonKernels::AsmDhopSiteDagExt(StencilView &st, Double int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) #include +#endif // VEC 5D + #undef COMPLEX_SIGNS #undef MAYBEPERM #undef MULT_2SPIN +#undef Chi_00 +#undef Chi_01 +#undef Chi_02 +#undef Chi_10 +#undef Chi_11 +#undef Chi_12 +#undef Chi_20 +#undef Chi_21 +#undef Chi_22 +#undef Chi_30 +#undef Chi_31 +#undef Chi_32 + +#undef UChi_00 +#undef UChi_01 +#undef UChi_02 +#undef UChi_10 +#undef UChi_11 +#undef UChi_12 +#undef UChi_20 +#undef UChi_21 +#undef UChi_22 +#undef UChi_30 +#undef UChi_31 +#undef UChi_32 + +#undef Psi_00 +#undef Psi_01 +#undef Psi_02 +#undef Psi_10 +#undef Psi_11 +#undef Psi_12 +#undef Psi_20 +#undef Psi_21 +#undef Psi_22 +#undef Psi_30 +#undef Psi_31 +#undef Psi_32 + +#undef Phi_00 +#undef Phi_01 +#undef Phi_02 +#undef Phi_10 +#undef Phi_11 +#undef Phi_12 +#undef Phi_20 +#undef Phi_21 +#undef Phi_22 +#undef Phi_30 +#undef Phi_31 +#undef Phi_32 + + #endif //AVX512 diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h index 743960da..0c956f7e 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h @@ -38,52 +38,49 @@ NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////// // Default to no assembler implementation +// Will specialise to /////////////////////////////////////////////////////////// template void WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } -#include -#include - - NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h index 8abad0fd..f7b018fa 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h @@ -31,6 +31,49 @@ Author: paboyle #include + +#undef LOAD_CHIMU +#undef LOAD_CHI +#undef MULT_2SPIN +#undef PERMUTE_DIR +#undef XP_PROJ +#undef YP_PROJ +#undef ZP_PROJ +#undef TP_PROJ +#undef XM_PROJ +#undef YM_PROJ +#undef ZM_PROJ +#undef TM_PROJ +#undef XP_RECON +#undef XP_RECON_ACCUM +#undef XM_RECON +#undef XM_RECON_ACCUM +#undef YP_RECON_ACCUM +#undef YM_RECON_ACCUM +#undef ZP_RECON_ACCUM +#undef ZM_RECON_ACCUM +#undef TP_RECON_ACCUM +#undef TM_RECON_ACCUM +#undef ZERO_RESULT +#undef Chimu_00 +#undef Chimu_01 +#undef Chimu_02 +#undef Chimu_10 +#undef Chimu_11 +#undef Chimu_12 +#undef Chimu_20 +#undef Chimu_21 +#undef Chimu_22 +#undef Chimu_30 +#undef Chimu_31 +#undef Chimu_32 +#undef HAND_STENCIL_LEG +#undef HAND_STENCIL_LEG_INT +#undef HAND_STENCIL_LEG_EXT +#undef HAND_RESULT +#undef HAND_RESULT_INT +#undef HAND_RESULT_EXT + #define REGISTER #define LOAD_CHIMU \ diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master index c664a5cd..7af909d3 100644 --- a/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master @@ -29,7 +29,7 @@ directory #include #include #include - +#include NAMESPACE_BEGIN(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master index e1e27eb6..02174b16 100644 --- a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master @@ -30,16 +30,11 @@ directory /* END LEGAL */ #include #include -#include #include #include -// Do the specialisation only once -//#include NAMESPACE_BEGIN(Grid); -// Move these - #include "impl.h" template class WilsonKernels; diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationAsm.cc b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationAsm.cc new file mode 100644 index 00000000..f6f235c8 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationAsm.cc @@ -0,0 +1,43 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +//////////////////////////////////////////////////////////////////////// +// Include the specialisations for ASM kernels +//////////////////////////////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); +#include +#include +NAMESPACE_END(Grid); + +