diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h new file mode 100644 index 00000000..f04d129c --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h @@ -0,0 +1,392 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +#pragma once + +NAMESPACE_BEGIN(Grid); + +#define LOAD_CHI(b) \ + const SiteSpinor & ref (b[offset]); \ + Chi_0=ref()()(0);\ + Chi_1=ref()()(1);\ + Chi_2=ref()()(2); + + +// To splat or not to splat depends on the implementation +#define MULT(A,UChi) \ + auto & ref(U[sU](A)); \ + Impl::loadLinkElement(U_00,ref()(0,0)); \ + Impl::loadLinkElement(U_10,ref()(1,0)); \ + Impl::loadLinkElement(U_20,ref()(2,0)); \ + Impl::loadLinkElement(U_01,ref()(0,1)); \ + Impl::loadLinkElement(U_11,ref()(1,1)); \ + Impl::loadLinkElement(U_21,ref()(2,1)); \ + Impl::loadLinkElement(U_02,ref()(0,2)); \ + Impl::loadLinkElement(U_12,ref()(1,2)); \ + Impl::loadLinkElement(U_22,ref()(2,2)); \ + UChi ## _0 = U_00*Chi_0; \ + UChi ## _1 = U_10*Chi_0;\ + UChi ## _2 = U_20*Chi_0;\ + UChi ## _0 += U_01*Chi_1;\ + UChi ## _1 += U_11*Chi_1;\ + UChi ## _2 += U_21*Chi_1;\ + UChi ## _0 += U_02*Chi_2;\ + UChi ## _1 += U_12*Chi_2;\ + UChi ## _2 += U_22*Chi_2; + +#define MULT_ADD(U,A,UChi) \ + auto & ref(U[sU](A)); \ + Impl::loadLinkElement(U_00,ref()(0,0)); \ + Impl::loadLinkElement(U_10,ref()(1,0)); \ + Impl::loadLinkElement(U_20,ref()(2,0)); \ + Impl::loadLinkElement(U_01,ref()(0,1)); \ + Impl::loadLinkElement(U_11,ref()(1,1)); \ + Impl::loadLinkElement(U_21,ref()(2,1)); \ + Impl::loadLinkElement(U_02,ref()(0,2)); \ + Impl::loadLinkElement(U_12,ref()(1,2)); \ + Impl::loadLinkElement(U_22,ref()(2,2)); \ + UChi ## _0 += U_00*Chi_0; \ + UChi ## _1 += U_10*Chi_0;\ + UChi ## _2 += U_20*Chi_0;\ + UChi ## _0 += U_01*Chi_1;\ + UChi ## _1 += U_11*Chi_1;\ + UChi ## _2 += U_21*Chi_1;\ + UChi ## _0 += U_02*Chi_2;\ + UChi ## _1 += U_12*Chi_2;\ + UChi ## _2 += U_22*Chi_2; + + +#define PERMUTE_DIR(dir) \ + permute##dir(Chi_0,Chi_0); \ + permute##dir(Chi_1,Chi_1); \ + permute##dir(Chi_2,Chi_2); + + +#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \ + SE=st.GetEntry(ptype,Dir+skew,sF); \ + offset = SE->_offset; \ + local = SE->_is_local; \ + perm = SE->_permute; \ + if ( local ) { \ + LOAD_CHI(in); \ + if ( perm) { \ + PERMUTE_DIR(Perm); \ + } \ + } else { \ + LOAD_CHI(buf); \ + } + +#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \ + HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \ + { \ + MULT(Dir,even); \ + } + +#define HAND_STENCIL_LEG(U,Dir,Perm,skew,even) \ + HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \ + { \ + MULT_ADD(U,Dir,even); \ + } + + + +#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \ + SE=st.GetEntry(ptype,Dir+skew,sF); \ + offset = SE->_offset; \ + local = SE->_is_local; \ + perm = SE->_permute; \ + if ( local ) { \ + LOAD_CHI(in); \ + if ( perm) { \ + PERMUTE_DIR(Perm); \ + } \ + } else if ( st.same_node[Dir] ) { \ + LOAD_CHI(buf); \ + } \ + if (local || st.same_node[Dir] ) { \ + MULT_ADD(U,Dir,even); \ + } + +#define HAND_STENCIL_LEG_EXT(U,Dir,Perm,skew,even) \ + SE=st.GetEntry(ptype,Dir+skew,sF); \ + offset = SE->_offset; \ + local = SE->_is_local; \ + if ((!local) && (!st.same_node[Dir]) ) { \ + nmu++; \ + { LOAD_CHI(buf); } \ + { MULT_ADD(U,Dir,even); } \ + } + + +template +void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, int sU, + const FermionFieldView &in, FermionFieldView &out,int dag) +{ + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + Simd even_0; // 12 regs on knc + Simd even_1; + Simd even_2; + Simd odd_0; // 12 regs on knc + Simd odd_1; + Simd odd_2; + + Simd Chi_0; // two spinor; 6 regs + Simd Chi_1; + Simd Chi_2; + + Simd U_00; // two rows of U matrix + Simd U_10; + Simd U_20; + Simd U_01; + Simd U_11; + Simd U_21; // 2 reg left. + Simd U_02; + Simd U_12; + Simd U_22; + + SiteSpinor result; + int offset,local,perm, ptype; + + StencilEntry *SE; + int skew; + + for(int s=0;s +void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, int sU, + const FermionFieldView &in, FermionFieldView &out,int dag) +{ + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + Simd even_0; // 12 regs on knc + Simd even_1; + Simd even_2; + Simd odd_0; // 12 regs on knc + Simd odd_1; + Simd odd_2; + + Simd Chi_0; // two spinor; 6 regs + Simd Chi_1; + Simd Chi_2; + + Simd U_00; // two rows of U matrix + Simd U_10; + Simd U_20; + Simd U_01; + Simd U_11; + Simd U_21; // 2 reg left. + Simd U_02; + Simd U_12; + Simd U_22; + + SiteSpinor result; + int offset, ptype, local, perm; + + StencilEntry *SE; + int skew; + + for(int s=0;s +void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, int sU, + const FermionFieldView &in, FermionFieldView &out,int dag) +{ + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + Simd even_0; // 12 regs on knc + Simd even_1; + Simd even_2; + Simd odd_0; // 12 regs on knc + Simd odd_1; + Simd odd_2; + + Simd Chi_0; // two spinor; 6 regs + Simd Chi_1; + Simd Chi_2; + + Simd U_00; // two rows of U matrix + Simd U_10; + Simd U_20; + Simd U_01; + Simd U_11; + Simd U_21; // 2 reg left. + Simd U_02; + Simd U_12; + Simd U_22; + + SiteSpinor result; + int offset, ptype, local; + + StencilEntry *SE; + int skew; + + for(int s=0;s::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ + SiteSpinor *buf, int LLs, int sU, \ + const FermionFieldView &in, FermionFieldView &out, int dag); \ + \ + template void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ + SiteSpinor *buf, int LLs, int sU, \ + const FermionFieldView &in, FermionFieldView &out, int dag); \ + \ + template void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ + SiteSpinor *buf, int LLs, int sU, \ + const FermionFieldView &in, FermionFieldView &out, int dag); \ + +NAMESPACE_END(Grid); + +