2016-09-09 11:34:25 +01:00
|
|
|
/*************************************************************************************
|
2016-01-02 14:51:32 +00:00
|
|
|
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
|
|
|
|
|
2016-01-02 14:51:32 +00:00
|
|
|
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
|
|
|
|
|
|
|
|
Copyright (C) 2015
|
|
|
|
|
|
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
2016-10-30 12:04:06 +00:00
|
|
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
2016-01-02 14:51:32 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
2016-09-09 11:34:25 +01:00
|
|
|
*************************************************************************************/
|
|
|
|
/* END LEGAL */
|
2016-03-31 14:51:32 +01:00
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
#include <Grid.h>
|
2015-11-04 11:15:08 +00:00
|
|
|
|
2016-10-04 15:37:32 +01:00
|
|
|
|
2015-11-04 11:15:08 +00:00
|
|
|
namespace Grid {
|
2016-10-22 18:14:27 +01:00
|
|
|
namespace QCD {
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-10-22 18:14:27 +01:00
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
// Default to no assembler implementation
|
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
template<class Impl> void
|
|
|
|
WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
|
|
{
|
|
|
|
assert(0);
|
|
|
|
}
|
2016-10-04 15:37:32 +01:00
|
|
|
|
2016-10-22 18:14:27 +01:00
|
|
|
template<class Impl> void
|
|
|
|
WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
|
|
{
|
|
|
|
assert(0);
|
|
|
|
}
|
2016-10-04 15:37:32 +01:00
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
#if defined(AVX512)
|
2016-10-28 17:20:04 +01:00
|
|
|
#include <simd/Intel512wilson.h>
|
|
|
|
|
2016-09-09 11:34:25 +01:00
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
// If we are AVX512 specialise the single precision routine
|
|
|
|
///////////////////////////////////////////////////////////
|
2016-10-28 17:20:04 +01:00
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
#include <simd/Intel512single.h>
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-10-30 12:04:06 +00:00
|
|
|
static Vector<vComplexF> signsF;
|
|
|
|
|
|
|
|
template<typename vtype>
|
|
|
|
int setupSigns(Vector<vtype>& signs ){
|
|
|
|
Vector<vtype> bother(2);
|
2016-10-22 18:14:27 +01:00
|
|
|
signs = bother;
|
|
|
|
vrsign(signs[0]);
|
|
|
|
visign(signs[1]);
|
|
|
|
return 1;
|
|
|
|
}
|
2016-10-30 12:04:06 +00:00
|
|
|
|
|
|
|
static int signInitF = setupSigns(signsF);
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-06-25 19:08:05 +01:00
|
|
|
#define label(A) ilabel(A)
|
|
|
|
#define ilabel(A) ".globl\n" #A ":\n"
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
2016-06-08 13:43:48 +01:00
|
|
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
2016-06-25 19:08:05 +01:00
|
|
|
#define FX(A) WILSONASM_ ##A
|
2016-10-28 17:20:04 +01:00
|
|
|
#define COMPLEX_TYPE vComplexF
|
2016-10-30 12:04:06 +00:00
|
|
|
#define signs signsF
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-08-15 22:31:29 +01:00
|
|
|
#undef KERNEL_DAG
|
2016-10-22 18:14:27 +01:00
|
|
|
template<> void
|
|
|
|
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
2016-06-03 11:24:26 +01:00
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-08-15 22:31:29 +01:00
|
|
|
#define KERNEL_DAG
|
2016-10-22 18:14:27 +01:00
|
|
|
template<> void
|
|
|
|
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
2016-08-15 22:31:29 +01:00
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
#undef VMOVIDUP
|
|
|
|
#undef VMOVRDUP
|
|
|
|
#undef MAYBEPERM
|
|
|
|
#undef MULT_2SPIN
|
2016-06-25 19:08:05 +01:00
|
|
|
#undef FX
|
|
|
|
#define FX(A) DWFASM_ ## A
|
2016-06-03 11:24:26 +01:00
|
|
|
#define MAYBEPERM(A,B)
|
2016-11-02 10:21:06 +00:00
|
|
|
//#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
|
|
|
//#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
2016-06-03 11:24:26 +01:00
|
|
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-08-15 22:31:29 +01:00
|
|
|
#undef KERNEL_DAG
|
2016-10-22 18:14:27 +01:00
|
|
|
template<> void
|
|
|
|
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
2016-06-03 11:24:26 +01:00
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-08-15 22:31:29 +01:00
|
|
|
#define KERNEL_DAG
|
2016-10-22 18:14:27 +01:00
|
|
|
template<> void
|
|
|
|
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
2016-08-15 22:31:29 +01:00
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
2016-10-28 17:20:04 +01:00
|
|
|
#undef COMPLEX_TYPE
|
2016-10-30 12:04:06 +00:00
|
|
|
#undef signs
|
|
|
|
#undef VMOVRDUP
|
|
|
|
#undef MAYBEPERM
|
|
|
|
#undef MULT_2SPIN
|
|
|
|
#undef FX
|
2016-10-28 17:20:04 +01:00
|
|
|
|
2016-10-30 12:04:06 +00:00
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
// If we are AVX512 specialise the double precision routine
|
|
|
|
///////////////////////////////////////////////////////////
|
2016-10-28 17:20:04 +01:00
|
|
|
|
|
|
|
#include <simd/Intel512double.h>
|
|
|
|
|
2016-10-30 12:04:06 +00:00
|
|
|
static Vector<vComplexD> signsD;
|
|
|
|
#define signs signsD
|
|
|
|
static int signInitD = setupSigns(signsD);
|
2016-10-28 17:20:04 +01:00
|
|
|
|
|
|
|
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
|
|
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
|
|
|
#define FX(A) WILSONASM_ ##A
|
|
|
|
#define COMPLEX_TYPE vComplexD
|
|
|
|
|
|
|
|
#undef KERNEL_DAG
|
|
|
|
template<> void
|
|
|
|
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
|
|
|
|
|
|
|
#define KERNEL_DAG
|
|
|
|
template<> void
|
|
|
|
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
2016-09-09 11:34:25 +01:00
|
|
|
|
2016-10-28 17:20:04 +01:00
|
|
|
#undef VMOVIDUP
|
|
|
|
#undef VMOVRDUP
|
|
|
|
#undef MAYBEPERM
|
|
|
|
#undef MULT_2SPIN
|
|
|
|
#undef FX
|
|
|
|
#define FX(A) DWFASM_ ## A
|
|
|
|
#define MAYBEPERM(A,B)
|
2016-11-02 10:21:06 +00:00
|
|
|
//#define VMOVIDUP(A,B,C) VBCASTIDUPd(A,B,C)
|
|
|
|
//#define VMOVRDUP(A,B,C) VBCASTRDUPd(A,B,C)
|
2016-10-28 17:20:04 +01:00
|
|
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
|
|
|
|
|
|
|
#undef KERNEL_DAG
|
|
|
|
template<> void
|
|
|
|
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
|
|
|
|
|
|
|
#define KERNEL_DAG
|
|
|
|
template<> void
|
|
|
|
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
|
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
|
|
|
|
|
|
|
#undef COMPLEX_TYPE
|
2016-10-30 12:04:06 +00:00
|
|
|
#undef signs
|
|
|
|
#undef VMOVRDUP
|
|
|
|
#undef MAYBEPERM
|
|
|
|
#undef MULT_2SPIN
|
|
|
|
#undef FX
|
2016-10-28 17:20:04 +01:00
|
|
|
|
|
|
|
#endif //AVX512
|
2016-10-04 15:37:32 +01:00
|
|
|
|
|
|
|
#define INSTANTIATE_ASM(A)\
|
2016-10-22 18:14:27 +01:00
|
|
|
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
2016-10-04 15:37:32 +01:00
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
2016-10-22 18:14:27 +01:00
|
|
|
\
|
|
|
|
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
2016-10-04 15:37:32 +01:00
|
|
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
|
|
|
|
|
|
|
INSTANTIATE_ASM(WilsonImplF);
|
|
|
|
INSTANTIATE_ASM(WilsonImplD);
|
|
|
|
INSTANTIATE_ASM(ZWilsonImplF);
|
|
|
|
INSTANTIATE_ASM(ZWilsonImplD);
|
|
|
|
INSTANTIATE_ASM(GparityWilsonImplF);
|
|
|
|
INSTANTIATE_ASM(GparityWilsonImplD);
|
|
|
|
INSTANTIATE_ASM(DomainWallVec5dImplF);
|
|
|
|
INSTANTIATE_ASM(DomainWallVec5dImplD);
|
|
|
|
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
|
|
|
|
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
|
2016-10-22 18:14:27 +01:00
|
|
|
|
|
|
|
}}
|
2016-06-03 11:24:26 +01:00
|
|
|
|