mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-11 03:46:55 +01:00
BGQ performance ASM
This commit is contained in:
162
lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
Normal file
162
lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
Normal file
@ -0,0 +1,162 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
|
||||
#if defined(AVX512)
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are AVX512 specialise the single precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
#include <simd/Intel512wilson.h>
|
||||
#include <simd/Intel512single.h>
|
||||
|
||||
static Vector<vComplexF> signsF;
|
||||
|
||||
template<typename vtype>
|
||||
int setupSigns(Vector<vtype>& signs ){
|
||||
Vector<vtype> bother(2);
|
||||
signs = bother;
|
||||
vrsign(signs[0]);
|
||||
visign(signs[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int signInitF = setupSigns(signsF);
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||
#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0];
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT vectorised, undag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT vectorised, dag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#define MAYBEPERM(A,B)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, undag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, dag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
#undef COMPLEX_SIGNS
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are AVX512 specialise the double precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
|
||||
static Vector<vComplexD> signsD;
|
||||
static int signInitD = setupSigns(signsD);
|
||||
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||
#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0];
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT Vectorised, undag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT Vectorised, dag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#define MAYBEPERM(A,B)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, undag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, dag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef COMPLEX_SIGNS
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
|
||||
#endif //AVX512
|
@ -7,12 +7,15 @@
|
||||
// vComplexF isigns[2] = { signs[0], signs[1] };
|
||||
//COMPLEX_TYPE is vComplexF of vComplexD depending
|
||||
//on the chosen precision
|
||||
COMPLEX_TYPE *isigns = &signs[0];
|
||||
|
||||
COMPLEX_SIGNS(isigns);
|
||||
MASK_REGS;
|
||||
int nmax=U._grid->oSites();
|
||||
for(int site=0;site<Ns;site++) {
|
||||
int sU =lo.Reorder(ssU);
|
||||
|
||||
LOCK_GAUGE(0);
|
||||
|
||||
|
||||
int ssn=ssU+1;
|
||||
if(ssn>=nmax) ssn=0;
|
||||
int sUn=lo.Reorder(ssn);
|
||||
@ -251,5 +254,6 @@
|
||||
|
||||
}
|
||||
ssU++;
|
||||
UNLOCK_GAUGE(0);
|
||||
}
|
||||
}
|
||||
|
146
lib/qcd/action/fermion/WilsonKernelsAsmQPX.h
Normal file
146
lib/qcd/action/fermion/WilsonKernelsAsmQPX.h
Normal file
@ -0,0 +1,146 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmQPX.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
|
||||
#if defined(QPX)
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are QPX specialise the single precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#include <simd/IBM_qpx.h>
|
||||
#include <simd/IBM_qpx_single.h>
|
||||
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX(ptr,pf)
|
||||
#define COMPLEX_SIGNS(isigns)
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT vectorised, undag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT vectorised, dag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#define MAYBEPERM(A,B)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX_LS(ptr,pf)
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, undag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, dag Kernel, single
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// DP routines
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#include <simd/IBM_qpx_double.h>
|
||||
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX(ptr,pf)
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT Vectorised, undag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// XYZT Vectorised, dag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#define MAYBEPERM(A,B)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX_LS(ptr,pf)
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, undag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Ls vectorised, dag Kernel, double
|
||||
/////////////////////////////////////////////////////////////////
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user