1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-08-15 18:51:53 +01:00

merge develop

This commit is contained in:
2021-02-23 14:54:46 +00:00
88 changed files with 8040 additions and 5694 deletions

View File

@@ -80,6 +80,13 @@ template<typename T> struct isSpinor {
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
const int CoarseIndex = 4;
template<typename T> struct isCoarsened {
static constexpr bool value = (CoarseIndex<=T::TensorLevel);
};
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
// ChrisK very keen to add extra space for Gparity doubling.
//
// Also add domain wall index, in a way where Wilson operator

View File

@@ -97,42 +97,30 @@ public:
Coordinate icoor;
#ifdef GRID_SIMT
_Spinor tmp;
const int Nsimd =SiteDoubledGaugeField::Nsimd();
int s = acceleratorSIMTlane(Nsimd);
St.iCoorFromIindex(icoor,s);
int mmu = mu % Nd;
if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
int permute_lane = (sl==1)
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
if ( permute_lane ) {
tmp(0) = chi(1);
tmp(1) = chi(0);
} else {
tmp(0) = chi(0);
tmp(1) = chi(1);
}
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
//Decide whether we do a G-parity flavor twist
//Note: this assumes (but does not check) that sl==1 || sl==2 i.e. max 2 SIMD lanes in G-parity dir
//It also assumes (but does not check) that abs(distance) == 1
int permute_lane = (sl==1)
|| ((distance== 1)&&(icoor[direction]==1))
|| ((distance==-1)&&(icoor[direction]==0));
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world
mult(&phi(0),&UU0,&tmp(0));
mult(&phi(1),&UU1,&tmp(1));
//Apply the links
int f_upper = permute_lane ? 1 : 0;
int f_lower = !f_upper;
} else {
auto UU0=coalescedRead(U(0)(mu));
auto UU1=coalescedRead(U(1)(mu));
mult(&phi(0),&UU0,&chi(0));
mult(&phi(1),&UU1,&chi(1));
}
mult(&phi(0),&UU0,&chi(f_upper));
mult(&phi(1),&UU1,&chi(f_lower));
#else
typedef _Spinor vobj;

View File

@@ -85,7 +85,7 @@ class MADWF
maxiter =_maxiter;
};
void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
void operator() (const FermionFieldo &src,FermionFieldo &sol5)
{
std::cout << GridLogMessage<< " ************************************************" << std::endl;
std::cout << GridLogMessage<< " MADWF-like algorithm " << std::endl;
@@ -114,8 +114,16 @@ class MADWF
///////////////////////////////////////
//Import source, include Dminus factors
///////////////////////////////////////
Mato.ImportPhysicalFermionSource(src4,b);
std::cout << GridLogMessage << " src4 " <<norm2(src4)<<std::endl;
GridBase *src_grid = src.Grid();
assert( (src_grid == Mato.GaugeGrid()) || (src_grid == Mato.FermionGrid()));
if ( src_grid == Mato.GaugeGrid() ) {
Mato.ImportPhysicalFermionSource(src,b);
} else {
b=src;
}
std::cout << GridLogMessage << " src " <<norm2(src)<<std::endl;
std::cout << GridLogMessage << " b " <<norm2(b)<<std::endl;
defect = b;

View File

@@ -106,11 +106,15 @@ public:
const _SpinorField & phi,
int mu)
{
const int Nsimd = SiteHalfSpinor::Nsimd();
autoView( out_v, out, AcceleratorWrite);
autoView( phi_v, phi, AcceleratorRead);
autoView( Umu_v, Umu, AcceleratorRead);
accelerator_for(sss,out.Grid()->oSites(),1,{
multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu);
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
calcSpinor tmp;
multLink(tmp,Umu_v[sss],phi_v(sss),mu);
coalescedWrite(out_v[sss],tmp);
});
}

View File

@@ -642,7 +642,7 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
Current curr_type,
unsigned int mu)
{
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
#if (!defined(GRID_HIP))
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
@@ -826,7 +826,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
}
#endif
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
#if (!defined(GRID_HIP))
int tshift = (mu == Nd-1) ? 1 : 0;
////////////////////////////////////////////////
// GENERAL CAYLEY CASE

View File

@@ -92,20 +92,16 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
int lvol = _Umu.Grid()->lSites();
int DimRep = Impl::Dimension;
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Coordinate lcoor;
typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero();
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
for (int site = 0; site < lvol; site++) {
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
Qxinv = Zero();
//if (csw!=0){
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
@@ -126,7 +122,7 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
// }
pokeLocalSite(Qxinv, CTIv, lcoor);
}
});
}
// Separate the even and odd parts

View File

@@ -397,6 +397,7 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
template <class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
{
DhopCalls+=2;
conformable(in.Grid(), _grid); // verifies full grid
conformable(in.Grid(), out.Grid());
@@ -408,6 +409,7 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
template <class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
{
DhopCalls++;
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
@@ -420,6 +422,7 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
template <class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
{
DhopCalls++;
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check

View File

@@ -38,9 +38,6 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
// undefine everything related to kernels
#include <simd/Fujitsu_A64FX_undef.h>
// enable A64FX body
#define WILSONKERNELSASMBODYA64FX
//#pragma message("A64FX Dslash: WilsonKernelsAsmBodyA64FX.h")
///////////////////////////////////////////////////////////
// If we are A64FX specialise the single precision routine
@@ -63,119 +60,89 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
/////////////////////////////////////////////////////////////////
@@ -185,119 +152,89 @@ WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldV
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
// undefine
@@ -330,119 +267,89 @@ WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFie
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
/////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, double
@@ -451,124 +358,93 @@ WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldV
#define INTERIOR_AND_EXTERIOR
#undef INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#define INTERIOR
#undef EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#undef INTERIOR_AND_EXTERIOR
#undef INTERIOR
#define EXTERIOR
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#if defined (WILSONKERNELSASMBODYA64FX)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#else
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif
// undefs
#undef WILSONKERNELSASMBODYA64FX
#include <simd/Fujitsu_A64FX_undef.h>
#endif //A64FXASM

View File

@@ -25,6 +25,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
// GCC 10 messes up SVE instruction scheduling using -O3, but
// -O3 -fno-schedule-insns -fno-schedule-insns2 does wonders
// performance now is better than armclang 20.2
#ifdef KERNEL_DAG
#define DIR0_PROJ XP_PROJ
#define DIR1_PROJ YP_PROJ
@@ -97,7 +102,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
PROJ; \
MAYBEPERM(PERMUTE_DIR,perm); \
} else { \
LOAD_CHI(base); \
LOAD_CHI(base); \
} \
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
MULT_2SPIN_1(Dir); \
@@ -110,6 +115,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
} \
RECON; \
/*
NB: picking PREFETCH_GAUGE_L2(Dir+4); here results in performance penalty
though I expected that it would improve on performance
*/
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
PREFETCH1_CHIMU(base); \
@@ -126,73 +136,63 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
basep = st.GetPFInfo(nent,plocal); nent++; \
if ( local ) { \
LOAD_CHIMU(base); \
LOAD_TABLE(PERMUTE_DIR); \
PROJ; \
MAYBEPERM(PERMUTE_DIR,perm); \
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
if ( local || st.same_node[Dir] ) { \
MULT_2SPIN_1(Dir); \
PREFETCH_CHIMU(base); \
/* PREFETCH_GAUGE_L1(NxtDir); */ \
MULT_2SPIN_2; \
if (s == 0) { \
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
} \
RECON; \
PREFETCH_CHIMU_L2(basep); \
} else { PREFETCH_CHIMU(base); } \
if ( local ) { \
LOAD_CHIMU(base); \
LOAD_TABLE(PERMUTE_DIR); \
PROJ; \
MAYBEPERM(PERMUTE_DIR,perm); \
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
if ( local || st.same_node[Dir] ) { \
MULT_2SPIN_1(Dir); \
MULT_2SPIN_2; \
RECON; \
} \
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
PREFETCH_CHIMU(base); \
PREFETCH_CHIMU_L2(basep); \
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
PREFETCH1_CHIMU(base); \
{ ZERO_PSI; } \
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
#define RESULT(base,basep) SAVE_RESULT(base,basep);
#endif
////////////////////////////////////////////////////////////////////////////////
// Post comms kernel
////////////////////////////////////////////////////////////////////////////////
#ifdef EXTERIOR
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
MULT_2SPIN_1(Dir); \
PREFETCH_CHIMU(base); \
/* PREFETCH_GAUGE_L1(NxtDir); */ \
MULT_2SPIN_2; \
if (s == 0) { \
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
} \
RECON; \
nmu++; \
RECON; \
nmu++; \
}
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
nmu=0; \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++;\
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
nmu=0; \
{ ZERO_PSI;} \
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
MULT_2SPIN_1(Dir); \
PREFETCH_CHIMU(base); \
/* PREFETCH_GAUGE_L1(NxtDir); */ \
MULT_2SPIN_2; \
if (s == 0) { \
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
} \
RECON; \
nmu++; \
RECON; \
nmu++; \
}
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
#endif
{
int nmu;
int local,perm, ptype;
@@ -209,7 +209,6 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
// int sUn=lo.Reorder(ssn);
int sUn=ssn;
LOCK_GAUGE(0);
#else
int sU =ssU;
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
@@ -295,6 +294,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
std::cout << "----------------------------------------------------" << std::endl;
#endif
// DC ZVA test
// { uint64_t basestore = (uint64_t)&out[ss];
// PREFETCH_RESULT_L2_STORE(basestore); }
ASM_LEG(Ym,Zm,PERMUTE_DIR2,DIR5_PROJ,DIR5_RECON);
#ifdef SHOW
@@ -308,6 +312,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
std::cout << "----------------------------------------------------" << std::endl;
#endif
// DC ZVA test
//{ uint64_t basestore = (uint64_t)&out[ss];
// PREFETCH_RESULT_L2_STORE(basestore); }
ASM_LEG(Zm,Tm,PERMUTE_DIR1,DIR6_PROJ,DIR6_RECON);
#ifdef SHOW
@@ -321,6 +330,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
std::cout << "----------------------------------------------------" << std::endl;
#endif
// DC ZVA test
//{ uint64_t basestore = (uint64_t)&out[ss];
// PREFETCH_RESULT_L2_STORE(basestore); }
ASM_LEG(Tm,Xp,PERMUTE_DIR0,DIR7_PROJ,DIR7_RECON);
#ifdef SHOW
@@ -341,6 +355,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
base = (uint64_t) &out[ss];
basep= st.GetPFInfo(nent,plocal); ent++;
basep = (uint64_t) &out[ssn];
//PREFETCH_RESULT_L1_STORE(base);
RESULT(base,basep);
#ifdef SHOW

View File

@@ -0,0 +1,38 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/Gauge.cc
Copyright (C) 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
NAMESPACE_BEGIN(Grid);
std::vector<int> ConjugateGaugeImplBase::_conjDirs;
NAMESPACE_END(Grid);

View File

@@ -154,6 +154,10 @@ public:
return Hsum.real();
}
static inline void Project(Field &U) {
ProjectSUn(U);
}
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::HotConfiguration(pRNG, U);
}

View File

@@ -59,14 +59,14 @@ public:
}
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
return Cshift(adj(Link), mu, -1);
return PeriodicBC::CovShiftIdentityBackward(Link, mu);
}
static inline GaugeLinkField
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
return Link;
return PeriodicBC::CovShiftIdentityForward(Link,mu);
}
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
return Cshift(Link, mu, 1);
return PeriodicBC::ShiftStaple(Link,mu);
}
static inline bool isPeriodicGaugeField(void) { return true; }
@@ -74,7 +74,13 @@ public:
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template <class GimplTypes> class ConjugateGaugeImpl : public GimplTypes {
class ConjugateGaugeImplBase {
protected:
static std::vector<int> _conjDirs;
};
template <class GimplTypes> class ConjugateGaugeImpl : public GimplTypes, ConjugateGaugeImplBase {
private:
public:
INHERIT_GIMPL_TYPES(GimplTypes);
@@ -84,47 +90,56 @@ public:
////////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class covariant>
static Lattice<covariant> CovShiftForward(const GaugeLinkField &Link, int mu,
const Lattice<covariant> &field) {
return ConjugateBC::CovShiftForward(Link, mu, field);
const Lattice<covariant> &field)
{
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::CovShiftForward(Link, mu, field);
else
return PeriodicBC::CovShiftForward(Link, mu, field);
}
template <class covariant>
static Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,
const Lattice<covariant> &field) {
return ConjugateBC::CovShiftBackward(Link, mu, field);
const Lattice<covariant> &field)
{
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::CovShiftBackward(Link, mu, field);
else
return PeriodicBC::CovShiftBackward(Link, mu, field);
}
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
GridBase *grid = Link.Grid();
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
GaugeLinkField tmp(grid);
tmp = adj(Link);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return Cshift(tmp, mu, -1); // moves towards positive mu
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu)
{
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::CovShiftIdentityBackward(Link, mu);
else
return PeriodicBC::CovShiftIdentityBackward(Link, mu);
}
static inline GaugeLinkField
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
return Link;
CovShiftIdentityForward(const GaugeLinkField &Link, int mu)
{
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::CovShiftIdentityForward(Link,mu);
else
return PeriodicBC::CovShiftIdentityForward(Link,mu);
}
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
GridBase *grid = Link.Grid();
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
GaugeLinkField tmp(grid);
tmp = Cshift(Link, mu, 1);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return tmp;
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu)
{
assert(_conjDirs.size() == Nd);
if(_conjDirs[mu])
return ConjugateBC::ShiftStaple(Link,mu);
else
return PeriodicBC::ShiftStaple(Link,mu);
}
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline std::vector<int> getDirections(void) { return _conjDirs; }
static inline bool isPeriodicGaugeField(void) { return false; }
};

View File

@@ -54,6 +54,10 @@ public:
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
U = 1.0;
}
static inline void Project(Field &U) {
return;
}
static void MomentumSpacePropagator(Field &out, RealD m)
{
@@ -234,6 +238,10 @@ public:
#endif //USE_FFT_ACCELERATION
}
static inline void Project(Field &U) {
return;
}
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U);
}

View File

@@ -159,6 +159,13 @@ private:
Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
Resources.GetSerialRNG(),
Resources.GetParallelRNG());
} else {
// others
std::cout << GridLogError << "Unrecognized StartingType\n";
std::cout
<< GridLogError
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
exit(1);
}
Smearing.set_Field(U);

View File

@@ -95,7 +95,7 @@ private:
typedef typename IntegratorType::Field Field;
typedef std::vector< HmcObservable<Field> * > ObsListType;
//pass these from the resource manager
GridSerialRNG &sRNG;
GridParallelRNG &pRNG;

View File

@@ -74,7 +74,7 @@ public:
conf_file = os.str();
}
}
virtual ~BaseHmcCheckpointer(){};
void check_filename(const std::string &filename){
std::ifstream f(filename.c_str());
if(!f.good()){
@@ -82,7 +82,6 @@ public:
abort();
};
}
virtual void initialize(const CheckpointerParameters &Params) = 0;
virtual void CheckpointRestore(int traj, typename Impl::Field &U,

View File

@@ -45,6 +45,7 @@ private:
public:
INHERIT_GIMPL_TYPES(Implementation);
typedef GaugeStatistics<Implementation> GaugeStats;
ILDGHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
@@ -78,7 +79,7 @@ public:
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(config);
_IldgWriter.writeConfiguration(U, traj, config, config);
_IldgWriter.writeConfiguration<GaugeStats>(U, traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
@@ -105,7 +106,7 @@ public:
FieldMetaData header;
IldgReader _IldgReader;
_IldgReader.open(config);
_IldgReader.readConfiguration(U,header); // format from the header
_IldgReader.readConfiguration<GaugeStats>(U,header); // format from the header
_IldgReader.close();
std::cout << GridLogMessage << "Read ILDG Configuration from " << config

View File

@@ -43,7 +43,8 @@ private:
public:
INHERIT_GIMPL_TYPES(Gimpl); // only for gauge configurations
typedef GaugeStatistics<Gimpl> GaugeStats;
NerscHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
void initialize(const CheckpointerParameters &Params_) {
@@ -60,7 +61,7 @@ public:
int precision32 = 1;
int tworow = 0;
NerscIO::writeRNGState(sRNG, pRNG, rng);
NerscIO::writeConfiguration(U, config, tworow, precision32);
NerscIO::writeConfiguration<GaugeStats>(U, config, tworow, precision32);
}
};
@@ -74,7 +75,7 @@ public:
FieldMetaData header;
NerscIO::readRNGState(sRNG, pRNG, header, rng);
NerscIO::readConfiguration(U, header, config);
NerscIO::readConfiguration<GaugeStats>(U, header, config);
};
};

View File

@@ -313,6 +313,8 @@ public:
std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl;
}
FieldImplementation::Project(U);
// and that we indeed got to the end of the trajectory
assert(fabs(t_U - Params.trajL) < 1.0e-6);

View File

@@ -99,7 +99,7 @@ public:
virtual Prod* getPtr() = 0;
// add a getReference?
virtual ~HMCModuleBase(){};
virtual void print_parameters(){}; // default to nothing
};

View File

@@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(0)-fspin(2);
hspin(1)=fspin(1)-fspin(3);
}
@@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
* 0 0 -1 0
* 0 0 0 -1
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(0);
hspin(1)=fspin(1);
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(2);
hspin(1)=fspin(3);
}
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
rfspin(0)=fspin(0);
rfspin(1)=fspin(1);
rfspin(2)=Zero();
rfspin(3)=Zero();
}
// template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
rfspin(0)=Zero();
rfspin(1)=Zero();
rfspin(2)=fspin(2);
rfspin(3)=fspin(3);
}
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
{
const int hN = N>>1;
for(int s=0;s<hN;s++){
rfspin(s)=fspin(s);
rfspin(s+hN)=Zero();
}
}
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
{
const int hN = N>>1;
for(int s=0;s<hN;s++){
rfspin(s)=Zero();
rfspin(s+hN)=fspin(s+hN);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Reconstruction routines to move back again to four spin
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=timesMinusI(hspin(1));
@@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=timesI(hspin(1));
@@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=timesI(hspin(1));
@@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=timesI(hspin(1));
@@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)= hspin(1);
@@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=-hspin(1);
@@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=hspin(1);
@@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=hspin(1);
@@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=timesMinusI(hspin(0));
@@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)= timesI(hspin(0));
@@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=timesI(hspin(0));
@@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=timesI(hspin(0));
@@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=hspin(0);
@@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0);
fspin(1)=hspin(1);
fspin(2)=-hspin(0);
@@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=hspin(0);
@@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=hspin(0);
@@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
fspin(2)=Zero();
@@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)=Zero();
fspin(1)=Zero();
fspin(2)=hspin(0)+hspin(0);
@@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0)+hspin(0);
fspin(1)+=hspin(1)+hspin(1);
}
@@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
//////////
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjXp(hspin._internal[i],fspin._internal[i]);
}
@@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
}}
}
////////
// Xm
////////
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjXm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjXm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconXm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconXm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconXm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconXm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// Yp
////////
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjYp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjYp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconYp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconYp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconYp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconYp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// Ym
////////
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjYm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjYm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconYm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconYm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconYm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconYm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
////////
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjZp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjZp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconZp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconZp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconZp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconZp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
@@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
////////
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjZm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjZm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconZm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconZm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconZm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconZm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
////////
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjTp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjTp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconTp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconTp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconTp(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconTp(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// Tm
////////
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProjTm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProjTm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spReconTm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spReconTm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumReconTm(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumReconTm(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
}}
}
////////
// 5p
////////
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProj5p(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProj5p(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spRecon5p(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spRecon5p(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumRecon5p(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumRecon5p(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
}
// four spinor projectors for chiral proj
// template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProj5p(hspin._internal,fspin._internal);
}
// template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProj5p(hspin._internal[i],fspin._internal[i]);
}
}
// template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
// 5m
////////
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
spProj5m(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
for(int i=0;i<N;i++) {
spProj5m(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
@@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spRecon5m(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spRecon5m(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
}}
}}
}
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
accumRecon5m(hspin._internal,fspin._internal);
}
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
accumRecon5m(hspin._internal[i],fspin._internal[i]);
}
}
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
@@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
// four spinor projectors for chiral proj
// template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
spProj5m(hspin._internal,fspin._internal);
}
// template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
for(int i=0;i<N;i++) {
spProj5m(hspin._internal[i],fspin._internal[i]);
}
}
// template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);

File diff suppressed because it is too large Load Diff

View File

@@ -53,6 +53,24 @@ namespace PeriodicBC {
return Cshift(tmp,mu,-1);// moves towards positive mu
}
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
{
return Cshift(adj(Link), mu, -1);
}
template<class gauge> Lattice<gauge>
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
{
return Link;
}
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{
return Cshift(Link, mu, 1);
}
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
auto CovShiftForward(const Lattice<gauge> &Link,
int mu,
@@ -70,6 +88,7 @@ namespace PeriodicBC {
return CovShiftBackward(Link,mu,arg);
}
}
@@ -139,6 +158,38 @@ namespace ConjugateBC {
// std::cout<<"Gparity::CovCshiftBackward mu="<<mu<<std::endl;
return Cshift(tmp,mu,-1);// moves towards positive mu
}
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu) {
GridBase *grid = Link.Grid();
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
Lattice<gauge> tmp(grid);
tmp = adj(Link);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return Cshift(tmp, mu, -1); // moves towards positive mu
}
template<class gauge> Lattice<gauge>
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu) {
return Link;
}
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{
GridBase *grid = Link.Grid();
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
Lattice<gauge> tmp(grid);
tmp = Cshift(Link, mu, 1);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return tmp;
}
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
auto CovShiftForward(const Lattice<gauge> &Link,

View File

@@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
accelerator_for(sss,nloop,vobj::Nsimd(),{
uint64_t ss = sss*Ls;
decltype(coalescedRead(y_v[ss+sp])) tmp;
spProj5m(tmp,y_v(ss+sp));
tmp = a*x_v(ss+s)+b*tmp;
spProj5m(tmp,y_v(ss+sp));
tmp = a*x_v(ss+s)+b*tmp;
coalescedWrite(z_v[ss+s],tmp);
});
}
@@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
z.Checkerboard() = x.Checkerboard();
conformable(x,z);
int Ls = grid->_rdimensions[0];
Gamma G5(Gamma::Algebra::Gamma5);
autoView( x_v, x, AcceleratorRead);
autoView( z_v, z, AcceleratorWrite);
uint64_t nloop = grid->oSites()/Ls;
@@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
uint64_t ss = sss*Ls;
for(int s=0;s<Ls;s++){
int sp = Ls-1-s;
coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
auto tmp = x_v(ss+s);
decltype(tmp) tmp_p;
decltype(tmp) tmp_m;
spProj5p(tmp_p,tmp);
spProj5m(tmp_m,tmp);
// Use of spProj5m, 5p captures the coarse space too
coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
}
});
}
@@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
z.Checkerboard() = x.Checkerboard();
conformable(x, z);
Gamma G5(Gamma::Algebra::Gamma5);
z = G5 * x;
autoView( x_v, x, AcceleratorRead);
autoView( z_v, z, AcceleratorWrite);
uint64_t nloop = grid->oSites();
accelerator_for(ss,nloop,vobj::Nsimd(),{
auto tmp = x_v(ss);
decltype(tmp) tmp_p;
decltype(tmp) tmp_m;
spProj5p(tmp_p,tmp);
spProj5m(tmp_m,tmp);
coalescedWrite(z_v[ss],tmp_p - tmp_m);
});
}
/*
template<class CComplex, int nbasis>
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
{
@@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
}
});
}
*/
NAMESPACE_END(Grid);

View File

@@ -735,7 +735,6 @@ public:
}
}
template <typename GaugeField>
static void HotConfiguration(GridParallelRNG &pRNG, GaugeField &out) {
typedef typename GaugeField::vector_type vector_type;
@@ -800,6 +799,88 @@ public:
}
};
template<int N>
LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
LatticeComplexD ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
ComplexD det = EigenU.determinant();
pokeLocalSite(det,ret_v,lcoor);
});
return ret;
}
template<int N>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
Umu = ProjectOnGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for(int i=0;i<N;i++){
auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
element = element * det;
PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
}
}
template<int N>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplexD, N> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSUn(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
// Explicit specialisation for SU(3).
// Explicit specialisation for SU(3).
static void
ProjectSU3 (Lattice<iScalar<iScalar<iMatrix<vComplexD, 3> > > > &Umu)
{
GridBase *grid=Umu.Grid();
const int x=0;
const int y=1;
const int z=2;
// Reunitarise
Umu = ProjectOnGroup(Umu);
autoView(Umu_v,Umu,CpuWrite);
thread_for(ss,grid->oSites(),{
auto cm = Umu_v[ss];
cm()()(2,x) = adj(cm()()(0,y)*cm()()(1,z)-cm()()(0,z)*cm()()(1,y)); //x= yz-zy
cm()()(2,y) = adj(cm()()(0,z)*cm()()(1,x)-cm()()(0,x)*cm()()(1,z)); //y= zx-xz
cm()()(2,z) = adj(cm()()(0,x)*cm()()(1,y)-cm()()(0,y)*cm()()(1,x)); //z= xy-yx
Umu_v[ss]=cm;
});
}
static void ProjectSU3(Lattice<iVector<iScalar<iMatrix<vComplexD, 3> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSU3(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
typedef SU<2> SU2;
typedef SU<3> SU3;
typedef SU<4> SU4;