mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Staggereed move to accelerator
This commit is contained in:
parent
cf2938688a
commit
006cc8a8f1
@ -57,6 +57,7 @@ NAMESPACE_CHECK(WilsonClover);
|
|||||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
||||||
NAMESPACE_CHECK(Wilson5D);
|
NAMESPACE_CHECK(Wilson5D);
|
||||||
|
|
||||||
|
#include <Grid/qcd/action/fermion/NaiveStaggeredFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
|
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
|
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
|
||||||
NAMESPACE_CHECK(Staggered);
|
NAMESPACE_CHECK(Staggered);
|
||||||
@ -282,6 +283,10 @@ typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
|||||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||||
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
||||||
|
|
||||||
|
typedef NaiveStaggeredFermion<StaggeredImplR> NaiveStaggeredFermionR;
|
||||||
|
typedef NaiveStaggeredFermion<StaggeredImplF> NaiveStaggeredFermionF;
|
||||||
|
typedef NaiveStaggeredFermion<StaggeredImplD> NaiveStaggeredFermionD;
|
||||||
|
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
|
typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
|
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
|
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
|
||||||
|
@ -62,8 +62,8 @@ public:
|
|||||||
double DhopCalls;
|
double DhopCalls;
|
||||||
double DhopCommTime;
|
double DhopCommTime;
|
||||||
double DhopComputeTime;
|
double DhopComputeTime;
|
||||||
double DhopComputeTime2;
|
double DhopComputeTime2;
|
||||||
double DhopFaceTime;
|
double DhopFaceTime;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
|
194
Grid/qcd/action/fermion/NaiveStaggeredFermion.h
Normal file
194
Grid/qcd/action/fermion/NaiveStaggeredFermion.h
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi, Peter Boyle
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_QCD_NAIVE_STAG_FERMION_H
|
||||||
|
#define GRID_QCD_NAIVE_STAG_FERMION_H
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
class NaiveStaggeredFermionStatic {
|
||||||
|
public:
|
||||||
|
static const std::vector<int> directions;
|
||||||
|
static const std::vector<int> displacements;
|
||||||
|
static const int npoint = 8;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
class NaiveStaggeredFermion : public StaggeredKernels<Impl>, public NaiveStaggeredFermionStatic {
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
typedef StaggeredKernels<Impl> Kernels;
|
||||||
|
|
||||||
|
FermionField _tmp;
|
||||||
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
|
////////////////////////////////////////
|
||||||
|
// Performance monitoring
|
||||||
|
////////////////////////////////////////
|
||||||
|
void Report(void);
|
||||||
|
void ZeroCounters(void);
|
||||||
|
double DhopTotalTime;
|
||||||
|
double DhopCalls;
|
||||||
|
double DhopCommTime;
|
||||||
|
double DhopComputeTime;
|
||||||
|
double DhopComputeTime2;
|
||||||
|
double DhopFaceTime;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Implement the abstract base
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
GridBase *GaugeGrid(void) { return _grid; }
|
||||||
|
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
|
||||||
|
GridBase *FermionGrid(void) { return _grid; }
|
||||||
|
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////
|
||||||
|
// override multiply; cut number routines if pass dagger argument
|
||||||
|
// and also make interface more uniformly consistent
|
||||||
|
//////////////////////////////////////////////////////////////////
|
||||||
|
RealD M(const FermionField &in, FermionField &out);
|
||||||
|
RealD Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
// half checkerboard operations
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
void Meooe(const FermionField &in, FermionField &out);
|
||||||
|
void MeooeDag(const FermionField &in, FermionField &out);
|
||||||
|
void Mooee(const FermionField &in, FermionField &out);
|
||||||
|
void MooeeDag(const FermionField &in, FermionField &out);
|
||||||
|
void MooeeInv(const FermionField &in, FermionField &out);
|
||||||
|
void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
////////////////////////
|
||||||
|
// Derivative interface
|
||||||
|
////////////////////////
|
||||||
|
// Interface calls an internal routine
|
||||||
|
void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||||
|
void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||||
|
void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// non-hermitian hopping term; half cb or both
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void Dhop (const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopOE(const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopEO(const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Multigrid assistance; force term uses too
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
|
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Extra methods added by derived
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void DerivInternal(StencilImpl &st,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
GaugeField &mat,
|
||||||
|
const FermionField &A, const FermionField &B, int dag);
|
||||||
|
|
||||||
|
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grid own interface Constructor
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid,
|
||||||
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
|
RealD _c1, RealD _u0,
|
||||||
|
const ImplParams &p = ImplParams());
|
||||||
|
NaiveStaggeredFermion(GridCartesian &Fgrid,
|
||||||
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
|
RealD _c1, RealD _u0,
|
||||||
|
const ImplParams &p = ImplParams());
|
||||||
|
|
||||||
|
// DoubleStore impl dependent
|
||||||
|
void ImportGauge (const GaugeField &_U );
|
||||||
|
DoubledGaugeField &GetU(void) { return Umu ; } ;
|
||||||
|
void CopyGaugeCheckerboards(void);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Data members require to support the functionality
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// protected:
|
||||||
|
public:
|
||||||
|
// any other parameters of action ???
|
||||||
|
virtual int isTrivialEE(void) { return 1; };
|
||||||
|
virtual RealD Mass(void) { return mass; }
|
||||||
|
RealD mass;
|
||||||
|
RealD u0;
|
||||||
|
RealD c1;
|
||||||
|
|
||||||
|
GridBase *_grid;
|
||||||
|
GridBase *_cbgrid;
|
||||||
|
|
||||||
|
// Defines the stencils for even and odd
|
||||||
|
StencilImpl Stencil;
|
||||||
|
StencilImpl StencilEven;
|
||||||
|
StencilImpl StencilOdd;
|
||||||
|
|
||||||
|
// Copy of the gauge field , with even and odd subsets
|
||||||
|
DoubledGaugeField Umu;
|
||||||
|
DoubledGaugeField UmuEven;
|
||||||
|
DoubledGaugeField UmuOdd;
|
||||||
|
|
||||||
|
LebesgueOrder Lebesgue;
|
||||||
|
LebesgueOrder LebesgueEvenOdd;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Conserved current utilities
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||||
|
PropagatorField &q_in_2,
|
||||||
|
PropagatorField &q_out,
|
||||||
|
PropagatorField &src,
|
||||||
|
Current curr_type,
|
||||||
|
unsigned int mu);
|
||||||
|
void SeqConservedCurrent(PropagatorField &q_in,
|
||||||
|
PropagatorField &q_out,
|
||||||
|
PropagatorField &srct,
|
||||||
|
Current curr_type,
|
||||||
|
unsigned int mu,
|
||||||
|
unsigned int tmin,
|
||||||
|
unsigned int tmax,
|
||||||
|
ComplexField &lattice_cmplx);
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef NaiveStaggeredFermion<StaggeredImplF> NaiveStaggeredFermionF;
|
||||||
|
typedef NaiveStaggeredFermion<StaggeredImplD> NaiveStaggeredFermionD;
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
#endif
|
@ -47,23 +47,34 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef FermionOperator<Impl> Base;
|
typedef FermionOperator<Impl> Base;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
void DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||||
|
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
|
||||||
|
void DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
|
||||||
|
|
||||||
|
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||||
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
||||||
|
protected:
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Generic Nc kernels
|
// Generic Nc kernels
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
template<int Naik>
|
||||||
|
void DhopSiteGeneric(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
template<int Naik>
|
||||||
|
void DhopSiteGenericInt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
template<int Naik>
|
||||||
|
void DhopSiteGenericExt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
@ -71,15 +82,18 @@ public:
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Nc=3 specific kernels
|
// Nc=3 specific kernels
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
template<int Naik>
|
||||||
|
void DhopSiteHand(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
template<int Naik>
|
||||||
|
void DhopSiteHandInt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
template<int Naik>
|
||||||
|
void DhopSiteHandExt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
@ -87,27 +101,10 @@ public:
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Asm Nc=3 specific kernels
|
// Asm Nc=3 specific kernels
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
void DhopSiteAsm(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
SiteSpinor * buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Generic interface; fan out to right routine
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
|
||||||
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
|
|
||||||
|
|
||||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
|
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
|
||||||
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
|
|
||||||
|
|
||||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
|
||||||
SiteSpinor * buf, int LLs, int sU,
|
|
||||||
const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -281,11 +281,9 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
|||||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
#ifdef GRID_OMP
|
|
||||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -294,9 +292,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
|||||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
#ifdef GRID_OMP
|
|
||||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
|
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
@ -305,99 +301,42 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
|||||||
DhopFaceTime-=usecond();
|
DhopFaceTime-=usecond();
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
|
DhopCommTime -=usecond();
|
||||||
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
||||||
|
DhopFaceTime-=usecond();
|
||||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||||
DhopFaceTime+=usecond();
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
double ctime=0;
|
|
||||||
double ptime=0;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
// Remove explicit thread mapping introduced for OPA reasons.
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
DhopComputeTime-=usecond();
|
||||||
{
|
{
|
||||||
int tid = omp_get_thread_num();
|
int interior=1;
|
||||||
int nthreads = omp_get_num_threads();
|
int exterior=0;
|
||||||
int ncomms = CartesianCommunicator::nCommThreads;
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
if (ncomms == -1) ncomms = 1;
|
|
||||||
assert(nthreads > ncomms);
|
|
||||||
if (tid >= ncomms) {
|
|
||||||
double start = usecond();
|
|
||||||
nthreads -= ncomms;
|
|
||||||
int ttid = tid - ncomms;
|
|
||||||
int n = U.Grid()->oSites(); // 4d vol
|
|
||||||
int chunk = n / nthreads;
|
|
||||||
int rem = n % nthreads;
|
|
||||||
int myblock, myn;
|
|
||||||
if (ttid < rem) {
|
|
||||||
myblock = ttid * chunk + ttid;
|
|
||||||
myn = chunk+1;
|
|
||||||
} else {
|
|
||||||
myblock = ttid*chunk + rem;
|
|
||||||
myn = chunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
// do the compute
|
|
||||||
auto U_v = U.View(CpuRead);
|
|
||||||
auto UUU_v = UUU.View(CpuRead);
|
|
||||||
auto in_v = in.View(CpuRead);
|
|
||||||
auto out_v = out.View(CpuWrite);
|
|
||||||
|
|
||||||
if (dag == DaggerYes) {
|
|
||||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
|
||||||
int sU = ss;
|
|
||||||
// Interior = 1; Exterior = 0; must implement for staggered
|
|
||||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<---------
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
|
||||||
// Interior = 1; Exterior = 0;
|
|
||||||
int sU = ss;
|
|
||||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ptime = usecond() - start;
|
|
||||||
} else {
|
|
||||||
double start = usecond();
|
|
||||||
st.CommunicateThreaded();
|
|
||||||
ctime = usecond() - start;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
DhopCommTime += ctime;
|
DhopComputeTime+=usecond();
|
||||||
DhopComputeTime+=ptime;
|
|
||||||
|
|
||||||
// First to enter, last to leave timing
|
|
||||||
st.CollateThreads();
|
|
||||||
|
|
||||||
DhopFaceTime-=usecond();
|
DhopFaceTime-=usecond();
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
DhopFaceTime+=usecond();
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
DhopComputeTime2-=usecond();
|
st.CommunicateComplete(requests);
|
||||||
|
DhopCommTime +=usecond();
|
||||||
|
|
||||||
auto U_v = U.View(CpuRead);
|
DhopComputeTime2-=usecond();
|
||||||
auto UUU_v = UUU.View(CpuRead);
|
{
|
||||||
auto in_v = in.View(CpuRead);
|
int interior=0;
|
||||||
auto out_v = out.View(CpuWrite);
|
int exterior=1;
|
||||||
if (dag == DaggerYes) {
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
int sz=st.surface_list.size();
|
|
||||||
thread_for( ss,sz,{
|
|
||||||
int sU = st.surface_list[ss];
|
|
||||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<----------
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
int sz=st.surface_list.size();
|
|
||||||
thread_for( ss,sz,{
|
|
||||||
int sU = st.surface_list[ss];
|
|
||||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<----------
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
DhopComputeTime2+=usecond();
|
DhopComputeTime2+=usecond();
|
||||||
#else
|
|
||||||
assert(0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -408,8 +347,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//double t1=usecond();
|
//double t1=usecond();
|
||||||
DhopTotalTime -= usecond();
|
DhopTotalTime -= usecond();
|
||||||
DhopCommTime -= usecond();
|
DhopCommTime -= usecond();
|
||||||
@ -418,28 +355,13 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
|
|
||||||
DhopComputeTime -= usecond();
|
DhopComputeTime -= usecond();
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
auto U_v = U.View(CpuRead);
|
{
|
||||||
auto UUU_v = UUU.View(CpuRead);
|
int interior=1;
|
||||||
auto in_v = in.View(CpuRead);
|
int exterior=1;
|
||||||
auto out_v = out.View(CpuWrite);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
if (dag == DaggerYes) {
|
|
||||||
thread_for( ss,U.Grid()->oSites(),{
|
|
||||||
int sU=ss;
|
|
||||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
thread_for( ss,U.Grid()->oSites(),{
|
|
||||||
int sU=ss;
|
|
||||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
DhopComputeTime += usecond();
|
||||||
DhopTotalTime += usecond();
|
DhopTotalTime += usecond();
|
||||||
//double t2=usecond();
|
|
||||||
//std::cout << __FILE__ << " " << __func__ << " Total Time " << DhopTotalTime << std::endl;
|
|
||||||
//std::cout << __FILE__ << " " << __func__ << " Total Time Org " << t2-t1 << std::endl;
|
|
||||||
//std::cout << __FILE__ << " " << __func__ << " Comml Time " << DhopCommTime << std::endl;
|
|
||||||
//std::cout << __FILE__ << " " << __func__ << " Compute Time " << DhopComputeTime << std::endl;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
/*CHANGE END*/
|
/*CHANGE END*/
|
||||||
|
@ -395,11 +395,9 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
|
|||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag)
|
FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
#ifdef GRID_OMP
|
|
||||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -409,7 +407,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag)
|
FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
#ifdef GRID_OMP
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int len = U.Grid()->oSites();
|
int len = U.Grid()->oSites();
|
||||||
|
|
||||||
@ -418,60 +415,30 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
DhopFaceTime -= usecond();
|
DhopFaceTime -= usecond();
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
st.CommsMergeSHM(compressor);
|
|
||||||
DhopFaceTime += usecond();
|
DhopFaceTime += usecond();
|
||||||
|
|
||||||
|
DhopCommTime -=usecond();
|
||||||
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
|
DhopFaceTime-=usecond();
|
||||||
|
st.CommsMergeSHM(compressor);
|
||||||
|
DhopFaceTime+= usecond();
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
// Removed explicit thread comms
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
DhopComputeTime -= usecond();
|
DhopComputeTime -= usecond();
|
||||||
#pragma omp parallel
|
|
||||||
{
|
{
|
||||||
int tid = omp_get_thread_num();
|
int interior=1;
|
||||||
int nthreads = omp_get_num_threads();
|
int exterior=0;
|
||||||
int ncomms = CartesianCommunicator::nCommThreads;
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
if (ncomms == -1) ncomms = 1;
|
|
||||||
assert(nthreads > ncomms);
|
|
||||||
|
|
||||||
if (tid >= ncomms) {
|
|
||||||
nthreads -= ncomms;
|
|
||||||
int ttid = tid - ncomms;
|
|
||||||
int n = len;
|
|
||||||
int chunk = n / nthreads;
|
|
||||||
int rem = n % nthreads;
|
|
||||||
int myblock, myn;
|
|
||||||
if (ttid < rem) {
|
|
||||||
myblock = ttid * chunk + ttid;
|
|
||||||
myn = chunk+1;
|
|
||||||
} else {
|
|
||||||
myblock = ttid*chunk + rem;
|
|
||||||
myn = chunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
// do the compute
|
|
||||||
auto U_v = U.View(CpuRead);
|
|
||||||
auto UUU_v = UUU.View(CpuRead);
|
|
||||||
auto in_v = in.View(CpuRead);
|
|
||||||
auto out_v = out.View(CpuWrite);
|
|
||||||
if (dag == DaggerYes) {
|
|
||||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
|
||||||
int sU = ss;
|
|
||||||
// Interior = 1; Exterior = 0; must implement for staggered
|
|
||||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
|
||||||
// Interior = 1; Exterior = 0;
|
|
||||||
int sU = ss;
|
|
||||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
st.CommunicateThreaded();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
DhopComputeTime += usecond();
|
||||||
|
|
||||||
|
st.CommunicateComplete(requests);
|
||||||
|
DhopCommTime +=usecond();
|
||||||
|
|
||||||
// First to enter, last to leave timing
|
// First to enter, last to leave timing
|
||||||
DhopFaceTime -= usecond();
|
DhopFaceTime -= usecond();
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
@ -479,28 +446,11 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
|
|
||||||
DhopComputeTime2 -= usecond();
|
DhopComputeTime2 -= usecond();
|
||||||
{
|
{
|
||||||
auto U_v = U.View(CpuRead);
|
int interior=0;
|
||||||
auto UUU_v = UUU.View(CpuRead);
|
int exterior=1;
|
||||||
auto in_v = in.View(CpuRead);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
auto out_v = out.View(CpuWrite);
|
|
||||||
if (dag == DaggerYes) {
|
|
||||||
int sz=st.surface_list.size();
|
|
||||||
thread_for(ss,sz,{
|
|
||||||
int sU = st.surface_list[ss];
|
|
||||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
int sz=st.surface_list.size();
|
|
||||||
thread_for(ss,sz,{
|
|
||||||
int sU = st.surface_list[ss];
|
|
||||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
DhopComputeTime2 += usecond();
|
DhopComputeTime2 += usecond();
|
||||||
#else
|
|
||||||
assert(0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -520,19 +470,11 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
|||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
DhopCommTime += usecond();
|
DhopCommTime += usecond();
|
||||||
|
|
||||||
auto U_v = U.View(CpuRead);
|
|
||||||
auto UUU_v = UUU.View(CpuRead);
|
|
||||||
auto in_v = in.View(CpuRead);
|
|
||||||
auto out_v = out.View(CpuWrite);
|
|
||||||
DhopComputeTime -= usecond();
|
DhopComputeTime -= usecond();
|
||||||
if (dag == DaggerYes) {
|
{
|
||||||
thread_for(sss, in.Grid()->oSites(),{
|
int interior=1;
|
||||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
int exterior=1;
|
||||||
});
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
} else {
|
|
||||||
thread_for(sss, in.Grid()->oSites(),{
|
|
||||||
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
DhopComputeTime += usecond();
|
||||||
DhopTotalTime += usecond();
|
DhopTotalTime += usecond();
|
||||||
|
@ -0,0 +1,499 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi, Peter Boyle
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// Constructor and gauge import
|
||||||
|
/////////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
NaiveStaggeredFermion<Impl>::NaiveStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid,
|
||||||
|
RealD _mass,
|
||||||
|
RealD _c1, RealD _u0,
|
||||||
|
const ImplParams &p)
|
||||||
|
: Kernels(p),
|
||||||
|
_grid(&Fgrid),
|
||||||
|
_cbgrid(&Hgrid),
|
||||||
|
Stencil(&Fgrid, npoint, Even, directions, displacements,p),
|
||||||
|
StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even
|
||||||
|
StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd
|
||||||
|
mass(_mass),
|
||||||
|
Lebesgue(_grid),
|
||||||
|
LebesgueEvenOdd(_cbgrid),
|
||||||
|
Umu(&Fgrid),
|
||||||
|
UmuEven(&Hgrid),
|
||||||
|
UmuOdd(&Hgrid),
|
||||||
|
_tmp(&Hgrid)
|
||||||
|
{
|
||||||
|
int vol4;
|
||||||
|
int LLs=1;
|
||||||
|
c1=_c1;
|
||||||
|
u0=_u0;
|
||||||
|
vol4= _grid->oSites();
|
||||||
|
Stencil.BuildSurfaceList(LLs,vol4);
|
||||||
|
vol4= _cbgrid->oSites();
|
||||||
|
StencilEven.BuildSurfaceList(LLs,vol4);
|
||||||
|
StencilOdd.BuildSurfaceList(LLs,vol4);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
NaiveStaggeredFermion<Impl>::NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid,
|
||||||
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
|
RealD _c1, RealD _u0,
|
||||||
|
const ImplParams &p)
|
||||||
|
: NaiveStaggeredFermion(Fgrid,Hgrid,_mass,_c1,_u0,p)
|
||||||
|
{
|
||||||
|
ImportGauge(_U);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Momentum space propagator should be
|
||||||
|
// https://arxiv.org/pdf/hep-lat/9712010.pdf
|
||||||
|
//
|
||||||
|
// mom space action.
|
||||||
|
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
|
||||||
|
//
|
||||||
|
// must track through staggered flavour/spin reduction in literature to
|
||||||
|
// turn to free propagator for the one component chi field, a la page 4/5
|
||||||
|
// of above link to implmement fourier based solver.
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::CopyGaugeCheckerboards(void)
|
||||||
|
{
|
||||||
|
pickCheckerboard(Even, UmuEven, Umu);
|
||||||
|
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::ImportGauge(const GaugeField &_U)
|
||||||
|
{
|
||||||
|
GaugeLinkField U(GaugeGrid());
|
||||||
|
DoubledGaugeField _UUU(GaugeGrid());
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Double Store should take two fields for Naik and one hop separately.
|
||||||
|
// Discard teh Naik as Naive
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
Impl::DoubleStore(GaugeGrid(), _UUU, Umu, _U, _U );
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Apply scale factors to get the right fermion Kinetic term
|
||||||
|
// Could pass coeffs into the double store to save work.
|
||||||
|
// 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) )
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
|
PokeIndex<LorentzIndex>(Umu, U*( 0.5*c1/u0), mu );
|
||||||
|
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu, mu+4);
|
||||||
|
PokeIndex<LorentzIndex>(Umu, U*(-0.5*c1/u0), mu+4);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
CopyGaugeCheckerboards();
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Implement the interface
|
||||||
|
/////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
RealD NaiveStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
Dhop(in, out, DaggerNo);
|
||||||
|
return axpy_norm(out, mass, in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
RealD NaiveStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
Dhop(in, out, DaggerYes);
|
||||||
|
return axpy_norm(out, mass, in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||||
|
if (in.Checkerboard() == Odd) {
|
||||||
|
DhopEO(in, out, DaggerNo);
|
||||||
|
} else {
|
||||||
|
DhopOE(in, out, DaggerNo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||||
|
if (in.Checkerboard() == Odd) {
|
||||||
|
DhopEO(in, out, DaggerYes);
|
||||||
|
} else {
|
||||||
|
DhopOE(in, out, DaggerYes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
typename FermionField::scalar_type scal(mass);
|
||||||
|
out = scal * in;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
Mooee(in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
out = (1.0 / (mass)) * in;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
MooeeInv(in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////
|
||||||
|
// Internal
|
||||||
|
///////////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||||
|
GaugeField & mat,
|
||||||
|
const FermionField &A, const FermionField &B, int dag)
|
||||||
|
{
|
||||||
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
Compressor compressor;
|
||||||
|
|
||||||
|
FermionField Btilde(B.Grid());
|
||||||
|
FermionField Atilde(B.Grid());
|
||||||
|
Atilde = A;
|
||||||
|
|
||||||
|
st.HaloExchange(B, compressor);
|
||||||
|
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
|
||||||
|
////////////////////////
|
||||||
|
// Call the single hop
|
||||||
|
////////////////////////
|
||||||
|
auto U_v = U.View(CpuRead);
|
||||||
|
auto B_v = B.View(CpuWrite);
|
||||||
|
auto Btilde_v = Btilde.View(CpuWrite);
|
||||||
|
thread_for(sss,B.Grid()->oSites(),{
|
||||||
|
Kernels::DhopDirKernel(st, U_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
||||||
|
});
|
||||||
|
|
||||||
|
assert(0);// need to figure out the force interface with a blasted three link term.
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||||
|
|
||||||
|
conformable(U.Grid(), _grid);
|
||||||
|
conformable(U.Grid(), V.Grid());
|
||||||
|
conformable(U.Grid(), mat.Grid());
|
||||||
|
|
||||||
|
mat.Checkerboard() = U.Checkerboard();
|
||||||
|
|
||||||
|
DerivInternal(Stencil, Umu, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||||
|
|
||||||
|
conformable(U.Grid(), _cbgrid);
|
||||||
|
conformable(U.Grid(), V.Grid());
|
||||||
|
conformable(U.Grid(), mat.Grid());
|
||||||
|
|
||||||
|
assert(V.Checkerboard() == Even);
|
||||||
|
assert(U.Checkerboard() == Odd);
|
||||||
|
mat.Checkerboard() = Odd;
|
||||||
|
|
||||||
|
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||||
|
|
||||||
|
conformable(U.Grid(), _cbgrid);
|
||||||
|
conformable(U.Grid(), V.Grid());
|
||||||
|
conformable(U.Grid(), mat.Grid());
|
||||||
|
|
||||||
|
assert(V.Checkerboard() == Odd);
|
||||||
|
assert(U.Checkerboard() == Even);
|
||||||
|
mat.Checkerboard() = Even;
|
||||||
|
|
||||||
|
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||||
|
{
|
||||||
|
DhopCalls+=2;
|
||||||
|
conformable(in.Grid(), _grid); // verifies full grid
|
||||||
|
conformable(in.Grid(), out.Grid());
|
||||||
|
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
|
||||||
|
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||||
|
{
|
||||||
|
DhopCalls+=1;
|
||||||
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
|
assert(in.Checkerboard() == Even);
|
||||||
|
out.Checkerboard() = Odd;
|
||||||
|
|
||||||
|
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||||
|
{
|
||||||
|
DhopCalls+=1;
|
||||||
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
|
assert(in.Checkerboard() == Odd);
|
||||||
|
out.Checkerboard() = Even;
|
||||||
|
|
||||||
|
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
|
DhopDir(in, out, dir, disp);
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
assert(0); // Not implemented yet
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
|
|
||||||
|
Compressor compressor;
|
||||||
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
auto Umu_v = Umu.View(CpuRead);
|
||||||
|
auto in_v = in.View(CpuRead);
|
||||||
|
auto out_v = out.View(CpuWrite);
|
||||||
|
// thread_for( sss, in.Grid()->oSites(),{
|
||||||
|
// Kernels::DhopDirKernel(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
||||||
|
// });
|
||||||
|
assert(0);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in,
|
||||||
|
FermionField &out, int dag)
|
||||||
|
{
|
||||||
|
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||||
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||||
|
else
|
||||||
|
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in,
|
||||||
|
FermionField &out, int dag)
|
||||||
|
{
|
||||||
|
Compressor compressor;
|
||||||
|
int len = U.Grid()->oSites();
|
||||||
|
|
||||||
|
DhopTotalTime -= usecond();
|
||||||
|
|
||||||
|
DhopFaceTime -= usecond();
|
||||||
|
st.Prepare();
|
||||||
|
st.HaloGather(in,compressor);
|
||||||
|
DhopFaceTime += usecond();
|
||||||
|
|
||||||
|
DhopCommTime -=usecond();
|
||||||
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
|
DhopFaceTime-=usecond();
|
||||||
|
st.CommsMergeSHM(compressor);
|
||||||
|
DhopFaceTime+= usecond();
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Removed explicit thread comms
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
DhopComputeTime -= usecond();
|
||||||
|
{
|
||||||
|
int interior=1;
|
||||||
|
int exterior=0;
|
||||||
|
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||||
|
}
|
||||||
|
DhopComputeTime += usecond();
|
||||||
|
|
||||||
|
st.CommunicateComplete(requests);
|
||||||
|
DhopCommTime +=usecond();
|
||||||
|
|
||||||
|
// First to enter, last to leave timing
|
||||||
|
DhopFaceTime -= usecond();
|
||||||
|
st.CommsMerge(compressor);
|
||||||
|
DhopFaceTime -= usecond();
|
||||||
|
|
||||||
|
DhopComputeTime2 -= usecond();
|
||||||
|
{
|
||||||
|
int interior=0;
|
||||||
|
int exterior=1;
|
||||||
|
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||||
|
}
|
||||||
|
DhopComputeTime2 += usecond();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in,
|
||||||
|
FermionField &out, int dag)
|
||||||
|
{
|
||||||
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
DhopTotalTime -= usecond();
|
||||||
|
|
||||||
|
DhopCommTime -= usecond();
|
||||||
|
Compressor compressor;
|
||||||
|
st.HaloExchange(in, compressor);
|
||||||
|
DhopCommTime += usecond();
|
||||||
|
|
||||||
|
DhopComputeTime -= usecond();
|
||||||
|
{
|
||||||
|
int interior=1;
|
||||||
|
int exterior=1;
|
||||||
|
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||||
|
}
|
||||||
|
DhopComputeTime += usecond();
|
||||||
|
DhopTotalTime += usecond();
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Reporting
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
template<class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::Report(void)
|
||||||
|
{
|
||||||
|
Coordinate latt = _grid->GlobalDimensions();
|
||||||
|
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
|
RealD NP = _grid->_Nprocessors;
|
||||||
|
RealD NN = _grid->NodeCount();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : "
|
||||||
|
<< DhopCalls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : "
|
||||||
|
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : "
|
||||||
|
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : "
|
||||||
|
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||||
|
|
||||||
|
// Average the compute time
|
||||||
|
_grid->GlobalSum(DhopComputeTime);
|
||||||
|
DhopComputeTime/=NP;
|
||||||
|
|
||||||
|
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||||
|
|
||||||
|
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||||
|
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::ZeroCounters(void)
|
||||||
|
{
|
||||||
|
DhopCalls = 0;
|
||||||
|
DhopTotalTime = 0;
|
||||||
|
DhopCommTime = 0;
|
||||||
|
DhopComputeTime = 0;
|
||||||
|
DhopFaceTime = 0;
|
||||||
|
|
||||||
|
Stencil.ZeroCounters();
|
||||||
|
StencilEven.ZeroCounters();
|
||||||
|
StencilOdd.ZeroCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Conserved current - not yet implemented.
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||||
|
PropagatorField &q_in_2,
|
||||||
|
PropagatorField &q_out,
|
||||||
|
PropagatorField &src,
|
||||||
|
Current curr_type,
|
||||||
|
unsigned int mu)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void NaiveStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||||
|
PropagatorField &q_out,
|
||||||
|
PropagatorField &src,
|
||||||
|
Current curr_type,
|
||||||
|
unsigned int mu,
|
||||||
|
unsigned int tmin,
|
||||||
|
unsigned int tmax,
|
||||||
|
ComplexField &lattice_cmplx)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
@ -618,10 +618,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
void StaggeredKernels<Impl>::DhopSiteAsm(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -680,12 +680,13 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
gauge2 =(uint64_t)&UU[sU]( Z ); \
|
gauge2 =(uint64_t)&UU[sU]( Z ); \
|
||||||
gauge3 =(uint64_t)&UU[sU]( T );
|
gauge3 =(uint64_t)&UU[sU]( T );
|
||||||
|
|
||||||
|
|
||||||
// This is the single precision 5th direction vectorised kernel
|
// This is the single precision 5th direction vectorised kernel
|
||||||
#include <Grid/simd/Intel512single.h>
|
#include <Grid/simd/Intel512single.h>
|
||||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -702,9 +703,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
|||||||
StencilEntry *SE2;
|
StencilEntry *SE2;
|
||||||
StencilEntry *SE3;
|
StencilEntry *SE3;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
|
|
||||||
int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
|
{
|
||||||
// Xp, Yp, Zp, Tp
|
// Xp, Yp, Zp, Tp
|
||||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||||
@ -736,10 +738,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
|||||||
}
|
}
|
||||||
|
|
||||||
#include <Grid/simd/Intel512double.h>
|
#include <Grid/simd/Intel512double.h>
|
||||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -756,8 +758,9 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
|||||||
StencilEntry *SE2;
|
StencilEntry *SE2;
|
||||||
StencilEntry *SE3;
|
StencilEntry *SE3;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
|
{
|
||||||
// Xp, Yp, Zp, Tp
|
// Xp, Yp, Zp, Tp
|
||||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||||
@ -821,10 +824,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
|||||||
// This is the single precision 5th direction vectorised kernel
|
// This is the single precision 5th direction vectorised kernel
|
||||||
|
|
||||||
#include <Grid/simd/Intel512single.h>
|
#include <Grid/simd/Intel512single.h>
|
||||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -841,9 +844,9 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
|||||||
StencilEntry *SE2;
|
StencilEntry *SE2;
|
||||||
StencilEntry *SE3;
|
StencilEntry *SE3;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
|
// int sF=s+LLs*sU;
|
||||||
int sF=s+LLs*sU;
|
{
|
||||||
// Xp, Yp, Zp, Tp
|
// Xp, Yp, Zp, Tp
|
||||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||||
LOAD_CHIa(addr0,addr1);
|
LOAD_CHIa(addr0,addr1);
|
||||||
@ -890,10 +893,10 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#include <Grid/simd/Intel512double.h>
|
#include <Grid/simd/Intel512double.h>
|
||||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -910,9 +913,9 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
|||||||
StencilEntry *SE2;
|
StencilEntry *SE2;
|
||||||
StencilEntry *SE3;
|
StencilEntry *SE3;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
|
// int sF=s+LLs*sU;
|
||||||
int sF=s+LLs*sU;
|
{
|
||||||
// Xp, Yp, Zp, Tp
|
// Xp, Yp, Zp, Tp
|
||||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||||
LOAD_CHIa(addr0,addr1);
|
LOAD_CHIa(addr0,addr1);
|
||||||
|
@ -146,9 +146,10 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
template <int Naik>
|
||||||
|
void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
|
||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag)
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -181,8 +182,9 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int skew;
|
int skew;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
|
{
|
||||||
|
|
||||||
skew = 0;
|
skew = 0;
|
||||||
HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);
|
HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);
|
||||||
@ -193,6 +195,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
HAND_STENCIL_LEG (U,Ym,2,skew,odd);
|
HAND_STENCIL_LEG (U,Ym,2,skew,odd);
|
||||||
HAND_STENCIL_LEG (U,Zm,1,skew,even);
|
HAND_STENCIL_LEG (U,Zm,1,skew,even);
|
||||||
HAND_STENCIL_LEG (U,Tm,0,skew,odd);
|
HAND_STENCIL_LEG (U,Tm,0,skew,odd);
|
||||||
|
if (Naik) {
|
||||||
skew = 8;
|
skew = 8;
|
||||||
HAND_STENCIL_LEG(UUU,Xp,3,skew,even);
|
HAND_STENCIL_LEG(UUU,Xp,3,skew,even);
|
||||||
HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);
|
HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);
|
||||||
@ -202,7 +205,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);
|
HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);
|
||||||
HAND_STENCIL_LEG(UUU,Zm,1,skew,even);
|
HAND_STENCIL_LEG(UUU,Zm,1,skew,even);
|
||||||
HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);
|
HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);
|
||||||
|
}
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
result()()(0) = - even_0 - odd_0;
|
result()()(0) = - even_0 - odd_0;
|
||||||
result()()(1) = - even_1 - odd_1;
|
result()()(1) = - even_1 - odd_1;
|
||||||
@ -218,9 +221,10 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
template <int Naik>
|
||||||
|
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag)
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -253,8 +257,9 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int skew;
|
int skew;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
|
{
|
||||||
|
|
||||||
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
||||||
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
||||||
@ -268,6 +273,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);
|
HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);
|
||||||
HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);
|
HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);
|
||||||
HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);
|
HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);
|
||||||
|
if (Naik) {
|
||||||
skew = 8;
|
skew = 8;
|
||||||
HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);
|
HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);
|
||||||
HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);
|
HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);
|
||||||
@ -277,7 +283,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);
|
HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);
|
||||||
HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);
|
HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);
|
||||||
HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);
|
HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);
|
||||||
|
}
|
||||||
// Assume every site must be connected to at least one interior point. No 1^4 subvols.
|
// Assume every site must be connected to at least one interior point. No 1^4 subvols.
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
result()()(0) = - even_0 - odd_0;
|
result()()(0) = - even_0 - odd_0;
|
||||||
@ -294,9 +300,10 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
template <int Naik>
|
||||||
|
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag)
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -329,8 +336,9 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int skew;
|
int skew;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
|
{
|
||||||
|
|
||||||
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
||||||
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
||||||
@ -344,6 +352,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);
|
HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);
|
||||||
HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);
|
HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);
|
||||||
HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);
|
HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);
|
||||||
|
if (Naik) {
|
||||||
skew = 8;
|
skew = 8;
|
||||||
HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);
|
HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);
|
||||||
HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);
|
HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);
|
||||||
@ -353,7 +362,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);
|
HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);
|
||||||
HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);
|
HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);
|
||||||
HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);
|
HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);
|
||||||
|
}
|
||||||
// Add sum of all exterior connected stencil legs
|
// Add sum of all exterior connected stencil legs
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
@ -370,6 +379,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
||||||
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
||||||
@ -385,7 +395,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
||||||
SiteSpinor *buf, int LLs, int sU, \
|
SiteSpinor *buf, int LLs, int sU, \
|
||||||
const FermionFieldView &in, FermionFieldView &out, int dag); \
|
const FermionFieldView &in, FermionFieldView &out, int dag); \
|
||||||
|
*/
|
||||||
#undef LOAD_CHI
|
#undef LOAD_CHI
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -37,9 +37,9 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
if (SE->_permute) { \
|
if (SE->_permute) { \
|
||||||
chi_p = χ \
|
chi_p = χ \
|
||||||
permute(chi, in[SE->_offset], ptype); \
|
permute(chi, in[SE->_offset], ptype); \
|
||||||
} else { \
|
} else { \
|
||||||
chi_p = &in[SE->_offset]; \
|
chi_p = &in[SE->_offset]; \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi_p = &buf[SE->_offset]; \
|
||||||
@ -51,15 +51,15 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
if (SE->_permute) { \
|
if (SE->_permute) { \
|
||||||
chi_p = χ \
|
chi_p = χ \
|
||||||
permute(chi, in[SE->_offset], ptype); \
|
permute(chi, in[SE->_offset], ptype); \
|
||||||
} else { \
|
} else { \
|
||||||
chi_p = &in[SE->_offset]; \
|
chi_p = &in[SE->_offset]; \
|
||||||
} \
|
} \
|
||||||
} else if ( st.same_node[Dir] ) { \
|
} else if ( st.same_node[Dir] ) { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi_p = &buf[SE->_offset]; \
|
||||||
} \
|
} \
|
||||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir); \
|
multLink(Uchi, U[sU], *chi_p, Dir); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
||||||
@ -67,7 +67,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi_p = &buf[SE->_offset]; \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir); \
|
multLink(Uchi, U[sU], *chi_p, Dir); \
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -78,10 +78,12 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
|||||||
// Int, Ext, Int+Ext cases for comms overlap
|
// Int, Ext, Int+Ext cases for comms overlap
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
template <int Naik>
|
||||||
|
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out, int dag) {
|
const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||||
|
{
|
||||||
const SiteSpinor *chi_p;
|
const SiteSpinor *chi_p;
|
||||||
SiteSpinor chi;
|
SiteSpinor chi;
|
||||||
SiteSpinor Uchi;
|
SiteSpinor Uchi;
|
||||||
@ -89,8 +91,10 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
int ptype;
|
int ptype;
|
||||||
int skew;
|
int skew;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=LLs*sU+s;
|
//
|
||||||
|
// int sF=LLs*sU+s;
|
||||||
|
{
|
||||||
skew = 0;
|
skew = 0;
|
||||||
GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
|
GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
|
||||||
GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
|
||||||
@ -100,6 +104,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
|
||||||
|
if ( Naik ) {
|
||||||
skew=8;
|
skew=8;
|
||||||
GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
|
||||||
@ -109,6 +114,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
|
||||||
|
}
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
Uchi = - Uchi;
|
Uchi = - Uchi;
|
||||||
}
|
}
|
||||||
@ -120,9 +126,10 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
// Only contributions from interior of our node
|
// Only contributions from interior of our node
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
template <int Naik>
|
||||||
|
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
||||||
const SiteSpinor *chi_p;
|
const SiteSpinor *chi_p;
|
||||||
SiteSpinor chi;
|
SiteSpinor chi;
|
||||||
@ -131,8 +138,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
|||||||
int ptype;
|
int ptype;
|
||||||
int skew ;
|
int skew ;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=LLs*sU+s;
|
// int sF=LLs*sU+s;
|
||||||
|
{
|
||||||
skew = 0;
|
skew = 0;
|
||||||
Uchi=Zero();
|
Uchi=Zero();
|
||||||
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
|
||||||
@ -143,6 +151,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
|||||||
GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
|
||||||
|
if ( Naik ) {
|
||||||
skew=8;
|
skew=8;
|
||||||
GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||||
@ -152,6 +161,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
|||||||
GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||||
|
}
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
Uchi = - Uchi;
|
Uchi = - Uchi;
|
||||||
}
|
}
|
||||||
@ -164,9 +174,10 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
|||||||
// Only contributions from exterior of our node
|
// Only contributions from exterior of our node
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
template <int Naik>
|
||||||
|
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
||||||
const SiteSpinor *chi_p;
|
const SiteSpinor *chi_p;
|
||||||
// SiteSpinor chi;
|
// SiteSpinor chi;
|
||||||
@ -176,8 +187,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
|||||||
int nmu=0;
|
int nmu=0;
|
||||||
int skew ;
|
int skew ;
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
int sF=LLs*sU+s;
|
// int sF=LLs*sU+s;
|
||||||
|
{
|
||||||
skew = 0;
|
skew = 0;
|
||||||
Uchi=Zero();
|
Uchi=Zero();
|
||||||
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
|
||||||
@ -188,6 +200,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
|||||||
GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
|
||||||
|
if ( Naik ) {
|
||||||
skew=8;
|
skew=8;
|
||||||
GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||||
@ -197,7 +210,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
|||||||
GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||||
|
}
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
out[sF] = out[sF] - Uchi;
|
out[sF] = out[sF] - Uchi;
|
||||||
@ -211,72 +224,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
|||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Driving / wrapping routine to select right kernel
|
// Driving / wrapping routine to select right kernel
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
void StaggeredKernels<Impl>::DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp)
|
||||||
const FermionFieldView &in, FermionFieldView &out,
|
|
||||||
int interior,int exterior)
|
|
||||||
{
|
|
||||||
int dag=1;
|
|
||||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class Impl>
|
|
||||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
|
||||||
const FermionFieldView &in, FermionFieldView &out,
|
|
||||||
int interior,int exterior)
|
|
||||||
{
|
|
||||||
int dag=0;
|
|
||||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class Impl>
|
|
||||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
|
||||||
SiteSpinor *buf, int LLs,
|
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out,
|
|
||||||
int dag,int interior,int exterior)
|
|
||||||
{
|
|
||||||
switch(Opt) {
|
|
||||||
#ifdef AVX512
|
|
||||||
case OptInlineAsm:
|
|
||||||
if ( interior && exterior ) {
|
|
||||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
} else {
|
|
||||||
std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<<std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
case OptHandUnroll:
|
|
||||||
if ( interior && exterior ) {
|
|
||||||
DhopSiteHand (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
} else if ( interior ) {
|
|
||||||
DhopSiteHandInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
} else if ( exterior ) {
|
|
||||||
DhopSiteHandExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case OptGeneric:
|
|
||||||
if ( interior && exterior ) {
|
|
||||||
DhopSiteGeneric (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
} else if ( interior ) {
|
|
||||||
DhopSiteGenericInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
} else if ( exterior ) {
|
|
||||||
DhopSiteGenericExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
std::cout<<"Oops Opt = "<<Opt<<std::endl;
|
|
||||||
assert(0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class Impl>
|
|
||||||
void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
|
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
|
|
||||||
{
|
{
|
||||||
// Disp should be either +1,-1,+3,-3
|
// Disp should be either +1,-1,+3,-3
|
||||||
// What about "dag" ?
|
// What about "dag" ?
|
||||||
@ -285,6 +235,108 @@ void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldVi
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define KERNEL_CALLNB(A,improved) \
|
||||||
|
const uint64_t NN = Nsite*Ls; \
|
||||||
|
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
|
||||||
|
int sF = ss; \
|
||||||
|
int sU = ss/Ls; \
|
||||||
|
ThisKernel:: template A<improved>(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \
|
||||||
|
});
|
||||||
|
|
||||||
|
#define KERNEL_CALL(A,improved) KERNEL_CALLNB(A,improved); accelerator_barrier();
|
||||||
|
|
||||||
|
#define ASM_CALL(A) \
|
||||||
|
const uint64_t NN = Nsite*Ls; \
|
||||||
|
thread_for( ss, NN, { \
|
||||||
|
int sF = ss; \
|
||||||
|
int sU = ss/Ls; \
|
||||||
|
ThisKernel::A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \
|
||||||
|
});
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||||
|
const FermionField &in, FermionField &out, int dag, int interior,int exterior)
|
||||||
|
{
|
||||||
|
GridBase *FGrid=in.Grid();
|
||||||
|
GridBase *UGrid=U.Grid();
|
||||||
|
typedef StaggeredKernels<Impl> ThisKernel;
|
||||||
|
auto UUU_v = UUU.View(AcceleratorRead);
|
||||||
|
auto U_v = U.View(AcceleratorRead);
|
||||||
|
auto in_v = in.View(AcceleratorRead);
|
||||||
|
auto out_v = out.View(AcceleratorWrite);
|
||||||
|
auto st_v = st.View(AcceleratorRead);
|
||||||
|
SiteSpinor * buf = st.CommBuf();
|
||||||
|
|
||||||
|
int Ls=1;
|
||||||
|
if(FGrid->Nd()==UGrid->Nd()+1){
|
||||||
|
Ls = FGrid->_rdimensions[0];
|
||||||
|
}
|
||||||
|
int Nsite = UGrid->oSites();
|
||||||
|
|
||||||
|
if( interior && exterior ) {
|
||||||
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;}
|
||||||
|
if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;}
|
||||||
|
#endif
|
||||||
|
} else if( interior ) {
|
||||||
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;}
|
||||||
|
#endif
|
||||||
|
} else if( exterior ) {
|
||||||
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
assert(0 && " Kernel optimisation case not covered ");
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag, int interior,int exterior)
|
||||||
|
{
|
||||||
|
GridBase *FGrid=in.Grid();
|
||||||
|
GridBase *UGrid=U.Grid();
|
||||||
|
typedef StaggeredKernels<Impl> ThisKernel;
|
||||||
|
auto UUU_v= U.View(AcceleratorRead);
|
||||||
|
auto U_v = U.View(AcceleratorRead);
|
||||||
|
auto in_v = in.View(AcceleratorRead);
|
||||||
|
auto out_v = out.View(AcceleratorWrite);
|
||||||
|
auto st_v = st.View(AcceleratorRead);
|
||||||
|
SiteSpinor * buf = st.CommBuf();
|
||||||
|
|
||||||
|
int Ls=1;
|
||||||
|
if(FGrid->Nd()==UGrid->Nd()+1){
|
||||||
|
Ls = FGrid->_rdimensions[0];
|
||||||
|
}
|
||||||
|
int Nsite = UGrid->oSites();
|
||||||
|
|
||||||
|
if( interior && exterior ) {
|
||||||
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;}
|
||||||
|
#endif
|
||||||
|
} else if( interior ) {
|
||||||
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;}
|
||||||
|
#endif
|
||||||
|
} else if( exterior ) {
|
||||||
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#undef KERNEL_CALLNB
|
||||||
|
#undef KERNEL_CALL
|
||||||
|
#undef ASM_CALL
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
|
||||||
|
@ -496,5 +496,9 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
|||||||
assert(0 && " Kernel optimisation case not covered ");
|
assert(0 && " Kernel optimisation case not covered ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef KERNEL_CALLNB
|
||||||
|
#undef KERNEL_CALL
|
||||||
|
#undef ASM_CALL
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -0,0 +1,36 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi, Peter Boyle
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
const std::vector<int> NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
|
||||||
|
const std::vector<int> NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
@ -0,0 +1,37 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/NaiveStaggeredFermion.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi, Peter Boyle
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h>
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
#include "impl.h"
|
||||||
|
template class NaiveStaggeredFermion<IMPLEMENTATION>;
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
|||||||
|
../NaiveStaggeredFermionInstantiation.cc.master
|
@ -0,0 +1 @@
|
|||||||
|
../NaiveStaggeredFermionInstantiation.cc.master
|
@ -88,6 +88,7 @@ done
|
|||||||
CC_LIST=" \
|
CC_LIST=" \
|
||||||
ImprovedStaggeredFermion5DInstantiation \
|
ImprovedStaggeredFermion5DInstantiation \
|
||||||
ImprovedStaggeredFermionInstantiation \
|
ImprovedStaggeredFermionInstantiation \
|
||||||
|
NaiveStaggeredFermionInstantiation \
|
||||||
StaggeredKernelsInstantiation "
|
StaggeredKernelsInstantiation "
|
||||||
|
|
||||||
for impl in $STAG_IMPL_LIST
|
for impl in $STAG_IMPL_LIST
|
||||||
|
@ -87,26 +87,6 @@ int main (int argc, char ** argv)
|
|||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
}
|
}
|
||||||
ref = Zero();
|
|
||||||
/*
|
|
||||||
{ // Naive wilson implementation
|
|
||||||
ref = Zero();
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
|
||||||
tmp = U[mu]*Cshift(src,mu,1);
|
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
|
||||||
ref[i]+= tmp[i] - Gamma(Gmu[mu])*tmp[i]; ;
|
|
||||||
}
|
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
|
||||||
tmp =Cshift(tmp,mu,-1);
|
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
|
||||||
ref[i]+= tmp[i] + Gamma(Gmu[mu])*tmp[i]; ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ref = -0.5*ref;
|
|
||||||
*/
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD c1=9.0/8.0;
|
RealD c1=9.0/8.0;
|
||||||
@ -125,10 +105,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
err = ref-result;
|
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
282
tests/core/Test_staggered_naive.cc
Normal file
282
tests/core/Test_staggered_naive.cc
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
Coordinate latt_size = GridDefaultLatt();
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGrid(&Grid);
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||||
|
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
GridParallelRNG pRNG(&Grid);
|
||||||
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||||
|
|
||||||
|
typedef typename NaiveStaggeredFermionR::FermionField FermionField;
|
||||||
|
typedef typename NaiveStaggeredFermionR::ComplexField ComplexField;
|
||||||
|
typename NaiveStaggeredFermionR::ImplParams params;
|
||||||
|
|
||||||
|
FermionField src (&Grid); random(pRNG,src);
|
||||||
|
FermionField result(&Grid); result=Zero();
|
||||||
|
FermionField ref(&Grid); ref=Zero();
|
||||||
|
FermionField tmp(&Grid); tmp=Zero();
|
||||||
|
FermionField err(&Grid); tmp=Zero();
|
||||||
|
FermionField phi (&Grid); random(pRNG,phi);
|
||||||
|
FermionField chi (&Grid); random(pRNG,chi);
|
||||||
|
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||||
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
|
|
||||||
|
double volume=1;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
volume=volume*latt_size[mu];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only one non-zero (y)
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
/* Debug force unit
|
||||||
|
U[mu] = 1.0;
|
||||||
|
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
ref = Zero();
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD c1=9.0/8.0;
|
||||||
|
RealD u0=1.0;
|
||||||
|
|
||||||
|
{ // Simple improved staggered implementation
|
||||||
|
ref = Zero();
|
||||||
|
RealD c1tad = 0.5*c1/u0;
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger> > coor(&Grid);
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger> > x(&Grid); LatticeCoordinate(x,0);
|
||||||
|
Lattice<iScalar<vInteger> > y(&Grid); LatticeCoordinate(y,1);
|
||||||
|
Lattice<iScalar<vInteger> > z(&Grid); LatticeCoordinate(z,2);
|
||||||
|
Lattice<iScalar<vInteger> > t(&Grid); LatticeCoordinate(t,3);
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger> > lin_z(&Grid); lin_z=x+y;
|
||||||
|
Lattice<iScalar<vInteger> > lin_t(&Grid); lin_t=x+y+z;
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
|
// Staggered Phase.
|
||||||
|
ComplexField phases(&Grid); phases=1.0;
|
||||||
|
|
||||||
|
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
|
||||||
|
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
|
||||||
|
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
|
||||||
|
|
||||||
|
tmp = PeriodicBC::CovShiftForward(U[mu],mu,src);
|
||||||
|
ref = ref +c1tad*tmp*phases; // Forward 1 hop
|
||||||
|
|
||||||
|
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,src);
|
||||||
|
ref = ref -c1tad*tmp*phases; // Backward 1 hop
|
||||||
|
|
||||||
|
}
|
||||||
|
// ref = ref + mass * src;
|
||||||
|
}
|
||||||
|
|
||||||
|
NaiveStaggeredFermionR Ds(Umu,Grid,RBGrid,mass,c1,u0,params);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||||
|
int ncall=1000;
|
||||||
|
double t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
Ds.Dhop(src,result,0);
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
|
double t2;
|
||||||
|
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
|
|
||||||
|
err = ref-result;
|
||||||
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||||
|
|
||||||
|
FermionField src_e (&RBGrid);
|
||||||
|
FermionField src_o (&RBGrid);
|
||||||
|
FermionField r_e (&RBGrid);
|
||||||
|
FermionField r_o (&RBGrid);
|
||||||
|
FermionField r_eo (&Grid);
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
|
Ds.Meooe(src_e,r_o); std::cout<<GridLogMessage<<"Applied Meo"<<std::endl;
|
||||||
|
Ds.Meooe(src_o,r_e); std::cout<<GridLogMessage<<"Applied Moe"<<std::endl;
|
||||||
|
Ds.Dhop (src,ref,DaggerNo);
|
||||||
|
|
||||||
|
setCheckerboard(r_eo,r_o);
|
||||||
|
setCheckerboard(r_eo,r_e);
|
||||||
|
|
||||||
|
err= ref - r_eo;
|
||||||
|
std::cout<<GridLogMessage << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
|
||||||
|
FermionField chi_e (&RBGrid);
|
||||||
|
FermionField chi_o (&RBGrid);
|
||||||
|
|
||||||
|
FermionField dchi_e (&RBGrid);
|
||||||
|
FermionField dchi_o (&RBGrid);
|
||||||
|
|
||||||
|
FermionField phi_e (&RBGrid);
|
||||||
|
FermionField phi_o (&RBGrid);
|
||||||
|
|
||||||
|
FermionField dphi_e (&RBGrid);
|
||||||
|
FermionField dphi_o (&RBGrid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,chi_e,chi);
|
||||||
|
pickCheckerboard(Odd ,chi_o,chi);
|
||||||
|
pickCheckerboard(Even,phi_e,phi);
|
||||||
|
pickCheckerboard(Odd ,phi_o,phi);
|
||||||
|
|
||||||
|
Ds.Meooe(chi_e,dchi_o);
|
||||||
|
Ds.Meooe(chi_o,dchi_e);
|
||||||
|
Ds.MeooeDag(phi_e,dphi_o);
|
||||||
|
Ds.MeooeDag(phi_o,dphi_e);
|
||||||
|
|
||||||
|
ComplexD pDce = innerProduct(phi_e,dchi_e);
|
||||||
|
ComplexD pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
ComplexD cDpe = innerProduct(chi_e,dphi_e);
|
||||||
|
ComplexD cDpo = innerProduct(chi_o,dphi_o);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= Test MeeInv Mee = 1 "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
|
||||||
|
pickCheckerboard(Even,chi_e,chi);
|
||||||
|
pickCheckerboard(Odd ,chi_o,chi);
|
||||||
|
|
||||||
|
Ds.Mooee(chi_e,src_e);
|
||||||
|
Ds.MooeeInv(src_e,phi_e);
|
||||||
|
|
||||||
|
Ds.Mooee(chi_o,src_o);
|
||||||
|
Ds.MooeeInv(src_o,phi_o);
|
||||||
|
|
||||||
|
setCheckerboard(phi,phi_e);
|
||||||
|
setCheckerboard(phi,phi_o);
|
||||||
|
|
||||||
|
err = phi-chi;
|
||||||
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
|
||||||
|
pickCheckerboard(Even,chi_e,chi);
|
||||||
|
pickCheckerboard(Odd ,chi_o,chi);
|
||||||
|
|
||||||
|
Ds.MooeeDag(chi_e,src_e);
|
||||||
|
Ds.MooeeInvDag(src_e,phi_e);
|
||||||
|
|
||||||
|
Ds.MooeeDag(chi_o,src_o);
|
||||||
|
Ds.MooeeInvDag(src_o,phi_o);
|
||||||
|
|
||||||
|
setCheckerboard(phi,phi_e);
|
||||||
|
setCheckerboard(phi,phi_o);
|
||||||
|
|
||||||
|
err = phi-chi;
|
||||||
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"= Test MpcDagMpc is Hermitian "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||||
|
|
||||||
|
random(pRNG,phi);
|
||||||
|
random(pRNG,chi);
|
||||||
|
pickCheckerboard(Even,chi_e,chi);
|
||||||
|
pickCheckerboard(Odd ,chi_o,chi);
|
||||||
|
pickCheckerboard(Even,phi_e,phi);
|
||||||
|
pickCheckerboard(Odd ,phi_o,phi);
|
||||||
|
|
||||||
|
SchurDiagMooeeOperator<NaiveStaggeredFermionR,FermionField> HermOpEO(Ds);
|
||||||
|
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
||||||
|
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
||||||
|
|
||||||
|
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
||||||
|
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
||||||
|
|
||||||
|
pDce = innerProduct(phi_e,dchi_e);
|
||||||
|
pDco = innerProduct(phi_o,dchi_o);
|
||||||
|
cDpe = innerProduct(chi_e,dphi_e);
|
||||||
|
cDpo = innerProduct(chi_o,dphi_o);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user