mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Staggereed move to accelerator
This commit is contained in:
parent
cf2938688a
commit
006cc8a8f1
@ -57,6 +57,7 @@ NAMESPACE_CHECK(WilsonClover);
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
||||
NAMESPACE_CHECK(Wilson5D);
|
||||
|
||||
#include <Grid/qcd/action/fermion/NaiveStaggeredFermion.h>
|
||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion.h>
|
||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
|
||||
NAMESPACE_CHECK(Staggered);
|
||||
@ -282,6 +283,10 @@ typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
||||
|
||||
typedef NaiveStaggeredFermion<StaggeredImplR> NaiveStaggeredFermionR;
|
||||
typedef NaiveStaggeredFermion<StaggeredImplF> NaiveStaggeredFermionF;
|
||||
typedef NaiveStaggeredFermion<StaggeredImplD> NaiveStaggeredFermionD;
|
||||
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
|
||||
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
|
||||
|
194
Grid/qcd/action/fermion/NaiveStaggeredFermion.h
Normal file
194
Grid/qcd/action/fermion/NaiveStaggeredFermion.h
Normal file
@ -0,0 +1,194 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_NAIVE_STAG_FERMION_H
|
||||
#define GRID_QCD_NAIVE_STAG_FERMION_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
class NaiveStaggeredFermionStatic {
|
||||
public:
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
static const int npoint = 8;
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
class NaiveStaggeredFermion : public StaggeredKernels<Impl>, public NaiveStaggeredFermionStatic {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef StaggeredKernels<Impl> Kernels;
|
||||
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopTotalTime;
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _grid; }
|
||||
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
|
||||
GridBase *FermionGrid(void) { return _grid; }
|
||||
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// override multiply; cut number routines if pass dagger argument
|
||||
// and also make interface more uniformly consistent
|
||||
//////////////////////////////////////////////////////////////////
|
||||
RealD M(const FermionField &in, FermionField &out);
|
||||
RealD Mdag(const FermionField &in, FermionField &out);
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// half checkerboard operations
|
||||
/////////////////////////////////////////////////////////
|
||||
void Meooe(const FermionField &in, FermionField &out);
|
||||
void MeooeDag(const FermionField &in, FermionField &out);
|
||||
void Mooee(const FermionField &in, FermionField &out);
|
||||
void MooeeDag(const FermionField &in, FermionField &out);
|
||||
void MooeeInv(const FermionField &in, FermionField &out);
|
||||
void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||
|
||||
////////////////////////
|
||||
// Derivative interface
|
||||
////////////////////////
|
||||
// Interface calls an internal routine
|
||||
void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// non-hermitian hopping term; half cb or both
|
||||
///////////////////////////////////////////////////////////////
|
||||
void Dhop (const FermionField &in, FermionField &out, int dag);
|
||||
void DhopOE(const FermionField &in, FermionField &out, int dag);
|
||||
void DhopEO(const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Multigrid assistance; force term uses too
|
||||
///////////////////////////////////////////////////////////////
|
||||
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Extra methods added by derived
|
||||
///////////////////////////////////////////////////////////////
|
||||
void DerivInternal(StencilImpl &st,
|
||||
DoubledGaugeField &U,
|
||||
GaugeField &mat,
|
||||
const FermionField &A, const FermionField &B, int dag);
|
||||
|
||||
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
void DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Grid own interface Constructor
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1, RealD _u0,
|
||||
const ImplParams &p = ImplParams());
|
||||
NaiveStaggeredFermion(GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1, RealD _u0,
|
||||
const ImplParams &p = ImplParams());
|
||||
|
||||
// DoubleStore impl dependent
|
||||
void ImportGauge (const GaugeField &_U );
|
||||
DoubledGaugeField &GetU(void) { return Umu ; } ;
|
||||
void CopyGaugeCheckerboards(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
// protected:
|
||||
public:
|
||||
// any other parameters of action ???
|
||||
virtual int isTrivialEE(void) { return 1; };
|
||||
virtual RealD Mass(void) { return mass; }
|
||||
RealD mass;
|
||||
RealD u0;
|
||||
RealD c1;
|
||||
|
||||
GridBase *_grid;
|
||||
GridBase *_cbgrid;
|
||||
|
||||
// Defines the stencils for even and odd
|
||||
StencilImpl Stencil;
|
||||
StencilImpl StencilEven;
|
||||
StencilImpl StencilOdd;
|
||||
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
DoubledGaugeField Umu;
|
||||
DoubledGaugeField UmuEven;
|
||||
DoubledGaugeField UmuOdd;
|
||||
|
||||
LebesgueOrder Lebesgue;
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Conserved current utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &srct,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
};
|
||||
|
||||
typedef NaiveStaggeredFermion<StaggeredImplF> NaiveStaggeredFermionF;
|
||||
typedef NaiveStaggeredFermion<StaggeredImplD> NaiveStaggeredFermionD;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -49,21 +49,32 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
|
||||
|
||||
public:
|
||||
|
||||
void DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
|
||||
void DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
|
||||
|
||||
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
||||
protected:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Nc kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
template<int Naik>
|
||||
void DhopSiteGeneric(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template<int Naik>
|
||||
void DhopSiteGenericInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template<int Naik>
|
||||
void DhopSiteGenericExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
@ -71,15 +82,18 @@ public:
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
template<int Naik>
|
||||
void DhopSiteHand(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template<int Naik>
|
||||
void DhopSiteHandInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template<int Naik>
|
||||
void DhopSiteHandExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
@ -87,27 +101,10 @@ public:
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Asm Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
void DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic interface; fan out to right routine
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
|
||||
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1);
|
||||
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior);
|
||||
|
||||
public:
|
||||
|
||||
|
@ -281,11 +281,9 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||
}
|
||||
|
||||
@ -294,9 +292,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
|
||||
Compressor compressor;
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
@ -305,99 +301,42 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
||||
DhopFaceTime-=usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
double ctime=0;
|
||||
double ptime=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||
// Remove explicit thread mapping introduced for OPA reasons.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
||||
DhopComputeTime-=usecond();
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int ncomms = CartesianCommunicator::nCommThreads;
|
||||
if (ncomms == -1) ncomms = 1;
|
||||
assert(nthreads > ncomms);
|
||||
if (tid >= ncomms) {
|
||||
double start = usecond();
|
||||
nthreads -= ncomms;
|
||||
int ttid = tid - ncomms;
|
||||
int n = U.Grid()->oSites(); // 4d vol
|
||||
int chunk = n / nthreads;
|
||||
int rem = n % nthreads;
|
||||
int myblock, myn;
|
||||
if (ttid < rem) {
|
||||
myblock = ttid * chunk + ttid;
|
||||
myn = chunk+1;
|
||||
} else {
|
||||
myblock = ttid*chunk + rem;
|
||||
myn = chunk;
|
||||
int interior=1;
|
||||
int exterior=0;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
|
||||
// do the compute
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
|
||||
if (dag == DaggerYes) {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
// Interior = 1; Exterior = 0; must implement for staggered
|
||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<---------
|
||||
}
|
||||
} else {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
// Interior = 1; Exterior = 0;
|
||||
int sU = ss;
|
||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------
|
||||
}
|
||||
}
|
||||
ptime = usecond() - start;
|
||||
} else {
|
||||
double start = usecond();
|
||||
st.CommunicateThreaded();
|
||||
ctime = usecond() - start;
|
||||
}
|
||||
}
|
||||
DhopCommTime += ctime;
|
||||
DhopComputeTime+=ptime;
|
||||
|
||||
// First to enter, last to leave timing
|
||||
st.CollateThreads();
|
||||
DhopComputeTime+=usecond();
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
DhopComputeTime2-=usecond();
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
thread_for( ss,sz,{
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<----------
|
||||
});
|
||||
} else {
|
||||
int sz=st.surface_list.size();
|
||||
thread_for( ss,sz,{
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<----------
|
||||
});
|
||||
DhopComputeTime2-=usecond();
|
||||
{
|
||||
int interior=0;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -408,8 +347,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
Compressor compressor;
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
|
||||
|
||||
|
||||
//double t1=usecond();
|
||||
DhopTotalTime -= usecond();
|
||||
DhopCommTime -= usecond();
|
||||
@ -418,28 +355,13 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
thread_for( ss,U.Grid()->oSites(),{
|
||||
int sU=ss;
|
||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
|
||||
});
|
||||
} else {
|
||||
thread_for( ss,U.Grid()->oSites(),{
|
||||
int sU=ss;
|
||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
|
||||
});
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
//double t2=usecond();
|
||||
//std::cout << __FILE__ << " " << __func__ << " Total Time " << DhopTotalTime << std::endl;
|
||||
//std::cout << __FILE__ << " " << __func__ << " Total Time Org " << t2-t1 << std::endl;
|
||||
//std::cout << __FILE__ << " " << __func__ << " Comml Time " << DhopCommTime << std::endl;
|
||||
//std::cout << __FILE__ << " " << __func__ << " Compute Time " << DhopComputeTime << std::endl;
|
||||
|
||||
}
|
||||
/*CHANGE END*/
|
||||
|
@ -395,11 +395,9 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||
}
|
||||
template <class Impl>
|
||||
@ -409,7 +407,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
Compressor compressor;
|
||||
int len = U.Grid()->oSites();
|
||||
|
||||
@ -418,60 +415,30 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
||||
DhopFaceTime -= usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime += usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime+= usecond();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||
// Removed explicit thread comms
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
DhopComputeTime -= usecond();
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int ncomms = CartesianCommunicator::nCommThreads;
|
||||
if (ncomms == -1) ncomms = 1;
|
||||
assert(nthreads > ncomms);
|
||||
|
||||
if (tid >= ncomms) {
|
||||
nthreads -= ncomms;
|
||||
int ttid = tid - ncomms;
|
||||
int n = len;
|
||||
int chunk = n / nthreads;
|
||||
int rem = n % nthreads;
|
||||
int myblock, myn;
|
||||
if (ttid < rem) {
|
||||
myblock = ttid * chunk + ttid;
|
||||
myn = chunk+1;
|
||||
} else {
|
||||
myblock = ttid*chunk + rem;
|
||||
myn = chunk;
|
||||
}
|
||||
|
||||
// do the compute
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
// Interior = 1; Exterior = 0; must implement for staggered
|
||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
|
||||
}
|
||||
} else {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
// Interior = 1; Exterior = 0;
|
||||
int sU = ss;
|
||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
st.CommunicateThreaded();
|
||||
}
|
||||
int interior=1;
|
||||
int exterior=0;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
// First to enter, last to leave timing
|
||||
DhopFaceTime -= usecond();
|
||||
st.CommsMerge(compressor);
|
||||
@ -479,28 +446,11 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
||||
|
||||
DhopComputeTime2 -= usecond();
|
||||
{
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
thread_for(ss,sz,{
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
|
||||
});
|
||||
} else {
|
||||
int sz=st.surface_list.size();
|
||||
thread_for(ss,sz,{
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1);
|
||||
});
|
||||
}
|
||||
int interior=0;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime2 += usecond();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -520,19 +470,11 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto UUU_v = UUU.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
DhopComputeTime -= usecond();
|
||||
if (dag == DaggerYes) {
|
||||
thread_for(sss, in.Grid()->oSites(),{
|
||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
||||
});
|
||||
} else {
|
||||
thread_for(sss, in.Grid()->oSites(),{
|
||||
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
||||
});
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
|
@ -0,0 +1,499 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////
|
||||
// Constructor and gauge import
|
||||
/////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
NaiveStaggeredFermion<Impl>::NaiveStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid,
|
||||
RealD _mass,
|
||||
RealD _c1, RealD _u0,
|
||||
const ImplParams &p)
|
||||
: Kernels(p),
|
||||
_grid(&Fgrid),
|
||||
_cbgrid(&Hgrid),
|
||||
Stencil(&Fgrid, npoint, Even, directions, displacements,p),
|
||||
StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even
|
||||
StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd
|
||||
mass(_mass),
|
||||
Lebesgue(_grid),
|
||||
LebesgueEvenOdd(_cbgrid),
|
||||
Umu(&Fgrid),
|
||||
UmuEven(&Hgrid),
|
||||
UmuOdd(&Hgrid),
|
||||
_tmp(&Hgrid)
|
||||
{
|
||||
int vol4;
|
||||
int LLs=1;
|
||||
c1=_c1;
|
||||
u0=_u0;
|
||||
vol4= _grid->oSites();
|
||||
Stencil.BuildSurfaceList(LLs,vol4);
|
||||
vol4= _cbgrid->oSites();
|
||||
StencilEven.BuildSurfaceList(LLs,vol4);
|
||||
StencilOdd.BuildSurfaceList(LLs,vol4);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
NaiveStaggeredFermion<Impl>::NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1, RealD _u0,
|
||||
const ImplParams &p)
|
||||
: NaiveStaggeredFermion(Fgrid,Hgrid,_mass,_c1,_u0,p)
|
||||
{
|
||||
ImportGauge(_U);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Momentum space propagator should be
|
||||
// https://arxiv.org/pdf/hep-lat/9712010.pdf
|
||||
//
|
||||
// mom space action.
|
||||
// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m
|
||||
//
|
||||
// must track through staggered flavour/spin reduction in literature to
|
||||
// turn to free propagator for the one component chi field, a la page 4/5
|
||||
// of above link to implmement fourier based solver.
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::CopyGaugeCheckerboards(void)
|
||||
{
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
}
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::ImportGauge(const GaugeField &_U)
|
||||
{
|
||||
GaugeLinkField U(GaugeGrid());
|
||||
DoubledGaugeField _UUU(GaugeGrid());
|
||||
////////////////////////////////////////////////////////
|
||||
// Double Store should take two fields for Naik and one hop separately.
|
||||
// Discard teh Naik as Naive
|
||||
////////////////////////////////////////////////////////
|
||||
Impl::DoubleStore(GaugeGrid(), _UUU, Umu, _U, _U );
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Apply scale factors to get the right fermion Kinetic term
|
||||
// Could pass coeffs into the double store to save work.
|
||||
// 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) )
|
||||
////////////////////////////////////////////////////////
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Umu, U*( 0.5*c1/u0), mu );
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu+4);
|
||||
PokeIndex<LorentzIndex>(Umu, U*(-0.5*c1/u0), mu+4);
|
||||
|
||||
}
|
||||
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
// Implement the interface
|
||||
/////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
RealD NaiveStaggeredFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerNo);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD NaiveStaggeredFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Dhop(in, out, DaggerYes);
|
||||
return axpy_norm(out, mass, in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(mass);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0 / (mass)) * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MooeeInv(in, out);
|
||||
}
|
||||
|
||||
///////////////////////////////////
|
||||
// Internal
|
||||
///////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
GaugeField & mat,
|
||||
const FermionField &A, const FermionField &B, int dag)
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor;
|
||||
|
||||
FermionField Btilde(B.Grid());
|
||||
FermionField Atilde(B.Grid());
|
||||
Atilde = A;
|
||||
|
||||
st.HaloExchange(B, compressor);
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
////////////////////////
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
auto U_v = U.View(CpuRead);
|
||||
auto B_v = B.View(CpuWrite);
|
||||
auto Btilde_v = Btilde.View(CpuWrite);
|
||||
thread_for(sss,B.Grid()->oSites(),{
|
||||
Kernels::DhopDirKernel(st, U_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
||||
});
|
||||
|
||||
assert(0);// need to figure out the force interface with a blasted three link term.
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
conformable(U.Grid(), _grid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
|
||||
mat.Checkerboard() = U.Checkerboard();
|
||||
|
||||
DerivInternal(Stencil, Umu, mat, U, V, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
conformable(U.Grid(), _cbgrid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
|
||||
assert(V.Checkerboard() == Even);
|
||||
assert(U.Checkerboard() == Odd);
|
||||
mat.Checkerboard() = Odd;
|
||||
|
||||
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) {
|
||||
|
||||
conformable(U.Grid(), _cbgrid);
|
||||
conformable(U.Grid(), V.Grid());
|
||||
conformable(U.Grid(), mat.Grid());
|
||||
|
||||
assert(V.Checkerboard() == Odd);
|
||||
assert(U.Checkerboard() == Even);
|
||||
mat.Checkerboard() = Even;
|
||||
|
||||
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in.Grid(), _grid); // verifies full grid
|
||||
conformable(in.Grid(), out.Grid());
|
||||
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
assert(in.Checkerboard() == Even);
|
||||
out.Checkerboard() = Odd;
|
||||
|
||||
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
assert(in.Checkerboard() == Odd);
|
||||
out.Checkerboard() = Even;
|
||||
|
||||
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||
{
|
||||
DhopDir(in, out, dir, disp);
|
||||
}
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||
{
|
||||
assert(0); // Not implemented yet
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||
{
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in, compressor);
|
||||
auto Umu_v = Umu.View(CpuRead);
|
||||
auto in_v = in.View(CpuRead);
|
||||
auto out_v = out.View(CpuWrite);
|
||||
// thread_for( sss, in.Grid()->oSites(),{
|
||||
// Kernels::DhopDirKernel(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
||||
// });
|
||||
assert(0);
|
||||
};
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||
else
|
||||
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
||||
}
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
Compressor compressor;
|
||||
int len = U.Grid()->oSites();
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopFaceTime -= usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime += usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime+= usecond();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Removed explicit thread comms
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
DhopComputeTime -= usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=0;
|
||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
// First to enter, last to leave timing
|
||||
DhopFaceTime -= usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime -= usecond();
|
||||
|
||||
DhopComputeTime2 -= usecond();
|
||||
{
|
||||
int interior=0;
|
||||
int exterior=1;
|
||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime2 += usecond();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopCommTime -= usecond();
|
||||
Compressor compressor;
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=1;
|
||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Reporting
|
||||
////////////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Report(void)
|
||||
{
|
||||
Coordinate latt = _grid->GlobalDimensions();
|
||||
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = _grid->_Nprocessors;
|
||||
RealD NN = _grid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : "
|
||||
<< DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : "
|
||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_grid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
|
||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
template<class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::ZeroCounters(void)
|
||||
{
|
||||
DhopCalls = 0;
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopFaceTime = 0;
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Conserved current - not yet implemented.
|
||||
////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx)
|
||||
{
|
||||
assert(0);
|
||||
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -618,10 +618,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
assert(0);
|
||||
@ -680,12 +680,13 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
gauge2 =(uint64_t)&UU[sU]( Z ); \
|
||||
gauge3 =(uint64_t)&UU[sU]( T );
|
||||
|
||||
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
#include <Grid/simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
@ -702,9 +703,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
// for(int s=0;s<LLs;s++){
|
||||
|
||||
int sF=s+LLs*sU;
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
@ -736,10 +738,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
}
|
||||
|
||||
#include <Grid/simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
@ -756,8 +758,9 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHI(addr0,addr1,addr2,addr3);
|
||||
@ -821,10 +824,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
|
||||
#include <Grid/simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
@ -841,9 +844,9 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
|
||||
int sF=s+LLs*sU;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
@ -890,10 +893,10 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
}
|
||||
|
||||
#include <Grid/simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilView &st,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
@ -910,9 +913,9 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
||||
StencilEntry *SE2;
|
||||
StencilEntry *SE3;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
|
||||
int sF=s+LLs*sU;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
// Xp, Yp, Zp, Tp
|
||||
PREPARE(Xp,Yp,Zp,Tp,0,U);
|
||||
LOAD_CHIa(addr0,addr1);
|
||||
|
@ -146,9 +146,10 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -181,8 +182,9 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);
|
||||
@ -193,6 +195,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
HAND_STENCIL_LEG (U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG (U,Tm,0,skew,odd);
|
||||
if (Naik) {
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);
|
||||
@ -202,7 +205,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);
|
||||
|
||||
}
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
@ -218,9 +221,10 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -253,8 +257,9 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
|
||||
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
||||
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
||||
@ -268,6 +273,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);
|
||||
if (Naik) {
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);
|
||||
@ -277,7 +283,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);
|
||||
|
||||
}
|
||||
// Assume every site must be connected to at least one interior point. No 1^4 subvols.
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
@ -294,9 +300,10 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -329,8 +336,9 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=s+LLs*sU;
|
||||
{
|
||||
|
||||
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
||||
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
||||
@ -344,6 +352,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);
|
||||
if (Naik) {
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);
|
||||
@ -353,7 +362,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);
|
||||
|
||||
}
|
||||
// Add sum of all exterior connected stencil legs
|
||||
if ( nmu ) {
|
||||
if ( dag ) {
|
||||
@ -370,6 +379,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
||||
@ -385,7 +395,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionFieldView &in, FermionFieldView &out, int dag); \
|
||||
|
||||
*/
|
||||
#undef LOAD_CHI
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -78,10 +78,12 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
||||
// Int, Ext, Int+Ext cases for comms overlap
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out, int dag) {
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||
{
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
@ -89,8 +91,10 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
int ptype;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
//
|
||||
// int sF=LLs*sU+s;
|
||||
{
|
||||
skew = 0;
|
||||
GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
|
||||
GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
|
||||
@ -100,6 +104,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
|
||||
if ( Naik ) {
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
@ -109,6 +114,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
}
|
||||
if ( dag ) {
|
||||
Uchi = - Uchi;
|
||||
}
|
||||
@ -120,9 +126,10 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
// Only contributions from interior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
@ -131,8 +138,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
||||
int ptype;
|
||||
int skew ;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=LLs*sU+s;
|
||||
{
|
||||
skew = 0;
|
||||
Uchi=Zero();
|
||||
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
|
||||
@ -143,6 +151,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
||||
GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
|
||||
if ( Naik ) {
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
@ -152,6 +161,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
}
|
||||
if ( dag ) {
|
||||
Uchi = - Uchi;
|
||||
}
|
||||
@ -164,9 +174,10 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &
|
||||
// Only contributions from exterior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
template <int Naik>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
SiteSpinor *buf, int sF, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
// SiteSpinor chi;
|
||||
@ -176,8 +187,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
||||
int nmu=0;
|
||||
int skew ;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
// for(int s=0;s<LLs;s++){
|
||||
// int sF=LLs*sU+s;
|
||||
{
|
||||
skew = 0;
|
||||
Uchi=Zero();
|
||||
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
|
||||
@ -188,6 +200,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
||||
GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
|
||||
if ( Naik ) {
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
@ -197,7 +210,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
|
||||
}
|
||||
if ( nmu ) {
|
||||
if ( dag ) {
|
||||
out[sF] = out[sF] - Uchi;
|
||||
@ -211,72 +224,9 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Driving / wrapping routine to select right kernel
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
int dag=1;
|
||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionFieldView &in, FermionFieldView &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
int dag=0;
|
||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out,
|
||||
int dag,int interior,int exterior)
|
||||
{
|
||||
switch(Opt) {
|
||||
#ifdef AVX512
|
||||
case OptInlineAsm:
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else {
|
||||
std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteHand (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( interior ) {
|
||||
DhopSiteHandInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( exterior ) {
|
||||
DhopSiteHandExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteGeneric (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( interior ) {
|
||||
DhopSiteGenericInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( exterior ) {
|
||||
DhopSiteGenericExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout<<"Oops Opt = "<<Opt<<std::endl;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
|
||||
void StaggeredKernels<Impl>::DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp)
|
||||
{
|
||||
// Disp should be either +1,-1,+3,-3
|
||||
// What about "dag" ?
|
||||
@ -285,6 +235,108 @@ void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldVi
|
||||
assert(0);
|
||||
}
|
||||
|
||||
#define KERNEL_CALLNB(A,improved) \
|
||||
const uint64_t NN = Nsite*Ls; \
|
||||
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
|
||||
int sF = ss; \
|
||||
int sU = ss/Ls; \
|
||||
ThisKernel:: template A<improved>(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \
|
||||
});
|
||||
|
||||
#define KERNEL_CALL(A,improved) KERNEL_CALLNB(A,improved); accelerator_barrier();
|
||||
|
||||
#define ASM_CALL(A) \
|
||||
const uint64_t NN = Nsite*Ls; \
|
||||
thread_for( ss, NN, { \
|
||||
int sF = ss; \
|
||||
int sU = ss/Ls; \
|
||||
ThisKernel::A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \
|
||||
});
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior)
|
||||
{
|
||||
GridBase *FGrid=in.Grid();
|
||||
GridBase *UGrid=U.Grid();
|
||||
typedef StaggeredKernels<Impl> ThisKernel;
|
||||
auto UUU_v = UUU.View(AcceleratorRead);
|
||||
auto U_v = U.View(AcceleratorRead);
|
||||
auto in_v = in.View(AcceleratorRead);
|
||||
auto out_v = out.View(AcceleratorWrite);
|
||||
auto st_v = st.View(AcceleratorRead);
|
||||
SiteSpinor * buf = st.CommBuf();
|
||||
|
||||
int Ls=1;
|
||||
if(FGrid->Nd()==UGrid->Nd()+1){
|
||||
Ls = FGrid->_rdimensions[0];
|
||||
}
|
||||
int Nsite = UGrid->oSites();
|
||||
|
||||
if( interior && exterior ) {
|
||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;}
|
||||
if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;}
|
||||
#endif
|
||||
} else if( interior ) {
|
||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;}
|
||||
#endif
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior)
|
||||
{
|
||||
GridBase *FGrid=in.Grid();
|
||||
GridBase *UGrid=U.Grid();
|
||||
typedef StaggeredKernels<Impl> ThisKernel;
|
||||
auto UUU_v= U.View(AcceleratorRead);
|
||||
auto U_v = U.View(AcceleratorRead);
|
||||
auto in_v = in.View(AcceleratorRead);
|
||||
auto out_v = out.View(AcceleratorWrite);
|
||||
auto st_v = st.View(AcceleratorRead);
|
||||
SiteSpinor * buf = st.CommBuf();
|
||||
|
||||
int Ls=1;
|
||||
if(FGrid->Nd()==UGrid->Nd()+1){
|
||||
Ls = FGrid->_rdimensions[0];
|
||||
}
|
||||
int Nsite = UGrid->oSites();
|
||||
|
||||
if( interior && exterior ) {
|
||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;}
|
||||
#endif
|
||||
} else if( interior ) {
|
||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#undef KERNEL_CALLNB
|
||||
#undef KERNEL_CALL
|
||||
#undef ASM_CALL
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
@ -496,5 +496,9 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
|
||||
#undef KERNEL_CALLNB
|
||||
#undef KERNEL_CALL
|
||||
#undef ASM_CALL
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -0,0 +1,36 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
const std::vector<int> NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
|
||||
const std::vector<int> NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1,37 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/NaiveStaggeredFermion.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class NaiveStaggeredFermion<IMPLEMENTATION>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../NaiveStaggeredFermionInstantiation.cc.master
|
@ -0,0 +1 @@
|
||||
../NaiveStaggeredFermionInstantiation.cc.master
|
@ -88,6 +88,7 @@ done
|
||||
CC_LIST=" \
|
||||
ImprovedStaggeredFermion5DInstantiation \
|
||||
ImprovedStaggeredFermionInstantiation \
|
||||
NaiveStaggeredFermionInstantiation \
|
||||
StaggeredKernelsInstantiation "
|
||||
|
||||
for impl in $STAG_IMPL_LIST
|
||||
|
@ -87,26 +87,6 @@ int main (int argc, char ** argv)
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
ref = Zero();
|
||||
/*
|
||||
{ // Naive wilson implementation
|
||||
ref = Zero();
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||
tmp = U[mu]*Cshift(src,mu,1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref[i]+= tmp[i] - Gamma(Gmu[mu])*tmp[i]; ;
|
||||
}
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu,-1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref[i]+= tmp[i] + Gamma(Gmu[mu])*tmp[i]; ;
|
||||
}
|
||||
}
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
*/
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
@ -125,10 +105,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
282
tests/core/Test_staggered_naive.cc
Normal file
282
tests/core/Test_staggered_naive.cc
Normal file
@ -0,0 +1,282 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
Coordinate latt_size = GridDefaultLatt();
|
||||
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
Coordinate mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
|
||||
typedef typename NaiveStaggeredFermionR::FermionField FermionField;
|
||||
typedef typename NaiveStaggeredFermionR::ComplexField ComplexField;
|
||||
typename NaiveStaggeredFermionR::ImplParams params;
|
||||
|
||||
FermionField src (&Grid); random(pRNG,src);
|
||||
FermionField result(&Grid); result=Zero();
|
||||
FermionField ref(&Grid); ref=Zero();
|
||||
FermionField tmp(&Grid); tmp=Zero();
|
||||
FermionField err(&Grid); tmp=Zero();
|
||||
FermionField phi (&Grid); random(pRNG,phi);
|
||||
FermionField chi (&Grid); random(pRNG,chi);
|
||||
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
// Only one non-zero (y)
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
/* Debug force unit
|
||||
U[mu] = 1.0;
|
||||
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
|
||||
*/
|
||||
}
|
||||
|
||||
ref = Zero();
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
RealD u0=1.0;
|
||||
|
||||
{ // Simple improved staggered implementation
|
||||
ref = Zero();
|
||||
RealD c1tad = 0.5*c1/u0;
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(&Grid);
|
||||
|
||||
Lattice<iScalar<vInteger> > x(&Grid); LatticeCoordinate(x,0);
|
||||
Lattice<iScalar<vInteger> > y(&Grid); LatticeCoordinate(y,1);
|
||||
Lattice<iScalar<vInteger> > z(&Grid); LatticeCoordinate(z,2);
|
||||
Lattice<iScalar<vInteger> > t(&Grid); LatticeCoordinate(t,3);
|
||||
|
||||
Lattice<iScalar<vInteger> > lin_z(&Grid); lin_z=x+y;
|
||||
Lattice<iScalar<vInteger> > lin_t(&Grid); lin_t=x+y+z;
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
// Staggered Phase.
|
||||
ComplexField phases(&Grid); phases=1.0;
|
||||
|
||||
if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases);
|
||||
if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases);
|
||||
|
||||
tmp = PeriodicBC::CovShiftForward(U[mu],mu,src);
|
||||
ref = ref +c1tad*tmp*phases; // Forward 1 hop
|
||||
|
||||
tmp = PeriodicBC::CovShiftBackward(U[mu],mu,src);
|
||||
ref = ref -c1tad*tmp*phases; // Backward 1 hop
|
||||
|
||||
}
|
||||
// ref = ref + mass * src;
|
||||
}
|
||||
|
||||
NaiveStaggeredFermionR Ds(Umu,Grid,RBGrid,mass,c1,u0,params);
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing Dhop against cshift implementation "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||
int ncall=1000;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Ds.Dhop(src,result,0);
|
||||
}
|
||||
double t1=usecond();
|
||||
double t2;
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=========================================================="<<std::endl;
|
||||
|
||||
FermionField src_e (&RBGrid);
|
||||
FermionField src_o (&RBGrid);
|
||||
FermionField r_e (&RBGrid);
|
||||
FermionField r_o (&RBGrid);
|
||||
FermionField r_eo (&Grid);
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
Ds.Meooe(src_e,r_o); std::cout<<GridLogMessage<<"Applied Meo"<<std::endl;
|
||||
Ds.Meooe(src_o,r_e); std::cout<<GridLogMessage<<"Applied Moe"<<std::endl;
|
||||
Ds.Dhop (src,ref,DaggerNo);
|
||||
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
|
||||
err= ref - r_eo;
|
||||
std::cout<<GridLogMessage << "EO norm diff "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test Ddagger is the dagger of D by requiring "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= < phi | Deo | chi > * = < chi | Deo^dag| phi> "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
FermionField chi_e (&RBGrid);
|
||||
FermionField chi_o (&RBGrid);
|
||||
|
||||
FermionField dchi_e (&RBGrid);
|
||||
FermionField dchi_o (&RBGrid);
|
||||
|
||||
FermionField phi_e (&RBGrid);
|
||||
FermionField phi_o (&RBGrid);
|
||||
|
||||
FermionField dphi_e (&RBGrid);
|
||||
FermionField dphi_o (&RBGrid);
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
Ds.Meooe(chi_e,dchi_o);
|
||||
Ds.Meooe(chi_o,dchi_e);
|
||||
Ds.MeooeDag(phi_e,dphi_o);
|
||||
Ds.MeooeDag(phi_o,dphi_e);
|
||||
|
||||
ComplexD pDce = innerProduct(phi_e,dchi_e);
|
||||
ComplexD pDco = innerProduct(phi_o,dchi_o);
|
||||
ComplexD cDpe = innerProduct(chi_e,dphi_e);
|
||||
ComplexD cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MeeInv Mee = 1 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Ds.Mooee(chi_e,src_e);
|
||||
Ds.MooeeInv(src_e,phi_e);
|
||||
|
||||
Ds.Mooee(chi_o,src_o);
|
||||
Ds.MooeeInv(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MeeInvDag MeeDag = 1 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
|
||||
Ds.MooeeDag(chi_e,src_e);
|
||||
Ds.MooeeInvDag(src_e,phi_e);
|
||||
|
||||
Ds.MooeeDag(chi_o,src_o);
|
||||
Ds.MooeeInvDag(src_o,phi_o);
|
||||
|
||||
setCheckerboard(phi,phi_e);
|
||||
setCheckerboard(phi,phi_o);
|
||||
|
||||
err = phi-chi;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<< std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"= Test MpcDagMpc is Hermitian "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"=============================================================="<<std::endl;
|
||||
|
||||
random(pRNG,phi);
|
||||
random(pRNG,chi);
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
pickCheckerboard(Even,phi_e,phi);
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
SchurDiagMooeeOperator<NaiveStaggeredFermionR,FermionField> HermOpEO(Ds);
|
||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
||||
|
||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
||||
|
||||
pDce = innerProduct(phi_e,dchi_e);
|
||||
pDco = innerProduct(phi_o,dchi_o);
|
||||
cDpe = innerProduct(chi_e,dphi_e);
|
||||
cDpo = innerProduct(chi_o,dphi_o);
|
||||
|
||||
std::cout<<GridLogMessage <<"e "<<pDce<<" "<<cDpe <<std::endl;
|
||||
std::cout<<GridLogMessage <<"o "<<pDco<<" "<<cDpo <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
|
||||
std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
Loading…
Reference in New Issue
Block a user