mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
602 lines
18 KiB
C++
602 lines
18 KiB
C++
/*************************************************************************************
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./lib/qcd/action/fermion/WilsonFermion5D.cc
|
|
|
|
Copyright (C) 2015
|
|
|
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
#include <Grid.h>
|
|
#include <PerfCount.h>
|
|
|
|
namespace Grid {
|
|
namespace QCD {
|
|
|
|
// S-direction is INNERMOST and takes no part in the parity.
|
|
const std::vector<int> WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4});
|
|
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
|
|
int WilsonFermion5DStatic::HandOptDslash;
|
|
int WilsonFermion5DStatic::AsmOptDslash;
|
|
|
|
// 5d lattice for DWF.
|
|
template<class Impl>
|
|
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|
GridCartesian &FiveDimGrid,
|
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
|
GridCartesian &FourDimGrid,
|
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
|
RealD _M5,const ImplParams &p) :
|
|
Kernels(p),
|
|
_FiveDimGrid(&FiveDimGrid),
|
|
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
|
_FourDimGrid(&FourDimGrid),
|
|
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
|
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
|
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
|
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
|
|
M5(_M5),
|
|
Umu(_FourDimGrid),
|
|
UmuEven(_FourDimRedBlackGrid),
|
|
UmuOdd (_FourDimRedBlackGrid),
|
|
Lebesgue(_FourDimGrid),
|
|
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
|
{
|
|
// some assertions
|
|
assert(FiveDimGrid._ndimension==5);
|
|
assert(FourDimGrid._ndimension==4);
|
|
|
|
assert(FiveDimRedBlackGrid._ndimension==5);
|
|
assert(FourDimRedBlackGrid._ndimension==4);
|
|
|
|
assert(FiveDimRedBlackGrid._checker_dim==1);
|
|
|
|
// Dimension zero of the five-d is the Ls direction
|
|
Ls=FiveDimGrid._fdimensions[0];
|
|
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
|
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
|
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
|
assert(FiveDimGrid._processors[0] ==1);
|
|
assert(FiveDimGrid._simd_layout[0] ==1);
|
|
|
|
// Other dimensions must match the decomposition of the four-D fields
|
|
for(int d=0;d<4;d++){
|
|
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
|
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
|
|
|
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
|
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
|
|
|
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
|
|
|
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
|
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
|
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
|
}
|
|
|
|
// Allocate the required comms buffer
|
|
ImportGauge(_Umu);
|
|
alltime=0;
|
|
commtime=0;
|
|
jointime=0;
|
|
dslashtime=0;
|
|
dslash1time=0;
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
|
{
|
|
GaugeField HUmu(_Umu._grid);
|
|
HUmu = _Umu*(-0.5);
|
|
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
|
pickCheckerboard(Even,UmuEven,Umu);
|
|
pickCheckerboard(Odd ,UmuOdd,Umu);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
|
|
{
|
|
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
|
|
// we drop off the innermost fifth dimension
|
|
// assert( (disp==1)||(disp==-1) );
|
|
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
|
|
|
Compressor compressor(DaggerNo);
|
|
Stencil.HaloExchange(in,compressor);
|
|
|
|
int skip = (disp==1) ? 0 : 1;
|
|
|
|
int dirdisp = dir+skip*4;
|
|
int gamma = dir+(1-skip)*4;
|
|
|
|
assert(dirdisp<=7);
|
|
assert(dirdisp>=0);
|
|
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
|
for(int s=0;s<Ls;s++){
|
|
int sU=ss;
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sF,sU,in,out,dirdisp,gamma);
|
|
}
|
|
}
|
|
};
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|
DoubledGaugeField & U,
|
|
GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
|
|
conformable(st._grid,A._grid);
|
|
conformable(st._grid,B._grid);
|
|
|
|
Compressor compressor(dag);
|
|
|
|
FermionField Btilde(B._grid);
|
|
FermionField Atilde(B._grid);
|
|
|
|
st.HaloExchange(B,compressor);
|
|
|
|
Atilde=A;
|
|
|
|
for(int mu=0;mu<Nd;mu++){
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Flip gamma if dag
|
|
////////////////////////////////////////////////////////////////////////
|
|
int gamma = mu;
|
|
if ( !dag ) gamma+= Nd;
|
|
|
|
////////////////////////
|
|
// Call the single hop
|
|
////////////////////////
|
|
|
|
PARALLEL_FOR_LOOP
|
|
for(int sss=0;sss<U._grid->oSites();sss++){
|
|
for(int s=0;s<Ls;s++){
|
|
int sU=sss;
|
|
int sF = s+Ls*sU;
|
|
|
|
assert ( sF< B._grid->oSites());
|
|
assert ( sU< U._grid->oSites());
|
|
|
|
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
|
|
|
|
////////////////////////////
|
|
// spin trace outer product
|
|
////////////////////////////
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Impl::InsertForce5D(mat,Btilde,Atilde,mu);
|
|
|
|
}
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
conformable(A._grid,FermionGrid());
|
|
conformable(A._grid,B._grid);
|
|
conformable(GaugeGrid(),mat._grid);
|
|
|
|
mat.checkerboard = A.checkerboard;
|
|
|
|
DerivInternal(Stencil,Umu,mat,A,B,dag);
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
conformable(A._grid,FermionRedBlackGrid());
|
|
conformable(GaugeRedBlackGrid(),mat._grid);
|
|
conformable(A._grid,B._grid);
|
|
|
|
assert(B.checkerboard==Odd);
|
|
assert(A.checkerboard==Even);
|
|
mat.checkerboard = Even;
|
|
|
|
DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
|
|
}
|
|
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::Report(void)
|
|
{
|
|
std::cout<<GridLogMessage << "******************** WilsonFermion"<<std::endl;
|
|
std::cout<<GridLogMessage << "Wilson5d time "<<alltime <<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "HaloBegin time "<<commtime <<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Dslash time "<<dslashtime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Dslash1 time "<<dslash1time<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "HaloComplete time "<<jointime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "******************** Stencil"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil all gather time "<<Stencil.halogtime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil nosplice gather time "<<Stencil.nosplicetime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil splice gather time "<<Stencil.splicetime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "********************"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil gather "<<Stencil.gathertime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil gather simd "<<Stencil.gathermtime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil merge simd "<<Stencil.mergetime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil spin simd "<<Stencil.spintime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "********************"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil MB/s "<<(double)Stencil.comms_bytes/Stencil.commtime<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil comm time "<<Stencil.commtime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "Stencil join time "<<Stencil.jointime<<" us"<<std::endl;
|
|
std::cout<<GridLogMessage << "********************"<<std::endl;
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
|
const FermionField &A,
|
|
const FermionField &B,
|
|
int dag)
|
|
{
|
|
conformable(A._grid,FermionRedBlackGrid());
|
|
conformable(GaugeRedBlackGrid(),mat._grid);
|
|
conformable(A._grid,B._grid);
|
|
|
|
assert(B.checkerboard==Even);
|
|
assert(A.checkerboard==Odd);
|
|
mat.checkerboard = Odd;
|
|
|
|
DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|
DoubledGaugeField & U,
|
|
const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
// if ( Impl::overlapCommsCompute () ) {
|
|
// DhopInternalCommsOverlapCompute(st,lo,U,in,out,dag);
|
|
// } else {
|
|
DhopInternalCommsThenCompute(st,lo,U,in,out,dag);
|
|
// }
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopInternalCommsThenCompute(StencilImpl & st, LebesgueOrder &lo,
|
|
DoubledGaugeField & U,
|
|
const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
alltime-=usecond();
|
|
Compressor compressor(dag);
|
|
|
|
// Assume balanced KMP_AFFINITY; this is forced in GridThread.h
|
|
|
|
int threads = GridThread::GetThreads();
|
|
int HT = GridThread::GetHyperThreads();
|
|
int cores = GridThread::GetCores();
|
|
int nwork = U._grid->oSites();
|
|
|
|
commtime -=usecond();
|
|
auto handle = st.HaloExchangeOptBegin(in,compressor);
|
|
st.HaloExchangeOptComplete(handle);
|
|
commtime +=usecond();
|
|
|
|
jointime -=usecond();
|
|
jointime +=usecond();
|
|
|
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
|
// Not loop ordering and data layout.
|
|
// Designed to create
|
|
// - per thread reuse in L1 cache for U
|
|
// - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
|
|
dslashtime -=usecond();
|
|
if ( dag == DaggerYes ) {
|
|
if( this->HandOptDslash ) {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
|
|
}
|
|
}
|
|
} else {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
{
|
|
int sd;
|
|
for(sd=0;sd<Ls;sd++){
|
|
int sU=ss;
|
|
int sF = sd+Ls*sU;
|
|
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if( this->AsmOptDslash ) {
|
|
// for(int i=0;i<1;i++){
|
|
// for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
|
// PerformanceCounter Counter(i);
|
|
// Counter.Start();
|
|
|
|
#pragma omp parallel for
|
|
for(int t=0;t<threads;t++){
|
|
|
|
int hyperthread = t%HT;
|
|
int core = t/HT;
|
|
|
|
int sswork, swork,soff,ssoff, sU,sF;
|
|
|
|
GridThread::GetWork(nwork,core,sswork,ssoff,cores);
|
|
GridThread::GetWork(Ls , hyperthread, swork, soff,HT);
|
|
|
|
for(int ss=0;ss<sswork;ss++){
|
|
for(int s=soff;s<soff+swork;s++){
|
|
|
|
sU=ss+ ssoff;
|
|
|
|
if ( LebesgueOrder::UseLebesgueOrder ) {
|
|
sU = lo.Reorder(sU);
|
|
}
|
|
sF = s+Ls*sU;
|
|
Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out,(uint64_t *)0);// &buf[0]
|
|
}
|
|
}
|
|
}
|
|
// Counter.Stop();
|
|
// Counter.Report();
|
|
// }
|
|
} else if( this->HandOptDslash ) {
|
|
/*
|
|
|
|
#pragma omp parallel for schedule(static)
|
|
for(int t=0;t<threads;t++){
|
|
|
|
int hyperthread = t%HT;
|
|
int core = t/HT;
|
|
|
|
int sswork, swork,soff,ssoff, sU,sF;
|
|
|
|
GridThread::GetWork(nwork,core,sswork,ssoff,cores);
|
|
GridThread::GetWork(Ls , hyperthread, swork, soff,HT);
|
|
|
|
for(int ss=0;ss<sswork;ss++){
|
|
sU=ss+ ssoff;
|
|
for(int s=soff;s<soff+swork;s++){
|
|
sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
|
|
}
|
|
}
|
|
} else {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
dslashtime +=usecond();
|
|
alltime+=usecond();
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopInternalCommsOverlapCompute(StencilImpl & st, LebesgueOrder &lo,
|
|
DoubledGaugeField & U,
|
|
const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
assert(0);
|
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
alltime-=usecond();
|
|
|
|
Compressor compressor(dag);
|
|
|
|
// Assume balanced KMP_AFFINITY; this is forced in GridThread.h
|
|
|
|
int threads = GridThread::GetThreads();
|
|
int HT = GridThread::GetHyperThreads();
|
|
int cores = GridThread::GetCores();
|
|
int nwork = U._grid->oSites();
|
|
|
|
commtime -=usecond();
|
|
auto handle = st.HaloExchangeOptBegin(in,compressor);
|
|
commtime +=usecond();
|
|
|
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
|
// Not loop ordering and data layout.
|
|
// Designed to create
|
|
// - per thread reuse in L1 cache for U
|
|
// - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
|
|
bool local = true;
|
|
bool nonlocal = false;
|
|
dslashtime -=usecond();
|
|
if ( dag == DaggerYes ) {
|
|
if( this->HandOptDslash ) {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
} else {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
{
|
|
int sd;
|
|
for(sd=0;sd<Ls;sd++){
|
|
int sU=ss;
|
|
int sF = sd+Ls*sU;
|
|
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if( this->HandOptDslash ) {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
} else {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
dslashtime +=usecond();
|
|
|
|
jointime -=usecond();
|
|
st.HaloExchangeOptComplete(handle);
|
|
jointime +=usecond();
|
|
|
|
local = false;
|
|
nonlocal = true;
|
|
dslash1time -=usecond();
|
|
if ( dag == DaggerYes ) {
|
|
if( this->HandOptDslash ) {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
} else {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
{
|
|
int sd;
|
|
for(sd=0;sd<Ls;sd++){
|
|
int sU=ss;
|
|
int sF = sd+Ls*sU;
|
|
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if( this->HandOptDslash ) {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
} else {
|
|
PARALLEL_FOR_LOOP
|
|
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
int sU=ss;
|
|
for(int s=0;s<Ls;s++){
|
|
int sF = s+Ls*sU;
|
|
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
dslash1time +=usecond();
|
|
alltime+=usecond();
|
|
|
|
}
|
|
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
|
conformable(in._grid,out._grid); // drops the cb check
|
|
|
|
assert(in.checkerboard==Even);
|
|
out.checkerboard = Odd;
|
|
|
|
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
|
conformable(in._grid,out._grid); // drops the cb check
|
|
|
|
assert(in.checkerboard==Odd);
|
|
out.checkerboard = Even;
|
|
|
|
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
conformable(in._grid,FermionGrid()); // verifies full grid
|
|
conformable(in._grid,out._grid);
|
|
|
|
out.checkerboard = in.checkerboard;
|
|
|
|
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
|
|
}
|
|
template<class Impl>
|
|
void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
|
|
{
|
|
out.checkerboard=in.checkerboard;
|
|
Dhop(in,out,dag); // -0.5 is included
|
|
axpy(out,4.0-M5,in,out);
|
|
}
|
|
|
|
FermOpTemplateInstantiate(WilsonFermion5D);
|
|
|
|
}}
|
|
|
|
|
|
|