mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
include counters in WilsonFermionImplementation.h
This commit is contained in:
parent
f013979791
commit
38164f8480
@ -43,7 +43,7 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
|||||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
const ImplParams &p,
|
const ImplParams &p,
|
||||||
const WilsonAnisotropyCoefficients &anis)
|
const WilsonAnisotropyCoefficients &anis)
|
||||||
:
|
:
|
||||||
Kernels(p),
|
Kernels(p),
|
||||||
_grid(&Fgrid),
|
_grid(&Fgrid),
|
||||||
_cbgrid(&Hgrid),
|
_cbgrid(&Hgrid),
|
||||||
@ -70,8 +70,91 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion<Impl>::Report(void)
|
||||||
|
{
|
||||||
|
RealD NP = _FourDimGrid->_Nprocessors;
|
||||||
|
RealD NN = _FourDimGrid->NodeCount();
|
||||||
|
RealD volume = Ls;
|
||||||
|
Coordinate latt = _FourDimGrid->GlobalDimensions();
|
||||||
|
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
|
|
||||||
|
if ( DhopCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion Number of DhopEO Calls : " << DhopCalls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl;
|
||||||
|
|
||||||
|
// Average the compute time
|
||||||
|
_FourDimGrid->GlobalSum(DhopComputeTime);
|
||||||
|
DhopComputeTime/=NP;
|
||||||
|
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||||
|
|
||||||
|
RealD Fullmflops = 1344*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( DerivCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
|
||||||
|
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||||
|
|
||||||
|
RealD Fullmflops = 144*volume*DerivCalls/(DerivDhopComputeTime+DerivCommTime)/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NP << std::endl; }
|
||||||
|
|
||||||
|
if (DerivCalls > 0 || DhopCalls > 0){
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion Stencil" <<std::endl; Stencil.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||||
|
}
|
||||||
|
if ( DhopCalls > 0){
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion Stencil Reporti()" <<std::endl; Stencil.Reporti(DhopCalls);
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion StencilEven Reporti()"<<std::endl; StencilEven.Reporti(DhopCalls);
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion StencilOdd Reporti()" <<std::endl; StencilOdd.Reporti(DhopCalls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion<Impl>::ZeroCounters(void) {
|
||||||
|
DhopCalls = 0; // ok
|
||||||
|
DhopCommTime = 0;
|
||||||
|
DhopComputeTime = 0;
|
||||||
|
DhopComputeTime2= 0;
|
||||||
|
DhopFaceTime = 0;
|
||||||
|
DhopTotalTime = 0;
|
||||||
|
|
||||||
|
DerivCalls = 0; // ok
|
||||||
|
DerivCommTime = 0;
|
||||||
|
DerivComputeTime = 0;
|
||||||
|
DerivDhopComputeTime = 0;
|
||||||
|
|
||||||
|
Stencil.ZeroCounters();
|
||||||
|
StencilEven.ZeroCounters();
|
||||||
|
StencilOdd.ZeroCounters();
|
||||||
|
Stencil.ZeroCountersi();
|
||||||
|
StencilEven.ZeroCountersi();
|
||||||
|
StencilOdd.ZeroCountersi();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||||
{
|
{
|
||||||
GaugeField HUmu(_Umu.Grid());
|
GaugeField HUmu(_Umu.Grid());
|
||||||
|
|
||||||
@ -132,7 +215,7 @@ void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
|||||||
DhopOE(in, out, DaggerYes);
|
DhopOE(in, out, DaggerYes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
@ -151,7 +234,7 @@ void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
|||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
out = (1.0/(diag_mass))*in;
|
out = (1.0/(diag_mass))*in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
@ -159,59 +242,59 @@ void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
|||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
|
void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
|
||||||
{
|
{
|
||||||
typedef typename FermionField::vector_type vector_type;
|
typedef typename FermionField::vector_type vector_type;
|
||||||
typedef typename FermionField::scalar_type ScalComplex;
|
typedef typename FermionField::scalar_type ScalComplex;
|
||||||
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||||
|
|
||||||
// what type LatticeComplex
|
// what type LatticeComplex
|
||||||
conformable(_grid,out.Grid());
|
conformable(_grid,out.Grid());
|
||||||
|
|
||||||
Gamma::Algebra Gmu [] = {
|
Gamma::Algebra Gmu [] = {
|
||||||
Gamma::Algebra::GammaX,
|
Gamma::Algebra::GammaX,
|
||||||
Gamma::Algebra::GammaY,
|
Gamma::Algebra::GammaY,
|
||||||
Gamma::Algebra::GammaZ,
|
Gamma::Algebra::GammaZ,
|
||||||
Gamma::Algebra::GammaT
|
Gamma::Algebra::GammaT
|
||||||
};
|
};
|
||||||
|
|
||||||
Coordinate latt_size = _grid->_fdimensions;
|
Coordinate latt_size = _grid->_fdimensions;
|
||||||
|
|
||||||
FermionField num (_grid); num = Zero();
|
FermionField num (_grid); num = Zero();
|
||||||
LatComplex wilson(_grid); wilson= Zero();
|
LatComplex wilson(_grid); wilson= Zero();
|
||||||
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||||
|
|
||||||
LatComplex denom(_grid); denom= Zero();
|
LatComplex denom(_grid); denom= Zero();
|
||||||
LatComplex kmu(_grid);
|
LatComplex kmu(_grid);
|
||||||
ScalComplex ci(0.0,1.0);
|
ScalComplex ci(0.0,1.0);
|
||||||
// momphase = n * 2pi / L
|
// momphase = n * 2pi / L
|
||||||
for(int mu=0;mu<Nd;mu++) {
|
for(int mu=0;mu<Nd;mu++) {
|
||||||
|
|
||||||
LatticeCoordinate(kmu,mu);
|
LatticeCoordinate(kmu,mu);
|
||||||
|
|
||||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
|
||||||
kmu = TwoPiL * kmu;
|
kmu = TwoPiL * kmu;
|
||||||
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||||
|
|
||||||
wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
||||||
|
|
||||||
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in); // derivative term
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in); // derivative term
|
||||||
|
|
||||||
denom=denom + sin(kmu)*sin(kmu);
|
denom=denom + sin(kmu)*sin(kmu);
|
||||||
}
|
}
|
||||||
|
|
||||||
wilson = wilson + _m; // 2 sin^2 k/2 + m
|
wilson = wilson + _m; // 2 sin^2 k/2 + m
|
||||||
|
|
||||||
num = num + wilson*in; // -i gmu sin k + 2 sin^2 k/2 + m
|
num = num + wilson*in; // -i gmu sin k + 2 sin^2 k/2 + m
|
||||||
|
|
||||||
denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
|
denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
|
||||||
|
|
||||||
denom= one/denom;
|
denom= one/denom;
|
||||||
|
|
||||||
out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
|
out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
// Internal
|
// Internal
|
||||||
@ -221,6 +304,7 @@ template <class Impl>
|
|||||||
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||||
GaugeField &mat, const FermionField &A,
|
GaugeField &mat, const FermionField &A,
|
||||||
const FermionField &B, int dag) {
|
const FermionField &B, int dag) {
|
||||||
|
DerivCalls++;
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
@ -229,8 +313,11 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
FermionField Atilde(B.Grid());
|
FermionField Atilde(B.Grid());
|
||||||
Atilde = A;
|
Atilde = A;
|
||||||
|
|
||||||
|
DerivCommTime-=usecond();
|
||||||
st.HaloExchange(B, compressor);
|
st.HaloExchange(B, compressor);
|
||||||
|
DerivCommTime+=usecond();
|
||||||
|
|
||||||
|
DerivComputeTime-=usecond();
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Flip gamma (1+g)<->(1-g) if dag
|
// Flip gamma (1+g)<->(1-g) if dag
|
||||||
@ -238,6 +325,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
int gamma = mu;
|
int gamma = mu;
|
||||||
if (!dag) gamma += Nd;
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
|
DerivDhopComputeTime -= usecond();
|
||||||
int Ls=1;
|
int Ls=1;
|
||||||
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
|
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
|
||||||
|
|
||||||
@ -245,7 +333,9 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
||||||
|
DerivDhopComputeTime += usecond();
|
||||||
}
|
}
|
||||||
|
DerivComputeTime += usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -265,7 +355,7 @@ void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, co
|
|||||||
conformable(U.Grid(), V.Grid());
|
conformable(U.Grid(), V.Grid());
|
||||||
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
|
//conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
|
||||||
// Motivation: look at the SchurDiff operator
|
// Motivation: look at the SchurDiff operator
|
||||||
|
|
||||||
assert(V.Checkerboard() == Even);
|
assert(V.Checkerboard() == Even);
|
||||||
assert(U.Checkerboard() == Odd);
|
assert(U.Checkerboard() == Odd);
|
||||||
mat.Checkerboard() = Odd;
|
mat.Checkerboard() = Odd;
|
||||||
@ -288,6 +378,7 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
||||||
|
DhopCalls+=2;
|
||||||
conformable(in.Grid(), _grid); // verifies full grid
|
conformable(in.Grid(), _grid); // verifies full grid
|
||||||
conformable(in.Grid(), out.Grid());
|
conformable(in.Grid(), out.Grid());
|
||||||
|
|
||||||
@ -298,6 +389,7 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
||||||
|
DhopCalls+=1;
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -309,6 +401,7 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
|
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
|
||||||
|
DhopCalls+=1;
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -319,18 +412,18 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
{
|
{
|
||||||
DhopDir(in, out, dir, disp);
|
DhopDir(in, out, dir, disp);
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
{
|
{
|
||||||
DhopDirAll(in, out);
|
DhopDirAll(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
{
|
{
|
||||||
Compressor compressor(DaggerNo);
|
Compressor compressor(DaggerNo);
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
@ -342,12 +435,12 @@ void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int
|
|||||||
DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
|
DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
|
||||||
};
|
};
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
|
void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
{
|
{
|
||||||
Compressor compressor(DaggerNo);
|
Compressor compressor(DaggerNo);
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
|
||||||
assert((out.size()==8)||(out.size()==9));
|
assert((out.size()==8)||(out.size()==9));
|
||||||
for(int dir=0;dir<Nd;dir++){
|
for(int dir=0;dir<Nd;dir++){
|
||||||
for(int disp=-1;disp<=1;disp+=2){
|
for(int disp=-1;disp<=1;disp+=2){
|
||||||
|
|
||||||
@ -360,7 +453,7 @@ void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<Fermion
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
|
void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
|
||||||
{
|
{
|
||||||
int Ls=1;
|
int Ls=1;
|
||||||
uint64_t Nsite=in.oSites();
|
uint64_t Nsite=in.oSites();
|
||||||
@ -371,15 +464,16 @@ template <class Impl>
|
|||||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag)
|
FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
|
DhopTotalTime-=usecond();
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
DhopInternalSerial(st,lo,U,in,out,dag);
|
DhopInternalSerial(st,lo,U,in,out,dag);
|
||||||
|
DhopTotalTime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -397,38 +491,53 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
|
DhopFaceTime-=usecond();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
|
DhopCommTime -=usecond();
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Overlap with comms
|
// Overlap with comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
DhopFaceTime-=usecond();
|
||||||
st.CommsMergeSHM(compressor);
|
st.CommsMergeSHM(compressor);
|
||||||
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute interior
|
// do the compute interior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
|
DhopComputeTime-=usecond();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
||||||
} else {
|
} else {
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
||||||
}
|
}
|
||||||
|
DhopComputeTime+=usecond();
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Complete comms
|
// Complete comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
|
DhopCommTime +=usecond();
|
||||||
|
|
||||||
|
DhopFaceTime-=usecond();
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
|
DhopFaceTime+=usecond();
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute exterior
|
// do the compute exterior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
||||||
|
DhopComputeTime2-=usecond();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
||||||
} else {
|
} else {
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
||||||
}
|
}
|
||||||
|
DhopComputeTime2+=usecond();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -439,20 +548,24 @@ void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
FermionField &out, int dag) {
|
FermionField &out, int dag) {
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
DhopCommTime-=usecond();
|
||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
|
DhopCommTime+=usecond();
|
||||||
|
|
||||||
|
DhopComputeTime-=usecond();
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
||||||
} else {
|
} else {
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
||||||
}
|
}
|
||||||
|
DhopComputeTime+=usecond();
|
||||||
};
|
};
|
||||||
/*Change ends */
|
/*Change ends */
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
* Conserved current utilities for Wilson fermions, for contracting propagators
|
* Conserved current utilities for Wilson fermions, for contracting propagators
|
||||||
* to make a conserved current sink or inserting the conserved current
|
* to make a conserved current sink or inserting the conserved current
|
||||||
* sequentially.
|
* sequentially.
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -493,11 +606,11 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||||
PropagatorField &q_out,
|
PropagatorField &q_out,
|
||||||
Current curr_type,
|
Current curr_type,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
unsigned int tmin,
|
unsigned int tmin,
|
||||||
unsigned int tmax,
|
unsigned int tmax,
|
||||||
ComplexField &lattice_cmplx)
|
ComplexField &lattice_cmplx)
|
||||||
{
|
{
|
||||||
@ -535,24 +648,24 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
Integer timeSlices = Reduce(t_mask());
|
Integer timeSlices = Reduce(t_mask());
|
||||||
|
|
||||||
if (timeSlices > 0) {
|
if (timeSlices > 0) {
|
||||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
|
||||||
q_out_v[sU],
|
q_out_v[sU],
|
||||||
Umu_v, sU, mu, t_mask);
|
Umu_v, sU, mu, t_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repeat for backward direction.
|
// Repeat for backward direction.
|
||||||
t_mask() = ((coords_v[sU] >= (tmin + tshift)) &&
|
t_mask() = ((coords_v[sU] >= (tmin + tshift)) &&
|
||||||
(coords_v[sU] <= (tmax + tshift)));
|
(coords_v[sU] <= (tmax + tshift)));
|
||||||
|
|
||||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||||
unsigned int t0 = 0;
|
unsigned int t0 = 0;
|
||||||
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
|
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
|
||||||
|
|
||||||
timeSlices = Reduce(t_mask());
|
timeSlices = Reduce(t_mask());
|
||||||
|
|
||||||
if (timeSlices > 0) {
|
if (timeSlices > 0) {
|
||||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
|
||||||
q_out_v[sU],
|
q_out_v[sU],
|
||||||
Umu_v, sU, mu, t_mask);
|
Umu_v, sU, mu, t_mask);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user