/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/WilsonFermion.cc Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include #include NAMESPACE_BEGIN(Grid); ///////////////////////////////// // Constructor and gauge import ///////////////////////////////// template WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass, const ImplParams &p, const WilsonAnisotropyCoefficients &anis) : Kernels(p), _grid(&Fgrid), _cbgrid(&Hgrid), Stencil(&Fgrid, npoint, Even, directions, displacements,p), StencilEven(&Hgrid, npoint, Even, directions,displacements,p), // source is Even StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p), // source is Odd mass(_mass), Lebesgue(_grid), LebesgueEvenOdd(_cbgrid), Umu(&Fgrid), UmuEven(&Hgrid), UmuOdd(&Hgrid), _tmp(&Hgrid), anisotropyCoeff(anis) { // Allocate the required comms buffer ImportGauge(_Umu); if (anisotropyCoeff.isAnisotropic){ diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0); } else { diag_mass = 4.0 + mass; } int vol4; vol4=Fgrid.oSites(); Stencil.BuildSurfaceList(1,vol4); vol4=Hgrid.oSites(); StencilEven.BuildSurfaceList(1,vol4); StencilOdd.BuildSurfaceList(1,vol4); } template void WilsonFermion::Report(void) { RealD NP = _grid->_Nprocessors; RealD NN = _grid->NodeCount(); RealD volume = 1; Coordinate latt = _grid->GlobalDimensions(); for(int mu=0;mu 0 ) { std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; std::cout << GridLogMessage << "WilsonFermion Number of DhopEO Calls : " << DhopCalls << std::endl; std::cout << GridLogMessage << "WilsonFermion TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl; std::cout << GridLogMessage << "WilsonFermion CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl; std::cout << GridLogMessage << "WilsonFermion FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl; std::cout << GridLogMessage << "WilsonFermion ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl; std::cout << GridLogMessage << "WilsonFermion ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl; // Average the compute time _grid->GlobalSum(DhopComputeTime); DhopComputeTime/=NP; RealD mflops = 1320*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; RealD Fullmflops = 1320*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; } if ( DerivCalls > 0 ) { std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; std::cout << GridLogMessage << "WilsonFermion Number of Deriv Calls : " < 0 || DhopCalls > 0){ std::cout << GridLogMessage << "WilsonFermion Stencil" < 0){ std::cout << GridLogMessage << "WilsonFermion Stencil Reporti()" < void WilsonFermion::ZeroCounters(void) { DhopCalls = 0; // ok DhopCommTime = 0; DhopComputeTime = 0; DhopComputeTime2= 0; DhopFaceTime = 0; DhopTotalTime = 0; DerivCalls = 0; // ok DerivCommTime = 0; DerivComputeTime = 0; DerivDhopComputeTime = 0; Stencil.ZeroCounters(); StencilEven.ZeroCounters(); StencilOdd.ZeroCounters(); Stencil.ZeroCountersi(); StencilEven.ZeroCountersi(); StencilOdd.ZeroCountersi(); } template void WilsonFermion::ImportGauge(const GaugeField &_Umu) { GaugeField HUmu(_Umu.Grid()); //Here multiply the anisotropy coefficients if (anisotropyCoeff.isAnisotropic) { for (int mu = 0; mu < Nd; mu++) { GaugeLinkField U_dir = (-0.5)*PeekIndex(_Umu, mu); if (mu != anisotropyCoeff.t_direction) U_dir *= (anisotropyCoeff.nu / anisotropyCoeff.xi_0); PokeIndex(HUmu, U_dir, mu); } } else { HUmu = _Umu * (-0.5); } Impl::DoubleStore(GaugeGrid(), Umu, HUmu); pickCheckerboard(Even, UmuEven, Umu); pickCheckerboard(Odd, UmuOdd, Umu); } ///////////////////////////// // Implement the interface ///////////////////////////// template void WilsonFermion::M(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); axpy(out, diag_mass, in, out); } template void WilsonFermion::Mdag(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); axpy(out, diag_mass, in, out); } template void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { DhopOE(in, out, DaggerNo); } } template void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { DhopOE(in, out, DaggerYes); } } template void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(diag_mass); out = scal * in; } template void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); out = (1.0/(diag_mass))*in; } template void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); MooeeInv(in,out); } template void WilsonFermion::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector twist) { typedef typename FermionField::vector_type vector_type; typedef typename FermionField::scalar_type ScalComplex; typedef Lattice > LatComplex; // what type LatticeComplex conformable(_grid,out.Grid()); Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT }; Coordinate latt_size = _grid->_fdimensions; FermionField num (_grid); num = Zero(); LatComplex wilson(_grid); wilson= Zero(); LatComplex one (_grid); one = ScalComplex(1.0,0.0); LatComplex denom(_grid); denom= Zero(); LatComplex kmu(_grid); ScalComplex ci(0.0,1.0); // momphase = n * 2pi / L for(int mu=0;mu void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat, const FermionField &A, const FermionField &B, int dag) { DerivCalls++; assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); FermionField Btilde(B.Grid()); FermionField Atilde(B.Grid()); Atilde = A; DerivCommTime-=usecond(); st.HaloExchange(B, compressor); DerivCommTime+=usecond(); DerivComputeTime-=usecond(); for (int mu = 0; mu < Nd; mu++) { //////////////////////////////////////////////////////////////////////// // Flip gamma (1+g)<->(1-g) if dag //////////////////////////////////////////////////////////////////////// int gamma = mu; if (!dag) gamma += Nd; DerivDhopComputeTime -= usecond(); int Ls=1; Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma); ////////////////////////////////////////////////// // spin trace outer product ////////////////////////////////////////////////// Impl::InsertForce4D(mat, Btilde, Atilde, mu); DerivDhopComputeTime += usecond(); } DerivComputeTime += usecond(); } template void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { conformable(U.Grid(), _grid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); mat.Checkerboard() = U.Checkerboard(); DerivInternal(Stencil, Umu, mat, U, V, dag); } template void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido) // Motivation: look at the SchurDiff operator assert(V.Checkerboard() == Even); assert(U.Checkerboard() == Odd); mat.Checkerboard() = Odd; DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); } template void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); //conformable(U.Grid(), mat.Grid()); assert(V.Checkerboard() == Odd); assert(U.Checkerboard() == Even); mat.Checkerboard() = Even; DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); } template void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) { DhopCalls+=2; conformable(in.Grid(), _grid); // verifies full grid conformable(in.Grid(), out.Grid()); out.Checkerboard() = in.Checkerboard(); DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); } template void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { DhopCalls++; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check assert(in.Checkerboard() == Even); out.Checkerboard() = Odd; DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); } template void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { DhopCalls++; conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check assert(in.Checkerboard() == Odd); out.Checkerboard() = Even; DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); } template void WilsonFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { DhopDir(in, out, dir, disp); } template void WilsonFermion::MdirAll(const FermionField &in, std::vector &out) { DhopDirAll(in, out); } template void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { Compressor compressor(DaggerNo); Stencil.HaloExchange(in, compressor); int skip = (disp == 1) ? 0 : 1; int dirdisp = dir + skip * 4; int gamma = dir + (1 - skip) * 4; DhopDirCalc(in, out, dirdisp, gamma, DaggerNo); }; template void WilsonFermion::DhopDirAll(const FermionField &in, std::vector &out) { Compressor compressor(DaggerNo); Stencil.HaloExchange(in, compressor); assert((out.size()==8)||(out.size()==9)); for(int dir=0;dir void WilsonFermion::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) { int Ls=1; uint64_t Nsite=in.oSites(); Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma); }; template void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, FermionField &out, int dag) { DhopTotalTime-=usecond(); #ifdef GRID_OMP if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,in,out,dag); else #endif DhopInternalSerial(st,lo,U,in,out,dag); DhopTotalTime+=usecond(); } template void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, FermionField &out, int dag) { assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); int len = U.Grid()->oSites(); ///////////////////////////// // Start comms // Gather intranode and extra node differentiated?? ///////////////////////////// std::vector > requests; st.Prepare(); DhopFaceTime-=usecond(); st.HaloGather(in,compressor); DhopFaceTime+=usecond(); DhopCommTime -=usecond(); st.CommunicateBegin(requests); ///////////////////////////// // Overlap with comms ///////////////////////////// DhopFaceTime-=usecond(); st.CommsMergeSHM(compressor); DhopFaceTime+=usecond(); ///////////////////////////// // do the compute interior ///////////////////////////// int Opt = WilsonKernelsStatic::Opt; DhopComputeTime-=usecond(); if (dag == DaggerYes) { Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); } else { Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); } DhopComputeTime+=usecond(); ///////////////////////////// // Complete comms ///////////////////////////// st.CommunicateComplete(requests); DhopCommTime +=usecond(); DhopFaceTime-=usecond(); st.CommsMerge(compressor); DhopFaceTime+=usecond(); ///////////////////////////// // do the compute exterior ///////////////////////////// DhopComputeTime2-=usecond(); if (dag == DaggerYes) { Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1); } else { Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1); } DhopComputeTime2+=usecond(); }; template void WilsonFermion::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, FermionField &out, int dag) { assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); DhopCommTime-=usecond(); st.HaloExchange(in, compressor); DhopCommTime+=usecond(); DhopComputeTime-=usecond(); int Opt = WilsonKernelsStatic::Opt; if (dag == DaggerYes) { Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out); } else { Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out); } DhopComputeTime+=usecond(); }; /*Change ends */ /******************************************************************************* * Conserved current utilities for Wilson fermions, for contracting propagators * to make a conserved current sink or inserting the conserved current * sequentially. ******************************************************************************/ template void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, PropagatorField &src, Current curr_type, unsigned int mu) { Gamma g5(Gamma::Algebra::Gamma5); conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); assert(0); } template void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, PropagatorField &src, Current curr_type, unsigned int mu, unsigned int tmin, unsigned int tmax, ComplexField &lattice_cmplx) { conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); assert(0); } NAMESPACE_END(Grid);