mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Added some instrumentation to benchmark the force computation
This commit is contained in:
parent
4089984431
commit
2e453dfbf5
@ -86,18 +86,6 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
/* src=zero;
|
|
||||||
std::vector<int> origin(5,0);
|
|
||||||
SpinColourVector f=zero;
|
|
||||||
for(int sp=0;sp<4;sp++){
|
|
||||||
for(int co=0;co<3;co++){
|
|
||||||
f()(sp)(co)=Complex(1.0,0.0);
|
|
||||||
}}
|
|
||||||
pokeSite(f,src,origin);
|
|
||||||
*/
|
|
||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
random(RNG4,Umu);
|
random(RNG4,Umu);
|
||||||
|
|
||||||
@ -144,6 +132,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl;
|
||||||
std::cout << GridLogMessage<< "Calling Dw"<<std::endl;
|
std::cout << GridLogMessage<< "Calling Dw"<<std::endl;
|
||||||
int ncall =100;
|
int ncall =100;
|
||||||
if (1) {
|
if (1) {
|
||||||
@ -189,7 +178,7 @@ int main (int argc, char ** argv)
|
|||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
sDw.ZeroCounters();
|
sDw.ZeroCounters();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
@ -201,7 +190,7 @@ int main (int argc, char ** argv)
|
|||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
sDw.Report();
|
sDw.Report();
|
||||||
@ -217,7 +206,7 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
RealF sum=0;
|
RealF sum=0;
|
||||||
@ -237,7 +226,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||||
}
|
}
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
|
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
@ -310,6 +299,7 @@ int main (int argc, char ** argv)
|
|||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
}
|
}
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,1);
|
||||||
|
std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl;
|
||||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
|
@ -106,7 +106,6 @@
|
|||||||
#define SERIAL_SENDS
|
#define SERIAL_SENDS
|
||||||
|
|
||||||
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
|
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
|
||||||
comms_bytes+=2.0*bytes;
|
|
||||||
#ifdef SEND_IMMEDIATE
|
#ifdef SEND_IMMEDIATE
|
||||||
commtime-=usecond();
|
commtime-=usecond();
|
||||||
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
|
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
|
||||||
@ -304,23 +303,23 @@
|
|||||||
double calls;
|
double calls;
|
||||||
|
|
||||||
void ZeroCounters(void) {
|
void ZeroCounters(void) {
|
||||||
gathertime=0;
|
gathertime = 0.;
|
||||||
jointime=0;
|
jointime = 0.;
|
||||||
commtime=0;
|
commtime = 0.;
|
||||||
halogtime=0;
|
halogtime = 0.;
|
||||||
mergetime=0;
|
mergetime = 0.;
|
||||||
spintime=0;
|
spintime = 0.;
|
||||||
gathermtime=0;
|
gathermtime = 0.;
|
||||||
splicetime=0;
|
splicetime = 0.;
|
||||||
nosplicetime=0;
|
nosplicetime = 0.;
|
||||||
comms_bytes=0;
|
comms_bytes = 0.;
|
||||||
calls=0;
|
calls = 0.;
|
||||||
};
|
};
|
||||||
|
|
||||||
void Report(void) {
|
void Report(void) {
|
||||||
#define PRINTIT(A) \
|
#define PRINTIT(A) \
|
||||||
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
|
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
|
||||||
if ( calls > 0 ) {
|
if ( calls > 0. ) {
|
||||||
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
|
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
|
||||||
PRINTIT(jointime);
|
PRINTIT(jointime);
|
||||||
PRINTIT(gathertime);
|
PRINTIT(gathertime);
|
||||||
|
@ -24,7 +24,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GRID_CONJUGATE_GRADIENT_H
|
#ifndef GRID_CONJUGATE_GRADIENT_H
|
||||||
@ -40,15 +41,17 @@ namespace Grid {
|
|||||||
template <class Field>
|
template <class Field>
|
||||||
class ConjugateGradient : public OperatorFunction<Field> {
|
class ConjugateGradient : public OperatorFunction<Field> {
|
||||||
public:
|
public:
|
||||||
bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true.
|
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||||
|
// Defaults true.
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
ConjugateGradient(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){
|
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||||
};
|
: Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
ErrorOnNoConverge(err_on_no_conv){};
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
|
||||||
|
|
||||||
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
|
||||||
|
Field &psi) {
|
||||||
psi.checkerboard = src.checkerboard;
|
psi.checkerboard = src.checkerboard;
|
||||||
conformable(psi, src);
|
conformable(psi, src);
|
||||||
|
|
||||||
@ -62,8 +65,10 @@ public:
|
|||||||
RealD guess = norm2(psi);
|
RealD guess = norm2(psi);
|
||||||
assert(std::isnan(guess) == 0);
|
assert(std::isnan(guess) == 0);
|
||||||
|
|
||||||
|
|
||||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||||
|
|
||||||
|
|
||||||
r = src - mmp;
|
r = src - mmp;
|
||||||
p = r;
|
p = r;
|
||||||
|
|
||||||
@ -71,12 +76,18 @@ public:
|
|||||||
cp = a;
|
cp = a;
|
||||||
ssq = norm2(src);
|
ssq = norm2(src);
|
||||||
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
<< "ConjugateGradient: guess " << guess << std::endl;
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
<< "ConjugateGradient: src " << ssq << std::endl;
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
<< "ConjugateGradient: mp " << d << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mmp " << b << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: cp,r " << cp << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: p " << a << std::endl;
|
||||||
|
|
||||||
RealD rsq = Tolerance * Tolerance * ssq;
|
RealD rsq = Tolerance * Tolerance * ssq;
|
||||||
|
|
||||||
@ -85,7 +96,9 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
GridStopWatch LinalgTimer;
|
GridStopWatch LinalgTimer;
|
||||||
GridStopWatch MatrixTimer;
|
GridStopWatch MatrixTimer;
|
||||||
@ -94,7 +107,6 @@ public:
|
|||||||
SolverTimer.Start();
|
SolverTimer.Start();
|
||||||
int k;
|
int k;
|
||||||
for (k = 1; k <= MaxIterations; k++) {
|
for (k = 1; k <= MaxIterations; k++) {
|
||||||
|
|
||||||
c = cp;
|
c = cp;
|
||||||
|
|
||||||
MatrixTimer.Start();
|
MatrixTimer.Start();
|
||||||
@ -116,11 +128,11 @@ public:
|
|||||||
p = p * b + r;
|
p = p * b + r;
|
||||||
|
|
||||||
LinalgTimer.Stop();
|
LinalgTimer.Stop();
|
||||||
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
|
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||||
|
<< " residual " << cp << " target " << rsq << std::endl;
|
||||||
|
|
||||||
// Stopping condition
|
// Stopping condition
|
||||||
if (cp <= rsq) {
|
if (cp <= rsq) {
|
||||||
|
|
||||||
SolverTimer.Stop();
|
SolverTimer.Stop();
|
||||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
p = mmp - src;
|
p = mmp - src;
|
||||||
@ -131,22 +143,25 @@ public:
|
|||||||
RealD resnorm = sqrt(norm2(p));
|
RealD resnorm = sqrt(norm2(p));
|
||||||
RealD true_residual = resnorm / srcnorm;
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
|
std::cout << GridLogMessage
|
||||||
<<" computed residual "<<sqrt(cp/ssq)
|
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
|
||||||
<<" true residual " <<true_residual
|
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
|
||||||
<<" target "<<Tolerance<<std::endl;
|
<< " true residual " << true_residual << " target "
|
||||||
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
|
<< Tolerance << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Time elapsed: Iterations "
|
||||||
|
<< SolverTimer.Elapsed() << " Matrix "
|
||||||
|
<< MatrixTimer.Elapsed() << " Linalg "
|
||||||
|
<< LinalgTimer.Elapsed();
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
if(ErrorOnNoConverge)
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
|
||||||
assert(true_residual/Tolerance < 1000.0);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
|
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||||
if(ErrorOnNoConverge)
|
<< std::endl;
|
||||||
assert(0);
|
if (ErrorOnNoConverge) assert(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -178,26 +178,64 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::Report(void)
|
void WilsonFermion5D<Impl>::Report(void)
|
||||||
{
|
{
|
||||||
if ( Calls > 0 ) {
|
std::vector<int> latt = GridDefaultLatt();
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Dhop Calls " <<Calls <<std::endl;
|
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime " <<CommTime/Calls<<" us" <<std::endl;
|
RealD NP = _FourDimGrid->_Nprocessors;
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime " <<ComputeTime/Calls<<" us" <<std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;
|
if ( DhopCalls > 0 ) {
|
||||||
Stencil.Report();
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
|
||||||
|
<< " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : "
|
||||||
|
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
|
||||||
|
<< DhopComputeTime << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
|
||||||
|
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;
|
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
|
||||||
StencilEven.Report();
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;
|
}
|
||||||
StencilOdd.Report();
|
|
||||||
|
if ( DerivCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DerivCalls > 0 || DhopCalls > 0){
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
||||||
Calls=0;
|
DhopCalls = 0;
|
||||||
CommTime=0;
|
DhopCommTime = 0;
|
||||||
ComputeTime=0;
|
DhopComputeTime = 0;
|
||||||
|
|
||||||
|
DerivCalls = 0;
|
||||||
|
DerivCommTime = 0;
|
||||||
|
DerivComputeTime = 0;
|
||||||
|
DerivDhopComputeTime = 0;
|
||||||
|
|
||||||
Stencil.ZeroCounters();
|
Stencil.ZeroCounters();
|
||||||
StencilEven.ZeroCounters();
|
StencilEven.ZeroCounters();
|
||||||
StencilOdd.ZeroCounters();
|
StencilOdd.ZeroCounters();
|
||||||
@ -250,6 +288,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
|
DerivCalls++;
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
|
|
||||||
conformable(st._grid,A._grid);
|
conformable(st._grid,A._grid);
|
||||||
@ -260,12 +299,14 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
FermionField Btilde(B._grid);
|
FermionField Btilde(B._grid);
|
||||||
FermionField Atilde(B._grid);
|
FermionField Atilde(B._grid);
|
||||||
|
|
||||||
|
DerivCommTime-=usecond();
|
||||||
st.HaloExchange(B,compressor);
|
st.HaloExchange(B,compressor);
|
||||||
|
DerivCommTime+=usecond();
|
||||||
|
|
||||||
Atilde=A;
|
Atilde=A;
|
||||||
|
|
||||||
|
DerivComputeTime-=usecond();
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Flip gamma if dag
|
// Flip gamma if dag
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -276,6 +317,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
// Call the single hop
|
// Call the single hop
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
|
DerivDhopComputeTime -= usecond();
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int sss = 0; sss < U._grid->oSites(); sss++) {
|
for (int sss = 0; sss < U._grid->oSites(); sss++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
@ -285,19 +327,18 @@ PARALLEL_FOR_LOOP
|
|||||||
assert(sF < B._grid->oSites());
|
assert(sF < B._grid->oSites());
|
||||||
assert(sU < U._grid->oSites());
|
assert(sU < U._grid->oSites());
|
||||||
|
|
||||||
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
|
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
|
||||||
|
gamma);
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
DerivDhopComputeTime += usecond();
|
||||||
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
DerivComputeTime += usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -355,34 +396,36 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
Calls++;
|
DhopCalls++;
|
||||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
int LLs = in._grid->_rdimensions[0];
|
int LLs = in._grid->_rdimensions[0];
|
||||||
|
|
||||||
CommTime-=usecond();
|
DhopCommTime-=usecond();
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
CommTime+=usecond();
|
DhopCommTime+=usecond();
|
||||||
|
|
||||||
ComputeTime-=usecond();
|
DhopComputeTime-=usecond();
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||||
|
out);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||||
|
out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ComputeTime+=usecond();
|
DhopComputeTime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,9 +63,14 @@ namespace Grid {
|
|||||||
|
|
||||||
void Report(void);
|
void Report(void);
|
||||||
void ZeroCounters(void);
|
void ZeroCounters(void);
|
||||||
double Calls;
|
double DhopCalls;
|
||||||
double CommTime;
|
double DhopCommTime;
|
||||||
double ComputeTime;
|
double DhopComputeTime;
|
||||||
|
|
||||||
|
double DerivCalls;
|
||||||
|
double DerivCommTime;
|
||||||
|
double DerivComputeTime;
|
||||||
|
double DerivDhopComputeTime;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
|
@ -131,9 +131,11 @@ namespace Grid{
|
|||||||
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
|
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
|
||||||
X=zero;
|
X=zero;
|
||||||
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
|
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
|
||||||
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
|
//Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
|
||||||
|
// Multiply by Ydag
|
||||||
|
RealD action = real(innerProduct(Y,X));
|
||||||
|
|
||||||
RealD action = norm2(Y);
|
//RealD action = norm2(Y);
|
||||||
|
|
||||||
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
|
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
|
||||||
// Only really clover term that creates this. Leave the EE portion as a future to do to make most
|
// Only really clover term that creates this. Leave the EE portion as a future to do to make most
|
||||||
|
@ -76,6 +76,12 @@ public:
|
|||||||
TheAction.push_back(Level1);
|
TheAction.push_back(Level1);
|
||||||
|
|
||||||
Run(argc,argv);
|
Run(argc,argv);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl;
|
||||||
|
NumOp.Report();
|
||||||
|
std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl;
|
||||||
|
DenOp.Report();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -22,7 +22,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
@ -36,41 +37,47 @@ struct scal {
|
|||||||
d internal;
|
d internal;
|
||||||
};
|
};
|
||||||
|
|
||||||
Gamma::GammaMatrix Gmu [] = {
|
Gamma::GammaMatrix Gmu[] = {Gamma::GammaX, Gamma::GammaY, Gamma::GammaZ,
|
||||||
Gamma::GammaX,
|
Gamma::GammaT};
|
||||||
Gamma::GammaY,
|
|
||||||
Gamma::GammaZ,
|
|
||||||
Gamma::GammaT
|
|
||||||
};
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main(int argc, char** argv) {
|
||||||
{
|
|
||||||
Grid_init(&argc, &argv);
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
const int Ls=8;
|
const int Ls = 16;
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian* UrbGrid =
|
||||||
|
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian* FrbGrid =
|
||||||
|
SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||||
|
|
||||||
std::vector<int> seeds4({1, 2, 3, 4});
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
std::vector<int> seeds5({5, 6, 7, 8});
|
std::vector<int> seeds5({5, 6, 7, 8});
|
||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGrid);
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid);
|
||||||
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeFermion src(FGrid); random(RNG5,src);
|
LatticeFermion src(FGrid);
|
||||||
LatticeFermion result(FGrid); result=zero;
|
random(RNG5, src);
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
result = zero;
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
SU3::HotConfiguration(RNG4, Umu);
|
SU3::HotConfiguration(RNG4, Umu);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
|
||||||
|
<< " Ls: " << Ls << std::endl;
|
||||||
|
|
||||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass = 0.01;
|
||||||
RealD M5 = 1.8;
|
RealD M5 = 1.8;
|
||||||
DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
|
||||||
|
|
||||||
@ -79,9 +86,20 @@ int main (int argc, char ** argv)
|
|||||||
pickCheckerboard(Odd, src_o, src);
|
pickCheckerboard(Odd, src_o, src);
|
||||||
result_o = zero;
|
result_o = zero;
|
||||||
|
|
||||||
|
GridStopWatch CGTimer;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> HermOpEO(Ddwf);
|
||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);// switch off the assert
|
||||||
|
|
||||||
|
CGTimer.Start();
|
||||||
CG(HermOpEO, src_o, result_o);
|
CG(HermOpEO, src_o, result_o);
|
||||||
|
CGTimer.Stop();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Total CG time : " << CGTimer.Elapsed()
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "######## Dhop calls summary" << std::endl;
|
||||||
|
Ddwf.Report();
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -84,5 +84,6 @@ int main (int argc, char ** argv)
|
|||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||||
CG(HermOpEO,src_o,result_o);
|
CG(HermOpEO,src_o,result_o);
|
||||||
|
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user