mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Hadrons: meson field kernel performance for each block
This commit is contained in:
parent
09001aedca
commit
08c47328ba
@ -247,10 +247,12 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
// Total index is sum of these i+ii+iii etc...
|
// Total index is sum of these i+ii+iii etc...
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
double flops = 0.0;
|
double flops;
|
||||||
double bytes = 0.0;
|
double bytes;
|
||||||
double vol = env().getVolume();
|
double vol = env().getVolume();
|
||||||
double t_contr=0;
|
double t_kernel = 0.0;
|
||||||
|
double nodes = env().getGrid()->NodeCount();
|
||||||
|
double tot_kernel;
|
||||||
|
|
||||||
envGetTmp(Vector<MF_IO_TYPE>, mfBuf);
|
envGetTmp(Vector<MF_IO_TYPE>, mfBuf);
|
||||||
envGetTmp(Vector<Complex>, mfCache);
|
envGetTmp(Vector<Complex>, mfCache);
|
||||||
@ -275,6 +277,8 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
MesonFieldIo mfBlock(mfBuf.data(),nmom,ngamma,nt,N_ii,N_jj);
|
MesonFieldIo mfBlock(mfBuf.data(),nmom,ngamma,nt,N_ii,N_jj);
|
||||||
|
|
||||||
// Series of cache blocked chunks of the contractions within this block
|
// Series of cache blocked chunks of the contractions within this block
|
||||||
|
flops = 0.0;
|
||||||
|
bytes = 0.0;
|
||||||
for(int ii=0;ii<N_ii;ii+=cacheBlock)
|
for(int ii=0;ii<N_ii;ii+=cacheBlock)
|
||||||
for(int jj=0;jj<N_jj;jj+=cacheBlock)
|
for(int jj=0;jj<N_jj;jj+=cacheBlock)
|
||||||
{
|
{
|
||||||
@ -290,7 +294,7 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
// flops for general N_c & N_s
|
// flops for general N_c & N_s
|
||||||
flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj*ngamma;
|
flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj*ngamma;
|
||||||
bytes += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj
|
bytes += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj
|
||||||
+ vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
|
+ vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
|
||||||
|
|
||||||
startTimer("cache copy");
|
startTimer("cache copy");
|
||||||
|
|
||||||
@ -305,6 +309,16 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
stopTimer("cache copy");
|
stopTimer("cache copy");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// perf
|
||||||
|
tot_kernel = getDTimer("contraction: colour trace & mom.")
|
||||||
|
+ getDTimer("contraction: local space sum");
|
||||||
|
t_kernel = tot_kernel - t_kernel;
|
||||||
|
LOG(Message) << "Kernel perf " << flops/t_kernel/1.0e3/nodes
|
||||||
|
<< " Gflop/s/node " << std::endl;
|
||||||
|
LOG(Message) << "Kernel perf " << bytes/t_kernel*1.0e6/1024/1024/1024/nodes
|
||||||
|
<< " GB/s/node " << std::endl;
|
||||||
|
t_kernel = tot_kernel;
|
||||||
|
|
||||||
// IO
|
// IO
|
||||||
if (!par().output.empty())
|
if (!par().output.empty())
|
||||||
{
|
{
|
||||||
@ -374,13 +388,6 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
<< " MB/s)" << std::endl;
|
<< " MB/s)" << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double nodes = env().getGrid()->NodeCount();
|
|
||||||
double t_kernel = getDTimer("contraction: colour trace & mom.")
|
|
||||||
+ getDTimer("contraction: local space sum");
|
|
||||||
|
|
||||||
LOG(Message) << "Perf " << flops/t_kernel/1.0e3/nodes << " Gflop/s/node " << std::endl;
|
|
||||||
LOG(Message) << "Perf " << bytes/t_kernel/1.0e3/nodes << " GB/s/node " << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IO
|
// IO
|
||||||
|
Loading…
x
Reference in New Issue
Block a user