diff --git a/.travis.yml b/.travis.yml index ae3efda8..bc6dd0ef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -102,5 +102,5 @@ script: - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto - make -j4 - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi + diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 969a2a42..99ab190b 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -48,9 +48,9 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], @@ -124,8 +124,8 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat,lat,lat,lat}); @@ -194,14 +194,14 @@ int main (int argc, char ** argv) } - Nloop=100; + Nloop=10; std::cout< latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], @@ -281,8 +281,8 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], @@ -324,8 +324,8 @@ int main (int argc, char ** argv) (void *)&rbuf[mu][0], recv_from_rank, bytes); - // Grid.StencilSendToRecvFromComplete(requests); - // requests.resize(0); + Grid.StencilSendToRecvFromComplete(requests); + requests.resize(0); comm_proc = mpi_layout[mu]-1; diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 753cd60f..686d00a1 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -48,16 +48,16 @@ typedef WilsonFermion5D WilsonFermion5DR; typedef WilsonFermion5D WilsonFermion5DF; typedef WilsonFermion5D WilsonFermion5DD; - int main (int argc, char ** argv) { Grid_init(&argc,&argv); + int threads = GridThread::GetThreads(); std::cout< latt4 = GridDefaultLatt(); - const int Ls=8; + const int Ls=16; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); @@ -71,35 +71,66 @@ int main (int argc, char ** argv) std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); - + + std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; LatticeFermion src (FGrid); random(RNG5,src); +#if 0 + src = zero; + { + std::vector origin({0,0,0,latt4[2]-1,0}); + SpinColourVectorF tmp; + tmp=zero; + tmp()(0)(0)=Complex(-2.0,0.0); + std::cout << " source site 0 " << tmp<(Umu,mu); + // if (mu !=2 ) ttmp = 0; + // ttmp = ttmp* pow(10.0,mu); + PokeIndex(Umu,ttmp,mu); + } + std::cout << GridLogMessage << "Forced to diagonal " << std::endl; +#endif + //////////////////////////////////// + // Naive wilson implementation + //////////////////////////////////// // replicate across fifth dimension + LatticeGaugeField Umu5d(FGrid); + std::vector U(4,FGrid); for(int ss=0;ssoSites();ss++){ for(int s=0;s U(4,FGrid); for(int mu=0;mu(Umu5d,mu); } + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; if (1) { @@ -121,6 +152,7 @@ int main (int argc, char ** argv) RealD NP = UGrid->_Nprocessors; + std::cout << GridLogMessage << "Creating action operator " << std::endl; DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); Dw.ZeroCounters(); + Dw.Dhop(src,result,0); double t0=usecond(); for(int i=0;i1.0e-4) ) { + std::cout << "RESULT\n " << result<Barrier(); + exit(-1); + } + */ assert (norm2(err)< 1.0e-4 ); Dw.Report(); } @@ -182,21 +225,13 @@ int main (int argc, char ** argv) LatticeFermion sresult(sFGrid); WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); - - for(int x=0;x site({s,x,y,z,t}); - SpinColourVector tmp; - peekSite(tmp,src,site); - pokeSite(tmp,ssrc,site); - }}}}} + + localConvert(src,ssrc); std::cout<Barrier(); - double t0=usecond(); + sDw.Dhop(ssrc,sresult,0); sDw.ZeroCounters(); + double t0=usecond(); for(int i=0;i site({s,x,y,z,t}); - SpinColourVector normal, simd; - peekSite(normal,result,site); - peekSite(simd,sresult,site); - sum=sum+norm2(normal-simd); - if (norm2(normal-simd) > 1.0e-6 ) { - std::cout << "site "< 1.0e-4 ){ + std::cout<< "sD REF\n " < 1.0e-4 ){ + std::cout<< "sD REF\n " <::DhopEO "<::DhopEO "<Barrier(); + sDw.DhopEO(ssrc_o, sr_e, DaggerNo); sDw.ZeroCounters(); - sDw.stat.init("DhopEO"); + // sDw.stat.init("DhopEO"); double t0=usecond(); for (int i = 0; i < ncall; i++) { sDw.DhopEO(ssrc_o, sr_e, DaggerNo); } double t1=usecond(); FGrid->Barrier(); - sDw.stat.print(); + // sDw.stat.print(); double volume=Ls; for(int mu=0;mu1.0e-4) { + + if(( error>1.0e-4) ) { setCheckerboard(ssrc,ssrc_o); setCheckerboard(ssrc,ssrc_e); - std::cout<< ssrc << std::endl; + std::cout<< "DIFF\n " <1.0e-4)){ + std::cout<< "DAG RESULT\n " <Barrier(); + Dw.DhopEO(src_o,r_e,DaggerNo); double t0=usecond(); for(int i=0;i1.0e-4)){ + std::cout<< "Deo RESULT\n " < +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedRandomDevice(); + + typedef typename ImprovedStaggeredFermionR::FermionField FermionField; + typename ImprovedStaggeredFermionR::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=zero; + FermionField ref(&Grid); ref=zero; + FermionField tmp(&Grid); tmp=zero; + FermionField err(&Grid); tmp=zero; + LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + std::vector U(4,&Grid); + + double volume=1; + for(int mu=0;mu(Umu,U[nn],nn); + } +#endif + + for(int mu=0;mu(Umu,mu); + } + ref = zero; + /* + { // Naive wilson implementation + ref = zero; + for(int mu=0;mu #ifndef GRID_H #define GRID_H -/////////////////// -// Std C++ dependencies -/////////////////// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/////////////////// -// Grid headers -/////////////////// -#include "Config.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - +#include +#include +#include +#include +#include #endif diff --git a/lib/GridCore.h b/lib/GridCore.h new file mode 100644 index 00000000..26fbc1b9 --- /dev/null +++ b/lib/GridCore.h @@ -0,0 +1,81 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/Grid.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: azusayamaguchi +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +// +// Grid.h +// simd +// +// Created by Peter Boyle on 09/05/2014. +// Copyright (c) 2014 University of Edinburgh. All rights reserved. +// + +#ifndef GRID_BASE_H +#define GRID_BASE_H + +/////////////////// +// Std C++ dependencies +/////////////////// +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/////////////////// +// Grid headers +/////////////////// +#include "Config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/lib/qcd/hmc/HMC.cc b/lib/GridQCDcore.h similarity index 75% rename from lib/qcd/hmc/HMC.cc rename to lib/GridQCDcore.h index 6386f5bf..7f50761f 100644 --- a/lib/qcd/hmc/HMC.cc +++ b/lib/GridQCDcore.h @@ -2,12 +2,12 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/hmc/HMC.cc + Source file: ./lib/Grid.h Copyright (C) 2015 Author: Peter Boyle -Author: neo +Author: azusayamaguchi Author: paboyle This program is free software; you can redistribute it and/or modify @@ -27,10 +27,16 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#ifndef GRID_QCD_CORE_H +#define GRID_QCD_CORE_H -namespace Grid{ - namespace QCD{ +///////////////////////// +// Core Grid QCD headers +///////////////////////// +#include +#include +#include +#include +#include - } -} +#endif diff --git a/lib/Old/Endeavour.tgz b/lib/Old/Endeavour.tgz deleted file mode 100644 index 33bfbc01..00000000 Binary files a/lib/Old/Endeavour.tgz and /dev/null differ diff --git a/lib/Old/Tensor_peek.h b/lib/Old/Tensor_peek.h deleted file mode 100644 index eecb3cd5..00000000 --- a/lib/Old/Tensor_peek.h +++ /dev/null @@ -1,154 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/Old/Tensor_peek.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_MATH_PEEK_H -#define GRID_MATH_PEEK_H -namespace Grid { - -////////////////////////////////////////////////////////////////////////////// -// Peek on a specific index; returns a scalar in that index, tensor inherits rest -////////////////////////////////////////////////////////////////////////////// -// If we hit the right index, return scalar with no further recursion - -//template inline ComplexF peekIndex(const ComplexF arg) { return arg;} -//template inline ComplexD peekIndex(const ComplexD arg) { return arg;} -//template inline RealF peekIndex(const RealF arg) { return arg;} -//template inline RealD peekIndex(const RealD arg) { return arg;} -#if 0 -// Scalar peek, no indices -template::TensorLevel == Level >::type * =nullptr> inline - auto peekIndex(const iScalar &arg) -> iScalar -{ - return arg; -} -// Vector peek, one index -template::TensorLevel == Level >::type * =nullptr> inline - auto peekIndex(const iVector &arg,int i) -> iScalar // Index matches -{ - iScalar ret; // return scalar - ret._internal = arg._internal[i]; - return ret; -} -// Matrix peek, two indices -template::TensorLevel == Level >::type * =nullptr> inline - auto peekIndex(const iMatrix &arg,int i,int j) -> iScalar -{ - iScalar ret; // return scalar - ret._internal = arg._internal[i][j]; - return ret; -} - -///////////// -// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue -///////////// -// scalar -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iScalar &arg) -> iScalar(arg._internal))> -{ - iScalar(arg._internal))> ret; - ret._internal= peekIndex(arg._internal); - return ret; -} -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iScalar &arg,int i) -> iScalar(arg._internal,i))> -{ - iScalar(arg._internal,i))> ret; - ret._internal=peekIndex(arg._internal,i); - return ret; -} -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iScalar &arg,int i,int j) -> iScalar(arg._internal,i,j))> -{ - iScalar(arg._internal,i,j))> ret; - ret._internal=peekIndex(arg._internal,i,j); - return ret; -} -// vector -template::TensorLevel != Level >::type * =nullptr> inline -auto peekIndex(const iVector &arg) -> iVector(arg._internal[0])),N> -{ - iVector(arg._internal[0])),N> ret; - for(int ii=0;ii(arg._internal[ii]); - } - return ret; -} -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iVector &arg,int i) -> iVector(arg._internal[0],i)),N> -{ - iVector(arg._internal[0],i)),N> ret; - for(int ii=0;ii(arg._internal[ii],i); - } - return ret; -} -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iVector &arg,int i,int j) -> iVector(arg._internal[0],i,j)),N> -{ - iVector(arg._internal[0],i,j)),N> ret; - for(int ii=0;ii(arg._internal[ii],i,j); - } - return ret; -} - -// matrix -template::TensorLevel != Level >::type * =nullptr> inline -auto peekIndex(const iMatrix &arg) -> iMatrix(arg._internal[0][0])),N> -{ - iMatrix(arg._internal[0][0])),N> ret; - for(int ii=0;ii(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb - }} - return ret; -} -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iMatrix &arg,int i) -> iMatrix(arg._internal[0][0],i)),N> -{ - iMatrix(arg._internal[0][0],i)),N> ret; - for(int ii=0;ii(arg._internal[ii][jj],i); - }} - return ret; -} -template::TensorLevel != Level >::type * =nullptr> inline - auto peekIndex(const iMatrix &arg,int i,int j) -> iMatrix(arg._internal[0][0],i,j)),N> -{ - iMatrix(arg._internal[0][0],i,j)),N> ret; - for(int ii=0;ii(arg._internal[ii][jj],i,j); - }} - return ret; -} -#endif - - -} -#endif diff --git a/lib/Old/Tensor_poke.h b/lib/Old/Tensor_poke.h deleted file mode 100644 index 83d09cf1..00000000 --- a/lib/Old/Tensor_poke.h +++ /dev/null @@ -1,127 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/Old/Tensor_poke.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_MATH_POKE_H -#define GRID_MATH_POKE_H -namespace Grid { - -////////////////////////////////////////////////////////////////////////////// -// Poke a specific index; -////////////////////////////////////////////////////////////////////////////// -#if 0 -// Scalar poke -template::TensorLevel == Level >::type * =nullptr> inline - void pokeIndex(iScalar &ret, const iScalar &arg) -{ - ret._internal = arg._internal; -} -// Vector poke, one index -template::TensorLevel == Level >::type * =nullptr> inline - void pokeIndex(iVector &ret, const iScalar &arg,int i) -{ - ret._internal[i] = arg._internal; -} -//Matrix poke, two indices -template::TensorLevel == Level >::type * =nullptr> inline - void pokeIndex(iMatrix &ret, const iScalar &arg,int i,int j) -{ - ret._internal[i][j] = arg._internal; -} - -///////////// -// No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue -///////////// -// scalar -template::TensorLevel != Level >::type * =nullptr> inline -void pokeIndex(iScalar &ret, const iScalar(ret._internal))> &arg) -{ - pokeIndex(ret._internal,arg._internal); -} -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iScalar &ret, const iScalar(ret._internal,0))> &arg, int i) - -{ - pokeIndex(ret._internal,arg._internal,i); -} -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iScalar &ret, const iScalar(ret._internal,0,0))> &arg,int i,int j) -{ - pokeIndex(ret._internal,arg._internal,i,j); -} - -// Vector -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iVector &ret, iVector(ret._internal)),N> &arg) -{ - for(int ii=0;ii(ret._internal[ii],arg._internal[ii]); - } -} -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iVector &ret, const iVector(ret._internal,0)),N> &arg,int i) -{ - for(int ii=0;ii(ret._internal[ii],arg._internal[ii],i); - } -} -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iVector &ret, const iVector(ret._internal,0,0)),N> &arg,int i,int j) -{ - for(int ii=0;ii(ret._internal[ii],arg._internal[ii],i,j); - } -} - -// Matrix -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iMatrix &ret, const iMatrix(ret._internal)),N> &arg) -{ - for(int ii=0;ii(ret._internal[ii][jj],arg._internal[ii][jj]); - }} -} -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iMatrix &ret, const iMatrix(ret._internal,0)),N> &arg,int i) -{ - for(int ii=0;ii(ret._internal[ii][jj],arg._internal[ii][jj],i); - }} -} -template::TensorLevel != Level >::type * =nullptr> inline - void pokeIndex(iMatrix &ret, const iMatrix(ret._internal,0,0)),N> &arg, int i,int j) -{ - for(int ii=0;ii(ret._internal[ii][jj],arg._internal[ii][jj],i,j); - }} -} -#endif - -} -#endif diff --git a/lib/Algorithms.h b/lib/algorithms/Algorithms.h similarity index 98% rename from lib/Algorithms.h rename to lib/algorithms/Algorithms.h index 67eb11c3..1b82f0ce 100644 --- a/lib/Algorithms.h +++ b/lib/algorithms/Algorithms.h @@ -42,15 +42,14 @@ Author: Peter Boyle #include #include #include - #include #include // Lanczos support #include #include - #include +#include // Eigen/lanczos // EigCg diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index fd9acc91..73f6baff 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -267,8 +267,7 @@ namespace Grid { SimpleCompressor compressor; Stencil.HaloExchange(in,compressor); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ siteVector res = zero; siteVector nbr; int ptype; @@ -380,8 +379,7 @@ PARALLEL_FOR_LOOP Subspace.ProjectToSubspace(oProj,oblock); // blockProject(iProj,iblock,Subspace.subspace); // blockProject(oProj,oblock,Subspace.subspace); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ for(int j=0;j See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include namespace Grid { double MultiShiftFunction::approx(double x) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index cf3872c8..0f24ae94 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -45,6 +45,8 @@ class ConjugateGradient : public OperatorFunction { // Defaults true. RealD Tolerance; Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), @@ -155,13 +157,14 @@ class ConjugateGradient : public OperatorFunction { std::cout << std::endl; if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); - + IterationsToComplete = k; return; } } std::cout << GridLogMessage << "ConjugateGradient did NOT converge" << std::endl; if (ErrorOnNoConverge) assert(0); + IterationsToComplete = k; } }; } diff --git a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h index 446465a1..c7332455 100644 --- a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h +++ b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -35,6 +35,7 @@ namespace Grid { class MixedPrecisionConjugateGradient : public LinearFunction { public: RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed Integer MaxInnerIterations; Integer MaxOuterIterations; GridBase* SinglePrecGrid; //Grid for single-precision fields @@ -42,12 +43,16 @@ namespace Grid { LinearOperatorBase &Linop_f; LinearOperatorBase &Linop_d; + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess LinearFunction *guesser; MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d) : Linop_f(_Linop_f), Linop_d(_Linop_d), - Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), + Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), guesser(NULL){ }; void useGuesser(LinearFunction &g){ @@ -55,6 +60,8 @@ namespace Grid { } void operator() (const FieldD &src_d_in, FieldD &sol_d){ + TotalInnerIterations = 0; + GridStopWatch TotalTimer; TotalTimer.Start(); @@ -74,7 +81,7 @@ namespace Grid { FieldD src_d(DoublePrecGrid); src_d = src_d_in; //source for next inner iteration, computed from residual during operation - RealD inner_tol = Tolerance; + RealD inner_tol = InnerTolerance; FieldF src_f(SinglePrecGrid); src_f.checkerboard = cb; @@ -89,7 +96,9 @@ namespace Grid { GridStopWatch PrecChangeTimer; - for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ + Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count + + for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ //Compute double precision rsd and also new RHS vector. Linop_d.HermOp(sol_d, tmp_d); RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector @@ -117,6 +126,7 @@ namespace Grid { InnerCGtimer.Start(); CG_f(Linop_f, src_f, sol_f); InnerCGtimer.Stop(); + TotalInnerIterations += CG_f.IterationsToComplete; //Convert sol back to double and add to double prec solution PrecChangeTimer.Start(); @@ -131,9 +141,11 @@ namespace Grid { ConjugateGradient CG_d(Tolerance, MaxInnerIterations); CG_d(Linop_d, src_d_in, sol_d); + TotalFinalStepIterations = CG_d.IterationsToComplete; TotalTimer.Stop(); - std::cout< +#include namespace Grid { @@ -13,9 +13,10 @@ void *PointerCache::Insert(void *ptr,size_t bytes) { if (bytes < 4096 ) return NULL; -#ifdef _OPENMP +#ifdef GRID_OMP assert(omp_in_parallel()==0); #endif + void * ret = NULL; int v = -1; diff --git a/lib/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h similarity index 100% rename from lib/AlignedAllocator.h rename to lib/allocator/AlignedAllocator.h diff --git a/lib/Cartesian.h b/lib/cartesian/Cartesian.h similarity index 100% rename from lib/Cartesian.h rename to lib/cartesian/Cartesian.h diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 72b21ee3..cea0f3dc 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -52,7 +52,7 @@ public: // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. - std::vector _fdimensions;// Global dimensions of array prior to cb removal + std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal std::vector _gdimensions;// Global dimensions of array after cb removal std::vector _ldimensions;// local dimensions of array with processor images removed std::vector _rdimensions;// Reduced local dimensions with simd lane images and processor images removed @@ -77,7 +77,7 @@ public: // GridCartesian / GridRedBlackCartesian //////////////////////////////////////////////////////////////// virtual int CheckerBoarded(int dim)=0; - virtual int CheckerBoard(std::vector &site)=0; + virtual int CheckerBoard(const std::vector &site)=0; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; @@ -121,7 +121,6 @@ public: Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); } - ////////////////////////////////////////////////////////// // SIMD lane addressing ////////////////////////////////////////////////////////// @@ -207,16 +206,16 @@ public: std::vector lcoor; GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor); rank = RankFromProcessorCoor(pcoor); - + /* std::vector cblcoor(lcoor); for(int d=0;dCheckerBoarded(d) ) { cblcoor[d] = lcoor[d]/2; } } - - i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim - o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim + */ + i_idx= iIndex(lcoor); + o_idx= oIndex(lcoor); } void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector &gcoor) diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index b0d20441..7e29d311 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -49,7 +49,7 @@ public: virtual int CheckerBoarded(int dim){ return 0; } - virtual int CheckerBoard(std::vector &site){ + virtual int CheckerBoard(const std::vector &site){ return 0; } virtual int CheckerBoardDestination(int cb,int shift,int dim){ diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 6a4300d7..2f132c19 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -49,7 +49,7 @@ public: if( dim==_checker_dim) return 1; else return 0; } - virtual int CheckerBoard(std::vector &site){ + virtual int CheckerBoard(const std::vector &site){ int linear=0; assert(site.size()==_ndimension); for(int d=0;d<_ndimension;d++){ diff --git a/lib/Communicator.h b/lib/communicator/Communicator.h similarity index 100% rename from lib/Communicator.h rename to lib/communicator/Communicator.h diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index f882d282..98d2abf4 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -25,7 +25,8 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include + namespace Grid { /////////////////////////////////////////////////////////////// @@ -33,6 +34,7 @@ namespace Grid { /////////////////////////////////////////////////////////////// void * CartesianCommunicator::ShmCommBuf; uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024; +CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; ///////////////////////////////// // Alloc, free shmem region @@ -88,7 +90,9 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) -void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, +int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; + +double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, @@ -96,6 +100,7 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &waitall) { diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 94ad1093..e0b9f2c3 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -116,6 +116,12 @@ class CartesianCommunicator { // Implemented in Communicator_base.C ///////////////////////////////// static void * ShmCommBuf; + + // Isend/Irecv/Wait, or Sendrecv blocking + enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; + static CommunicatorPolicy_t CommunicatorPolicy; + static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } + size_t heap_top; size_t heap_bytes; @@ -148,6 +154,7 @@ class CartesianCommunicator { const std::vector & ThisProcessorCoor(void) ; const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; + int NodeCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid @@ -200,7 +207,7 @@ class CartesianCommunicator { void SendToRecvFromComplete(std::vector &waitall); - void StencilSendToRecvFromBegin(std::vector &list, + double StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 7f29c239..470a06c7 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -25,7 +25,9 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include +#include +#include #include namespace Grid { @@ -39,9 +41,13 @@ MPI_Comm CartesianCommunicator::communicator_world; // Should error check all MPI calls. void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; + int provided; MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { - MPI_Init(argc,argv); + MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); + if ( provided != MPI_THREAD_MULTIPLE ) { + QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; + } } MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); ShmInitGeneric(); @@ -152,24 +158,34 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis int from, int bytes) { - MPI_Request xrq; - MPI_Request rrq; - int rank = _processor; + int myrank = _processor; int ierr; - ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - - assert(ierr==0); + if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { + MPI_Request xrq; + MPI_Request rrq; - list.push_back(xrq); - list.push_back(rrq); + ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); + + assert(ierr==0); + list.push_back(xrq); + list.push_back(rrq); + } else { + // Give the CPU to MPI immediately; can use threads to overlap optionally + ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, + recv,bytes,MPI_CHAR,from, from, + communicator,MPI_STATUS_IGNORE); + assert(ierr==0); + } } void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); + if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { + int nreq=list.size(); + std::vector status(nreq); + int ierr = MPI_Waitall(nreq,&list[0],&status[0]); + assert(ierr==0); + } } void CartesianCommunicator::Barrier(void) diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index 2e17e531..7685768c 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,9 +25,23 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include + #include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#ifndef SHM_HUGETLB +#define SHM_HUGETLB 04000 +#endif + namespace Grid { /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -50,6 +64,10 @@ std::vector CartesianCommunicator::GroupRanks; std::vector CartesianCommunicator::MyGroup; std::vector CartesianCommunicator::ShmCommBufs; +int CartesianCommunicator::NodeCount(void) { return GroupSize;}; + + +#undef FORCE_COMMS void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBufs[ShmRank]; @@ -57,6 +75,9 @@ void *CartesianCommunicator::ShmBufferSelf(void) void *CartesianCommunicator::ShmBuffer(int rank) { int gpeer = GroupRanks[rank]; +#ifdef FORCE_COMMS + return NULL; +#endif if (gpeer == MPI_UNDEFINED){ return NULL; } else { @@ -65,7 +86,13 @@ void *CartesianCommunicator::ShmBuffer(int rank) } void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { + static int count =0; int gpeer = GroupRanks[rank]; + assert(gpeer!=ShmRank); // never send to self + assert(rank!=WorldRank);// never send to self +#ifdef FORCE_COMMS + return NULL; +#endif if (gpeer == MPI_UNDEFINED){ return NULL; } else { @@ -76,16 +103,27 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) } void CartesianCommunicator::Init(int *argc, char ***argv) { + int flag; + int provided; + // mtrace(); + MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { - MPI_Init(argc,argv); + MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); + assert (provided == MPI_THREAD_MULTIPLE); } + Grid_quiesce_nodes(); + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); MPI_Comm_rank(communicator_world,&WorldRank); MPI_Comm_size(communicator_world,&WorldSize); + if ( WorldRank == 0 ) { + std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"< - for(uint64_t page=0;page shmids(ShmSize); + + if ( ShmRank == 0 ) { + for(int r=0;r coor = _processor_coor; - + std::vector coor = _processor_coor; // my coord assert(std::abs(shift) <_processors[dim]); coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; @@ -242,28 +350,32 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; Lexicographic::IndexFromCoor(coor,dest,_processors); dest = LexicographicToWorldRank[dest]; -} + +}// rank is world rank. + int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { int rank; Lexicographic::IndexFromCoor(coor,rank,_processors); rank = LexicographicToWorldRank[rank]; return rank; -} +}// rank is world rank + void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) { - Lexicographic::CoorFromIndex(coor,rank,_processors); - rank = LexicographicToWorldRank[rank]; + int lr=-1; + for(int r=0;r &processors) { int ierr; - communicator=communicator_world; - _ndimension = processors.size(); - + //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// @@ -275,24 +387,22 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) } } assert(log2size != -1); - + //////////////////////////////////////////////////////////////// // Identify subblock of ranks on node spreading across dims // in a maximally symmetrical way //////////////////////////////////////////////////////////////// - int dim = 0; - std::vector WorldDims = processors; - ShmDims.resize(_ndimension,1); + ShmDims.resize (_ndimension,1); GroupDims.resize(_ndimension); - - ShmCoor.resize(_ndimension); + ShmCoor.resize (_ndimension); GroupCoor.resize(_ndimension); WorldCoor.resize(_ndimension); + int dim = 0; for(int l2=0;l2 &processors) GroupDims[d] = WorldDims[d]/ShmDims[d]; } + //////////////////////////////////////////////////////////////// + // Verbose + //////////////////////////////////////////////////////////////// +#if 0 + std::cout<< GridLogMessage << "MPI-3 usage "< &processors) //////////////////////////////////////////////////////////////// // Establish mapping between lexico physics coord and WorldRank - // //////////////////////////////////////////////////////////////// - LexicographicToWorldRank.resize(WorldSize,0); Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims); Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims); for(int d=0;d<_ndimension;d++){ WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d]; } _processor_coor = WorldCoor; - - int lexico; - Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims); - LexicographicToWorldRank[lexico]=WorldRank; - _processor = lexico; + _processor = WorldRank; /////////////////////////////////////////////////////////////////// // global sum Lexico to World mapping /////////////////////////////////////////////////////////////////// + int lexico; + LexicographicToWorldRank.resize(WorldSize,0); + Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims); + LexicographicToWorldRank[lexico] = WorldRank; ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator); assert(ierr==0); - -}; + for(int i=0;i coor(_ndimension); + ProcessorCoorFromRank(wr,coor); // from world rank + int ck = RankFromProcessorCoor(coor); + assert(ck==wr); + + if ( wr == WorldRank ) { + for(int j=0;j mcoor = coor; + this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int)); + for(int d = 0 ; d< _ndimension; d++) { + assert(coor[d] == mcoor[d]); + } + } +}; void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); @@ -367,8 +528,6 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N) int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); assert(ierr==0); } - - // Basic Halo comms primitive void CartesianCommunicator::SendToRecvFrom(void *xmit, int dest, @@ -377,10 +536,14 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, int bytes) { std::vector reqs(0); + // unsigned long xcrc = crc32(0L, Z_NULL, 0); + // unsigned long rcrc = crc32(0L, Z_NULL, 0); + // xcrc = crc32(xcrc,(unsigned char *)xmit,bytes); SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); SendToRecvFromComplete(reqs); + // rcrc = crc32(rcrc,(unsigned char *)recv,bytes); + // printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc); } - void CartesianCommunicator::SendRecvPacket(void *xmit, void *recv, int sender, @@ -397,7 +560,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); } } - // Basic Halo comms primitive void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, void *xmit, @@ -406,95 +568,29 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis int from, int bytes) { -#if 0 - this->StencilBarrier(); - - MPI_Request xrq; - MPI_Request rrq; - - static int sequence; - + int myrank = _processor; int ierr; - int tag; - int check; - assert(dest != _processor); - assert(from != _processor); - - int gdest = GroupRanks[dest]; - int gfrom = GroupRanks[from]; - int gme = GroupRanks[_processor]; + if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { + MPI_Request xrq; + MPI_Request rrq; - sequence++; - - char *from_ptr = (char *)ShmCommBufs[ShmRank]; - - int small = (bytesStencilBarrier(); - - if (small && (gfrom !=MPI_UNDEFINED) ) { - T *ip = (T *)from_ptr; - T *op = (T *)recv; -PARALLEL_FOR_LOOP - for(int w=0;wStencilBarrier(); - -#else - MPI_Request xrq; - MPI_Request rrq; - int rank = _processor; - int ierr; - ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - - assert(ierr==0); - - list.push_back(xrq); - list.push_back(rrq); -#endif } -void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, +double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, int dest, void *recv, @@ -505,57 +601,63 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorStencilSendToRecvFromComplete(list); + } + + return off_node_bytes; } - - -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall) { - SendToRecvFromComplete(list); + SendToRecvFromComplete(waitall); } - void CartesianCommunicator::StencilBarrier(void) { - MPI_Win_sync (ShmWindow); MPI_Barrier (ShmComm); - MPI_Win_sync (ShmWindow); } - void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { int nreq=list.size(); + + if (nreq==0) return; + std::vector status(nreq); int ierr = MPI_Waitall(nreq,&list[0],&status[0]); assert(ierr==0); + list.resize(0); } - void CartesianCommunicator::Barrier(void) { int ierr = MPI_Barrier(communicator); assert(ierr==0); } - void CartesianCommunicator::Broadcast(int root,void* data, int bytes) { int ierr=MPI_Bcast(data, @@ -565,7 +667,11 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) communicator); assert(ierr==0); } - +int CartesianCommunicator::RankWorld(void){ + int r; + MPI_Comm_rank(communicator_world,&r); + return r; +} void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { int ierr= MPI_Bcast(data, diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc index 71f1a913..6e26bd3e 100644 --- a/lib/communicator/Communicator_mpi3_leader.cc +++ b/lib/communicator/Communicator_mpi3_leader.cc @@ -27,6 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #include "Grid.h" #include +//#include //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Workarounds: @@ -42,19 +43,27 @@ Author: Peter Boyle #include #include #include - typedef sem_t *Grid_semaphore; + +#error /*THis is deprecated*/ + +#if 0 #define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED ); #define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED ); #define SEM_POST(S) assert ( sem_post(S) == 0 ); #define SEM_WAIT(S) assert ( sem_wait(S) == 0 ); - +#else +#define SEM_INIT(S) ; +#define SEM_INIT_EXCL(S) ; +#define SEM_POST(S) ; +#define SEM_WAIT(S) ; +#endif #include namespace Grid { -enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL }; +enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV }; struct Descriptor { uint64_t buf; @@ -62,6 +71,12 @@ struct Descriptor { int rank; int tag; int command; + uint64_t xbuf; + uint64_t rbuf; + int xtag; + int rtag; + int src; + int dest; MPI_Request request; }; @@ -94,18 +109,14 @@ public: void SemInit(void) { sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - // printf("SEM_NAME: %s \n",sem_name); SEM_INIT(sem_head); sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - // printf("SEM_NAME: %s \n",sem_name); SEM_INIT(sem_tail); } void SemInitExcl(void) { sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - // printf("SEM_INIT_EXCL: %s \n",sem_name); SEM_INIT_EXCL(sem_head); sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - // printf("SEM_INIT_EXCL: %s \n",sem_name); SEM_INIT_EXCL(sem_tail); } void WakeUpDMA(void) { @@ -125,6 +136,13 @@ public: while(1){ WaitForCommand(); // std::cout << "Getting command "<head,0,0); + int s=state->start; + if ( s != state->head ) { + _mm_mwait(0,0); + } +#endif Event(); } } @@ -132,6 +150,7 @@ public: int Event (void) ; uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ; + void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ; void WaitAll() { // std::cout << "Queueing WAIT command "<tail == state->head ); + while ( state->tail != state->head ); } }; @@ -196,6 +215,12 @@ public: // std::cout << "Waking up DMA "<< slave< MPIoffloadEngine::VerticalShmBufs; std::vector > MPIoffloadEngine::UniverseRanks; std::vector MPIoffloadEngine::UserCommunicatorToWorldRanks; +int CartesianCommunicator::NodeCount(void) { return HorizontalSize;}; int MPIoffloadEngine::ShmSetup = 0; void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, @@ -370,12 +418,22 @@ void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, ftruncate(fd, size); VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if ( VerticalShmBufs[r] == MAP_FAILED ) { perror("failed mmap"); assert(0); } + /* + for(uint64_t page=0;pagehead ) { switch ( state->Descrs[s].command ) { case COMMAND_ISEND: - /* - std::cout<< " Send "<Descrs[s].buf<< "["<Descrs[s].bytes<<"]" - << " to " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag - << " Comm " << MPIoffloadEngine::communicator_universe<< " me " <Descrs[s].buf+base), state->Descrs[s].bytes, MPI_CHAR, @@ -568,11 +623,6 @@ int Slave::Event (void) { break; case COMMAND_IRECV: - /* - std::cout<< " Recv "<Descrs[s].buf<< "["<Descrs[s].bytes<<"]" - << " from " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag - << " Comm " << MPIoffloadEngine::communicator_universe<< " me "<< universe_rank<< std::endl; - */ ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base), state->Descrs[s].bytes, MPI_CHAR, @@ -588,10 +638,32 @@ int Slave::Event (void) { return 1; break; + case COMMAND_SENDRECV: + + // fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10); + + ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10, + (void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10, + MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE); + + assert(ierr==0); + + // fprintf(stderr,"Sendrecv done %d %d\n",ierr,i); + // MPI_Barrier(MPIoffloadEngine::HorizontalComm); + // fprintf(stderr,"Barrier\n"); + i++; + + state->start = PERI_PLUS(s); + + return 1; + break; + case COMMAND_WAITALL: for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){ - MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE); + if ( state->Descrs[t].command != COMMAND_SENDRECV ) { + MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE); + } }; s=PERI_PLUS(s); state->start = s; @@ -613,6 +685,45 @@ int Slave::Event (void) { // External interaction with the queue ////////////////////////////////////////////////////////////////////////////// +void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) +{ + int head =state->head; + int next = PERI_PLUS(head); + + // Set up descriptor + int worldrank; + int hashtag; + MPI_Comm communicator; + MPI_Request request; + uint64_t relative; + + relative = (uint64_t)xbuf - base; + state->Descrs[head].xbuf = relative; + + relative= (uint64_t)rbuf - base; + state->Descrs[head].rbuf = relative; + + state->Descrs[head].bytes = bytes; + + MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest); + state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; + state->Descrs[head].xtag = hashtag; + + MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src); + state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; + state->Descrs[head].rtag = hashtag; + + state->Descrs[head].command= COMMAND_SENDRECV; + + // Block until FIFO has space + while( state->tail==next ); + + // Msync on weak order architectures + + // Advance pointer + state->head = next; + +}; uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank) { ///////////////////////////////////////// @@ -812,19 +923,22 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); assert(from!=_processor); assert(dest!=_processor); - MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest); - MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from); -} + MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); + + //MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); + + //MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest); + //MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from); +} void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) { MPIoffloadEngine::WaitAll(); + //this->Barrier(); } -void CartesianCommunicator::StencilBarrier(void) -{ -} +void CartesianCommunicator::StencilBarrier(void) { } void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index af55af46..ace2868b 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -25,7 +25,8 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include + namespace Grid { /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -87,6 +88,7 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector &lis { assert(0); } + void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { assert(0); @@ -97,7 +99,7 @@ void CartesianCommunicator::Barrier(void){} void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {} void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { } int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { return 0;} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor){ coor = _processor_coor ;} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor){ coor = _processor_coor; } void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) { source =0; diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index b7a263a4..3c76c808 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -27,6 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #include #include +#include namespace Grid { @@ -51,7 +52,7 @@ typedef struct HandShake_t { } HandShake; std::array make_psync_init(void) { - array ret; + std::array ret; ret.fill(SHMEM_SYNC_VALUE); return ret; } @@ -109,7 +110,7 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){ source = u; dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); + shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); shmem_barrier_all(); // necessary? u = dest; } @@ -125,7 +126,7 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ source = u; dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); + shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); shmem_barrier_all(); // necessary? u = dest; } @@ -137,7 +138,8 @@ void CartesianCommunicator::GlobalSum(float &f){ source = f; dest =0.0; - shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync); + shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); + shmem_barrier_all(); f = dest; } void CartesianCommunicator::GlobalSumVector(float *f,int N) @@ -148,14 +150,16 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N) static std::array psync = psync_init; if ( shmem_addr_accessible(f,_processor) ){ - shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync); + shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data()); + shmem_barrier_all(); return; } for(int i=0;i &lis SHMEM_VET(recv); // shmem_putmem_nb(recv,xmit,bytes,dest,NULL); shmem_putmem(recv,xmit,bytes,dest); + + if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all(); } void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) { // shmem_quiet(); // I'm done - shmem_barrier_all();// He's done too + if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too } void CartesianCommunicator::Barrier(void) { @@ -301,13 +310,13 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) int words = bytes/4; if ( shmem_addr_accessible(data,_processor) ){ - shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync); + shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data()); return; } for(int w=0;w &rhs,commVector &buffer,int dimen cbmask = 0x3; } - int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane - + int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane int e1=rhs._grid->_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask == 0x3 ) { -PARALLEL_NESTED_LOOP2 - for(int n=0;nCheckerBoardFromOindexTable(o+b); + int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { table.push_back(std::pair (bo++,o+b)); } } } -PARALLEL_FOR_LOOP - for(int i=0;i &rhs,std::vector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; int n1=rhs._grid->_slice_stride[dimension]; - int n2=rhs._grid->_slice_block[dimension]; + if ( cbmask ==0x3){ -PARALLEL_NESTED_LOOP2 - for(int n=0;n(temp,pointers,offset); } } } else { - assert(0); //Fixme think this is buggy - - for(int n=0;n_slice_stride[dimension]; + + int o=n*n1; int ocb=1<CheckerBoardFromOindex(o+b); - int offset = b+n*rhs._grid->_slice_block[dimension]; + int offset = b+n*e2; if ( ocb & cbmask ) { cobj temp =compress(rhs._odata[so+o+b]); @@ -171,10 +168,10 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; + int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask ==0x3 ) { -PARALLEL_NESTED_LOOP2 - for(int n=0;n_slice_stride[dimension]; int bo =n*rhs._grid->_slice_block[dimension]; @@ -182,17 +179,21 @@ PARALLEL_NESTED_LOOP2 } } } else { + std::vector > table; int bo=0; for(int n=0;n_slice_stride[dimension]; - int bo =n*rhs._grid->_slice_block[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - rhs._odata[so+o+b]=buffer[bo++]; + table.push_back(std::pair (so+o+b,bo++)); } } } + parallel_for(int i=0;i_slice_block[dimension]; if(cbmask ==0x3 ) { -PARALLEL_NESTED_LOOP2 - for(int n=0;n_slice_stride[dimension]; int offset = b+n*rhs._grid->_slice_block[dimension]; @@ -222,7 +222,11 @@ PARALLEL_NESTED_LOOP2 } } } else { - assert(0); // think this is buggy FIXME + + // Case of SIMD split AND checker dim cannot currently be hit, except in + // Test_cshift_red_black code. + // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME + std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<_slice_stride[dimension]; @@ -254,8 +258,7 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs int e2=rhs._grid->_slice_block[dimension]; int stride = rhs._grid->_slice_stride[dimension]; if(cbmask == 0x3 ){ -PARALLEL_NESTED_LOOP2 - for(int n=0;n void Copy_plane_permute(Lattice& lhs,const Lattice_slice_nblock[dimension]; int e2=rhs._grid->_slice_block [dimension]; int stride = rhs._grid->_slice_stride[dimension]; -PARALLEL_NESTED_LOOP2 - for(int n=0;n Lattice Cshift_local(Lattice &ret,const Lattice // Map to always positive shift modulo global full dimension. shift = (shift+fd)%fd; - ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension); // the permute type + ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension); int permute_dim =grid->PermuteDim(dimension); int permute_type=grid->PermuteType(dimension); int permute_type_dist; @@ -348,7 +350,6 @@ template Lattice Cshift_local(Lattice &ret,const Lattice int o = 0; int bo = x * grid->_ostride[dimension]; - int cb= (cbmask==0x2)? Odd : Even; int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); @@ -361,9 +362,23 @@ template Lattice Cshift_local(Lattice &ret,const Lattice // wrap is whether sshift > rd. // num is sshift mod rd. // + // shift 7 + // + // XoXo YcYc + // oXoX cYcY + // XoXo YcYc + // oXoX cYcY + // + // sshift -- + // + // XX YY ; 3 + // XX YY ; 0 + // XX YY ; 3 + // XX YY ; 0 + // int permute_slice=0; if(permute_dim){ - int wrap = sshift/rd; + int wrap = sshift/rd; wrap=wrap % ly; int num = sshift%rd; if ( x< rd-num ) permute_slice=wrap; @@ -375,7 +390,6 @@ template Lattice Cshift_local(Lattice &ret,const Lattice } else { permute_type_dist = permute_type; } - } if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); diff --git a/lib/cshift/Cshift_mpi.h b/lib/cshift/Cshift_mpi.h index b3c07cd6..b2a44961 100644 --- a/lib/cshift/Cshift_mpi.h +++ b/lib/cshift/Cshift_mpi.h @@ -74,7 +74,6 @@ template void Cshift_comms(Lattice& ret,const Lattice &r sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); // std::cout << "Cshift_comms dim "< void Cshift_comms(Lattice &ret,const Lattice &r (void *)&recv_buf[0], recv_from_rank, bytes); - - // for(int i=0;iBarrier(); + /* + for(int i=0;i void Cshift_comms_simd(Lattice &ret,const LatticeBarrier(); rpointers[i] = &recv_buf_extract[i][0]; } else { rpointers[i] = &send_buf_extract[nbr_lane][0]; diff --git a/lib/Lattice.h b/lib/lattice/Lattice.h similarity index 100% rename from lib/Lattice.h rename to lib/lattice/Lattice.h diff --git a/lib/lattice/Lattice_arith.h b/lib/lattice/Lattice_arith.h index 6527c487..c3093167 100644 --- a/lib/lattice/Lattice_arith.h +++ b/lib/lattice/Lattice_arith.h @@ -39,8 +39,7 @@ namespace Grid { ret.checkerboard = lhs.checkerboard; conformable(ret,rhs); conformable(lhs,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]); @@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP ret.checkerboard = lhs.checkerboard; conformable(ret,rhs); conformable(lhs,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]); @@ -73,8 +71,7 @@ PARALLEL_FOR_LOOP ret.checkerboard = lhs.checkerboard; conformable(ret,rhs); conformable(lhs,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]); @@ -89,8 +86,7 @@ PARALLEL_FOR_LOOP ret.checkerboard = lhs.checkerboard; conformable(ret,rhs); conformable(lhs,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; add(&tmp,&lhs._odata[ss],&rhs._odata[ss]); @@ -108,8 +104,7 @@ PARALLEL_FOR_LOOP void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.checkerboard = lhs.checkerboard; conformable(lhs,ret); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ obj1 tmp; mult(&tmp,&lhs._odata[ss],&rhs); vstream(ret._odata[ss],tmp); @@ -120,8 +115,7 @@ PARALLEL_FOR_LOOP void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.checkerboard = lhs.checkerboard; conformable(ret,lhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ obj1 tmp; mac(&tmp,&lhs._odata[ss],&rhs); vstream(ret._odata[ss],tmp); @@ -132,8 +126,7 @@ PARALLEL_FOR_LOOP void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.checkerboard = lhs.checkerboard; conformable(ret,lhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; sub(&tmp,&lhs._odata[ss],&rhs); @@ -147,8 +140,7 @@ PARALLEL_FOR_LOOP void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.checkerboard = lhs.checkerboard; conformable(lhs,ret); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; add(&tmp,&lhs._odata[ss],&rhs); @@ -166,8 +158,7 @@ PARALLEL_FOR_LOOP void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; mult(&tmp,&lhs,&rhs._odata[ss]); @@ -182,8 +173,7 @@ PARALLEL_FOR_LOOP void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; mac(&tmp,&lhs,&rhs._odata[ss]); @@ -198,8 +188,7 @@ PARALLEL_FOR_LOOP void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; sub(&tmp,&lhs,&rhs._odata[ss]); @@ -213,8 +202,7 @@ PARALLEL_FOR_LOOP void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES obj1 tmp; add(&tmp,&lhs,&rhs._odata[ss]); @@ -230,8 +218,7 @@ PARALLEL_FOR_LOOP ret.checkerboard = x.checkerboard; conformable(ret,x); conformable(x,y); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES vobj tmp = a*x._odata[ss]+y._odata[ss]; vstream(ret._odata[ss],tmp); @@ -245,8 +232,7 @@ PARALLEL_FOR_LOOP ret.checkerboard = x.checkerboard; conformable(ret,x); conformable(x,y); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ #ifdef STREAMING_STORES vobj tmp = a*x._odata[ss]+b*y._odata[ss]; vstream(ret._odata[ss],tmp); diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index e4dc1ca8..0c345545 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -121,8 +121,7 @@ public: assert( (cb==Odd) || (cb==Even)); checkerboard=cb; -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ #ifdef STREAMING_STORES vobj tmp = eval(ss,expr); vstream(_odata[ss] ,tmp); @@ -144,8 +143,7 @@ PARALLEL_FOR_LOOP assert( (cb==Odd) || (cb==Even)); checkerboard=cb; -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ #ifdef STREAMING_STORES vobj tmp = eval(ss,expr); vstream(_odata[ss] ,tmp); @@ -167,8 +165,7 @@ PARALLEL_FOR_LOOP assert( (cb==Odd) || (cb==Even)); checkerboard=cb; -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ #ifdef STREAMING_STORES //vobj tmp = eval(ss,expr); vstream(_odata[ss] ,eval(ss,expr)); @@ -191,8 +188,7 @@ PARALLEL_FOR_LOOP checkerboard=cb; _odata.resize(_grid->oSites()); -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ #ifdef STREAMING_STORES vobj tmp = eval(ss,expr); vstream(_odata[ss] ,tmp); @@ -213,8 +209,7 @@ PARALLEL_FOR_LOOP checkerboard=cb; _odata.resize(_grid->oSites()); -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ #ifdef STREAMING_STORES vobj tmp = eval(ss,expr); vstream(_odata[ss] ,tmp); @@ -235,8 +230,7 @@ PARALLEL_FOR_LOOP checkerboard=cb; _odata.resize(_grid->oSites()); -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ vstream(_odata[ss] ,eval(ss,expr)); } }; @@ -258,8 +252,7 @@ PARALLEL_FOR_LOOP _grid = r._grid; checkerboard = r.checkerboard; _odata.resize(_grid->oSites());// essential - PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ _odata[ss]=r._odata[ss]; } } @@ -269,8 +262,7 @@ PARALLEL_FOR_LOOP virtual ~Lattice(void) = default; template strong_inline Lattice & operator = (const sobj & r){ -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ this->_odata[ss]=r; } return *this; @@ -279,8 +271,7 @@ PARALLEL_FOR_LOOP this->checkerboard = r.checkerboard; conformable(*this,r); -PARALLEL_FOR_LOOP - for(int ss=0;ss<_grid->oSites();ss++){ + parallel_for(int ss=0;ss<_grid->oSites();ss++){ this->_odata[ss]=r._odata[ss]; } return *this; diff --git a/lib/lattice/Lattice_comparison.h b/lib/lattice/Lattice_comparison.h index 1b5b0624..9bf1fb2d 100644 --- a/lib/lattice/Lattice_comparison.h +++ b/lib/lattice/Lattice_comparison.h @@ -45,90 +45,87 @@ namespace Grid { ////////////////////////////////////////////////////////////////////////// template inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) - { - Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]); - } - return ret; + { + Lattice ret(rhs._grid); + parallel_for(int ss=0;ssoSites(); ss++){ + ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]); } + return ret; + } ////////////////////////////////////////////////////////////////////////// // compare lattice to scalar ////////////////////////////////////////////////////////////////////////// - template + template inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) - { - Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=op(lhs._odata[ss],rhs); - } - return ret; + { + Lattice ret(lhs._grid); + parallel_for(int ss=0;ssoSites(); ss++){ + ret._odata[ss]=op(lhs._odata[ss],rhs); } + return ret; + } ////////////////////////////////////////////////////////////////////////// // compare scalar to lattice ////////////////////////////////////////////////////////////////////////// - template + template inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) - { - Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=op(lhs._odata[ss],rhs); - } - return ret; + { + Lattice ret(rhs._grid); + parallel_for(int ss=0;ssoSites(); ss++){ + ret._odata[ss]=op(lhs._odata[ss],rhs); } - + return ret; + } + ////////////////////////////////////////////////////////////////////////// // Map to functors ////////////////////////////////////////////////////////////////////////// - // Less than - template - inline Lattice operator < (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vlt(),lhs,rhs); - } - template - inline Lattice operator < (const Lattice & lhs, const robj & rhs) { - return LSComparison(vlt(),lhs,rhs); - } - template - inline Lattice operator < (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vlt(),lhs,rhs); - } - - // Less than equal - template - inline Lattice operator <= (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vle(),lhs,rhs); - } - template - inline Lattice operator <= (const Lattice & lhs, const robj & rhs) { - return LSComparison(vle(),lhs,rhs); - } - template - inline Lattice operator <= (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vle(),lhs,rhs); - } - - // Greater than - template - inline Lattice operator > (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vgt(),lhs,rhs); - } - template - inline Lattice operator > (const Lattice & lhs, const robj & rhs) { - return LSComparison(vgt(),lhs,rhs); - } - template - inline Lattice operator > (const lobj & lhs, const Lattice & rhs) { + // Less than + template + inline Lattice operator < (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vlt(),lhs,rhs); + } + template + inline Lattice operator < (const Lattice & lhs, const robj & rhs) { + return LSComparison(vlt(),lhs,rhs); + } + template + inline Lattice operator < (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vlt(),lhs,rhs); + } + + // Less than equal + template + inline Lattice operator <= (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vle(),lhs,rhs); + } + template + inline Lattice operator <= (const Lattice & lhs, const robj & rhs) { + return LSComparison(vle(),lhs,rhs); + } + template + inline Lattice operator <= (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vle(),lhs,rhs); + } + + // Greater than + template + inline Lattice operator > (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vgt(),lhs,rhs); + } + template + inline Lattice operator > (const Lattice & lhs, const robj & rhs) { + return LSComparison(vgt(),lhs,rhs); + } + template + inline Lattice operator > (const lobj & lhs, const Lattice & rhs) { return SLComparison(vgt(),lhs,rhs); - } - - - // Greater than equal + } + + + // Greater than equal template - inline Lattice operator >= (const Lattice & lhs, const Lattice & rhs) { + inline Lattice operator >= (const Lattice & lhs, const Lattice & rhs) { return LLComparison(vge(),lhs,rhs); } template @@ -136,38 +133,37 @@ PARALLEL_FOR_LOOP return LSComparison(vge(),lhs,rhs); } template - inline Lattice operator >= (const lobj & lhs, const Lattice & rhs) { + inline Lattice operator >= (const lobj & lhs, const Lattice & rhs) { return SLComparison(vge(),lhs,rhs); } - + // equal template - inline Lattice operator == (const Lattice & lhs, const Lattice & rhs) { + inline Lattice operator == (const Lattice & lhs, const Lattice & rhs) { return LLComparison(veq(),lhs,rhs); } template - inline Lattice operator == (const Lattice & lhs, const robj & rhs) { + inline Lattice operator == (const Lattice & lhs, const robj & rhs) { return LSComparison(veq(),lhs,rhs); } template - inline Lattice operator == (const lobj & lhs, const Lattice & rhs) { + inline Lattice operator == (const lobj & lhs, const Lattice & rhs) { return SLComparison(veq(),lhs,rhs); } - - + + // not equal template - inline Lattice operator != (const Lattice & lhs, const Lattice & rhs) { + inline Lattice operator != (const Lattice & lhs, const Lattice & rhs) { return LLComparison(vne(),lhs,rhs); } template - inline Lattice operator != (const Lattice & lhs, const robj & rhs) { + inline Lattice operator != (const Lattice & lhs, const robj & rhs) { return LSComparison(vne(),lhs,rhs); } template - inline Lattice operator != (const lobj & lhs, const Lattice & rhs) { + inline Lattice operator != (const lobj & lhs, const Lattice & rhs) { return SLComparison(vne(),lhs,rhs); } - } #endif diff --git a/lib/lattice/Lattice_local.h b/lib/lattice/Lattice_local.h index 65d1d929..9dae1cd9 100644 --- a/lib/lattice/Lattice_local.h +++ b/lib/lattice/Lattice_local.h @@ -34,47 +34,42 @@ Author: Peter Boyle namespace Grid { - ///////////////////////////////////////////////////// - // Non site, reduced locally reduced routines - ///////////////////////////////////////////////////// - - // localNorm2, - template + ///////////////////////////////////////////////////// + // Non site, reduced locally reduced routines + ///////////////////////////////////////////////////// + + // localNorm2, + template inline auto localNorm2 (const Lattice &rhs)-> Lattice { Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]); - } - return ret; + parallel_for(int ss=0;ssoSites(); ss++){ + ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]); + } + return ret; } - - // localInnerProduct - template + + // localInnerProduct + template inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]); } return ret; } - - // outerProduct Scalar x Scalar -> Scalar - // Vector x Vector -> Matrix - template + + // outerProduct Scalar x Scalar -> Scalar + // Vector x Vector -> Matrix + template inline auto outerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice - { - Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]); - } - return ret; - } - + { + Lattice ret(rhs._grid); + parallel_for(int ss=0;ssoSites(); ss++){ + ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]); + } + return ret; + } } - #endif diff --git a/lib/lattice/Lattice_overload.h b/lib/lattice/Lattice_overload.h index 2a5d16a1..0906b610 100644 --- a/lib/lattice/Lattice_overload.h +++ b/lib/lattice/Lattice_overload.h @@ -37,8 +37,7 @@ namespace Grid { inline Lattice operator -(const Lattice &r) { Lattice ret(r._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ vstream(ret._odata[ss], -r._odata[ss]); } return ret; @@ -74,8 +73,7 @@ PARALLEL_FOR_LOOP inline auto operator * (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss]; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs*rhs._odata[ss]; @@ -86,8 +84,7 @@ PARALLEL_FOR_LOOP inline auto operator + (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss]; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs+rhs._odata[ss]; @@ -98,11 +95,9 @@ PARALLEL_FOR_LOOP inline auto operator - (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss]; vstream(ret._odata[ss],tmp); - // ret._odata[ss]=lhs-rhs._odata[ss]; } return ret; } @@ -110,8 +105,7 @@ PARALLEL_FOR_LOOP inline auto operator * (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs._odata[ss]*rhs; @@ -122,8 +116,7 @@ PARALLEL_FOR_LOOP inline auto operator + (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs._odata[ss]+rhs; @@ -134,15 +127,12 @@ PARALLEL_FOR_LOOP inline auto operator - (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs._odata[ss]-rhs; } return ret; } - - } #endif diff --git a/lib/lattice/Lattice_peekpoke.h b/lib/lattice/Lattice_peekpoke.h index 19d349c4..3d6268d2 100644 --- a/lib/lattice/Lattice_peekpoke.h +++ b/lib/lattice/Lattice_peekpoke.h @@ -44,22 +44,20 @@ namespace Grid { { Lattice(lhs._odata[0],i))> ret(lhs._grid); ret.checkerboard=lhs.checkerboard; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = peekIndex(lhs._odata[ss],i); - } - return ret; + parallel_for(int ss=0;ssoSites();ss++){ + ret._odata[ss] = peekIndex(lhs._odata[ss],i); + } + return ret; }; template - auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(lhs._odata[0],i,j))> + auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(lhs._odata[0],i,j))> { Lattice(lhs._odata[0],i,j))> ret(lhs._grid); ret.checkerboard=lhs.checkerboard; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = peekIndex(lhs._odata[ss],i,j); - } - return ret; + parallel_for(int ss=0;ssoSites();ss++){ + ret._odata[ss] = peekIndex(lhs._odata[ss],i,j); + } + return ret; }; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -68,25 +66,23 @@ PARALLEL_FOR_LOOP template void PokeIndex(Lattice &lhs,const Lattice(lhs._odata[0],0))> & rhs,int i) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - pokeIndex(lhs._odata[ss],rhs._odata[ss],i); - } + parallel_for(int ss=0;ssoSites();ss++){ + pokeIndex(lhs._odata[ss],rhs._odata[ss],i); + } } template void PokeIndex(Lattice &lhs,const Lattice(lhs._odata[0],0,0))> & rhs,int i,int j) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - pokeIndex(lhs._odata[ss],rhs._odata[ss],i,j); - } + parallel_for(int ss=0;ssoSites();ss++){ + pokeIndex(lhs._odata[ss],rhs._odata[ss],i,j); + } } ////////////////////////////////////////////////////// // Poke a scalar object into the SIMD array ////////////////////////////////////////////////////// template - void pokeSite(const sobj &s,Lattice &l,std::vector &site){ + void pokeSite(const sobj &s,Lattice &l,const std::vector &site){ GridBase *grid=l._grid; @@ -120,7 +116,7 @@ PARALLEL_FOR_LOOP // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// template - void peekSite(sobj &s,const Lattice &l,std::vector &site){ + void peekSite(sobj &s,const Lattice &l,const std::vector &site){ GridBase *grid=l._grid; @@ -131,9 +127,6 @@ PARALLEL_FOR_LOOP assert( l.checkerboard == l._grid->CheckerBoard(site)); - // FIXME - // assert( sizeof(sobj)*Nsimd == sizeof(vobj)); - int rank,odx,idx; grid->GlobalCoorToRankIndex(rank,odx,idx,site); diff --git a/lib/lattice/Lattice_reality.h b/lib/lattice/Lattice_reality.h index 10add8cd..7e7b2631 100644 --- a/lib/lattice/Lattice_reality.h +++ b/lib/lattice/Lattice_reality.h @@ -40,8 +40,7 @@ namespace Grid { template inline Lattice adj(const Lattice &lhs){ Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss] = adj(lhs._odata[ss]); } return ret; @@ -49,13 +48,10 @@ PARALLEL_FOR_LOOP template inline Lattice conjugate(const Lattice &lhs){ Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = conjugate(lhs._odata[ss]); + parallel_for(int ss=0;ssoSites();ss++){ + ret._odata[ss] = conjugate(lhs._odata[ss]); } return ret; }; - - } #endif diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 2615af48..45a88a64 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -57,8 +57,7 @@ namespace Grid { sumarray[i]=zero; } -PARALLEL_FOR_LOOP - for(int thr=0;thrSumArraySize();thr++){ + parallel_for(int thr=0;thrSumArraySize();thr++){ int nwork, mywork, myoff; GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); @@ -68,7 +67,7 @@ PARALLEL_FOR_LOOP } sumarray[thr]=TensorRemove(vnrm) ; } - + vector_type vvnrm; vvnrm=zero; // sum across threads for(int i=0;iSumArraySize();i++){ vvnrm = vvnrm+sumarray[i]; @@ -114,18 +113,17 @@ PARALLEL_FOR_LOOP sumarray[i]=zero; } -PARALLEL_FOR_LOOP - for(int thr=0;thrSumArraySize();thr++){ + parallel_for(int thr=0;thrSumArraySize();thr++){ int nwork, mywork, myoff; GridThread::GetWork(grid->oSites(),thr,mywork,myoff); - + vobj vvsum=zero; for(int ss=myoff;ssSumArraySize();i++){ vsum = vsum+sumarray[i]; diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 2caf2de4..3d653d17 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -34,6 +34,7 @@ Author: paboyle namespace Grid { + //http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf ? ////////////////////////////////////////////////////////////// // Allow the RNG state to be less dense than the fine grid @@ -69,6 +70,7 @@ namespace Grid { } // Wrap seed_seq to give common interface with random_device + // Should rather wrap random_device and have a generate class fixedSeed { public: @@ -76,20 +78,31 @@ namespace Grid { std::seed_seq src; - fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; - - result_type operator () (void){ - - std::vector list(1); - - src.generate(list.begin(),list.end()); - - return list[0]; + template fixedSeed(const std::vector &seeds) : src(seeds.begin(),seeds.end()) {}; + template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { + src.generate(begin,end); } }; + + class deviceSeed { + public: + + std::random_device rd; + + typedef std::random_device::result_type result_type; + + deviceSeed(void) : rd(){}; + + template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { + for(RandomIt it=begin; it!=end;it++){ + *it = rd(); + } + } + }; + // real scalars are one component template void fillScalar(scalar &s,distribution &dist,generator & gen) { @@ -127,7 +140,7 @@ namespace Grid { std::vector _generators; std::vector> _uniform; std::vector> _gaussian; - std::vector> _bernoulli; + std::vector> _bernoulli; void GetState(std::vector & saved,int gen) { saved.resize(RngStateCount); @@ -155,13 +168,6 @@ namespace Grid { // FIXME ... do we require lockstep draws of randoms // from all nodes keeping seeds consistent. // place a barrier/broadcast in the fill routine - template void Seed(source &src) - { - typename source::result_type init = src(); - CartesianCommunicator::BroadcastWorld(0,(void *)&init,sizeof(init)); - _generators[0] = RngEngine(init); - _seeded=1; - } GridSerialRNG() : GridRNGbase() { _generators.resize(1); @@ -244,12 +250,17 @@ namespace Grid { CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); } - + template void Seed(source &src) + { + _generators[0] = RngEngine(src); + _seeded=1; + } void SeedRandomDevice(void){ - std::random_device rd; - Seed(rd); + deviceSeed src; + Seed(src); } void SeedFixedIntegers(const std::vector &seeds){ + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); fixedSeed src(seeds); Seed(src); } @@ -278,46 +289,6 @@ namespace Grid { } - // This loop could be made faster to avoid the Ahmdahl by - // i) seed generators on each timeslice, for x=y=z=0; - // ii) seed generators on each z for x=y=0 - // iii)seed generators on each y,z for x=0 - // iv) seed generators on each y,z,x - // made possible by physical indexing. - template void Seed(source &src) - { - std::vector gcoor; - - int gsites = _grid->_gsites; - - typename source::result_type init = src(); - RngEngine pseeder(init); - std::uniform_int_distribution ui; - - for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); - _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - - int l_idx=generator_idx(o_idx,i_idx); - - const int num_rand_seed=16; - std::vector site_seeds(num_rand_seed); - for(int i=0;iBroadcast(0,(void *)&site_seeds[0],sizeof(int)*site_seeds.size()); - - if( rank == _grid->ThisRank() ){ - fixedSeed ssrc(site_seeds); - typename source::result_type sinit = ssrc(); - _generators[l_idx] = RngEngine(sinit); - } - } - _seeded=1; - } //FIXME implement generic IO and create state save/restore //void SaveState(const std::string &file); @@ -336,8 +307,7 @@ namespace Grid { int words=sizeof(scalar_object)/sizeof(scalar_type); -PARALLEL_FOR_LOOP - for(int ss=0;ss buf(Nsimd); for(int m=0;m void Seed(source &src) + { + + typedef typename source::result_type seed_t; + std::uniform_int_distribution uid; + + int numseed=4; + int gsites = _grid->_gsites; + std::vector site_init(numseed); + std::vector gcoor; + + + // Master RngEngine + std::vector master_init(numseed); src.generate(master_init.begin(),master_init.end()); + _grid->Broadcast(0,(void *)&master_init[0],sizeof(seed_t)*numseed); + fixedSeed master_seed(master_init); + RngEngine master_engine(master_seed); + + // Per node RngEngine + std::vector node_init(numseed); + for(int r=0;r<_grid->ProcessorCount();r++) { + + std::vector rank_init(numseed); + for(int i=0;iThisRank() ) { + for(int i=0;iGlobalIndexToGlobalCoor(gidx,gcoor); + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + if( rank == _grid->ThisRank() ){ + int l_idx=generator_idx(o_idx,i_idx); + for(int i=0;i &seeds){ + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); fixedSeed src(seeds); Seed(src); } diff --git a/lib/lattice/Lattice_trace.h b/lib/lattice/Lattice_trace.h index a341ff7c..449c55f8 100644 --- a/lib/lattice/Lattice_trace.h +++ b/lib/lattice/Lattice_trace.h @@ -42,8 +42,7 @@ namespace Grid { -> Lattice { Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss] = trace(lhs._odata[ss]); } return ret; @@ -56,8 +55,7 @@ PARALLEL_FOR_LOOP inline auto TraceIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> { Lattice(lhs._odata[0]))> ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss] = traceIndex(lhs._odata[ss]); } return ret; diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 27b4aa7e..8eb75f15 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -51,7 +51,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine) template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ half.checkerboard = cb; int ssh=0; - //PARALLEL_FOR_LOOP + //parallel_for for(int ss=0;ssoSites();ss++){ std::vector coor; int cbos; @@ -68,7 +68,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine) template inline void setCheckerboard(Lattice &full,const Lattice &half){ int cb = half.checkerboard; int ssh=0; - //PARALLEL_FOR_LOOP + //parallel_for for(int ss=0;ssoSites();ss++){ std::vector coor; int cbos; @@ -153,8 +153,7 @@ inline void blockZAXPY(Lattice &fineZ, assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); } -PARALLEL_FOR_LOOP - for(int sf=0;sfoSites();sf++){ + parallel_for(int sf=0;sfoSites();sf++){ int sc; std::vector coor_c(_ndimension); @@ -186,8 +185,7 @@ template fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ CoarseInner._odata[ss] = coarse_inner._odata[ss]; } } @@ -333,9 +331,6 @@ void localConvert(const Lattice &in,Lattice &out) typedef typename vobj::scalar_object sobj; typedef typename vvobj::scalar_object ssobj; - sobj s; - ssobj ss; - GridBase *ig = in._grid; GridBase *og = out._grid; @@ -347,10 +342,13 @@ void localConvert(const Lattice &in,Lattice &out) for(int d=0;d_processors[d] == og->_processors[d]); assert(ig->_ldimensions[d] == og->_ldimensions[d]); + assert(ig->lSites() == og->lSites()); } - //PARALLEL_FOR_LOOP - for(int idx=0;idxlSites();idx++){ + parallel_for(int idx=0;idxlSites();idx++){ + sobj s; + ssobj ss; + std::vector lcoor(ni); ig->LocalIndexToLocalCoor(idx,lcoor); peekLocalSite(s,in,lcoor); @@ -364,7 +362,6 @@ template void InsertSlice(Lattice &lowDim,Lattice & higherDim,int slice, int orthog) { typedef typename vobj::scalar_object sobj; - sobj s; GridBase *lg = lowDim._grid; GridBase *hg = higherDim._grid; @@ -386,17 +383,16 @@ void InsertSlice(Lattice &lowDim,Lattice & higherDim,int slice, int } // the above should guarantee that the operations are local - // Guido: check the threading here - //PARALLEL_FOR_LOOP - for(int idx=0;idxlSites();idx++){ + parallel_for(int idx=0;idxlSites();idx++){ + sobj s; std::vector lcoor(nl); std::vector hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); - dl=0; + int ddl=0; hcoor[orthog] = slice; for(int d=0;d void ExtractSlice(Lattice &lowDim, Lattice & higherDim,int slice, int orthog) { typedef typename vobj::scalar_object sobj; - sobj s; GridBase *lg = lowDim._grid; GridBase *hg = higherDim._grid; @@ -429,16 +424,16 @@ void ExtractSlice(Lattice &lowDim, Lattice & higherDim,int slice, in } } // the above should guarantee that the operations are local - //PARALLEL_FOR_LOOP - for(int idx=0;idxlSites();idx++){ + parallel_for(int idx=0;idxlSites();idx++){ + sobj s; std::vector lcoor(nl); std::vector hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); - dl=0; + int ddl=0; hcoor[orthog] = slice; for(int d=0;d void InsertSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) { typedef typename vobj::scalar_object sobj; - sobj s; GridBase *lg = lowDim._grid; GridBase *hg = higherDim._grid; @@ -469,8 +463,8 @@ void InsertSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice } // the above should guarantee that the operations are local - //PARALLEL_FOR_LOOP - for(int idx=0;idxlSites();idx++){ + parallel_for(int idx=0;idxlSites();idx++){ + sobj s; std::vector lcoor(nl); std::vector hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); @@ -488,7 +482,6 @@ template void ExtractSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) { typedef typename vobj::scalar_object sobj; - sobj s; GridBase *lg = lowDim._grid; GridBase *hg = higherDim._grid; @@ -505,8 +498,8 @@ void ExtractSliceLocal(Lattice &lowDim, Lattice & higherDim,int slic } // the above should guarantee that the operations are local - //PARALLEL_FOR_LOOP - for(int idx=0;idxlSites();idx++){ + parallel_for(int idx=0;idxlSites();idx++){ + sobj s; std::vector lcoor(nl); std::vector hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); @@ -574,8 +567,7 @@ typename std::enable_if::value && !isSIMDvectorized in_grid->iCoorFromIindex(in_icoor[lane], lane); } -PARALLEL_FOR_LOOP - for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index //Assemble vector of pointers to output elements std::vector out_ptrs(in_nsimd); @@ -623,8 +615,7 @@ void precisionChange(Lattice &out, const Lattice &in){ std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - PARALLEL_FOR_LOOP - for(int out_oidx=0;out_oidxoSites();out_oidx++){ + parallel_for(int out_oidx=0;out_oidxoSites();out_oidx++){ std::vector out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); @@ -642,10 +633,6 @@ void precisionChange(Lattice &out, const Lattice &in){ merge(out._odata[out_oidx], ptrs, 0); } } - - - - } #endif diff --git a/lib/lattice/Lattice_transpose.h b/lib/lattice/Lattice_transpose.h index c8d349a6..0ae7c6b3 100644 --- a/lib/lattice/Lattice_transpose.h +++ b/lib/lattice/Lattice_transpose.h @@ -40,27 +40,24 @@ namespace Grid { //////////////////////////////////////////////////////////////////////////////////////////////////// template inline Lattice transpose(const Lattice &lhs){ - Lattice ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = transpose(lhs._odata[ss]); - } - return ret; - }; + Lattice ret(lhs._grid); + parallel_for(int ss=0;ssoSites();ss++){ + ret._odata[ss] = transpose(lhs._odata[ss]); + } + return ret; + }; - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Index level dependent transpose - //////////////////////////////////////////////////////////////////////////////////////////////////// - template + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Index level dependent transpose + //////////////////////////////////////////////////////////////////////////////////////////////////// + template inline auto TransposeIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> - { - Lattice(lhs._odata[0]))> ret(lhs._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = transposeIndex(lhs._odata[ss]); - } - return ret; - }; - + { + Lattice(lhs._odata[0]))> ret(lhs._grid); + parallel_for(int ss=0;ssoSites();ss++){ + ret._odata[ss] = transposeIndex(lhs._odata[ss]); + } + return ret; + }; } #endif diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index f3c54896..f5b324ec 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -37,8 +37,7 @@ namespace Grid { Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss]=pow(rhs._odata[ss],y); } return ret; @@ -47,8 +46,7 @@ PARALLEL_FOR_LOOP Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss]=mod(rhs._odata[ss],y); } return ret; @@ -58,8 +56,7 @@ PARALLEL_FOR_LOOP Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss]=div(rhs._odata[ss],y); } return ret; @@ -69,8 +66,7 @@ PARALLEL_FOR_LOOP Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); } return ret; diff --git a/lib/lattice/Lattice_where.h b/lib/lattice/Lattice_where.h index cff372a0..6686d1b3 100644 --- a/lib/lattice/Lattice_where.h +++ b/lib/lattice/Lattice_where.h @@ -56,8 +56,7 @@ inline void whereWolf(Lattice &ret,const Lattice &predicate,Lattice< std::vector truevals (Nsimd); std::vector falsevals(Nsimd); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites(); ss++){ + parallel_for(int ss=0;ssoSites(); ss++){ extract(iftrue._odata[ss] ,truevals); extract(iffalse._odata[ss] ,falsevals); diff --git a/lib/Log.cc b/lib/log/Log.cc similarity index 98% rename from lib/Log.cc rename to lib/log/Log.cc index 1a7d8a58..320381cb 100644 --- a/lib/Log.cc +++ b/lib/log/Log.cc @@ -29,9 +29,10 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include #include +#include namespace Grid { diff --git a/lib/Log.h b/lib/log/Log.h similarity index 100% rename from lib/Log.h rename to lib/log/Log.h diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index e2af0545..154567fc 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -35,37 +35,27 @@ Author: paboyle #endif #include #include -// 64bit endian swap is a portability pain -#ifndef __has_builtin // Optional of course. -#define __has_builtin(x) 0 // Compatibility with non-clang compilers. -#endif -#if HAVE_DECL_BE64TOH -#undef Grid_ntohll -#define Grid_ntohll be64toh -#endif -#if HAVE_DECL_NTOHLL -#undef Grid_ntohll -#define Grid_ntohll ntohll -#endif - -#ifndef Grid_ntohll +inline uint32_t byte_reverse32(uint32_t f) { + f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + return f; +} +inline uint64_t byte_reverse64(uint64_t f) { + uint64_t g; + g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + g = g << 32; + f = f >> 32; + g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + return g; +} #if BYTE_ORDER == BIG_ENDIAN - -#define Grid_ntohll(A) (A) - -#else - -#if __has_builtin(__builtin_bswap64) -#define Grid_ntohll(A) __builtin_bswap64(A) +inline uint64_t Grid_ntohll(uint64_t A) { return A; } #else -#error -#endif - -#endif - +inline uint64_t Grid_ntohll(uint64_t A) { + return byte_reverse64(A); +} #endif namespace Grid { @@ -195,7 +185,7 @@ class BinaryIO { std::vector site({x,y,z,t}); if (grid->IsBoss()) { - fin.read((char *)&file_object, sizeof(file_object)); + fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0); bytes += sizeof(file_object); if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); @@ -211,11 +201,13 @@ class BinaryIO { std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); return csum; } template - static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) + static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset, + const std::string & format) { typedef typename vobj::scalar_object sobj; @@ -231,7 +223,7 @@ class BinaryIO { ////////////////////////////////////////////////// std::cout<< GridLogMessage<< "Serial write I/O "<< file<IsBoss() ) { fout.open(file,std::ios::binary|std::ios::out|std::ios::in); @@ -255,23 +247,24 @@ class BinaryIO { if ( grid->IsBoss() ) { - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); + if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); + if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); + if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object)); - bytes+=sizeof(file_object); + // NB could gather an xstrip as an optimisation. + fout.write((char *)&file_object,sizeof(file_object));assert( fout.fail()==0); + bytes+=sizeof(file_object); } }}}} timer.Stop(); std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); return csum; } - + static inline uint32_t writeRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) { typedef typename GridSerialRNG::RngStateType RngStateType; @@ -305,23 +298,23 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if( rank == grid->ThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank,(void *)&saved[0],bytes); if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); } - + } if ( grid->IsBoss() ) { serial.GetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); } grid->Broadcast(0,(void *)&csum,sizeof(csum)); return csum; @@ -355,20 +348,20 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } grid->Broadcast(0,(void *)&saved[0],bytes); if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); + parallel.SetState(saved,l_idx); } } if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); + fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); serial.SetState(saved,0); Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } @@ -380,7 +373,8 @@ class BinaryIO { template - static inline uint32_t readObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) + static inline uint32_t readObjectParallel(Lattice &Umu,std::string file,munger munge,int offset, + const std::string &format) { typedef typename vobj::scalar_object sobj; @@ -415,15 +409,15 @@ class BinaryIO { if ( d == 0 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; - - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; + + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } @@ -434,9 +428,9 @@ class BinaryIO { std::cout<< std::dec ; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -472,8 +466,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// @@ -488,28 +482,28 @@ class BinaryIO { //////////////////////////////// if (myrank == iorank) { - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + fin.seekg(offset+g_idx*sizeof(fileObj)); + fin.read((char *)&fileObj,sizeof(fileObj));assert( fin.fail()==0); + bytes+=sizeof(fileObj); - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); - - munge(fileObj,siteObj,csum); - + if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + + munge(fileObj,siteObj,csum); + } - + // Possibly do transport through pt2pt if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + } } // Poke at destination if ( myrank == rank ) { - pokeLocalSite(siteObj,Umu,lsite); + pokeLocalSite(siteObj,Umu,lsite); } grid->Barrier(); // necessary? } @@ -520,7 +514,7 @@ class BinaryIO { timer.Stop(); std::cout< - static inline uint32_t writeObjectParallel(Lattice &Umu,std::string file,munger munge,int offset,const std::string & format) + static inline uint32_t writeObjectParallel(Lattice &Umu,std::string file,munger munge,int offset, + const std::string & format) { typedef typename vobj::scalar_object sobj; GridBase *grid = Umu._grid; @@ -558,15 +553,15 @@ class BinaryIO { if ( d!= grid->_ndimension-1 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; @@ -577,13 +572,13 @@ class BinaryIO { grid->GlobalSum(tmp); std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } - + GridStopWatch timer; timer.Start(); uint64_t bytes=0; @@ -619,8 +614,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } @@ -640,36 +635,36 @@ class BinaryIO { // Pair of nodes may need to do pt2pt send if ( rank != iorank ) { // comms is necessary - if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it - // Send to IOrank - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it + // Send to IOrank + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); + } } grid->Barrier(); // necessary? if (myrank == iorank) { - munge(siteObj,fileObj,csum); - - if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); - - fout.seekp(offset+g_idx*sizeof(fileObj)); - fout.write((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + munge(siteObj,fileObj,csum); + + if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); + + fout.seekp(offset+g_idx*sizeof(fileObj)); + fout.write((char *)&fileObj,sizeof(fileObj));assert( fout.fail()==0); + bytes+=sizeof(fileObj); } } - + grid->GlobalSum(csum); grid->GlobalSum(bytes); - + timer.Stop(); std::cout<0) { std::string key=line.substr(0,eq); @@ -322,6 +324,8 @@ static inline int readHeader(std::string file,GridBase *grid, NerscField &field ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Now the meat: the object readers ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define PARALLEL_READ +#define PARALLEL_WRITE template static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) @@ -345,25 +349,41 @@ static inline void readConfiguration(Lattice > &Umu, // munger is a function of if ( header.data_type == std::string("4D_SU3_GAUGE") ) { if ( ieee32 || ieee32big ) { - // csum=BinaryIO::readObjectSerial, LorentzColour2x3F> - csum=BinaryIO::readObjectParallel, LorentzColour2x3F> +#ifdef PARALLEL_READ + csum=BinaryIO::readObjectParallel, LorentzColour2x3F> (Umu,file,Nersc3x2munger(), offset,format); +#else + csum=BinaryIO::readObjectSerial, LorentzColour2x3F> + (Umu,file,Nersc3x2munger(), offset,format); +#endif } if ( ieee64 || ieee64big ) { - //csum=BinaryIO::readObjectSerial, LorentzColour2x3D> +#ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel, LorentzColour2x3D> (Umu,file,Nersc3x2munger(),offset,format); +#else + csum=BinaryIO::readObjectSerial, LorentzColour2x3D> + (Umu,file,Nersc3x2munger(),offset,format); +#endif } } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { if ( ieee32 || ieee32big ) { - //csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> +#ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> (Umu,file,NerscSimpleMunger(),offset,format); +#else + csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> + (Umu,file,NerscSimpleMunger(),offset,format); +#endif } if ( ieee64 || ieee64big ) { - // csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> +#ifdef PARALLEL_READ csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> (Umu,file,NerscSimpleMunger(),offset,format); +#else + csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> + (Umu,file,NerscSimpleMunger(),offset,format); +#endif } } else { assert(0); @@ -371,12 +391,17 @@ static inline void readConfiguration(Lattice > &Umu, NerscStatistics(Umu,clone); + std::cout< @@ -416,19 +441,11 @@ static inline void writeConfiguration(Lattice > &Umu Nersc3x2unmunger munge; BinaryIO::Uint32Checksum(Umu, munge,header.checksum); offset = writeHeader(header,file); +#ifdef PARALLEL_WRITE + csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); +#else csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); - - std::string file1 = file+"para"; - int offset1 = writeHeader(header,file1); - int csum1=BinaryIO::writeObjectParallel(Umu,file1,munge,offset,header.floating_point); - //int csum1=BinaryIO::writeObjectSerial(Umu,file1,munge,offset,header.floating_point); - - - std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl; - std::cout << GridLogMessage << " TESTING PARALLEL WRITE csums " << csum1 << " "< > &Umu NerscSimpleUnmunger munge; BinaryIO::Uint32Checksum(Umu, munge,header.checksum); offset = writeHeader(header,file); - // csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); +#ifdef PARALLEL_WRITE csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); +#else + csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); +#endif } std::cout< uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); - std::cerr<<" Csum "<< csum << " "<< header.checksum < *************************************************************************************/ /* END LEGAL */ -#include -#include +#include +#include namespace Grid { diff --git a/lib/PerfCount.h b/lib/perfmon/PerfCount.h similarity index 98% rename from lib/PerfCount.h rename to lib/perfmon/PerfCount.h index 749441c5..fca80b8d 100644 --- a/lib/PerfCount.h +++ b/lib/perfmon/PerfCount.h @@ -172,7 +172,7 @@ public: const char * name = PerformanceCounterConfigs[PCT].name; fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1 if (fd == -1) { - fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); + fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name); perror("Error is"); } int norm = PerformanceCounterConfigs[PCT].normalisation; @@ -181,7 +181,7 @@ public: name = PerformanceCounterConfigs[norm].name; cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1 if (cyclefd == -1) { - fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); + fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name); perror("Error is"); } #endif diff --git a/lib/Stat.cc b/lib/perfmon/Stat.cc similarity index 98% rename from lib/Stat.cc rename to lib/perfmon/Stat.cc index e6fa978e..3f47fd83 100644 --- a/lib/Stat.cc +++ b/lib/perfmon/Stat.cc @@ -1,11 +1,9 @@ -#include -#include -#include - +#include +#include +#include namespace Grid { - bool PmuStat::pmu_initialized=false; diff --git a/lib/Stat.h b/lib/perfmon/Stat.h similarity index 100% rename from lib/Stat.h rename to lib/perfmon/Stat.h diff --git a/lib/Timer.h b/lib/perfmon/Timer.h similarity index 100% rename from lib/Timer.h rename to lib/perfmon/Timer.h diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index f434bdd9..6e6144da 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -29,8 +29,8 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_QCD_H -#define GRID_QCD_H +#ifndef GRID_QCD_BASE_H +#define GRID_QCD_BASE_H namespace Grid{ namespace QCD { @@ -62,7 +62,6 @@ namespace QCD { #define SpinIndex 1 #define LorentzIndex 0 - // Also should make these a named enum type static const int DaggerNo=0; static const int DaggerYes=1; @@ -494,26 +493,5 @@ namespace QCD { } // Grid -#include -#include -#include -#include -#include - -// Include representations -#include -#include -#include -#include - -#include - -#include - -#include -#include -#include - - #endif diff --git a/lib/qcd/action/Action.h b/lib/qcd/action/Action.h new file mode 100644 index 00000000..7272c90d --- /dev/null +++ b/lib/qcd/action/Action.h @@ -0,0 +1,50 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/Actions.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: neo +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_ACTION_H +#define GRID_QCD_ACTION_H + +//////////////////////////////////////////// +// Abstract base interface +//////////////////////////////////////////// +#include +//////////////////////////////////////////////////////////////////////// +// Fermion actions; prevent coupling fermion.cc files to other headers +//////////////////////////////////////////////////////////////////////// +#include +#include +//////////////////////////////////////// +// Pseudo fermion combinations for HMC +//////////////////////////////////////// +#include + +#endif diff --git a/lib/qcd/action/ActionBase.h b/lib/qcd/action/ActionBase.h index 56d6b8e0..2f9fed4b 100644 --- a/lib/qcd/action/ActionBase.h +++ b/lib/qcd/action/ActionBase.h @@ -150,4 +150,5 @@ using ActionSet = std::vector >; } } + #endif diff --git a/lib/qcd/action/ActionCore.h b/lib/qcd/action/ActionCore.h new file mode 100644 index 00000000..839645a3 --- /dev/null +++ b/lib/qcd/action/ActionCore.h @@ -0,0 +1,45 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/ActionCore.h + +Copyright (C) 2015 + +Author: Peter Boyle +Author: neo + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef QCD_ACTION_CORE +#define QCD_ACTION_CORE + +#include +#include + +//////////////////////////////////////////// +// Gauge Actions +//////////////////////////////////////////// +#include +//////////////////////////////////////////// +// Fermion prereqs +//////////////////////////////////////////// +#include + +#endif diff --git a/lib/qcd/action/ActionParams.h b/lib/qcd/action/ActionParams.h index dcbdfce8..91e94741 100644 --- a/lib/qcd/action/ActionParams.h +++ b/lib/qcd/action/ActionParams.h @@ -45,6 +45,10 @@ namespace QCD { WilsonImplParams() : overlapCommsCompute(false) {}; }; + struct StaggeredImplParams { + StaggeredImplParams() {}; + }; + struct OneFlavourRationalParams { RealD lo; RealD hi; diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 14b4edb6..51677522 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -30,8 +30,8 @@ Author: paboyle /* END LEGAL */ #include -#include - +#include +#include namespace Grid { namespace QCD { @@ -57,10 +57,23 @@ void CayleyFermion5D::Dminus(const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - this->DW(psi,this->tmp(),DaggerNo); + FermionField tmp_f(this->FermionGrid()); + this->DW(psi,tmp_f,DaggerNo); for(int s=0;stmp(),s,s);// chi = (1-c[s] D_W) psi + axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],tmp_f,s,s);// chi = (1-c[s] D_W) psi + } +} +template +void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + + FermionField tmp_f(this->FermionGrid()); + this->DW(psi,tmp_f,DaggerYes); + + for(int s=0;s void CayleyFermion5D::CayleyZeroCounters(void) } -template -void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi) -{ - int Ls=this->Ls; - - this->DW(psi,this->tmp(),DaggerYes); - - for(int s=0;stmp(),s,s);// chi = (1-c[s] D_W) psi - } -} template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { @@ -168,7 +170,6 @@ void CayleyFermion5D::Mooee (const FermionField &psi, FermionField & lower[0] =-mass*lower[0]; M5D(psi,psi,chi,lower,diag,upper); } - template void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) { @@ -190,7 +191,12 @@ void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField & lower[s]=-cee[s-1]; } } - + // Conjugate the terms + for (int s=0;s::MeooeDag5D (const FermionField &psi, FermionField int Ls=this->Ls; std::vector diag =bs; std::vector upper=cs; - std::vector lower=cs; - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + std::vector lower=cs; + + for (int s=0;s::MDeriv (GaugeField &mat,const FermionField &U,const this->DhopDeriv(mat,U,Din,dag); } else { // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call - Meooe5D(U,Din); + MeooeDag5D(U,Din); this->DhopDeriv(mat,Din,V,dag); } }; @@ -315,7 +335,7 @@ void CayleyFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const this->DhopDerivOE(mat,U,Din,dag); } else { // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call - Meooe5D(U,Din); + MeooeDag5D(U,Din); this->DhopDerivOE(mat,Din,V,dag); } }; @@ -330,7 +350,7 @@ void CayleyFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const this->DhopDerivEO(mat,U,Din,dag); } else { // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call - Meooe5D(U,Din); + MeooeDag5D(U,Din); this->DhopDerivEO(mat,Din,V,dag); } }; diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index 86255be6..cce13e12 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -29,6 +29,8 @@ Author: Peter Boyle #ifndef GRID_QCD_CAYLEY_FERMION_H #define GRID_QCD_CAYLEY_FERMION_H +#include + namespace Grid { namespace QCD { @@ -192,7 +194,9 @@ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const Fermion template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); -#define CAYLEY_DPERP_CACHE +#undef CAYLEY_DPERP_DENSE +#define CAYLEY_DPERP_CACHE #undef CAYLEY_DPERP_LINALG +#define CAYLEY_DPERP_VEC #endif diff --git a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc index 84c8b5bf..1f7d4903 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc @@ -29,7 +29,8 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ -#include +#include +#include namespace Grid { @@ -54,8 +55,8 @@ void CayleyFermion5D::M5D(const FermionField &psi, // Flops = 6.0*(Nc*Ns) *Ls*vol M5Dcalls++; M5Dtime-=usecond(); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls for(int s=0;s::M5Ddag(const FermionField &psi, // Flops = 6.0*(Nc*Ns) *Ls*vol M5Dcalls++; M5Dtime-=usecond(); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls auto tmp = psi._odata[0]; for(int s=0;s::MooeeInv (const FermionField &psi, FermionField & MooeeInvCalls++; MooeeInvTime-=usecond(); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls auto tmp = psi._odata[0]; // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops @@ -181,11 +181,22 @@ void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField & assert(psi.checkerboard == psi.checkerboard); chi.checkerboard=psi.checkerboard; + std::vector ueec(Ls); + std::vector deec(Ls); + std::vector leec(Ls); + std::vector ueemc(Ls); + std::vector leemc(Ls); + for(int s=0;soSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls auto tmp = psi._odata[0]; @@ -193,25 +204,25 @@ PARALLEL_FOR_LOOP chi[ss]=psi[ss]; for (int s=1;s=0;s--){ spProj5p(tmp,chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - lee[s]*tmp; + chi[ss+s] = chi[ss+s] - leec[s]*tmp; } } diff --git a/lib/qcd/action/fermion/CayleyFermion5Ddense.cc b/lib/qcd/action/fermion/CayleyFermion5Ddense.cc index 6c79da4b..16fb47bb 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Ddense.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Ddense.cc @@ -30,7 +30,8 @@ Author: paboyle /* END LEGAL */ #include -#include +#include +#include namespace Grid { @@ -38,20 +39,17 @@ namespace QCD { /* * Dense matrix versions of routines */ - - /* template void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) { this->MooeeInternal(psi,chi,DaggerYes,InverseYes); } - template void CayleyFermion5D::MooeeInv(const FermionField &psi, FermionField &chi) { this->MooeeInternal(psi,chi,DaggerNo,InverseYes); } - */ + template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) { @@ -125,9 +123,20 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField } } +#ifdef CAYLEY_DPERP_DENSE +INSTANTIATE_DPERP(GparityWilsonImplF); +INSTANTIATE_DPERP(GparityWilsonImplD); +INSTANTIATE_DPERP(WilsonImplF); +INSTANTIATE_DPERP(WilsonImplD); +INSTANTIATE_DPERP(ZWilsonImplF); +INSTANTIATE_DPERP(ZWilsonImplD); + template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +#endif }} diff --git a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc index 0224e533..a83b962e 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc @@ -29,7 +29,8 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ -#include +#include +#include namespace Grid { @@ -47,17 +48,18 @@ void CayleyFermion5D::M5D(const FermionField &psi, std::vector &diag, std::vector &upper) { + Coeff_t one(1.0); int Ls=this->Ls; for(int s=0;s::M5Ddag(const FermionField &psi, std::vector &diag, std::vector &upper) { + Coeff_t one(1.0); int Ls=this->Ls; for(int s=0;s::M5Ddag(const FermionField &psi, template void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) { + Coeff_t one(1.0); + Coeff_t czero(0.0); chi.checkerboard=psi.checkerboard; int Ls=this->Ls; // Apply (L^{\prime})^{-1} - axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0] + axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0] for (int s=1;s=0;s--){ - axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls] + axpby_ssp_pminus (chi,one,chi,-uee[s],chi,s,s+1); // chi[Ls] } } template void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) { + Coeff_t one(1.0); + Coeff_t czero(0.0); chi.checkerboard=psi.checkerboard; int Ls=this->Ls; // Apply (U^{\prime})^{-dagger} - axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0] + axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0] for (int s=1;s=0;s--){ - axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls] + axpby_ssp_pplus (chi,one,chi,-conjugate(lee[s]),chi,s,s+1); // chi[Ls] } } #ifdef CAYLEY_DPERP_LINALG - INSTANTIATE(WilsonImplF); - INSTANTIATE(WilsonImplD); - INSTANTIATE(GparityWilsonImplF); - INSTANTIATE(GparityWilsonImplD); + INSTANTIATE_DPERP(WilsonImplF); + INSTANTIATE_DPERP(WilsonImplD); + INSTANTIATE_DPERP(GparityWilsonImplF); + INSTANTIATE_DPERP(GparityWilsonImplD); + INSTANTIATE_DPERP(ZWilsonImplF); + INSTANTIATE_DPERP(ZWilsonImplD); #endif } diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index b99198fa..566624e3 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -30,11 +30,13 @@ Author: paboyle /* END LEGAL */ -#include +#include +#include namespace Grid { -namespace QCD { /* +namespace QCD { + /* * Dense matrix versions of routines */ template @@ -91,8 +93,7 @@ void CayleyFermion5D::M5D(const FermionField &psi, assert(Nc==3); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=LLs){ // adds LLs + parallel_for(int ss=0;ssoSites();ss+=LLs){ // adds LLs #if 0 alignas(64) SiteHalfSpinor hp; alignas(64) SiteHalfSpinor hm; @@ -232,8 +233,7 @@ void CayleyFermion5D::M5Ddag(const FermionField &psi, M5Dcalls++; M5Dtime-=usecond(); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=LLs){ // adds LLs + parallel_for(int ss=0;ssoSites();ss+=LLs){ // adds LLs #if 0 alignas(64) SiteHalfSpinor hp; alignas(64) SiteHalfSpinor hm; @@ -792,13 +792,11 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField MooeeInvTime-=usecond(); if ( switcheroo::iscomplex() ) { - PARALLEL_FOR_LOOP - for(auto site=0;site See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include +#include namespace Grid { namespace QCD { diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h index 15d44945..e1e50aa5 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -29,6 +29,8 @@ Author: Peter Boyle #ifndef GRID_QCD_CONTINUED_FRACTION_H #define GRID_QCD_CONTINUED_FRACTION_H +#include + namespace Grid { namespace QCD { diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h index c0b6b6aa..ad4bf87e 100644 --- a/lib/qcd/action/fermion/DomainWallFermion.h +++ b/lib/qcd/action/fermion/DomainWallFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_DOMAIN_WALL_FERMION_H #define GRID_QCD_DOMAIN_WALL_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/fermion/Fermion.h similarity index 88% rename from lib/qcd/action/Actions.h rename to lib/qcd/action/fermion/Fermion.h index efd6a5bc..8ea41847 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/fermion/Fermion.h @@ -2,16 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/Actions.h + Source file: ./lib/qcd/action/fermion/Fermion_base_aggregate.h Copyright (C) 2015 -Author: Azusa Yamaguchi Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: neo -Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,8 +25,8 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_QCD_ACTIONS_H -#define GRID_QCD_ACTIONS_H +#ifndef GRID_QCD_FERMION_H +#define GRID_QCD_FERMION_H // * Linear operators (Hermitian and non-hermitian) .. my LinearOperator // * System solvers (Hermitian and non-hermitian) .. my OperatorFunction @@ -108,36 +103,6 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction // for EVERY .cc file. This define centralises the list and restores global push of impl cases //////////////////////////////////////////////////////////////////////////////////////////////////// - -#define FermOp4dVecTemplateInstantiate(A) \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; - -#define AdjointFermOpTemplateInstantiate(A) \ - template class A; \ - template class A; - -#define TwoIndexFermOpTemplateInstantiate(A) \ - template class A; \ - template class A; - -#define FermOp5dVecTemplateInstantiate(A) \ - template class A; \ - template class A; \ - template class A; \ - template class A; - -#define FermOpTemplateInstantiate(A) \ - FermOp4dVecTemplateInstantiate(A) \ - FermOp5dVecTemplateInstantiate(A) - - -#define GparityFermOpTemplateInstantiate(A) - //////////////////////////////////////////// // Fermion operators / actions //////////////////////////////////////////// @@ -145,9 +110,9 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction #include // 4d wilson like #include // 4d wilson like #include // 5d base used by all 5d overlap types - //#include - +#include +#include #include // Cayley types #include #include @@ -158,14 +123,16 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction #include #include #include - #include // Continued fraction #include #include - #include // Partial fraction #include #include +/////////////////////////////////////////////////////////////////////////////// +// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code +/////////////////////////////////////////////////////////////////////////////// +#include //////////////////////////////////////////////////////////////////////////////////////////////////// // More maintainable to maintain the following typedef list centrally, as more "impl" targets @@ -269,23 +236,20 @@ typedef MobiusFermion GparityMobiusFermionR; typedef MobiusFermion GparityMobiusFermionF; typedef MobiusFermion GparityMobiusFermionD; +typedef ImprovedStaggeredFermion ImprovedStaggeredFermionR; +typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; +typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; + +typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DR; +typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DF; +typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DD; + +typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dR; +typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dF; +typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dD; }} -/////////////////////////////////////////////////////////////////////////////// -// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code -/////////////////////////////////////////////////////////////////////////////// -#include - -//////////////////////////////////////// -// Pseudo fermion combinations for HMC -//////////////////////////////////////// -#include - -#include -#include -#include -#include #include #include diff --git a/lib/qcd/action/fermion/FermionCore.h b/lib/qcd/action/fermion/FermionCore.h new file mode 100644 index 00000000..74d94d67 --- /dev/null +++ b/lib/qcd/action/fermion/FermionCore.h @@ -0,0 +1,80 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/Fermion_base_aggregate.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_FERMION_CORE_H +#define GRID_QCD_FERMION_CORE_H + +#include +#include +#include + +//////////////////////////////////////////// +// Fermion prereqs +//////////////////////////////////////////// +#include //used by all wilson type fermions +#include +#include +#include //used by all wilson type fermions +#include //used by all wilson type fermions + +#define FermOpStaggeredTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define FermOpStaggeredVec5dTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define FermOp4dVecTemplateInstantiate(A) \ + template class A; \ + template class A; \ + template class A; \ + template class A; \ + template class A; \ + template class A; + +#define AdjointFermOpTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define TwoIndexFermOpTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define FermOp5dVecTemplateInstantiate(A) \ + template class A; \ + template class A; \ + template class A; \ + template class A; + +#define FermOpTemplateInstantiate(A) \ + FermOp4dVecTemplateInstantiate(A) \ + FermOp5dVecTemplateInstantiate(A) + +#define GparityFermOpTemplateInstantiate(A) + +#endif diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 07eee6be..141d808d 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -194,8 +194,7 @@ namespace QCD { GaugeLinkField tmp(mat._grid); tmp = zero; - PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ + parallel_for(int sss=0;sssoSites();sss++){ int sU=sss; for(int s=0;s FermionField; typedef Lattice PropagatorField; + + ///////////////////////////////////////////////// // Make the doubled gauge field a *scalar* + ///////////////////////////////////////////////// typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar typedef iImplGaugeField SiteScalarGaugeField; // scalar typedef iImplGaugeLink SiteScalarGaugeLink; // scalar - typedef Lattice DoubledGaugeField; typedef WilsonCompressor Compressor; @@ -271,11 +272,11 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu) { - SiteScalarGaugeField ScalarUmu; + SiteScalarGaugeField ScalarUmu; SiteDoubledGaugeField ScalarUds; GaugeLinkField U(Umu._grid); - GaugeField Uadj(Umu._grid); + GaugeField Uadj(Umu._grid); for (int mu = 0; mu < Nd; mu++) { U = PeekIndex(Umu, mu); U = adj(Cshift(U, mu, -1)); @@ -333,7 +334,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl SitePropagator; typedef iImplHalfSpinor SiteHalfSpinor; typedef iImplDoubledGaugeField SiteDoubledGaugeField; - + typedef Lattice FermionField; typedef Lattice PropagatorField; typedef Lattice DoubledGaugeField; @@ -356,7 +357,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl(outerProduct(Btilde, A)); -PARALLEL_FOR_LOOP - for (auto ss = tmp.begin(); ss < tmp.end(); ss++) { + parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); } PokeIndex(mat, link, mu); @@ -498,8 +495,7 @@ PARALLEL_FOR_LOOP GaugeLinkField tmp(mat._grid); tmp = zero; -PARALLEL_FOR_LOOP - for (int ss = 0; ss < tmp._grid->oSites(); ss++) { + parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) { for (int s = 0; s < Ls; s++) { int sF = s + Ls * ss; auto ttmp = traceIndex(outerProduct(Btilde[sF], Atilde[sF])); @@ -512,6 +508,323 @@ PARALLEL_FOR_LOOP }; + + ///////////////////////////////////////////////////////////////////////////// + // Single flavour one component spinors with colour index + ///////////////////////////////////////////////////////////////////////////// + template + class StaggeredImpl : public PeriodicGaugeImpl > { + + public: + + typedef RealD _Coeff_t ; + static const int Dimension = Representation::Dimension; + typedef PeriodicGaugeImpl > Gimpl; + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + const bool LsVectorised=false; + typedef _Coeff_t Coeff_t; + + INHERIT_GIMPL_TYPES(Gimpl); + + template using iImplScalar = iScalar > >; + template using iImplSpinor = iScalar > >; + template using iImplHalfSpinor = iScalar > >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplPropagator = iScalar > >; + + typedef iImplScalar SiteComplex; + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + typedef iImplPropagator SitePropagator; + + typedef Lattice ComplexField; + typedef Lattice FermionField; + typedef Lattice DoubledGaugeField; + typedef Lattice PropagatorField; + + typedef SimpleCompressor Compressor; + typedef StaggeredImplParams ImplParams; + typedef CartesianStencil StencilImpl; + + ImplParams Params; + + StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + inline void multLink(SiteSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteSpinor &chi, + int mu){ + mult(&phi(), &U(mu), &chi()); + } + inline void multLinkAdd(SiteSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteSpinor &chi, + int mu){ + mac(&phi(), &U(mu), &chi()); + } + + template + inline void loadLinkElement(Simd ®, ref &memory) { + reg = memory; + } + + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &UUUds, // for Naik term + DoubledGaugeField &Uds, + const GaugeField &Uthin, + const GaugeField &Ufat) { + conformable(Uds._grid, GaugeGrid); + conformable(Uthin._grid, GaugeGrid); + conformable(Ufat._grid, GaugeGrid); + GaugeLinkField U(GaugeGrid); + GaugeLinkField UU(GaugeGrid); + GaugeLinkField UUU(GaugeGrid); + GaugeLinkField Udag(GaugeGrid); + GaugeLinkField UUUdag(GaugeGrid); + for (int mu = 0; mu < Nd; mu++) { + + // Staggered Phase. + Lattice > coor(GaugeGrid); + Lattice > x(GaugeGrid); LatticeCoordinate(x,0); + Lattice > y(GaugeGrid); LatticeCoordinate(y,1); + Lattice > z(GaugeGrid); LatticeCoordinate(z,2); + Lattice > t(GaugeGrid); LatticeCoordinate(t,3); + + Lattice > lin_z(GaugeGrid); lin_z=x+y; + Lattice > lin_t(GaugeGrid); lin_t=x+y+z; + + ComplexField phases(GaugeGrid); phases=1.0; + + if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases); + if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases); + if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases); + + // 1 hop based on fat links + U = PeekIndex(Ufat, mu); + Udag = adj( Cshift(U, mu, -1)); + + U = U *phases; + Udag = Udag *phases; + + PokeIndex(Uds, U, mu); + PokeIndex(Uds, Udag, mu + 4); + + // 3 hop based on thin links. Crazy huh ? + U = PeekIndex(Uthin, mu); + UU = Gimpl::CovShiftForward(U,mu,U); + UUU= Gimpl::CovShiftForward(U,mu,UU); + + UUUdag = adj( Cshift(UUU, mu, -3)); + + UUU = UUU *phases; + UUUdag = UUUdag *phases; + + PokeIndex(UUUds, UUU, mu); + PokeIndex(UUUds, UUUdag, mu+4); + + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + GaugeLinkField link(mat._grid); + link = TraceIndex(outerProduct(Btilde,A)); + PokeIndex(mat,link,mu); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + assert (0); + // Must never hit + } + }; + + + + ///////////////////////////////////////////////////////////////////////////// + // Single flavour one component spinors with colour index. 5d vec + ///////////////////////////////////////////////////////////////////////////// + template + class StaggeredVec5dImpl : public PeriodicGaugeImpl > { + + public: + + typedef RealD _Coeff_t ; + static const int Dimension = Representation::Dimension; + typedef PeriodicGaugeImpl > Gimpl; + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + const bool LsVectorised=true; + + typedef _Coeff_t Coeff_t; + + INHERIT_GIMPL_TYPES(Gimpl); + + template using iImplScalar = iScalar > >; + template using iImplSpinor = iScalar > >; + template using iImplHalfSpinor = iScalar > >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplGaugeField = iVector >, Nd>; + template using iImplGaugeLink = iScalar > >; + template using iImplPropagator = iScalar > >; + + // Make the doubled gauge field a *scalar* + typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar + typedef iImplGaugeField SiteScalarGaugeField; // scalar + typedef iImplGaugeLink SiteScalarGaugeLink; // scalar + typedef iImplPropagator SitePropagator; + + typedef Lattice DoubledGaugeField; + typedef Lattice PropagatorField; + + typedef iImplScalar SiteComplex; + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + + + typedef Lattice ComplexField; + typedef Lattice FermionField; + + typedef SimpleCompressor Compressor; + typedef StaggeredImplParams ImplParams; + typedef CartesianStencil StencilImpl; + + ImplParams Params; + + StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + template + inline void loadLinkElement(Simd ®, ref &memory) { + vsplat(reg, memory); + } + + inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, int mu) { + SiteGaugeLink UU; + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); + } + } + mult(&phi(), &UU(), &chi()); + } + inline void multLinkAdd(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, int mu) { + SiteGaugeLink UU; + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); + } + } + mac(&phi(), &UU(), &chi()); + } + + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &UUUds, // for Naik term + DoubledGaugeField &Uds, + const GaugeField &Uthin, + const GaugeField &Ufat) + { + + GridBase * InputGrid = Uthin._grid; + conformable(InputGrid,Ufat._grid); + + GaugeLinkField U(InputGrid); + GaugeLinkField UU(InputGrid); + GaugeLinkField UUU(InputGrid); + GaugeLinkField Udag(InputGrid); + GaugeLinkField UUUdag(InputGrid); + + for (int mu = 0; mu < Nd; mu++) { + + // Staggered Phase. + Lattice > coor(InputGrid); + Lattice > x(InputGrid); LatticeCoordinate(x,0); + Lattice > y(InputGrid); LatticeCoordinate(y,1); + Lattice > z(InputGrid); LatticeCoordinate(z,2); + Lattice > t(InputGrid); LatticeCoordinate(t,3); + + Lattice > lin_z(InputGrid); lin_z=x+y; + Lattice > lin_t(InputGrid); lin_t=x+y+z; + + ComplexField phases(InputGrid); phases=1.0; + + if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases); + if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases); + if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases); + + // 1 hop based on fat links + U = PeekIndex(Ufat, mu); + Udag = adj( Cshift(U, mu, -1)); + + U = U *phases; + Udag = Udag *phases; + + + for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + SiteScalarGaugeLink ScalarU; + SiteDoubledGaugeField ScalarUds; + + std::vector lcoor; + GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); + peekLocalSite(ScalarUds, Uds, lcoor); + + peekLocalSite(ScalarU, U, lcoor); + ScalarUds(mu) = ScalarU(); + + peekLocalSite(ScalarU, Udag, lcoor); + ScalarUds(mu + 4) = ScalarU(); + + pokeLocalSite(ScalarUds, Uds, lcoor); + } + + // 3 hop based on thin links. Crazy huh ? + U = PeekIndex(Uthin, mu); + UU = Gimpl::CovShiftForward(U,mu,U); + UUU= Gimpl::CovShiftForward(U,mu,UU); + + UUUdag = adj( Cshift(UUU, mu, -3)); + + UUU = UUU *phases; + UUUdag = UUUdag *phases; + + for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + + SiteScalarGaugeLink ScalarU; + SiteDoubledGaugeField ScalarUds; + + std::vector lcoor; + GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); + + peekLocalSite(ScalarUds, UUUds, lcoor); + + peekLocalSite(ScalarU, UUU, lcoor); + ScalarUds(mu) = ScalarU(); + + peekLocalSite(ScalarU, UUUdag, lcoor); + ScalarUds(mu + 4) = ScalarU(); + + pokeLocalSite(ScalarUds, UUUds, lcoor); + } + + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + assert(0); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + assert (0); + } + }; + + + typedef WilsonImpl WilsonImplR; // Real.. whichever prec typedef WilsonImpl WilsonImplF; // Float typedef WilsonImpl WilsonImplD; // Double @@ -540,6 +853,14 @@ PARALLEL_FOR_LOOP typedef GparityWilsonImpl GparityWilsonImplF; // Float typedef GparityWilsonImpl GparityWilsonImplD; // Double + typedef StaggeredImpl StaggeredImplR; // Real.. whichever prec + typedef StaggeredImpl StaggeredImplF; // Float + typedef StaggeredImpl StaggeredImplD; // Double + + typedef StaggeredVec5dImpl StaggeredVec5dImplR; // Real.. whichever prec + typedef StaggeredVec5dImpl StaggeredVec5dImplF; // Float + typedef StaggeredVec5dImpl StaggeredVec5dImplD; // Double + }} #endif diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc new file mode 100644 index 00000000..2ba4f4af --- /dev/null +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc @@ -0,0 +1,403 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +namespace Grid { +namespace QCD { + +const std::vector +ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); +const std::vector +ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3}); + +///////////////////////////////// +// Constructor and gauge import +///////////////////////////////// + + +template +ImprovedStaggeredFermion::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, + RealD _mass, + const ImplParams &p) + : Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements), + StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid), + UUUmu(&Fgrid), + UUUmuEven(&Hgrid), + UUUmuOdd(&Hgrid) , + _tmp(&Hgrid) +{ +} + +template +ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _c2,RealD _u0, + const ImplParams &p) + : ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p) +{ + c1=_c1; + c2=_c2; + u0=_u0; + ImportGauge(_Uthin,_Ufat); +} +template +ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin,GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p) + : ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p) +{ + ImportGaugeSimple(_Utriple,_Ufat); +} + + + //////////////////////////////////////////////////////////// + // Momentum space propagator should be + // https://arxiv.org/pdf/hep-lat/9712010.pdf + // + // mom space action. + // gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m + // + // must track through staggered flavour/spin reduction in literature to + // turn to free propagator for the one component chi field, a la page 4/5 + // of above link to implmement fourier based solver. + //////////////////////////////////////////////////////////// +template +void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin) +{ + ImportGauge(_Uthin,_Uthin); +}; +template +void ImprovedStaggeredFermion::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat) +{ + ///////////////////////////////////////////////////////////////// + // Trivial import; phases and fattening and such like preapplied + ///////////////////////////////////////////////////////////////// + GaugeLinkField U(GaugeGrid()); + + for (int mu = 0; mu < Nd; mu++) { + + U = PeekIndex(_Utriple, mu); + PokeIndex(UUUmu, U, mu ); + + U = adj( Cshift(U, mu, -3)); + PokeIndex(UUUmu, -U, mu+4 ); + + U = PeekIndex(_Ufat, mu); + PokeIndex(Umu, U, mu); + + U = adj( Cshift(U, mu, -1)); + PokeIndex(Umu, -U, mu+4); + + } + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd , Umu); + pickCheckerboard(Even, UUUmuEven,UUUmu); + pickCheckerboard(Odd, UUUmuOdd, UUUmu); +} +template +void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat) +{ + GaugeLinkField U(GaugeGrid()); + + //////////////////////////////////////////////////////// + // Double Store should take two fields for Naik and one hop separately. + //////////////////////////////////////////////////////// + Impl::DoubleStore(GaugeGrid(), UUUmu, Umu, _Uthin, _Ufat ); + + //////////////////////////////////////////////////////// + // Apply scale factors to get the right fermion Kinetic term + // Could pass coeffs into the double store to save work. + // 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) ) + //////////////////////////////////////////////////////// + for (int mu = 0; mu < Nd; mu++) { + + U = PeekIndex(Umu, mu); + PokeIndex(Umu, U*( 0.5*c1/u0), mu ); + + U = PeekIndex(Umu, mu+4); + PokeIndex(Umu, U*(-0.5*c1/u0), mu+4); + + U = PeekIndex(UUUmu, mu); + PokeIndex(UUUmu, U*( 0.5*c2/u0/u0/u0), mu ); + + U = PeekIndex(UUUmu, mu+4); + PokeIndex(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4); + } + + std::cout << " Umu " << Umu._odata[0]< +RealD ImprovedStaggeredFermion::M(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerNo); + return axpy_norm(out, mass, in, out); +} + +template +RealD ImprovedStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerYes); + return axpy_norm(out, mass, in, out); +} + +template +void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } +} +template +void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } +} + +template +void ImprovedStaggeredFermion::Mooee(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + typename FermionField::scalar_type scal(mass); + out = scal * in; +} + +template +void ImprovedStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Mooee(in, out); +} + +template +void ImprovedStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + out = (1.0 / (mass)) * in; +} + +template +void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in, + FermionField &out) { + out.checkerboard = in.checkerboard; + MooeeInv(in, out); +} + +/////////////////////////////////// +// Internal +/////////////////////////////////// + +template +void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, + GaugeField & mat, + const FermionField &A, const FermionField &B, int dag) { + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor; + + FermionField Btilde(B._grid); + FermionField Atilde(B._grid); + Atilde = A; + + st.HaloExchange(B, compressor); + + for (int mu = 0; mu < Nd; mu++) { + + //////////////////////// + // Call the single hop + //////////////////////// + PARALLEL_FOR_LOOP + for (int sss = 0; sss < B._grid->oSites(); sss++) { + Kernels::DhopDir(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1); + } + + // Force in three link terms + // + // Impl::InsertForce4D(mat, Btilde, Atilde, mu); + // + // dU_ac(x)/dt = i p_ab U_bc(x) + // + // => dS_f/dt = dS_f/dU_ac(x) . dU_ac(x)/dt = i p_ab U_bc(x) dS_f/dU_ac(x) + // + // One link: form fragments S_f = A U B + // + // write Btilde = U(x) B(x+mu) + // + // mat+= TraceIndex(outerProduct(Btilde,A)); + // + // Three link: form fragments S_f = A UUU B + // + // mat+= outer ( A, UUUB) <-- Best take DhopDeriv with one linke or identity matrix + // mat+= outer ( AU, UUB) <-- and then use covariant cshift? + // mat+= outer ( AUU, UB) <-- Returned from call to DhopDir + + assert(0);// need to figure out the force interface with a blasted three link term. + + } +} + +template +void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U._grid, _grid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + mat.checkerboard = U.checkerboard; + + DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag); +} + +template +void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U._grid, _cbgrid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + assert(V.checkerboard == Even); + assert(U.checkerboard == Odd); + mat.checkerboard = Odd; + + DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag); +} + +template +void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U._grid, _cbgrid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + assert(V.checkerboard == Odd); + assert(U.checkerboard == Even); + mat.checkerboard = Even; + + DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag); +} + +template +void ImprovedStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) { + conformable(in._grid, _grid); // verifies full grid + conformable(in._grid, out._grid); + + out.checkerboard = in.checkerboard; + + DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag); +} + +template +void ImprovedStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { + conformable(in._grid, _cbgrid); // verifies half grid + conformable(in._grid, out._grid); // drops the cb check + + assert(in.checkerboard == Even); + out.checkerboard = Odd; + + DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag); +} + +template +void ImprovedStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) { + conformable(in._grid, _cbgrid); // verifies half grid + conformable(in._grid, out._grid); // drops the cb check + + assert(in.checkerboard == Odd); + out.checkerboard = Even; + + DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag); +} + +template +void ImprovedStaggeredFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { + DhopDir(in, out, dir, disp); +} + +template +void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { + + Compressor compressor; + Stencil.HaloExchange(in, compressor); + + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp); + } +}; + +template +void ImprovedStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + const FermionField &in, + FermionField &out, int dag) { + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor; + st.HaloExchange(in, compressor); + + if (dag == DaggerYes) { + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); + } + } else { + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); + } + } +}; + +FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion); + + //AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion); + //TwoIndexFermOpTemplateInstantiate(ImprovedStaggeredFermion); + +}} diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion.h b/lib/qcd/action/fermion/ImprovedStaggeredFermion.h new file mode 100644 index 00000000..7d1f2996 --- /dev/null +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -0,0 +1,167 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_IMPR_STAG_FERMION_H +#define GRID_QCD_IMPR_STAG_FERMION_H + +namespace Grid { + +namespace QCD { + +class ImprovedStaggeredFermionStatic { + public: + static const std::vector directions; + static const std::vector displacements; + static const int npoint = 16; +}; + +template +class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedStaggeredFermionStatic { + public: + INHERIT_IMPL_TYPES(Impl); + typedef StaggeredKernels Kernels; + + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _grid; } + GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } + GridBase *FermionGrid(void) { return _grid; } + GridBase *FermionRedBlackGrid(void) { return _cbgrid; } + + ////////////////////////////////////////////////////////////////// + // override multiply; cut number routines if pass dagger argument + // and also make interface more uniformly consistent + ////////////////////////////////////////////////////////////////// + RealD M(const FermionField &in, FermionField &out); + RealD Mdag(const FermionField &in, FermionField &out); + + ///////////////////////////////////////////////////////// + // half checkerboard operations + ///////////////////////////////////////////////////////// + void Meooe(const FermionField &in, FermionField &out); + void MeooeDag(const FermionField &in, FermionField &out); + void Mooee(const FermionField &in, FermionField &out); + void MooeeDag(const FermionField &in, FermionField &out); + void MooeeInv(const FermionField &in, FermionField &out); + void MooeeInvDag(const FermionField &in, FermionField &out); + + //////////////////////// + // Derivative interface + //////////////////////// + // Interface calls an internal routine + void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + + /////////////////////////////////////////////////////////////// + // non-hermitian hopping term; half cb or both + /////////////////////////////////////////////////////////////// + void Dhop (const FermionField &in, FermionField &out, int dag); + void DhopOE(const FermionField &in, FermionField &out, int dag); + void DhopEO(const FermionField &in, FermionField &out, int dag); + + /////////////////////////////////////////////////////////////// + // Multigrid assistance; force term uses too + /////////////////////////////////////////////////////////////// + void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); + + /////////////////////////////////////////////////////////////// + // Extra methods added by derived + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl &st, + DoubledGaugeField &U,DoubledGaugeField &UUU, + GaugeField &mat, + const FermionField &A, const FermionField &B, int dag); + + void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, + const FermionField &in, FermionField &out, int dag); + + // Constructor + ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0, + const ImplParams &p = ImplParams()); + + ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams()); + + ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams()); + + + // DoubleStore impl dependent + void ImportGaugeSimple(const GaugeField &_Utriple, const GaugeField &_Ufat); + void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); + void ImportGauge(const GaugeField &_Uthin); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + + // protected: + public: + // any other parameters of action ??? + + RealD mass; + RealD u0; + RealD c1; + RealD c2; + + GridBase *_grid; + GridBase *_cbgrid; + + // Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + DoubledGaugeField UUUmu; + DoubledGaugeField UUUmuEven; + DoubledGaugeField UUUmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; +}; + +typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; +typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; + +} +} +#endif diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc new file mode 100644 index 00000000..61a3c559 --- /dev/null +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc @@ -0,0 +1,355 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +namespace Grid { +namespace QCD { + +// S-direction is INNERMOST and takes no part in the parity. +const std::vector +ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4}); +const std::vector +ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3}); + + // 5d lattice for DWF. +template +ImprovedStaggeredFermion5D::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass, + RealD _c1,RealD _c2, RealD _u0, + const ImplParams &p) : + Kernels(p), + _FiveDimGrid (&FiveDimGrid), + _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), + _FourDimGrid (&FourDimGrid), + _FourDimRedBlackGrid(&FourDimRedBlackGrid), + Stencil (&FiveDimGrid,npoint,Even,directions,displacements), + StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even + StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd + mass(_mass), + c1(_c1), + c2(_c2), + u0(_u0), + Umu(&FourDimGrid), + UmuEven(&FourDimRedBlackGrid), + UmuOdd (&FourDimRedBlackGrid), + UUUmu(&FourDimGrid), + UUUmuEven(&FourDimRedBlackGrid), + UUUmuOdd(&FourDimRedBlackGrid), + Lebesgue(&FourDimGrid), + LebesgueEvenOdd(&FourDimRedBlackGrid), + _tmp(&FiveDimRedBlackGrid) +{ + + // some assertions + assert(FiveDimGrid._ndimension==5); + assert(FourDimGrid._ndimension==4); + assert(FourDimRedBlackGrid._ndimension==4); + assert(FiveDimRedBlackGrid._ndimension==5); + assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction + + // extent of fifth dim and not spread out + Ls=FiveDimGrid._fdimensions[0]; + assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); + assert(FiveDimGrid._processors[0] ==1); + assert(FiveDimRedBlackGrid._processors[0] ==1); + + // Other dimensions must match the decomposition of the four-D fields + for(int d=0;d<4;d++){ + assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]); + + assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); + assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); + assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]); + + assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); + assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); + assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); + } + + if (Impl::LsVectorised) { + + int nsimd = Simd::Nsimd(); + + // Dimension zero of the five-d is the Ls direction + assert(FiveDimGrid._simd_layout[0] ==nsimd); + assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); + + for(int d=0;d<4;d++){ + assert(FourDimGrid._simd_layout[d]=1); + assert(FourDimRedBlackGrid._simd_layout[d]=1); + assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); + } + + } else { + + // Dimension zero of the five-d is the Ls direction + assert(FiveDimRedBlackGrid._simd_layout[0]==1); + assert(FiveDimGrid._simd_layout[0] ==1); + + } + + // Allocate the required comms buffer + ImportGauge(_Uthin,_Ufat); +} + +template +void ImprovedStaggeredFermion5D::ImportGauge(const GaugeField &_Uthin) +{ + ImportGauge(_Uthin,_Uthin); +}; +template +void ImprovedStaggeredFermion5D::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat) +{ + //////////////////////////////////////////////////////// + // Double Store should take two fields for Naik and one hop separately. + //////////////////////////////////////////////////////// + Impl::DoubleStore(GaugeGrid(), UUUmu, Umu, _Uthin, _Ufat ); + + //////////////////////////////////////////////////////// + // Apply scale factors to get the right fermion Kinetic term + // Could pass coeffs into the double store to save work. + // 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) ) + //////////////////////////////////////////////////////// + for (int mu = 0; mu < Nd; mu++) { + + auto U = PeekIndex(Umu, mu); + PokeIndex(Umu, U*( 0.5*c1/u0), mu ); + + U = PeekIndex(Umu, mu+4); + PokeIndex(Umu, U*(-0.5*c1/u0), mu+4); + + U = PeekIndex(UUUmu, mu); + PokeIndex(UUUmu, U*( 0.5*c2/u0/u0/u0), mu ); + + U = PeekIndex(UUUmu, mu+4); + PokeIndex(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4); + } + + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd , Umu); + pickCheckerboard(Even, UUUmuEven, UUUmu); + pickCheckerboard(Odd, UUUmuOdd, UUUmu); +} +template +void ImprovedStaggeredFermion5D::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp) +{ + int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil + // we drop off the innermost fifth dimension + + Compressor compressor; + Stencil.HaloExchange(in,compressor); + + parallel_for(int ss=0;ssoSites();ss++){ + for(int s=0;s +void ImprovedStaggeredFermion5D::DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + DoubledGaugeField & UUU, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + // No force terms in multi-rhs solver staggered + assert(0); +} + +template +void ImprovedStaggeredFermion5D::DhopDeriv(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + assert(0); +} + +template +void ImprovedStaggeredFermion5D::DhopDerivEO(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + assert(0); +} + + +template +void ImprovedStaggeredFermion5D::DhopDerivOE(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) +{ + assert(0); +} + +template +void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, + DoubledGaugeField & U,DoubledGaugeField & UUU, + const FermionField &in, FermionField &out,int dag) +{ + Compressor compressor; + int LLs = in._grid->_rdimensions[0]; + st.HaloExchange(in,compressor); + + // Dhop takes the 4d grid from U, and makes a 5d index for fermion + if (dag == DaggerYes) { + parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU=ss; + Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out); + } + } else { + parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU=ss; + Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); + } + } +} + + +template +void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) +{ + conformable(in._grid,FermionRedBlackGrid()); // verifies half grid + conformable(in._grid,out._grid); // drops the cb check + + assert(in.checkerboard==Even); + out.checkerboard = Odd; + + DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag); +} +template +void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) +{ + conformable(in._grid,FermionRedBlackGrid()); // verifies half grid + conformable(in._grid,out._grid); // drops the cb check + + assert(in.checkerboard==Odd); + out.checkerboard = Even; + + DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag); +} +template +void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) +{ + conformable(in._grid,FermionGrid()); // verifies full grid + conformable(in._grid,out._grid); + + out.checkerboard = in.checkerboard; + + DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag); +} + + +///////////////////////////////////////////////////////////////////////// +// Implement the general interface. Here we use SAME mass on all slices +///////////////////////////////////////////////////////////////////////// +template +void ImprovedStaggeredFermion5D::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { + DhopDir(in, out, dir, disp); +} +template +RealD ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerNo); + return axpy_norm(out, mass, in, out); +} + +template +RealD ImprovedStaggeredFermion5D::Mdag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerYes); + return axpy_norm(out, mass, in, out); +} + +template +void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } +} +template +void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } +} + +template +void ImprovedStaggeredFermion5D::Mooee(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + typename FermionField::scalar_type scal(mass); + out = scal * in; +} + +template +void ImprovedStaggeredFermion5D::MooeeDag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Mooee(in, out); +} + +template +void ImprovedStaggeredFermion5D::MooeeInv(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + out = (1.0 / (mass)) * in; +} + +template +void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in, + FermionField &out) { + out.checkerboard = in.checkerboard; + MooeeInv(in, out); +} + + +FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D); +FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D); + +}} + + + diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h new file mode 100644 index 00000000..4961da49 --- /dev/null +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -0,0 +1,167 @@ + + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: AzusaYamaguchi + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H +#define GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H + +namespace Grid { +namespace QCD { + + //////////////////////////////////////////////////////////////////////////////// + // This is the 4d red black case appropriate to support + //////////////////////////////////////////////////////////////////////////////// + + class ImprovedStaggeredFermion5DStatic { + public: + // S-direction is INNERMOST and takes no part in the parity. + static const std::vector directions; + static const std::vector displacements; + const int npoint = 16; + }; + + template + class ImprovedStaggeredFermion5D : public StaggeredKernels, public ImprovedStaggeredFermion5DStatic + { + public: + INHERIT_IMPL_TYPES(Impl); + typedef StaggeredKernels Kernels; + + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _FourDimGrid ;} + GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;} + GridBase *FermionGrid(void) { return _FiveDimGrid;} + GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} + + // full checkerboard operations; leave unimplemented as abstract for now + RealD M (const FermionField &in, FermionField &out); + RealD Mdag (const FermionField &in, FermionField &out); + + // half checkerboard operations + void Meooe (const FermionField &in, FermionField &out); + void Mooee (const FermionField &in, FermionField &out); + void MooeeInv (const FermionField &in, FermionField &out); + + void MeooeDag (const FermionField &in, FermionField &out); + void MooeeDag (const FermionField &in, FermionField &out); + void MooeeInvDag (const FermionField &in, FermionField &out); + + void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); + + // These can be overridden by fancy 5d chiral action + void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + + // Implement hopping term non-hermitian hopping term; half cb or both + void Dhop (const FermionField &in, FermionField &out,int dag); + void DhopOE(const FermionField &in, FermionField &out,int dag); + void DhopEO(const FermionField &in, FermionField &out,int dag); + + + /////////////////////////////////////////////////////////////// + // New methods added + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + DoubledGaugeField & UUU, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag); + + void DhopInternal(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + const FermionField &in, + FermionField &out, + int dag); + + // Constructors + ImprovedStaggeredFermion5D(GaugeField &_Uthin, + GaugeField &_Ufat, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + double _mass, + RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0, + const ImplParams &p= ImplParams()); + + // DoubleStore + void ImportGauge(const GaugeField &_U); + void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + public: + + GridBase *_FourDimGrid; + GridBase *_FourDimRedBlackGrid; + GridBase *_FiveDimGrid; + GridBase *_FiveDimRedBlackGrid; + + RealD mass; + RealD c1; + RealD c2; + RealD u0; + int Ls; + + //Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + DoubledGaugeField UUUmu; + DoubledGaugeField UUUmuEven; + DoubledGaugeField UUUmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; + + // Comms buffer + std::vector > comm_buf; + + }; + +}} + +#endif diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h index ade9ca4d..b61c26d5 100644 --- a/lib/qcd/action/fermion/MobiusFermion.h +++ b/lib/qcd/action/fermion/MobiusFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_MOBIUS_FERMION_H #define GRID_QCD_MOBIUS_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h index 609d5cea..078d4f3e 100644 --- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h +++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H #define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h index 9cab0e22..f516c5d0 100644 --- a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H #define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h index 048244cc..4f1adbbf 100644 --- a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h index bbac735a..38d0fda2 100644 --- a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h index 9da30f65..6773b4d2 100644 --- a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h index 3b867174..84c4f597 100644 --- a/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h index e1d0763b..dc275852 100644 --- a/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc index 0c3e9397..3a78e043 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.cc +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc @@ -26,7 +26,9 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include +#include + namespace Grid { namespace QCD { diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h index 126f3299..0ec72de4 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.h +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h @@ -29,6 +29,8 @@ Author: Peter Boyle #ifndef GRID_QCD_PARTIAL_FRACTION_H #define GRID_QCD_PARTIAL_FRACTION_H +#include + namespace Grid { namespace QCD { diff --git a/lib/qcd/action/fermion/ScaledShamirFermion.h b/lib/qcd/action/fermion/ScaledShamirFermion.h index f850ee4d..b779b9c0 100644 --- a/lib/qcd/action/fermion/ScaledShamirFermion.h +++ b/lib/qcd/action/fermion/ScaledShamirFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H #define GRID_QCD_SCALED_SHAMIR_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/ShamirZolotarevFermion.h b/lib/qcd/action/fermion/ShamirZolotarevFermion.h index 732afa0a..f9397911 100644 --- a/lib/qcd/action/fermion/ShamirZolotarevFermion.h +++ b/lib/qcd/action/fermion/ShamirZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H #define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/StaggeredKernels.cc b/lib/qcd/action/fermion/StaggeredKernels.cc new file mode 100644 index 00000000..b6ec14c7 --- /dev/null +++ b/lib/qcd/action/fermion/StaggeredKernels.cc @@ -0,0 +1,276 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +namespace Grid { +namespace QCD { + +int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric; + +template +StaggeredKernels::StaggeredKernels(const ImplParams &p) : Base(p){}; + +//////////////////////////////////////////// +// Generic implementation; move to different file? +//////////////////////////////////////////// + +template +void StaggeredKernels::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + SiteSpinor *buf, int sF, + int sU, const FermionField &in, SiteSpinor &out,int threeLink) { + const SiteSpinor *chi_p; + SiteSpinor chi; + SiteSpinor Uchi; + StencilEntry *SE; + int ptype; + int skew = 0; + if (threeLink) skew=8; + /////////////////////////// + // Xp + /////////////////////////// + + SE = st.GetEntry(ptype, Xp+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp); + + /////////////////////////// + // Yp + /////////////////////////// + SE = st.GetEntry(ptype, Yp+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Yp); + + /////////////////////////// + // Zp + /////////////////////////// + SE = st.GetEntry(ptype, Zp+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zp); + + /////////////////////////// + // Tp + /////////////////////////// + SE = st.GetEntry(ptype, Tp+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tp); + + /////////////////////////// + // Xm + /////////////////////////// + SE = st.GetEntry(ptype, Xm+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Xm); + + /////////////////////////// + // Ym + /////////////////////////// + SE = st.GetEntry(ptype, Ym+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Ym); + + /////////////////////////// + // Zm + /////////////////////////// + SE = st.GetEntry(ptype, Zm+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zm); + + /////////////////////////// + // Tm + /////////////////////////// + SE = st.GetEntry(ptype, Tm+skew, sF); + if (SE->_is_local) { + if (SE->_permute) { + chi_p = χ + permute(chi, in._odata[SE->_offset], ptype); + } else { + chi_p = &in._odata[SE->_offset]; + } + } else { + chi_p = &buf[SE->_offset]; + } + Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tm); + + vstream(out, Uchi); +}; + +template +void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, int sU, + const FermionField &in, FermionField &out) { + SiteSpinor naik; + SiteSpinor naive; + int oneLink =0; + int threeLink=1; + int dag=1; + switch(Opt) { +#ifdef AVX512 + //FIXME; move the sign into the Asm routine + case OptInlineAsm: + DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out); + for(int s=0;s class StaggeredKernels : public FermionOperator , public StaggeredKernelsStatic { + public: + + INHERIT_IMPL_TYPES(Impl); + typedef FermionOperator Base; + +public: + + void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp); + + void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf, + int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink); + + + void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf, + int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink); + + void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf, + int LLs, int sU, const FermionField &in, FermionField &out, int dag); + + void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf, + int LLs, int sU, const FermionField &in, FermionField &out); + + void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, + int LLs, int sU, const FermionField &in, FermionField &out); + +public: + + StaggeredKernels(const ImplParams &p = ImplParams()); + +}; + +}} + +#endif diff --git a/lib/qcd/action/fermion/StaggeredKernelsAsm.cc b/lib/qcd/action/fermion/StaggeredKernelsAsm.cc new file mode 100644 index 00000000..fd881716 --- /dev/null +++ b/lib/qcd/action/fermion/StaggeredKernelsAsm.cc @@ -0,0 +1,920 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +#ifdef AVX512 +#include +#include +#endif + +// Interleave operations from two directions +// This looks just like a 2 spin multiply and reuse same sequence from the Wilson +// Kernel. But the spin index becomes a mu index instead. +#define Chi_00 %zmm0 +#define Chi_01 %zmm1 +#define Chi_02 %zmm2 +#define Chi_10 %zmm3 +#define Chi_11 %zmm4 +#define Chi_12 %zmm5 +#define Chi_20 %zmm6 +#define Chi_21 %zmm7 +#define Chi_22 %zmm8 +#define Chi_30 %zmm9 +#define Chi_31 %zmm10 +#define Chi_32 %zmm11 + +#define UChi_00 %zmm12 +#define UChi_01 %zmm13 +#define UChi_02 %zmm14 +#define UChi_10 %zmm15 +#define UChi_11 %zmm16 +#define UChi_12 %zmm17 +#define UChi_20 %zmm18 +#define UChi_21 %zmm19 +#define UChi_22 %zmm20 +#define UChi_30 %zmm21 +#define UChi_31 %zmm22 +#define UChi_32 %zmm23 + +#define pChi_00 %%zmm0 +#define pChi_01 %%zmm1 +#define pChi_02 %%zmm2 +#define pChi_10 %%zmm3 +#define pChi_11 %%zmm4 +#define pChi_12 %%zmm5 +#define pChi_20 %%zmm6 +#define pChi_21 %%zmm7 +#define pChi_22 %%zmm8 +#define pChi_30 %%zmm9 +#define pChi_31 %%zmm10 +#define pChi_32 %%zmm11 + +#define pUChi_00 %%zmm12 +#define pUChi_01 %%zmm13 +#define pUChi_02 %%zmm14 +#define pUChi_10 %%zmm15 +#define pUChi_11 %%zmm16 +#define pUChi_12 %%zmm17 +#define pUChi_20 %%zmm18 +#define pUChi_21 %%zmm19 +#define pUChi_22 %%zmm20 +#define pUChi_30 %%zmm21 +#define pUChi_31 %%zmm22 +#define pUChi_32 %%zmm23 + +#define T0 %zmm24 +#define T1 %zmm25 +#define T2 %zmm26 +#define T3 %zmm27 + +#define Z00 %zmm26 +#define Z10 %zmm27 +#define Z0 Z00 +#define Z1 %zmm28 +#define Z2 %zmm29 + +#define Z3 %zmm30 +#define Z4 %zmm31 +#define Z5 Chi_31 +#define Z6 Chi_32 + +#define MULT_ADD_LS(g0,g1,g2,g3) \ + asm ( "movq %0, %%r8 \n\t" \ + "movq %1, %%r9 \n\t" \ + "movq %2, %%r10 \n\t" \ + "movq %3, %%r11 \n\t" : : "r"(g0), "r"(g1), "r"(g2), "r"(g3) : "%r8","%r9","%r10","%r11" );\ + asm ( \ + VSHUF(Chi_00,T0) VSHUF(Chi_10,T1) \ + VSHUF(Chi_20,T2) VSHUF(Chi_30,T3) \ + VMADDSUBIDUP(0,%r8,T0,UChi_00) VMADDSUBIDUP(0,%r9,T1,UChi_10) \ + VMADDSUBIDUP(3,%r8,T0,UChi_01) VMADDSUBIDUP(3,%r9,T1,UChi_11) \ + VMADDSUBIDUP(6,%r8,T0,UChi_02) VMADDSUBIDUP(6,%r9,T1,UChi_12) \ + VMADDSUBIDUP(0,%r10,T2,UChi_20) VMADDSUBIDUP(0,%r11,T3,UChi_30) \ + VMADDSUBIDUP(3,%r10,T2,UChi_21) VMADDSUBIDUP(3,%r11,T3,UChi_31) \ + VMADDSUBIDUP(6,%r10,T2,UChi_22) VMADDSUBIDUP(6,%r11,T3,UChi_32) \ + VMADDSUBRDUP(0,%r8,Chi_00,UChi_00) VMADDSUBRDUP(0,%r9,Chi_10,UChi_10) \ + VMADDSUBRDUP(3,%r8,Chi_00,UChi_01) VMADDSUBRDUP(3,%r9,Chi_10,UChi_11) \ + VMADDSUBRDUP(6,%r8,Chi_00,UChi_02) VMADDSUBRDUP(6,%r9,Chi_10,UChi_12) \ + VMADDSUBRDUP(0,%r10,Chi_20,UChi_20) VMADDSUBRDUP(0,%r11,Chi_30,UChi_30) \ + VMADDSUBRDUP(3,%r10,Chi_20,UChi_21) VMADDSUBRDUP(3,%r11,Chi_30,UChi_31) \ + VMADDSUBRDUP(6,%r10,Chi_20,UChi_22) VMADDSUBRDUP(6,%r11,Chi_30,UChi_32) \ + VSHUF(Chi_01,T0) VSHUF(Chi_11,T1) \ + VSHUF(Chi_21,T2) VSHUF(Chi_31,T3) \ + VMADDSUBIDUP(1,%r8,T0,UChi_00) VMADDSUBIDUP(1,%r9,T1,UChi_10) \ + VMADDSUBIDUP(4,%r8,T0,UChi_01) VMADDSUBIDUP(4,%r9,T1,UChi_11) \ + VMADDSUBIDUP(7,%r8,T0,UChi_02) VMADDSUBIDUP(7,%r9,T1,UChi_12) \ + VMADDSUBIDUP(1,%r10,T2,UChi_20) VMADDSUBIDUP(1,%r11,T3,UChi_30) \ + VMADDSUBIDUP(4,%r10,T2,UChi_21) VMADDSUBIDUP(4,%r11,T3,UChi_31) \ + VMADDSUBIDUP(7,%r10,T2,UChi_22) VMADDSUBIDUP(7,%r11,T3,UChi_32) \ + VMADDSUBRDUP(1,%r8,Chi_01,UChi_00) VMADDSUBRDUP(1,%r9,Chi_11,UChi_10) \ + VMADDSUBRDUP(4,%r8,Chi_01,UChi_01) VMADDSUBRDUP(4,%r9,Chi_11,UChi_11) \ + VMADDSUBRDUP(7,%r8,Chi_01,UChi_02) VMADDSUBRDUP(7,%r9,Chi_11,UChi_12) \ + VMADDSUBRDUP(1,%r10,Chi_21,UChi_20) VMADDSUBRDUP(1,%r11,Chi_31,UChi_30) \ + VMADDSUBRDUP(4,%r10,Chi_21,UChi_21) VMADDSUBRDUP(4,%r11,Chi_31,UChi_31) \ + VMADDSUBRDUP(7,%r10,Chi_21,UChi_22) VMADDSUBRDUP(7,%r11,Chi_31,UChi_32) \ + VSHUF(Chi_02,T0) VSHUF(Chi_12,T1) \ + VSHUF(Chi_22,T2) VSHUF(Chi_32,T3) \ + VMADDSUBIDUP(2,%r8,T0,UChi_00) VMADDSUBIDUP(2,%r9,T1,UChi_10) \ + VMADDSUBIDUP(5,%r8,T0,UChi_01) VMADDSUBIDUP(5,%r9,T1,UChi_11) \ + VMADDSUBIDUP(8,%r8,T0,UChi_02) VMADDSUBIDUP(8,%r9,T1,UChi_12) \ + VMADDSUBIDUP(2,%r10,T2,UChi_20) VMADDSUBIDUP(2,%r11,T3,UChi_30) \ + VMADDSUBIDUP(5,%r10,T2,UChi_21) VMADDSUBIDUP(5,%r11,T3,UChi_31) \ + VMADDSUBIDUP(8,%r10,T2,UChi_22) VMADDSUBIDUP(8,%r11,T3,UChi_32) \ + VMADDSUBRDUP(2,%r8,Chi_02,UChi_00) VMADDSUBRDUP(2,%r9,Chi_12,UChi_10) \ + VMADDSUBRDUP(5,%r8,Chi_02,UChi_01) VMADDSUBRDUP(5,%r9,Chi_12,UChi_11) \ + VMADDSUBRDUP(8,%r8,Chi_02,UChi_02) VMADDSUBRDUP(8,%r9,Chi_12,UChi_12) \ + VMADDSUBRDUP(2,%r10,Chi_22,UChi_20) VMADDSUBRDUP(2,%r11,Chi_32,UChi_30) \ + VMADDSUBRDUP(5,%r10,Chi_22,UChi_21) VMADDSUBRDUP(5,%r11,Chi_32,UChi_31) \ + VMADDSUBRDUP(8,%r10,Chi_22,UChi_22) VMADDSUBRDUP(8,%r11,Chi_32,UChi_32) ); + +#define MULT_LS(g0,g1,g2,g3) \ + asm ( "movq %0, %%r8 \n\t" \ + "movq %1, %%r9 \n\t" \ + "movq %2, %%r10 \n\t" \ + "movq %3, %%r11 \n\t" : : "r"(g0), "r"(g1), "r"(g2), "r"(g3) : "%r8","%r9","%r10","%r11" );\ + asm ( \ + VSHUF(Chi_00,T0) VSHUF(Chi_10,T1) \ + VSHUF(Chi_20,T2) VSHUF(Chi_30,T3) \ + VMULIDUP(0,%r8,T0,UChi_00) VMULIDUP(0,%r9,T1,UChi_10) \ + VMULIDUP(3,%r8,T0,UChi_01) VMULIDUP(3,%r9,T1,UChi_11) \ + VMULIDUP(6,%r8,T0,UChi_02) VMULIDUP(6,%r9,T1,UChi_12) \ + VMULIDUP(0,%r10,T2,UChi_20) VMULIDUP(0,%r11,T3,UChi_30) \ + VMULIDUP(3,%r10,T2,UChi_21) VMULIDUP(3,%r11,T3,UChi_31) \ + VMULIDUP(6,%r10,T2,UChi_22) VMULIDUP(6,%r11,T3,UChi_32) \ + VMADDSUBRDUP(0,%r8,Chi_00,UChi_00) VMADDSUBRDUP(0,%r9,Chi_10,UChi_10) \ + VMADDSUBRDUP(3,%r8,Chi_00,UChi_01) VMADDSUBRDUP(3,%r9,Chi_10,UChi_11) \ + VMADDSUBRDUP(6,%r8,Chi_00,UChi_02) VMADDSUBRDUP(6,%r9,Chi_10,UChi_12) \ + VMADDSUBRDUP(0,%r10,Chi_20,UChi_20) VMADDSUBRDUP(0,%r11,Chi_30,UChi_30) \ + VMADDSUBRDUP(3,%r10,Chi_20,UChi_21) VMADDSUBRDUP(3,%r11,Chi_30,UChi_31) \ + VMADDSUBRDUP(6,%r10,Chi_20,UChi_22) VMADDSUBRDUP(6,%r11,Chi_30,UChi_32) \ + VSHUF(Chi_01,T0) VSHUF(Chi_11,T1) \ + VSHUF(Chi_21,T2) VSHUF(Chi_31,T3) \ + VMADDSUBIDUP(1,%r8,T0,UChi_00) VMADDSUBIDUP(1,%r9,T1,UChi_10) \ + VMADDSUBIDUP(4,%r8,T0,UChi_01) VMADDSUBIDUP(4,%r9,T1,UChi_11) \ + VMADDSUBIDUP(7,%r8,T0,UChi_02) VMADDSUBIDUP(7,%r9,T1,UChi_12) \ + VMADDSUBIDUP(1,%r10,T2,UChi_20) VMADDSUBIDUP(1,%r11,T3,UChi_30) \ + VMADDSUBIDUP(4,%r10,T2,UChi_21) VMADDSUBIDUP(4,%r11,T3,UChi_31) \ + VMADDSUBIDUP(7,%r10,T2,UChi_22) VMADDSUBIDUP(7,%r11,T3,UChi_32) \ + VMADDSUBRDUP(1,%r8,Chi_01,UChi_00) VMADDSUBRDUP(1,%r9,Chi_11,UChi_10) \ + VMADDSUBRDUP(4,%r8,Chi_01,UChi_01) VMADDSUBRDUP(4,%r9,Chi_11,UChi_11) \ + VMADDSUBRDUP(7,%r8,Chi_01,UChi_02) VMADDSUBRDUP(7,%r9,Chi_11,UChi_12) \ + VMADDSUBRDUP(1,%r10,Chi_21,UChi_20) VMADDSUBRDUP(1,%r11,Chi_31,UChi_30) \ + VMADDSUBRDUP(4,%r10,Chi_21,UChi_21) VMADDSUBRDUP(4,%r11,Chi_31,UChi_31) \ + VMADDSUBRDUP(7,%r10,Chi_21,UChi_22) VMADDSUBRDUP(7,%r11,Chi_31,UChi_32) \ + VSHUF(Chi_02,T0) VSHUF(Chi_12,T1) \ + VSHUF(Chi_22,T2) VSHUF(Chi_32,T3) \ + VMADDSUBIDUP(2,%r8,T0,UChi_00) VMADDSUBIDUP(2,%r9,T1,UChi_10) \ + VMADDSUBIDUP(5,%r8,T0,UChi_01) VMADDSUBIDUP(5,%r9,T1,UChi_11) \ + VMADDSUBIDUP(8,%r8,T0,UChi_02) VMADDSUBIDUP(8,%r9,T1,UChi_12) \ + VMADDSUBIDUP(2,%r10,T2,UChi_20) VMADDSUBIDUP(2,%r11,T3,UChi_30) \ + VMADDSUBIDUP(5,%r10,T2,UChi_21) VMADDSUBIDUP(5,%r11,T3,UChi_31) \ + VMADDSUBIDUP(8,%r10,T2,UChi_22) VMADDSUBIDUP(8,%r11,T3,UChi_32) \ + VMADDSUBRDUP(2,%r8,Chi_02,UChi_00) VMADDSUBRDUP(2,%r9,Chi_12,UChi_10) \ + VMADDSUBRDUP(5,%r8,Chi_02,UChi_01) VMADDSUBRDUP(5,%r9,Chi_12,UChi_11) \ + VMADDSUBRDUP(8,%r8,Chi_02,UChi_02) VMADDSUBRDUP(8,%r9,Chi_12,UChi_12) \ + VMADDSUBRDUP(2,%r10,Chi_22,UChi_20) VMADDSUBRDUP(2,%r11,Chi_32,UChi_30) \ + VMADDSUBRDUP(5,%r10,Chi_22,UChi_21) VMADDSUBRDUP(5,%r11,Chi_32,UChi_31) \ + VMADDSUBRDUP(8,%r10,Chi_22,UChi_22) VMADDSUBRDUP(8,%r11,Chi_32,UChi_32) ); + +#define MULT_ADD_XYZTa(g0,g1) \ + asm ( "movq %0, %%r8 \n\t" \ + "movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9");\ + __asm__ ( \ + VSHUF(Chi_00,T0) \ + VSHUF(Chi_10,T1) \ + VMOVIDUP(0,%r8,Z0 ) \ + VMOVIDUP(3,%r8,Z1 ) \ + VMOVIDUP(6,%r8,Z2 ) \ + VMADDSUB(Z0,T0,UChi_00) \ + VMADDSUB(Z1,T0,UChi_01) \ + VMADDSUB(Z2,T0,UChi_02) \ + \ + VMOVIDUP(0,%r9,Z0 ) \ + VMOVIDUP(3,%r9,Z1 ) \ + VMOVIDUP(6,%r9,Z2 ) \ + VMADDSUB(Z0,T1,UChi_10) \ + VMADDSUB(Z1,T1,UChi_11) \ + VMADDSUB(Z2,T1,UChi_12) \ + \ + \ + VMOVRDUP(0,%r8,Z3 ) \ + VMOVRDUP(3,%r8,Z4 ) \ + VMOVRDUP(6,%r8,Z5 ) \ + VMADDSUB(Z3,Chi_00,UChi_00)/*rr * ir = ri rr*/ \ + VMADDSUB(Z4,Chi_00,UChi_01) \ + VMADDSUB(Z5,Chi_00,UChi_02) \ + \ + VMOVRDUP(0,%r9,Z3 ) \ + VMOVRDUP(3,%r9,Z4 ) \ + VMOVRDUP(6,%r9,Z5 ) \ + VMADDSUB(Z3,Chi_10,UChi_10) \ + VMADDSUB(Z4,Chi_10,UChi_11)\ + VMADDSUB(Z5,Chi_10,UChi_12) \ + \ + \ + VMOVIDUP(1,%r8,Z0 ) \ + VMOVIDUP(4,%r8,Z1 ) \ + VMOVIDUP(7,%r8,Z2 ) \ + VSHUF(Chi_01,T0) \ + VMADDSUB(Z0,T0,UChi_00) \ + VMADDSUB(Z1,T0,UChi_01) \ + VMADDSUB(Z2,T0,UChi_02) \ + \ + VMOVIDUP(1,%r9,Z0 ) \ + VMOVIDUP(4,%r9,Z1 ) \ + VMOVIDUP(7,%r9,Z2 ) \ + VSHUF(Chi_11,T1) \ + VMADDSUB(Z0,T1,UChi_10) \ + VMADDSUB(Z1,T1,UChi_11) \ + VMADDSUB(Z2,T1,UChi_12) \ + \ + VMOVRDUP(1,%r8,Z3 ) \ + VMOVRDUP(4,%r8,Z4 ) \ + VMOVRDUP(7,%r8,Z5 ) \ + VMADDSUB(Z3,Chi_01,UChi_00) \ + VMADDSUB(Z4,Chi_01,UChi_01) \ + VMADDSUB(Z5,Chi_01,UChi_02) \ + \ + VMOVRDUP(1,%r9,Z3 ) \ + VMOVRDUP(4,%r9,Z4 ) \ + VMOVRDUP(7,%r9,Z5 ) \ + VMADDSUB(Z3,Chi_11,UChi_10) \ + VMADDSUB(Z4,Chi_11,UChi_11) \ + VMADDSUB(Z5,Chi_11,UChi_12) \ + \ + VSHUF(Chi_02,T0) \ + VSHUF(Chi_12,T1) \ + VMOVIDUP(2,%r8,Z0 ) \ + VMOVIDUP(5,%r8,Z1 ) \ + VMOVIDUP(8,%r8,Z2 ) \ + VMADDSUB(Z0,T0,UChi_00) \ + VMADDSUB(Z1,T0,UChi_01) \ + VMADDSUB(Z2,T0,UChi_02) \ + VMOVIDUP(2,%r9,Z0 ) \ + VMOVIDUP(5,%r9,Z1 ) \ + VMOVIDUP(8,%r9,Z2 ) \ + VMADDSUB(Z0,T1,UChi_10) \ + VMADDSUB(Z1,T1,UChi_11) \ + VMADDSUB(Z2,T1,UChi_12) \ + /*55*/ \ + VMOVRDUP(2,%r8,Z3 ) \ + VMOVRDUP(5,%r8,Z4 ) \ + VMOVRDUP(8,%r8,Z5 ) \ + VMADDSUB(Z3,Chi_02,UChi_00) \ + VMADDSUB(Z4,Chi_02,UChi_01) \ + VMADDSUB(Z5,Chi_02,UChi_02) \ + VMOVRDUP(2,%r9,Z3 ) \ + VMOVRDUP(5,%r9,Z4 ) \ + VMOVRDUP(8,%r9,Z5 ) \ + VMADDSUB(Z3,Chi_12,UChi_10) \ + VMADDSUB(Z4,Chi_12,UChi_11) \ + VMADDSUB(Z5,Chi_12,UChi_12) \ + /*61 insns*/ ); + +#define MULT_ADD_XYZT(g0,g1) \ + asm ( "movq %0, %%r8 \n\t" \ + "movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9");\ + __asm__ ( \ + VSHUFMEM(0,%r8,Z00) VSHUFMEM(0,%r9,Z10) \ + VRDUP(Chi_00,T0) VIDUP(Chi_00,Chi_00) \ + VRDUP(Chi_10,T1) VIDUP(Chi_10,Chi_10) \ + VMUL(Z00,Chi_00,Z1) VMUL(Z10,Chi_10,Z2) \ + VSHUFMEM(3,%r8,Z00) VSHUFMEM(3,%r9,Z10) \ + VMUL(Z00,Chi_00,Z3) VMUL(Z10,Chi_10,Z4) \ + VSHUFMEM(6,%r8,Z00) VSHUFMEM(6,%r9,Z10) \ + VMUL(Z00,Chi_00,Z5) VMUL(Z10,Chi_10,Z6) \ + VMADDMEM(0,%r8,T0,UChi_00) VMADDMEM(0,%r9,T1,UChi_10) \ + VMADDMEM(3,%r8,T0,UChi_01) VMADDMEM(3,%r9,T1,UChi_11) \ + VMADDMEM(6,%r8,T0,UChi_02) VMADDMEM(6,%r9,T1,UChi_12) \ + VSHUFMEM(1,%r8,Z00) VSHUFMEM(1,%r9,Z10) \ + VRDUP(Chi_01,T0) VIDUP(Chi_01,Chi_01) \ + VRDUP(Chi_11,T1) VIDUP(Chi_11,Chi_11) \ + VMADD(Z00,Chi_01,Z1) VMADD(Z10,Chi_11,Z2) \ + VSHUFMEM(4,%r8,Z00) VSHUFMEM(4,%r9,Z10) \ + VMADD(Z00,Chi_01,Z3) VMADD(Z10,Chi_11,Z4) \ + VSHUFMEM(7,%r8,Z00) VSHUFMEM(7,%r9,Z10) \ + VMADD(Z00,Chi_01,Z5) VMADD(Z10,Chi_11,Z6) \ + VMADDMEM(1,%r8,T0,UChi_00) VMADDMEM(1,%r9,T1,UChi_10) \ + VMADDMEM(4,%r8,T0,UChi_01) VMADDMEM(4,%r9,T1,UChi_11) \ + VMADDMEM(7,%r8,T0,UChi_02) VMADDMEM(7,%r9,T1,UChi_12) \ + VSHUFMEM(2,%r8,Z00) VSHUFMEM(2,%r9,Z10) \ + VRDUP(Chi_02,T0) VIDUP(Chi_02,Chi_02) \ + VRDUP(Chi_12,T1) VIDUP(Chi_12,Chi_12) \ + VMADD(Z00,Chi_02,Z1) VMADD(Z10,Chi_12,Z2) \ + VSHUFMEM(5,%r8,Z00) VSHUFMEM(5,%r9,Z10) \ + VMADD(Z00,Chi_02,Z3) VMADD(Z10,Chi_12,Z4) \ + VSHUFMEM(8,%r8,Z00) VSHUFMEM(8,%r9,Z10) \ + VMADD(Z00,Chi_02,Z5) VMADD(Z10,Chi_12,Z6) \ + VMADDSUBMEM(2,%r8,T0,Z1) VMADDSUBMEM(2,%r9,T1,Z2) \ + VMADDSUBMEM(5,%r8,T0,Z3) VMADDSUBMEM(5,%r9,T1,Z4) \ + VMADDSUBMEM(8,%r8,T0,Z5) VMADDSUBMEM(8,%r9,T1,Z6) \ + VADD(Z1,UChi_00,UChi_00) VADD(Z2,UChi_10,UChi_10) \ + VADD(Z3,UChi_01,UChi_01) VADD(Z4,UChi_11,UChi_11) \ + VADD(Z5,UChi_02,UChi_02) VADD(Z6,UChi_12,UChi_12) ); + +#define MULT_XYZT(g0,g1) \ + asm ( "movq %0, %%r8 \n\t" \ + "movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9" ); \ + __asm__ ( \ + VSHUF(Chi_00,T0) \ + VSHUF(Chi_10,T1) \ + VMOVIDUP(0,%r8,Z0 ) \ + VMOVIDUP(3,%r8,Z1 ) \ + VMOVIDUP(6,%r8,Z2 ) \ + /*6*/ \ + VMUL(Z0,T0,UChi_00) \ + VMUL(Z1,T0,UChi_01) \ + VMUL(Z2,T0,UChi_02) \ + VMOVIDUP(0,%r9,Z0 ) \ + VMOVIDUP(3,%r9,Z1 ) \ + VMOVIDUP(6,%r9,Z2 ) \ + VMUL(Z0,T1,UChi_10) \ + VMUL(Z1,T1,UChi_11) \ + VMUL(Z2,T1,UChi_12) \ + VMOVRDUP(0,%r8,Z3 ) \ + VMOVRDUP(3,%r8,Z4 ) \ + VMOVRDUP(6,%r8,Z5 ) \ + /*18*/ \ + VMADDSUB(Z3,Chi_00,UChi_00) \ + VMADDSUB(Z4,Chi_00,UChi_01)\ + VMADDSUB(Z5,Chi_00,UChi_02) \ + VMOVRDUP(0,%r9,Z3 ) \ + VMOVRDUP(3,%r9,Z4 ) \ + VMOVRDUP(6,%r9,Z5 ) \ + VMADDSUB(Z3,Chi_10,UChi_10) \ + VMADDSUB(Z4,Chi_10,UChi_11)\ + VMADDSUB(Z5,Chi_10,UChi_12) \ + VMOVIDUP(1,%r8,Z0 ) \ + VMOVIDUP(4,%r8,Z1 ) \ + VMOVIDUP(7,%r8,Z2 ) \ + /*28*/ \ + VSHUF(Chi_01,T0) \ + VMADDSUB(Z0,T0,UChi_00) \ + VMADDSUB(Z1,T0,UChi_01) \ + VMADDSUB(Z2,T0,UChi_02) \ + VMOVIDUP(1,%r9,Z0 ) \ + VMOVIDUP(4,%r9,Z1 ) \ + VMOVIDUP(7,%r9,Z2 ) \ + VSHUF(Chi_11,T1) \ + VMADDSUB(Z0,T1,UChi_10) \ + VMADDSUB(Z1,T1,UChi_11) \ + VMADDSUB(Z2,T1,UChi_12) \ + VMOVRDUP(1,%r8,Z3 ) \ + VMOVRDUP(4,%r8,Z4 ) \ + VMOVRDUP(7,%r8,Z5 ) \ + /*38*/ \ + VMADDSUB(Z3,Chi_01,UChi_00) \ + VMADDSUB(Z4,Chi_01,UChi_01) \ + VMADDSUB(Z5,Chi_01,UChi_02) \ + VMOVRDUP(1,%r9,Z3 ) \ + VMOVRDUP(4,%r9,Z4 ) \ + VMOVRDUP(7,%r9,Z5 ) \ + VMADDSUB(Z3,Chi_11,UChi_10) \ + VMADDSUB(Z4,Chi_11,UChi_11) \ + VMADDSUB(Z5,Chi_11,UChi_12) \ + /*48*/ \ + VSHUF(Chi_02,T0) \ + VSHUF(Chi_12,T1) \ + VMOVIDUP(2,%r8,Z0 ) \ + VMOVIDUP(5,%r8,Z1 ) \ + VMOVIDUP(8,%r8,Z2 ) \ + VMADDSUB(Z0,T0,UChi_00) \ + VMADDSUB(Z1,T0,UChi_01) \ + VMADDSUB(Z2,T0,UChi_02) \ + VMOVIDUP(2,%r9,Z0 ) \ + VMOVIDUP(5,%r9,Z1 ) \ + VMOVIDUP(8,%r9,Z2 ) \ + VMADDSUB(Z0,T1,UChi_10) \ + VMADDSUB(Z1,T1,UChi_11) \ + VMADDSUB(Z2,T1,UChi_12) \ + /*55*/ \ + VMOVRDUP(2,%r8,Z3 ) \ + VMOVRDUP(5,%r8,Z4 ) \ + VMOVRDUP(8,%r8,Z5 ) \ + VMADDSUB(Z3,Chi_02,UChi_00) \ + VMADDSUB(Z4,Chi_02,UChi_01) \ + VMADDSUB(Z5,Chi_02,UChi_02) \ + VMOVRDUP(2,%r9,Z3 ) \ + VMOVRDUP(5,%r9,Z4 ) \ + VMOVRDUP(8,%r9,Z5 ) \ + VMADDSUB(Z3,Chi_12,UChi_10) \ + VMADDSUB(Z4,Chi_12,UChi_11) \ + VMADDSUB(Z5,Chi_12,UChi_12) \ + /*61 insns*/ ); + +#define MULT_XYZTa(g0,g1) \ + asm ( "movq %0, %%r8 \n\t" \ + "movq %1, %%r9 \n\t" : : "r"(g0), "r"(g1) : "%r8","%r9" ); \ + __asm__ ( \ + VSHUFMEM(0,%r8,Z00) VSHUFMEM(0,%r9,Z10) \ + VRDUP(Chi_00,T0) VIDUP(Chi_00,Chi_00) \ + VRDUP(Chi_10,T1) VIDUP(Chi_10,Chi_10) \ + VMUL(Z00,Chi_00,Z1) VMUL(Z10,Chi_10,Z2) \ + VSHUFMEM(3,%r8,Z00) VSHUFMEM(3,%r9,Z10) \ + VMUL(Z00,Chi_00,Z3) VMUL(Z10,Chi_10,Z4) \ + VSHUFMEM(6,%r8,Z00) VSHUFMEM(6,%r9,Z10) \ + VMUL(Z00,Chi_00,Z5) VMUL(Z10,Chi_10,Z6) \ + VMULMEM(0,%r8,T0,UChi_00) VMULMEM(0,%r9,T1,UChi_10) \ + VMULMEM(3,%r8,T0,UChi_01) VMULMEM(3,%r9,T1,UChi_11) \ + VMULMEM(6,%r8,T0,UChi_02) VMULMEM(6,%r9,T1,UChi_12) \ + VSHUFMEM(1,%r8,Z00) VSHUFMEM(1,%r9,Z10) \ + VRDUP(Chi_01,T0) VIDUP(Chi_01,Chi_01) \ + VRDUP(Chi_11,T1) VIDUP(Chi_11,Chi_11) \ + VMADD(Z00,Chi_01,Z1) VMADD(Z10,Chi_11,Z2) \ + VSHUFMEM(4,%r8,Z00) VSHUFMEM(4,%r9,Z10) \ + VMADD(Z00,Chi_01,Z3) VMADD(Z10,Chi_11,Z4) \ + VSHUFMEM(7,%r8,Z00) VSHUFMEM(7,%r9,Z10) \ + VMADD(Z00,Chi_01,Z5) VMADD(Z10,Chi_11,Z6) \ + VMADDMEM(1,%r8,T0,UChi_00) VMADDMEM(1,%r9,T1,UChi_10) \ + VMADDMEM(4,%r8,T0,UChi_01) VMADDMEM(4,%r9,T1,UChi_11) \ + VMADDMEM(7,%r8,T0,UChi_02) VMADDMEM(7,%r9,T1,UChi_12) \ + VSHUFMEM(2,%r8,Z00) VSHUFMEM(2,%r9,Z10) \ + VRDUP(Chi_02,T0) VIDUP(Chi_02,Chi_02) \ + VRDUP(Chi_12,T1) VIDUP(Chi_12,Chi_12) \ + VMADD(Z00,Chi_02,Z1) VMADD(Z10,Chi_12,Z2) \ + VSHUFMEM(5,%r8,Z00) VSHUFMEM(5,%r9,Z10) \ + VMADD(Z00,Chi_02,Z3) VMADD(Z10,Chi_12,Z4) \ + VSHUFMEM(8,%r8,Z00) VSHUFMEM(8,%r9,Z10) \ + VMADD(Z00,Chi_02,Z5) VMADD(Z10,Chi_12,Z6) \ + VMADDSUBMEM(2,%r8,T0,Z1) VMADDSUBMEM(2,%r9,T1,Z2) \ + VMADDSUBMEM(5,%r8,T0,Z3) VMADDSUBMEM(5,%r9,T1,Z4) \ + VMADDSUBMEM(8,%r8,T0,Z5) VMADDSUBMEM(8,%r9,T1,Z6) \ + VADD(Z1,UChi_00,UChi_00) VADD(Z2,UChi_10,UChi_10) \ + VADD(Z3,UChi_01,UChi_01) VADD(Z4,UChi_11,UChi_11) \ + VADD(Z5,UChi_02,UChi_02) VADD(Z6,UChi_12,UChi_12) ); + + +#define LOAD_CHI(a0,a1,a2,a3) \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VLOAD(0,%%r8,pChi_00) \ + VLOAD(1,%%r8,pChi_01) \ + VLOAD(2,%%r8,pChi_02) \ + : : "r" (a0) : "%r8" ); \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VLOAD(0,%%r8,pChi_10) \ + VLOAD(1,%%r8,pChi_11) \ + VLOAD(2,%%r8,pChi_12) \ + : : "r" (a1) : "%r8" ); \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VLOAD(0,%%r8,pChi_20) \ + VLOAD(1,%%r8,pChi_21) \ + VLOAD(2,%%r8,pChi_22) \ + : : "r" (a2) : "%r8" ); \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VLOAD(0,%%r8,pChi_30) \ + VLOAD(1,%%r8,pChi_31) \ + VLOAD(2,%%r8,pChi_32) \ + : : "r" (a3) : "%r8" ); + +#define LOAD_CHIa(a0,a1) \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VLOAD(0,%%r8,pChi_00) \ + VLOAD(1,%%r8,pChi_01) \ + VLOAD(2,%%r8,pChi_02) \ + : : "r" (a0) : "%r8" ); \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VLOAD(0,%%r8,pChi_10) \ + VLOAD(1,%%r8,pChi_11) \ + VLOAD(2,%%r8,pChi_12) \ + : : "r" (a1) : "%r8" ); + +#define PF_CHI(a0) +#define PF_CHIa(a0) \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VPREFETCH1(0,%%r8) \ + VPREFETCH1(1,%%r8) \ + VPREFETCH1(2,%%r8) \ + : : "r" (a0) : "%r8" ); \ + +#define PF_GAUGE_XYZT(a0) +#define PF_GAUGE_XYZTa(a0) \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VPREFETCH1(0,%%r8) \ + VPREFETCH1(1,%%r8) \ + VPREFETCH1(2,%%r8) \ + VPREFETCH1(3,%%r8) \ + VPREFETCH1(4,%%r8) \ + VPREFETCH1(5,%%r8) \ + VPREFETCH1(6,%%r8) \ + VPREFETCH1(7,%%r8) \ + VPREFETCH1(8,%%r8) \ + : : "r" (a0) : "%r8" ); \ + +#define PF_GAUGE_LS(a0) +#define PF_GAUGE_LSa(a0) \ + asm ( \ + "movq %0, %%r8 \n\t" \ + VPREFETCH1(0,%%r8) \ + VPREFETCH1(1,%%r8) \ + : : "r" (a0) : "%r8" ); \ + + +#define REDUCE(out) \ + asm ( \ + VADD(UChi_00,UChi_10,UChi_00) \ + VADD(UChi_01,UChi_11,UChi_01) \ + VADD(UChi_02,UChi_12,UChi_02) \ + VADD(UChi_30,UChi_20,UChi_30) \ + VADD(UChi_31,UChi_21,UChi_31) \ + VADD(UChi_32,UChi_22,UChi_32) \ + VADD(UChi_00,UChi_30,UChi_00) \ + VADD(UChi_01,UChi_31,UChi_01) \ + VADD(UChi_02,UChi_32,UChi_02) ); \ + asm ( \ + VSTORE(0,%0,pUChi_00) \ + VSTORE(1,%0,pUChi_01) \ + VSTORE(2,%0,pUChi_02) \ + : : "r" (out) : "memory" ); + +#define REDUCEa(out) \ + asm ( \ + VADD(UChi_00,UChi_10,UChi_00) \ + VADD(UChi_01,UChi_11,UChi_01) \ + VADD(UChi_02,UChi_12,UChi_02) ); \ + asm ( \ + VSTORE(0,%0,pUChi_00) \ + VSTORE(1,%0,pUChi_01) \ + VSTORE(2,%0,pUChi_02) \ + : : "r" (out) : "memory" ); + +#define PERMUTE_DIR(dir) \ + permute##dir(Chi_0,Chi_0);\ + permute##dir(Chi_1,Chi_1);\ + permute##dir(Chi_2,Chi_2); + +namespace Grid { +namespace QCD { + +template +void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionField &in, FermionField &out) +{ + assert(0); +}; + + +//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; } + +#define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; } + +#define PREPARE_XYZT(X,Y,Z,T,skew,UU) \ + PREPARE(X,Y,Z,T,skew,UU); \ + PF_GAUGE_XYZT(gauge0); \ + PF_GAUGE_XYZT(gauge1); \ + PF_GAUGE_XYZT(gauge2); \ + PF_GAUGE_XYZT(gauge3); + +#define PREPARE_LS(X,Y,Z,T,skew,UU) \ + PREPARE(X,Y,Z,T,skew,UU); \ + PF_GAUGE_LS(gauge0); \ + PF_GAUGE_LS(gauge1); \ + PF_GAUGE_LS(gauge2); \ + PF_GAUGE_LS(gauge3); + +#define PREPARE(X,Y,Z,T,skew,UU) \ + SE0=st.GetEntry(ptype,X+skew,sF); \ + o0 = SE0->_offset; \ + l0 = SE0->_is_local; \ + p0 = SE0->_permute; \ + CONDITIONAL_MOVE(l0,o0,addr0); \ + PF_CHI(addr0); \ + \ + SE1=st.GetEntry(ptype,Y+skew,sF); \ + o1 = SE1->_offset; \ + l1 = SE1->_is_local; \ + p1 = SE1->_permute; \ + CONDITIONAL_MOVE(l1,o1,addr1); \ + PF_CHI(addr1); \ + \ + SE2=st.GetEntry(ptype,Z+skew,sF); \ + o2 = SE2->_offset; \ + l2 = SE2->_is_local; \ + p2 = SE2->_permute; \ + CONDITIONAL_MOVE(l2,o2,addr2); \ + PF_CHI(addr2); \ + \ + SE3=st.GetEntry(ptype,T+skew,sF); \ + o3 = SE3->_offset; \ + l3 = SE3->_is_local; \ + p3 = SE3->_permute; \ + CONDITIONAL_MOVE(l3,o3,addr3); \ + PF_CHI(addr3); \ + \ + gauge0 =(uint64_t)&UU._odata[sU]( X ); \ + gauge1 =(uint64_t)&UU._odata[sU]( Y ); \ + gauge2 =(uint64_t)&UU._odata[sU]( Z ); \ + gauge3 =(uint64_t)&UU._odata[sU]( T ); + + // This is the single precision 5th direction vectorised kernel +#include +template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionField &in, FermionField &out) +{ +#ifdef AVX512 + uint64_t gauge0,gauge1,gauge2,gauge3; + uint64_t addr0,addr1,addr2,addr3; + const SiteSpinor *in_p; in_p = &in._odata[0]; + + int o0,o1,o2,o3; // offsets + int l0,l1,l2,l3; // local + int p0,p1,p2,p3; // perm + int ptype; + StencilEntry *SE0; + StencilEntry *SE1; + StencilEntry *SE2; + StencilEntry *SE3; + + for(int s=0;s +template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionField &in, FermionField &out) +{ +#ifdef AVX512 + uint64_t gauge0,gauge1,gauge2,gauge3; + uint64_t addr0,addr1,addr2,addr3; + const SiteSpinor *in_p; in_p = &in._odata[0]; + + int o0,o1,o2,o3; // offsets + int l0,l1,l2,l3; // local + int p0,p1,p2,p3; // perm + int ptype; + StencilEntry *SE0; + StencilEntry *SE1; + StencilEntry *SE2; + StencilEntry *SE3; + + for(int s=0;s +template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionField &in, FermionField &out) +{ +#ifdef AVX512 + uint64_t gauge0,gauge1,gauge2,gauge3; + uint64_t addr0,addr1,addr2,addr3; + const SiteSpinor *in_p; in_p = &in._odata[0]; + + int o0,o1,o2,o3; // offsets + int l0,l1,l2,l3; // local + int p0,p1,p2,p3; // perm + int ptype; + StencilEntry *SE0; + StencilEntry *SE1; + StencilEntry *SE2; + StencilEntry *SE3; + + for(int s=0;s +template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionField &in, FermionField &out) +{ +#ifdef AVX512 + uint64_t gauge0,gauge1,gauge2,gauge3; + uint64_t addr0,addr1,addr2,addr3; + const SiteSpinor *in_p; in_p = &in._odata[0]; + + int o0,o1,o2,o3; // offsets + int l0,l1,l2,l3; // local + int p0,p1,p2,p3; // perm + int ptype; + StencilEntry *SE0; + StencilEntry *SE1; + StencilEntry *SE2; + StencilEntry *SE3; + + for(int s=0;s::FUNC(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeField &U, \ + DoubledGaugeField &UUU, \ + SiteSpinor *buf, int LLs, \ + int sU, const FermionField &in, FermionField &out); + +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD); +KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF); + +}} + diff --git a/lib/qcd/action/fermion/StaggeredKernelsHand.cc b/lib/qcd/action/fermion/StaggeredKernelsHand.cc new file mode 100644 index 00000000..7de8480c --- /dev/null +++ b/lib/qcd/action/fermion/StaggeredKernelsHand.cc @@ -0,0 +1,322 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +#define REGISTER + +#define LOAD_CHI(b) \ + const SiteSpinor & ref (b[offset]); \ + Chi_0=ref()()(0);\ + Chi_1=ref()()(1);\ + Chi_2=ref()()(2); + + +// To splat or not to splat depends on the implementation +#define MULT(A,UChi) \ + auto & ref(U._odata[sU](A)); \ + Impl::loadLinkElement(U_00,ref()(0,0)); \ + Impl::loadLinkElement(U_10,ref()(1,0)); \ + Impl::loadLinkElement(U_20,ref()(2,0)); \ + Impl::loadLinkElement(U_01,ref()(0,1)); \ + Impl::loadLinkElement(U_11,ref()(1,1)); \ + Impl::loadLinkElement(U_21,ref()(2,1)); \ + Impl::loadLinkElement(U_02,ref()(0,2)); \ + Impl::loadLinkElement(U_12,ref()(1,2)); \ + Impl::loadLinkElement(U_22,ref()(2,2)); \ + UChi ## _0 = U_00*Chi_0; \ + UChi ## _1 = U_10*Chi_0;\ + UChi ## _2 = U_20*Chi_0;\ + UChi ## _0 += U_01*Chi_1;\ + UChi ## _1 += U_11*Chi_1;\ + UChi ## _2 += U_21*Chi_1;\ + UChi ## _0 += U_02*Chi_2;\ + UChi ## _1 += U_12*Chi_2;\ + UChi ## _2 += U_22*Chi_2; + +#define MULT_ADD(A,UChi) \ + auto & ref(U._odata[sU](A)); \ + Impl::loadLinkElement(U_00,ref()(0,0)); \ + Impl::loadLinkElement(U_10,ref()(1,0)); \ + Impl::loadLinkElement(U_20,ref()(2,0)); \ + Impl::loadLinkElement(U_01,ref()(0,1)); \ + Impl::loadLinkElement(U_11,ref()(1,1)); \ + Impl::loadLinkElement(U_21,ref()(2,1)); \ + Impl::loadLinkElement(U_02,ref()(0,2)); \ + Impl::loadLinkElement(U_12,ref()(1,2)); \ + Impl::loadLinkElement(U_22,ref()(2,2)); \ + UChi ## _0 += U_00*Chi_0; \ + UChi ## _1 += U_10*Chi_0;\ + UChi ## _2 += U_20*Chi_0;\ + UChi ## _0 += U_01*Chi_1;\ + UChi ## _1 += U_11*Chi_1;\ + UChi ## _2 += U_21*Chi_1;\ + UChi ## _0 += U_02*Chi_2;\ + UChi ## _1 += U_12*Chi_2;\ + UChi ## _2 += U_22*Chi_2; + + +#define PERMUTE_DIR(dir) \ + permute##dir(Chi_0,Chi_0);\ + permute##dir(Chi_1,Chi_1);\ + permute##dir(Chi_2,Chi_2); + +namespace Grid { +namespace QCD { + + +template +void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionField &in, FermionField &out, int dag) +{ + SiteSpinor naik; + SiteSpinor naive; + int oneLink =0; + int threeLink=1; + int skew(0); + Real scale(1.0); + + if(dag) scale = -1.0; + + for(int s=0;s +void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + SiteSpinor *buf, int sF, + int sU, const FermionField &in, SiteSpinor &out,int threeLink) +{ + typedef typename Simd::scalar_type S; + typedef typename Simd::vector_type V; + + REGISTER Simd even_0; // 12 regs on knc + REGISTER Simd even_1; + REGISTER Simd even_2; + REGISTER Simd odd_0; // 12 regs on knc + REGISTER Simd odd_1; + REGISTER Simd odd_2; + + REGISTER Simd Chi_0; // two spinor; 6 regs + REGISTER Simd Chi_1; + REGISTER Simd Chi_2; + + REGISTER Simd U_00; // two rows of U matrix + REGISTER Simd U_10; + REGISTER Simd U_20; + REGISTER Simd U_01; + REGISTER Simd U_11; + REGISTER Simd U_21; // 2 reg left. + REGISTER Simd U_02; + REGISTER Simd U_12; + REGISTER Simd U_22; + + int skew = 0; + if (threeLink) skew=8; + + int offset,local,perm, ptype; + StencilEntry *SE; + + // Xp + SE=st.GetEntry(ptype,Xp+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT(Xp,even); + } + + // Yp + SE=st.GetEntry(ptype,Yp+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT(Yp,odd); + } + + + // Zp + SE=st.GetEntry(ptype,Zp+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT_ADD(Zp,even); + } + + // Tp + SE=st.GetEntry(ptype,Tp+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT_ADD(Tp,odd); + } + + // Xm + SE=st.GetEntry(ptype,Xm+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT_ADD(Xm,even); + } + + + // Ym + SE=st.GetEntry(ptype,Ym+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT_ADD(Ym,odd); + } + + // Zm + SE=st.GetEntry(ptype,Zm+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT_ADD(Zm,even); + } + + // Tm + SE=st.GetEntry(ptype,Tm+skew,sF); + offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; + + if ( local ) { + LOAD_CHI(in._odata); + if ( perm) { + PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... + } + } else { + LOAD_CHI(buf); + } + { + MULT_ADD(Tm,odd); + } + + vstream(out()()(0),even_0+odd_0); + vstream(out()()(1),even_1+odd_1); + vstream(out()()(2),even_2+odd_2); + +} + +#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ + template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ + DoubledGaugeField &U,DoubledGaugeField &UUU, \ + SiteSpinor *buf, int LLs, \ + int sU, const FermionField &in, FermionField &out, int dag); + +#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \ + template void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \ + SiteSpinor *buf, int sF, \ + int sU, const FermionField &in, SiteSpinor &out,int threeLink) ; +DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD); +DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF); +DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD); +DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF); + +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredImplD); +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredImplF); +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredVec5dImplD); +DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredVec5dImplF); + +}} diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index 41f24e1b..c3b4dffb 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -171,6 +171,8 @@ namespace QCD { class WilsonStencil : public CartesianStencil { public: + typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; + WilsonStencil(GridBase *grid, int npoints, int checkerboard, @@ -179,78 +181,77 @@ namespace QCD { { }; template < class compressor> - std::thread HaloExchangeOptBegin(const Lattice &source,compressor &compress) { - this->Mergers.resize(0); - this->Packets.resize(0); - this->HaloGatherOpt(source,compress); - return std::thread([&] { this->Communicate(); }); + void HaloExchangeOpt(const Lattice &source,compressor &compress) + { + std::vector > reqs; + HaloExchangeOptGather(source,compress); + this->CommunicateBegin(reqs); + this->calls++; + this->CommunicateComplete(reqs); + this->CommsMerge(); } template < class compressor> - void HaloExchangeOpt(const Lattice &source,compressor &compress) + void HaloExchangeOptGather(const Lattice &source,compressor &compress) { - auto thr = this->HaloExchangeOptBegin(source,compress); - this->HaloExchangeOptComplete(thr); + this->calls++; + this->Mergers.resize(0); + this->Packets.resize(0); + this->HaloGatherOpt(source,compress); } - void HaloExchangeOptComplete(std::thread &thr) - { - this->CommsMerge(); // spins - this->jointime-=usecond(); - thr.join(); - this->jointime+=usecond(); - } template < class compressor> void HaloGatherOpt(const Lattice &source,compressor &compress) { - // conformable(source._grid,_grid); - assert(source._grid==this->_grid); - this->halogtime-=usecond(); + this->_grid->StencilBarrier(); + // conformable(source._grid,_grid); + assert(source._grid==this->_grid); + this->halogtime-=usecond(); + + this->u_comm_offset=0; + + int dag = compress.dag; + + WilsonXpCompressor XpCompress; + WilsonYpCompressor YpCompress; + WilsonZpCompressor ZpCompress; + WilsonTpCompressor TpCompress; + WilsonXmCompressor XmCompress; + WilsonYmCompressor YmCompress; + WilsonZmCompressor ZmCompress; + WilsonTmCompressor TmCompress; - assert (this->comm_buf.size() == this->_unified_buffer_size ); - this->u_comm_offset=0; - - int dag = compress.dag; - static std::vector dirs(Nd*2); - for(int mu=0;mu XpCompress; - this->HaloGatherDir(source,XpCompress,dirs[0]); - - WilsonYpCompressor YpCompress; - this->HaloGatherDir(source,YpCompress,dirs[1]); - - WilsonZpCompressor ZpCompress; - this->HaloGatherDir(source,ZpCompress,dirs[2]); - - WilsonTpCompressor TpCompress; - this->HaloGatherDir(source,TpCompress,dirs[3]); - - WilsonXmCompressor XmCompress; - this->HaloGatherDir(source,XmCompress,dirs[4]); - - WilsonYmCompressor YmCompress; - this->HaloGatherDir(source,YmCompress,dirs[5]); - - WilsonZmCompressor ZmCompress; - this->HaloGatherDir(source,ZmCompress,dirs[6]); - - WilsonTmCompressor TmCompress; - this->HaloGatherDir(source,TmCompress,dirs[7]); - - assert(this->u_comm_offset==this->_unified_buffer_size); - this->halogtime+=usecond(); + // Gather all comms buffers + // for(int point = 0 ; point < _npoints; point++) { + // compress.Point(point); + // HaloGatherDir(source,compress,point,face_idx); + // } + int face_idx=0; + if ( dag ) { + // std::cout << " Optimised Dagger compress " <HaloGatherDir(source,XpCompress,Xp,face_idx); + this->HaloGatherDir(source,YpCompress,Yp,face_idx); + this->HaloGatherDir(source,ZpCompress,Zp,face_idx); + this->HaloGatherDir(source,TpCompress,Tp,face_idx); + this->HaloGatherDir(source,XmCompress,Xm,face_idx); + this->HaloGatherDir(source,YmCompress,Ym,face_idx); + this->HaloGatherDir(source,ZmCompress,Zm,face_idx); + this->HaloGatherDir(source,TmCompress,Tm,face_idx); + } else { + this->HaloGatherDir(source,XmCompress,Xp,face_idx); + this->HaloGatherDir(source,YmCompress,Yp,face_idx); + this->HaloGatherDir(source,ZmCompress,Zp,face_idx); + this->HaloGatherDir(source,TmCompress,Tp,face_idx); + this->HaloGatherDir(source,XpCompress,Xm,face_idx); + this->HaloGatherDir(source,YpCompress,Ym,face_idx); + this->HaloGatherDir(source,ZpCompress,Zm,face_idx); + this->HaloGatherDir(source,TpCompress,Tm,face_idx); } + this->face_table_computed=1; + assert(this->u_comm_offset==this->_unified_buffer_size); + this->halogtime+=usecond(); + } }; diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index b447f914..32083d5e 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -1,3 +1,4 @@ + /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -29,15 +30,14 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include +#include namespace Grid { namespace QCD { -const std::vector WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, - 3}); -const std::vector WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, - -1, -1}); +const std::vector WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3}); +const std::vector WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1}); int WilsonFermionStatic::HandOptDslash; ///////////////////////////////// @@ -52,10 +52,8 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, _grid(&Fgrid), _cbgrid(&Hgrid), Stencil(&Fgrid, npoint, Even, directions, displacements), - StencilEven(&Hgrid, npoint, Even, directions, - displacements), // source is Even - StencilOdd(&Hgrid, npoint, Odd, directions, - displacements), // source is Odd + StencilEven(&Hgrid, npoint, Even, directions,displacements), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions,displacements), // source is Odd mass(_mass), Lebesgue(_grid), LebesgueEvenOdd(_cbgrid), @@ -113,86 +111,84 @@ void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { } } - template - void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - typename FermionField::scalar_type scal(4.0 + mass); - out = scal * in; - } +template +void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + typename FermionField::scalar_type scal(4.0 + mass); + out = scal * in; +} - template - void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - Mooee(in, out); - } +template +void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Mooee(in, out); +} - template - void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - out = (1.0/(4.0+mass))*in; +template +void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + out = (1.0/(4.0+mass))*in; +} + +template +void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + MooeeInv(in,out); +} +template +void WilsonFermion::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m) +{ + typedef typename FermionField::vector_type vector_type; + typedef typename FermionField::scalar_type ScalComplex; + typedef Lattice > LatComplex; + + // what type LatticeComplex + conformable(_grid,out._grid); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + + std::vector latt_size = _grid->_fdimensions; + + FermionField num (_grid); num = zero; + LatComplex wilson(_grid); wilson= zero; + LatComplex one (_grid); one = ScalComplex(1.0,0.0); + + LatComplex denom(_grid); denom= zero; + LatComplex kmu(_grid); + ScalComplex ci(0.0,1.0); + // momphase = n * 2pi / L + for(int mu=0;mu - void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - MooeeInv(in,out); - } - - template - void WilsonFermion::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m) { - - // what type LatticeComplex - conformable(_grid,out._grid); - - typedef typename FermionField::vector_type vector_type; - typedef typename FermionField::scalar_type ScalComplex; - - typedef Lattice > LatComplex; - - Gamma::Algebra Gmu [] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT - }; - - std::vector latt_size = _grid->_fdimensions; - - FermionField num (_grid); num = zero; - LatComplex wilson(_grid); wilson= zero; - LatComplex one (_grid); one = ScalComplex(1.0,0.0); - - LatComplex denom(_grid); denom= zero; - LatComplex kmu(_grid); - ScalComplex ci(0.0,1.0); - // momphase = n * 2pi / L - for(int mu=0;mu::DerivInternal(StencilImpl &st, DoubledGaugeField &U, //////////////////////// // Call the single hop //////////////////////// - PARALLEL_FOR_LOOP - for (int sss = 0; sss < B._grid->oSites(); sss++) { - Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, - gamma); + parallel_for (int sss = 0; sss < B._grid->oSites(); sss++) { + Kernels::DhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma); } ////////////////////////////////////////////////// @@ -276,8 +270,7 @@ void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, } template -void WilsonFermion::Dhop(const FermionField &in, FermionField &out, - int dag) { +void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) { conformable(in._grid, _grid); // verifies full grid conformable(in._grid, out._grid); @@ -287,8 +280,7 @@ void WilsonFermion::Dhop(const FermionField &in, FermionField &out, } template -void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, - int dag) { +void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { conformable(in._grid, _cbgrid); // verifies half grid conformable(in._grid, out._grid); // drops the cb check @@ -299,8 +291,7 @@ void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, } template -void WilsonFermion::DhopEO(const FermionField &in, FermionField &out, - int dag) { +void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { conformable(in._grid, _cbgrid); // verifies half grid conformable(in._grid, out._grid); // drops the cb check @@ -311,14 +302,12 @@ void WilsonFermion::DhopEO(const FermionField &in, FermionField &out, } template -void WilsonFermion::Mdir(const FermionField &in, FermionField &out, - int dir, int disp) { +void WilsonFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { DhopDir(in, out, dir, disp); } template -void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, - int dir, int disp) { +void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { int skip = (disp == 1) ? 0 : 1; int dirdisp = dir + skip * 4; int gamma = dir + (1 - skip) * 4; @@ -327,16 +316,13 @@ void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, }; template -void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out, - int dirdisp, int gamma, int dag) { +void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) { Compressor compressor(dag); Stencil.HaloExchange(in, compressor); - PARALLEL_FOR_LOOP - for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, - dirdisp, gamma); + parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma); } }; @@ -351,16 +337,12 @@ void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, st.HaloExchange(in, compressor); if (dag == DaggerYes) { - PARALLEL_FOR_LOOP - for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, - out); + parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); } } else { - PARALLEL_FOR_LOOP - for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, - out); + parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); } } }; diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index a82aaa2d..88bc425a 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -29,8 +29,9 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include -#include +#include +#include +#include namespace Grid { namespace QCD { @@ -63,71 +64,55 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, LebesgueEvenOdd(_FourDimRedBlackGrid), _tmp(&FiveDimRedBlackGrid) { + // some assertions + assert(FiveDimGrid._ndimension==5); + assert(FourDimGrid._ndimension==4); + assert(FourDimRedBlackGrid._ndimension==4); + assert(FiveDimRedBlackGrid._ndimension==5); + assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction + + // extent of fifth dim and not spread out + Ls=FiveDimGrid._fdimensions[0]; + assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); + assert(FiveDimGrid._processors[0] ==1); + assert(FiveDimRedBlackGrid._processors[0] ==1); + + // Other dimensions must match the decomposition of the four-D fields + for(int d=0;d<4;d++){ + + assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]); + + assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); + assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); + assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]); + + assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); + assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); + assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); + } + if (Impl::LsVectorised) { int nsimd = Simd::Nsimd(); - // some assertions - assert(FiveDimGrid._ndimension==5); - assert(FiveDimRedBlackGrid._ndimension==5); - assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction - assert(FourDimGrid._ndimension==4); - // Dimension zero of the five-d is the Ls direction - Ls=FiveDimGrid._fdimensions[0]; - assert(FiveDimGrid._processors[0] ==1); assert(FiveDimGrid._simd_layout[0] ==nsimd); - - assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); - assert(FiveDimRedBlackGrid._processors[0] ==1); assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); - // Other dimensions must match the decomposition of the four-D fields for(int d=0;d<4;d++){ - assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); - assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); - assert(FourDimGrid._simd_layout[d]=1); assert(FourDimRedBlackGrid._simd_layout[d]=1); assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); - - assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); - assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); - assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); } } else { - - // some assertions - assert(FiveDimGrid._ndimension==5); - assert(FourDimGrid._ndimension==4); - assert(FiveDimRedBlackGrid._ndimension==5); - assert(FourDimRedBlackGrid._ndimension==4); - assert(FiveDimRedBlackGrid._checker_dim==1); // Dimension zero of the five-d is the Ls direction - Ls=FiveDimGrid._fdimensions[0]; - assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); - assert(FiveDimRedBlackGrid._processors[0] ==1); assert(FiveDimRedBlackGrid._simd_layout[0]==1); - assert(FiveDimGrid._processors[0] ==1); assert(FiveDimGrid._simd_layout[0] ==1); - - // Other dimensions must match the decomposition of the four-D fields - for(int d=0;d<4;d++){ - assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]); - assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); - - assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]); - assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); - - assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); - assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); - - assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); - assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); - assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); - } + } // Allocate the required comms buffer @@ -182,34 +167,37 @@ void WilsonFermion5D::Report(void) std::vector latt = GridDefaultLatt(); RealD volume = Ls; for(int mu=0;mu_Nprocessors; + RealD NN = _FourDimGrid->NodeCount(); if ( DhopCalls > 0 ) { std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime<< " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " << DhopCommTime / DhopCalls << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " << DhopComputeTime << " us" << std::endl; - std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Number of DhopEO Calls : " << DhopCalls << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl; + // Average the compute time + _FourDimGrid->GlobalSum(DhopComputeTime); + DhopComputeTime/=NP; RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; - RealD Fullmflops = 1344*volume*DhopCalls/(DhopComputeTime+DhopCommTime)/2; // 2 for red black counting + RealD Fullmflops = 1344*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; - + std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; } if ( DerivCalls > 0 ) { std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <::ZeroCounters(void) { DhopCalls = 0; DhopCommTime = 0; DhopComputeTime = 0; + DhopComputeTime2= 0; + DhopFaceTime = 0; + DhopTotalTime = 0; DerivCalls = 0; DerivCommTime = 0; @@ -272,12 +263,11 @@ void WilsonFermion5D::DhopDir(const FermionField &in, FermionField &out,in assert(dirdisp<=7); assert(dirdisp>=0); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ for(int s=0;s::DerivInternal(StencilImpl & st, //////////////////////// DerivDhopComputeTime -= usecond(); - PARALLEL_FOR_LOOP - for (int sss = 0; sss < U._grid->oSites(); sss++) { + parallel_for (int sss = 0; sss < U._grid->oSites(); sss++) { for (int s = 0; s < Ls; s++) { int sU = sss; int sF = s + Ls * sU; @@ -329,7 +318,7 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, assert(sF < B._grid->oSites()); assert(sU < U._grid->oSites()); - Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); + Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); //////////////////////////// // spin trace outer product @@ -396,6 +385,86 @@ template void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) +{ + DhopTotalTime-=usecond(); +#ifdef GRID_OMP + if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) + DhopInternalOverlappedComms(st,lo,U,in,out,dag); + else +#endif + DhopInternalSerialComms(st,lo,U,in,out,dag); + DhopTotalTime+=usecond(); +} + +template +void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo, + DoubledGaugeField & U, + const FermionField &in, FermionField &out,int dag) +{ +#ifdef GRID_OMP + // assert((dag==DaggerNo) ||(dag==DaggerYes)); + typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; + + Compressor compressor(dag); + + int LLs = in._grid->_rdimensions[0]; + int len = U._grid->oSites(); + + DhopFaceTime-=usecond(); + st.HaloExchangeOptGather(in,compressor); + DhopFaceTime+=usecond(); + std::vector > reqs; + +#pragma omp parallel + { + int nthreads = omp_get_num_threads(); + int me = omp_get_thread_num(); + int myoff, mywork; + + GridThread::GetWork(len,me-1,mywork,myoff,nthreads-1); + int sF = LLs * myoff; + + if ( me == 0 ) { + DhopCommTime-=usecond(); + st.CommunicateBegin(reqs); + st.CommunicateComplete(reqs); + DhopCommTime+=usecond(); + } else { + // Interior links in stencil + if ( me==1 ) DhopComputeTime-=usecond(); + if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,1,0); + else Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,1,0); + if ( me==1 ) DhopComputeTime+=usecond(); + } + } + + DhopFaceTime-=usecond(); + st.CommsMerge(); + DhopFaceTime+=usecond(); + +#pragma omp parallel + { + int nthreads = omp_get_num_threads(); + int me = omp_get_thread_num(); + int myoff, mywork; + + GridThread::GetWork(len,me,mywork,myoff,nthreads); + int sF = LLs * myoff; + + // Exterior links in stencil + if ( me==0 ) DhopComputeTime2-=usecond(); + if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1); + else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1); + if ( me==0 ) DhopComputeTime2+=usecond(); + }// end parallel region +#else + assert(0); +#endif +} +template +void WilsonFermion5D::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, + DoubledGaugeField & U, + const FermionField &in, FermionField &out,int dag) { // assert((dag==DaggerNo) ||(dag==DaggerYes)); Compressor compressor(dag); @@ -403,45 +472,23 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, int LLs = in._grid->_rdimensions[0]; DhopCommTime-=usecond(); - st.HaloExchange(in,compressor); + st.HaloExchangeOpt(in,compressor); DhopCommTime+=usecond(); DhopComputeTime-=usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion - if (dag == DaggerYes) { - PARALLEL_FOR_LOOP - for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU = ss; - int sF = LLs * sU; - Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sF, sU, LLs, 1, in, out); - } -#ifdef AVX512 - } else if (stat.is_init() ) { - int nthreads; - stat.start(); -#pragma omp parallel - { -#pragma omp master - nthreads = omp_get_num_threads(); - int mythread = omp_get_thread_num(); - stat.enter(mythread); -#pragma omp for nowait - for(int ss=0;ssoSites();ss++) { - int sU=ss; - int sF=LLs*sU; - Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); - } - stat.exit(mythread); - } - stat.accum(nthreads); -#endif - } else { - PARALLEL_FOR_LOOP - for (int ss = 0; ss < U._grid->oSites(); ss++) { + if (dag == DaggerYes) { + parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { int sU = ss; int sF = LLs * sU; - Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); + Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); + } + } else { + parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); } } DhopComputeTime+=usecond(); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index fb4fa925..e87e927e 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -31,7 +31,7 @@ Author: paboyle #ifndef GRID_QCD_WILSON_FERMION_5D_H #define GRID_QCD_WILSON_FERMION_5D_H -#include +#include namespace Grid { namespace QCD { @@ -82,6 +82,9 @@ namespace QCD { double DhopCalls; double DhopCommTime; double DhopComputeTime; + double DhopComputeTime2; + double DhopFaceTime; + double DhopTotalTime; double DerivCalls; double DerivCommTime; @@ -145,6 +148,20 @@ namespace QCD { const FermionField &in, FermionField &out, int dag); + + void DhopInternalOverlappedComms(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); + + void DhopInternalSerialComms(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); // Constructors WilsonFermion5D(GaugeField &_Umu, diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 68efd395..6e72e089 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -28,11 +28,57 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include + namespace Grid { namespace QCD { -int WilsonKernelsStatic::Opt; + int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric; + int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute; + +#ifdef QPX +#include +#include +#include +#include +#endif + +void bgq_l1p_optimisation(int mode) +{ +#ifdef QPX +#undef L1P_CFG_PF_USR +#define L1P_CFG_PF_USR (0x3fde8000108ll) /* (64 bit reg, 23 bits wide, user/unpriv) */ + + uint64_t cfg_pf_usr; + if ( mode ) { + cfg_pf_usr = + L1P_CFG_PF_USR_ifetch_depth(0) + | L1P_CFG_PF_USR_ifetch_max_footprint(1) + | L1P_CFG_PF_USR_pf_stream_est_on_dcbt + | L1P_CFG_PF_USR_pf_stream_establish_enable + | L1P_CFG_PF_USR_pf_stream_optimistic + | L1P_CFG_PF_USR_pf_adaptive_throttle(0xF) ; + // if ( sizeof(Float) == sizeof(double) ) { + cfg_pf_usr |= L1P_CFG_PF_USR_dfetch_depth(2)| L1P_CFG_PF_USR_dfetch_max_footprint(3) ; + // } else { + // cfg_pf_usr |= L1P_CFG_PF_USR_dfetch_depth(1)| L1P_CFG_PF_USR_dfetch_max_footprint(2) ; + // } + } else { + cfg_pf_usr = L1P_CFG_PF_USR_dfetch_depth(1) + | L1P_CFG_PF_USR_dfetch_max_footprint(2) + | L1P_CFG_PF_USR_ifetch_depth(0) + | L1P_CFG_PF_USR_ifetch_max_footprint(1) + | L1P_CFG_PF_USR_pf_stream_est_on_dcbt + | L1P_CFG_PF_USR_pf_stream_establish_enable + | L1P_CFG_PF_USR_pf_stream_optimistic + | L1P_CFG_PF_USR_pf_stream_prefetch_enable; + } + *((uint64_t *)L1P_CFG_PF_USR) = cfg_pf_usr; + +#endif + +} + template WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; @@ -42,9 +88,10 @@ WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; //////////////////////////////////////////// template -void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +void WilsonKernels::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) { + int sU, const FermionField &in, FermionField &out, + int interior,int exterior) { SiteHalfSpinor tmp; SiteHalfSpinor chi; SiteHalfSpinor *chi_p; @@ -218,9 +265,9 @@ void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOr // Need controls to do interior, exterior, or both template -void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +void WilsonKernels::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) { + int sU, const FermionField &in, FermionField &out,int interior,int exterior) { SiteHalfSpinor tmp; SiteHalfSpinor chi; SiteHalfSpinor *chi_p; @@ -393,7 +440,7 @@ void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder }; template -void WilsonKernels::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF, +void WilsonKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF, int sU, const FermionField &in, FermionField &out, int dir, int gamma) { SiteHalfSpinor tmp; diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 47da2b14..20ee87f2 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -34,6 +34,8 @@ directory namespace Grid { namespace QCD { +void bgq_l1p_optimisation(int mode); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Helper routines that implement Wilson stencil for a single site. // Common to both the WilsonFermion and WilsonFermion5D @@ -41,8 +43,10 @@ namespace QCD { class WilsonKernelsStatic { public: enum { OptGeneric, OptHandUnroll, OptInlineAsm }; + enum { CommsAndCompute, CommsThenCompute }; // S-direction is INNERMOST and takes no part in the parity. static int Opt; // these are a temporary hack + static int Comms; // these are a temporary hack }; template class WilsonKernels : public FermionOperator , public WilsonKernelsStatic { @@ -55,19 +59,23 @@ public: template typename std::enable_if::type - DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) + DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { + bgq_l1p_optimisation(1); switch(Opt) { -#ifdef AVX512 +#if defined(AVX512) || defined (QPX) case OptInlineAsm: - WilsonKernels::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - break; + if(interior&&exterior) WilsonKernels::AsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + else if (interior) WilsonKernels::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + else if (exterior) WilsonKernels::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + else assert(0); + break; #endif case OptHandUnroll: for (int site = 0; site < Ns; site++) { for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out); + if( exterior) WilsonKernels::HandDhopSite(st,lo,U,buf,sF,sU,in,out,interior,exterior); sF++; } sU++; @@ -76,7 +84,7 @@ public: case OptGeneric: for (int site = 0; site < Ns; site++) { for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out); + if( exterior) WilsonKernels::GenericDhopSite(st,lo,U,buf,sF,sU,in,out,interior,exterior); sF++; } sU++; @@ -85,16 +93,17 @@ public: default: assert(0); } + bgq_l1p_optimisation(0); } template typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type - DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { + DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) { // no kernel choice for (int site = 0; site < Ns; site++) { for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, out); + if( exterior) WilsonKernels::GenericDhopSite(st, lo, U, buf, sF, sU, in, out,interior,exterior); sF++; } sU++; @@ -103,19 +112,23 @@ public: template typename std::enable_if::type - DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { + DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { + bgq_l1p_optimisation(1); switch(Opt) { -#ifdef AVX512 +#if defined(AVX512) || defined (QPX) case OptInlineAsm: - WilsonKernels::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + if(interior&&exterior) WilsonKernels::AsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + else if (interior) WilsonKernels::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + else if (exterior) WilsonKernels::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + else assert(0); break; #endif case OptHandUnroll: for (int site = 0; site < Ns; site++) { for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + if( exterior) WilsonKernels::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out,interior,exterior); sF++; } sU++; @@ -124,7 +137,7 @@ public: case OptGeneric: for (int site = 0; site < Ns; site++) { for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + if( exterior) WilsonKernels::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out,interior,exterior); sF++; } sU++; @@ -133,44 +146,58 @@ public: default: assert(0); } + bgq_l1p_optimisation(0); } template typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type - DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { + DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { for (int site = 0; site < Ns; site++) { for (int s = 0; s < Ls; s++) { - WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + if( exterior) WilsonKernels::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out,interior,exterior); sF++; } sU++; } } - void DiracOptDhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, + void DhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); private: // Specialised variants - void DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior); - void DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior); - void DiracOptAsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); - void DiracOptAsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); - void DiracOptHandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); - void DiracOptHandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); + + void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); + + void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); + + + void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior); + + void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out,int interior,int exterior); public: diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 6ab9961f..365be69a 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -30,165 +30,75 @@ Author: Guido Cossu *************************************************************************************/ /* END LEGAL */ -#include - +#include namespace Grid { namespace QCD { - + + /////////////////////////////////////////////////////////// // Default to no assembler implementation /////////////////////////////////////////////////////////// template void -WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) { assert(0); } template void -WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) { assert(0); } -#if defined(AVX512) -#include +template void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} - /////////////////////////////////////////////////////////// - // If we are AVX512 specialise the single precision routine - /////////////////////////////////////////////////////////// +template void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} -#include - -static Vector signsF; +template void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} - template - int setupSigns(Vector& signs ){ - Vector bother(2); - signs = bother; - vrsign(signs[0]); - visign(signs[1]); - return 1; - } +template void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} - static int signInitF = setupSigns(signsF); - -#define label(A) ilabel(A) -#define ilabel(A) ".globl\n" #A ":\n" - -#define MAYBEPERM(A,perm) if (perm) { A ; } -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) -#define FX(A) WILSONASM_ ##A -#define COMPLEX_TYPE vComplexF -#define signs signsF - -#undef KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#define KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef VMOVIDUP -#undef VMOVRDUP -#undef MAYBEPERM -#undef MULT_2SPIN -#undef FX -#define FX(A) DWFASM_ ## A -#define MAYBEPERM(A,B) -//#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) -//#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) - -#undef KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#define KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -#undef COMPLEX_TYPE -#undef signs -#undef VMOVRDUP -#undef MAYBEPERM -#undef MULT_2SPIN -#undef FX - -/////////////////////////////////////////////////////////// -// If we are AVX512 specialise the double precision routine -/////////////////////////////////////////////////////////// - -#include - -static Vector signsD; -#define signs signsD -static int signInitD = setupSigns(signsD); - -#define MAYBEPERM(A,perm) if (perm) { A ; } -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) -#define FX(A) WILSONASM_ ##A -#define COMPLEX_TYPE vComplexD - -#undef KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#define KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef VMOVIDUP -#undef VMOVRDUP -#undef MAYBEPERM -#undef MULT_2SPIN -#undef FX -#define FX(A) DWFASM_ ## A -#define MAYBEPERM(A,B) -//#define VMOVIDUP(A,B,C) VBCASTIDUPd(A,B,C) -//#define VMOVRDUP(A,B,C) VBCASTRDUPd(A,B,C) -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) - -#undef KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#define KERNEL_DAG -template<> void -WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef COMPLEX_TYPE -#undef signs -#undef VMOVRDUP -#undef MAYBEPERM -#undef MULT_2SPIN -#undef FX - -#endif //AVX512 +#include +#include #define INSTANTIATE_ASM(A)\ -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ +template void WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ \ -template void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ +template void WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + \ +template void WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + \ +template void WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ INSTANTIATE_ASM(WilsonImplF); diff --git a/lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h b/lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h new file mode 100644 index 00000000..1839e9bc --- /dev/null +++ b/lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h @@ -0,0 +1,427 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + + + Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + + +#if defined(AVX512) + /////////////////////////////////////////////////////////// + // If we are AVX512 specialise the single precision routine + /////////////////////////////////////////////////////////// +#include +#include + +static Vector signsF; + + template + int setupSigns(Vector& signs ){ + Vector bother(2); + signs = bother; + vrsign(signs[0]); + visign(signs[1]); + return 1; + } + + static int signInitF = setupSigns(signsF); + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) +#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0]; + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef MAYBEPERM +#undef MULT_2SPIN +#define MAYBEPERM(A,B) +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) + +///////////////////////////////////////////////////////////////// +// Ls vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +#undef MULT_2SPIN +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf) +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +///////////////////////////////////////////////////////////////// +// Ls vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef COMPLEX_SIGNS +#undef MAYBEPERM +#undef MULT_2SPIN + + + +/////////////////////////////////////////////////////////// +// If we are AVX512 specialise the double precision routine +/////////////////////////////////////////////////////////// + +#include + +static Vector signsD; +static int signInitD = setupSigns(signsD); + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) +#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0]; + + +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef MAYBEPERM +#undef MULT_2SPIN +#define MAYBEPERM(A,B) +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) + +///////////////////////////////////////////////////////////////// +// Ls vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +#undef MULT_2SPIN +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf) +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +///////////////////////////////////////////////////////////////// +// Ls vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef COMPLEX_SIGNS +#undef MAYBEPERM +#undef MULT_2SPIN + +#endif //AVX512 diff --git a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h index 72e13754..34aba472 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h +++ b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h @@ -1,255 +1,267 @@ +#ifdef KERNEL_DAG +#define DIR0_PROJMEM(base) XP_PROJMEM(base); +#define DIR1_PROJMEM(base) YP_PROJMEM(base); +#define DIR2_PROJMEM(base) ZP_PROJMEM(base); +#define DIR3_PROJMEM(base) TP_PROJMEM(base); +#define DIR4_PROJMEM(base) XM_PROJMEM(base); +#define DIR5_PROJMEM(base) YM_PROJMEM(base); +#define DIR6_PROJMEM(base) ZM_PROJMEM(base); +#define DIR7_PROJMEM(base) TM_PROJMEM(base); +#define DIR0_RECON XP_RECON +#define DIR1_RECON YP_RECON_ACCUM +#define DIR2_RECON ZP_RECON_ACCUM +#define DIR3_RECON TP_RECON_ACCUM +#define DIR4_RECON XM_RECON_ACCUM +#define DIR5_RECON YM_RECON_ACCUM +#define DIR6_RECON ZM_RECON_ACCUM +#define DIR7_RECON TM_RECON_ACCUM +#else +#define DIR0_PROJMEM(base) XM_PROJMEM(base); +#define DIR1_PROJMEM(base) YM_PROJMEM(base); +#define DIR2_PROJMEM(base) ZM_PROJMEM(base); +#define DIR3_PROJMEM(base) TM_PROJMEM(base); +#define DIR4_PROJMEM(base) XP_PROJMEM(base); +#define DIR5_PROJMEM(base) YP_PROJMEM(base); +#define DIR6_PROJMEM(base) ZP_PROJMEM(base); +#define DIR7_PROJMEM(base) TP_PROJMEM(base); +#define DIR0_RECON XM_RECON +#define DIR1_RECON YM_RECON_ACCUM +#define DIR2_RECON ZM_RECON_ACCUM +#define DIR3_RECON TM_RECON_ACCUM +#define DIR4_RECON XP_RECON_ACCUM +#define DIR5_RECON YP_RECON_ACCUM +#define DIR6_RECON ZP_RECON_ACCUM +#define DIR7_RECON TP_RECON_ACCUM +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Comms then compute kernel +//////////////////////////////////////////////////////////////////////////////// +#ifdef INTERIOR_AND_EXTERIOR + +#define ZERO_NMU(A) +#define INTERIOR_BLOCK_XP(a,b,PERMUTE_DIR,PROJMEM,RECON) INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) +#define EXTERIOR_BLOCK_XP(a,b,RECON) EXTERIOR_BLOCK(a,b,RECON) + +#define INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) \ + LOAD64(%r10,isigns); \ + PROJMEM(base); \ + MAYBEPERM(PERMUTE_DIR,perm); + +#define EXTERIOR_BLOCK(a,b,RECON) \ + LOAD_CHI(base); + +#define COMMON_BLOCK(a,b,RECON) \ + base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; \ + PREFETCH_CHIMU(base); \ + MULT_2SPIN_DIR_PF(a,basep); \ + LOAD64(%r10,isigns); \ + RECON; + +#define RESULT(base,basep) SAVE_RESULT(base,basep); + +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Pre comms kernel -- prefetch like normal because it is mostly right +//////////////////////////////////////////////////////////////////////////////// +#ifdef INTERIOR + +#define COMMON_BLOCK(a,b,RECON) +#define ZERO_NMU(A) + +// No accumulate for DIR0 +#define EXTERIOR_BLOCK_XP(a,b,RECON) \ + ZERO_PSI; \ + base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; + +#define EXTERIOR_BLOCK(a,b,RECON) \ + base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; + +#define INTERIOR_BLOCK_XP(a,b,PERMUTE_DIR,PROJMEM,RECON) INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) + +#define INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) \ + LOAD64(%r10,isigns); \ + PROJMEM(base); \ + MAYBEPERM(PERMUTE_DIR,perm); \ + base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; \ + PREFETCH_CHIMU(base); \ + MULT_2SPIN_DIR_PF(a,basep); \ + LOAD64(%r10,isigns); \ + RECON; + +#define RESULT(base,basep) SAVE_RESULT(base,basep); + +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Post comms kernel +//////////////////////////////////////////////////////////////////////////////// +#ifdef EXTERIOR + +#define ZERO_NMU(A) nmu=0; + +#define INTERIOR_BLOCK_XP(a,b,PERMUTE_DIR,PROJMEM,RECON) \ + ZERO_PSI; base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; + +#define EXTERIOR_BLOCK_XP(a,b,RECON) EXTERIOR_BLOCK(a,b,RECON) + +#define INTERIOR_BLOCK(a,b,PERMUTE_DIR,PROJMEM,RECON) \ + base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; + +#define EXTERIOR_BLOCK(a,b,RECON) \ + nmu++; \ + LOAD_CHI(base); \ + MULT_2SPIN_DIR_PF(a,base); \ + base = st.GetInfo(ptype,local,perm,b,ent,plocal); ent++; \ + LOAD64(%r10,isigns); \ + RECON; + +#define COMMON_BLOCK(a,b,RECON) + +#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);} + +#endif + { + int nmu; int local,perm, ptype; uint64_t base; uint64_t basep; const uint64_t plocal =(uint64_t) & in._odata[0]; - // vComplexF isigns[2] = { signs[0], signs[1] }; - //COMPLEX_TYPE is vComplexF of vComplexD depending - //on the chosen precision - COMPLEX_TYPE *isigns = &signs[0]; - + COMPLEX_SIGNS(isigns); MASK_REGS; int nmax=U._grid->oSites(); for(int site=0;site=nmax) ssn=0; - int sUn=lo.Reorder(ssn); - for(int s=0;s=nmax) ssn=0; + int sUn=lo.Reorder(ssn); +#ifndef EXTERIOR + LOCK_GAUGE(0); +#endif + for(int s=0;s shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - YP_PROJMEM(base); -#else - YM_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR2,perm); - } else { - LOAD_CHI(base); - } - base = st.GetInfo(ptype,local,perm,Zp,ent,plocal); ent++; - PREFETCH_CHIMU(base); - { - MULT_2SPIN_DIR_PFYP(Yp,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - YP_RECON_ACCUM; -#else - YM_RECON_ACCUM; -#endif - - //////////////////////////////// - // Zp - //////////////////////////////// - basep = st.GetPFInfo(nent,plocal); nent++; - if ( local ) { - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - ZP_PROJMEM(base); -#else - ZM_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR1,perm); - } else { - LOAD_CHI(base); - } - base = st.GetInfo(ptype,local,perm,Tp,ent,plocal); ent++; - PREFETCH_CHIMU(base); - { - MULT_2SPIN_DIR_PFZP(Zp,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - ZP_RECON_ACCUM; -#else - ZM_RECON_ACCUM; -#endif - - //////////////////////////////// - // Tp - //////////////////////////////// - basep = st.GetPFInfo(nent,plocal); nent++; - if ( local ) { - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - TP_PROJMEM(base); -#else - TM_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR0,perm); - } else { - LOAD_CHI(base); - } - base = st.GetInfo(ptype,local,perm,Xm,ent,plocal); ent++; - PREFETCH_CHIMU(base); - { - MULT_2SPIN_DIR_PFTP(Tp,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - TP_RECON_ACCUM; -#else - TM_RECON_ACCUM; -#endif - - //////////////////////////////// - // Xm - //////////////////////////////// -#ifndef STREAM_STORE - basep= (uint64_t) &out._odata[ss]; -#endif - // basep= st.GetPFInfo(nent,plocal); nent++; - if ( local ) { - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - XM_PROJMEM(base); -#else - XP_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR3,perm); - } else { - LOAD_CHI(base); - } - base = st.GetInfo(ptype,local,perm,Ym,ent,plocal); ent++; - PREFETCH_CHIMU(base); - { - MULT_2SPIN_DIR_PFXM(Xm,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - XM_RECON_ACCUM; -#else - XP_RECON_ACCUM; -#endif - - //////////////////////////////// - // Ym - //////////////////////////////// - basep= st.GetPFInfo(nent,plocal); nent++; - if ( local ) { - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - YM_PROJMEM(base); -#else - YP_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR2,perm); - } else { - LOAD_CHI(base); - } - base = st.GetInfo(ptype,local,perm,Zm,ent,plocal); ent++; - PREFETCH_CHIMU(base); - { - MULT_2SPIN_DIR_PFYM(Ym,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - YM_RECON_ACCUM; -#else - YP_RECON_ACCUM; -#endif - - //////////////////////////////// - // Zm - //////////////////////////////// - basep= st.GetPFInfo(nent,plocal); nent++; - if ( local ) { - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - ZM_PROJMEM(base); -#else - ZP_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR1,perm); - } else { - LOAD_CHI(base); - } - base = st.GetInfo(ptype,local,perm,Tm,ent,plocal); ent++; - PREFETCH_CHIMU(base); - { - MULT_2SPIN_DIR_PFZM(Zm,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - ZM_RECON_ACCUM; -#else - ZP_RECON_ACCUM; -#endif - - //////////////////////////////// - // Tm - //////////////////////////////// - basep= st.GetPFInfo(nent,plocal); nent++; - if ( local ) { - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - TM_PROJMEM(base); -#else - TP_PROJMEM(base); -#endif - MAYBEPERM(PERMUTE_DIR0,perm); - } else { - LOAD_CHI(base); - } - base= (uint64_t) &out._odata[ss]; -#ifndef STREAM_STORE - PREFETCH_CHIMU(base); -#endif - { - MULT_2SPIN_DIR_PFTM(Tm,basep); - } - LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit -#ifdef KERNEL_DAG - TM_RECON_ACCUM; -#else - TP_RECON_ACCUM; -#endif - - basep= st.GetPFInfo(nent,plocal); nent++; - SAVE_RESULT(base,basep); - - } - ssU++; + base = (uint64_t) &out._odata[ss]; + basep= st.GetPFInfo(nent,plocal); nent++; + RESULT(base,basep); + } + ssU++; + UNLOCK_GAUGE(0); } } + +#undef DIR0_PROJMEM +#undef DIR1_PROJMEM +#undef DIR2_PROJMEM +#undef DIR3_PROJMEM +#undef DIR4_PROJMEM +#undef DIR5_PROJMEM +#undef DIR6_PROJMEM +#undef DIR7_PROJMEM +#undef DIR0_RECON +#undef DIR1_RECON +#undef DIR2_RECON +#undef DIR3_RECON +#undef DIR4_RECON +#undef DIR5_RECON +#undef DIR6_RECON +#undef DIR7_RECON +#undef EXTERIOR_BLOCK +#undef INTERIOR_BLOCK +#undef EXTERIOR_BLOCK_XP +#undef INTERIOR_BLOCK_XP +#undef COMMON_BLOCK +#undef ZERO_NMU +#undef RESULT diff --git a/lib/qcd/action/fermion/WilsonKernelsAsmQPX.h b/lib/qcd/action/fermion/WilsonKernelsAsmQPX.h new file mode 100644 index 00000000..612234d7 --- /dev/null +++ b/lib/qcd/action/fermion/WilsonKernelsAsmQPX.h @@ -0,0 +1,150 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + + + Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmQPX.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + + +#if defined(QPX) + + /////////////////////////////////////////////////////////// + // If we are QPX specialise the single precision routine + /////////////////////////////////////////////////////////// + +#include +#include + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX(ptr,pf) +#define COMPLEX_SIGNS(isigns) + +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef MAYBEPERM +#undef MULT_2SPIN +#define MAYBEPERM(A,B) +#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX_LS(ptr,pf) + +///////////////////////////////////////////////////////////////// +// Ls vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +///////////////////////////////////////////////////////////////// +// Ls vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +#undef MAYBEPERM +#undef MULT_2SPIN + +/////////////////////////////////////////////////////////// +// DP routines +/////////////////////////////////////////////////////////// + +#include + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX(ptr,pf) + +///////////////////////////////////////////////////////////////// +// XYZT Vectorised, undag Kernel, double +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +///////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////// +// XYZT Vectorised, dag Kernel, double +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +///////////////////////////////////////////////////////////////// + +#undef MAYBEPERM +#undef MULT_2SPIN +#define MAYBEPERM(A,B) +#define MULT_2SPIN(ptr,pf) MULT_2SPIN_QPX_LS(ptr,pf) +///////////////////////////////////////////////////////////////// +// Ls vectorised, undag Kernel, double +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +///////////////////////////////////////////////////////////////// + +///////////////////////////////////////////////////////////////// +// Ls vectorised, dag Kernel, double +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +template<> void +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +///////////////////////////////////////////////////////////////// + +#undef MAYBEPERM +#undef MULT_2SPIN + +#endif diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index ff1e0d29..0a60c107 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include #define REGISTER @@ -312,8 +312,8 @@ namespace QCD { template void -WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -554,8 +554,8 @@ WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,Doub } template -void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior) { // std::cout << "Hand op Dhop "<::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder // Specialise Gparity to simple implementation //////////////////////////////////////////////// template<> void -WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, +WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int sF,int sU,const FermionField &in, FermionField &out) + int sF,int sU,const FermionField &in, FermionField &out,int internal,int external) { assert(0); } template<> void -WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, +WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int sF,int sU,const FermionField &in, FermionField &out) + int sF,int sU,const FermionField &in, FermionField &out,int internal,int external) { assert(0); } template<> void -WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int sF,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int sF,int sU,const FermionField &in, FermionField &out,int internal,int external) { assert(0); } template<> void -WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int sF,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int sF,int sU,const FermionField &in, FermionField &out,int internal,int external) { assert(0); } @@ -835,10 +835,10 @@ WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,Lebes // Need Nc=3 though // #define INSTANTIATE_THEM(A) \ -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); +template void WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior); \ +template void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionField &in, FermionField &out,int interior,int exterior); INSTANTIATE_THEM(WilsonImplF); INSTANTIATE_THEM(WilsonImplD); diff --git a/lib/qcd/action/fermion/WilsonTMFermion.cc b/lib/qcd/action/fermion/WilsonTMFermion.cc index 1d59474e..d4604b10 100644 --- a/lib/qcd/action/fermion/WilsonTMFermion.cc +++ b/lib/qcd/action/fermion/WilsonTMFermion.cc @@ -25,7 +25,8 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include +#include namespace Grid { namespace QCD { diff --git a/lib/qcd/action/fermion/WilsonTMFermion.h b/lib/qcd/action/fermion/WilsonTMFermion.h index 5901cb2f..f75c287b 100644 --- a/lib/qcd/action/fermion/WilsonTMFermion.h +++ b/lib/qcd/action/fermion/WilsonTMFermion.h @@ -28,7 +28,8 @@ Author: paboyle #ifndef GRID_QCD_WILSON_TM_FERMION_H #define GRID_QCD_WILSON_TM_FERMION_H -#include +#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/ZMobiusFermion.h b/lib/qcd/action/fermion/ZMobiusFermion.h index d0e00657..32ff7670 100644 --- a/lib/qcd/action/fermion/ZMobiusFermion.h +++ b/lib/qcd/action/fermion/ZMobiusFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_ZMOBIUS_FERMION_H #define GRID_QCD_ZMOBIUS_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/gauge/Gauge.h b/lib/qcd/action/gauge/Gauge.h new file mode 100644 index 00000000..f3e0e53b --- /dev/null +++ b/lib/qcd/action/gauge/Gauge.h @@ -0,0 +1,70 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/gauge/Gauge_aggregate.h + +Copyright (C) 2015 + +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_GAUGE_H +#define GRID_QCD_GAUGE_H + +#include +#include +#include +#include + +namespace Grid { +namespace QCD { + +typedef WilsonGaugeAction WilsonGaugeActionR; +typedef WilsonGaugeAction WilsonGaugeActionF; +typedef WilsonGaugeAction WilsonGaugeActionD; +typedef PlaqPlusRectangleAction PlaqPlusRectangleActionR; +typedef PlaqPlusRectangleAction PlaqPlusRectangleActionF; +typedef PlaqPlusRectangleAction PlaqPlusRectangleActionD; +typedef IwasakiGaugeAction IwasakiGaugeActionR; +typedef IwasakiGaugeAction IwasakiGaugeActionF; +typedef IwasakiGaugeAction IwasakiGaugeActionD; +typedef SymanzikGaugeAction SymanzikGaugeActionR; +typedef SymanzikGaugeAction SymanzikGaugeActionF; +typedef SymanzikGaugeAction SymanzikGaugeActionD; + + +typedef WilsonGaugeAction ConjugateWilsonGaugeActionR; +typedef WilsonGaugeAction ConjugateWilsonGaugeActionF; +typedef WilsonGaugeAction ConjugateWilsonGaugeActionD; +typedef PlaqPlusRectangleAction ConjugatePlaqPlusRectangleActionR; +typedef PlaqPlusRectangleAction ConjugatePlaqPlusRectangleActionF; +typedef PlaqPlusRectangleAction ConjugatePlaqPlusRectangleActionD; +typedef IwasakiGaugeAction ConjugateIwasakiGaugeActionR; +typedef IwasakiGaugeAction ConjugateIwasakiGaugeActionF; +typedef IwasakiGaugeAction ConjugateIwasakiGaugeActionD; +typedef SymanzikGaugeAction ConjugateSymanzikGaugeActionR; +typedef SymanzikGaugeAction ConjugateSymanzikGaugeActionF; +typedef SymanzikGaugeAction ConjugateSymanzikGaugeActionD; + +}} + + +#endif diff --git a/lib/qcd/action/gauge/GaugeImpl.h b/lib/qcd/action/gauge/GaugeImpl.h index 400381bb..6041c006 100644 --- a/lib/qcd/action/gauge/GaugeImpl.h +++ b/lib/qcd/action/gauge/GaugeImpl.h @@ -66,8 +66,7 @@ public: // Move this elsewhere? FIXME static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W, int mu) { // U[mu] += W - PARALLEL_FOR_LOOP - for (auto ss = 0; ss < U._grid->oSites(); ss++) { + parallel_for (auto ss = 0; ss < U._grid->oSites(); ss++) { U._odata[ss]._internal[mu] = U._odata[ss]._internal[mu] + W._odata[ss]._internal; } diff --git a/lib/qcd/action/pseudofermion/PseudoFermion.h b/lib/qcd/action/pseudofermion/PseudoFermion.h new file mode 100644 index 00000000..bccca3d4 --- /dev/null +++ b/lib/qcd/action/pseudofermion/PseudoFermion.h @@ -0,0 +1,42 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/pseudofermion/PseudoFermion_aggregate.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef QCD_PSEUDOFERMION_AGGREGATE_H +#define QCD_PSEUDOFERMION_AGGREGATE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/lib/qcd/action/pseudofermion/TwoFlavour.h b/lib/qcd/action/pseudofermion/TwoFlavour.h index 6b65a95d..ddc17d42 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavour.h +++ b/lib/qcd/action/pseudofermion/TwoFlavour.h @@ -63,8 +63,7 @@ class TwoFlavourPseudoFermionAction : public Action { Phi(Op.FermionGrid()){}; ////////////////////////////////////////////////////////////////////////////////////// - // Push the gauge field in to the dops. Assume any BC's and smearing already - // applied + // Push the gauge field in to the dops. Assume any BC's and smearing already applied ////////////////////////////////////////////////////////////////////////////////////// virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) { // P(phi) = e^{- phi^dag (MdagM)^-1 phi} @@ -107,8 +106,7 @@ class TwoFlavourPseudoFermionAction : public Action { MdagMOp.Op(X, Y); RealD action = norm2(Y); - std::cout << GridLogMessage << "Pseudofermion action " << action - << std::endl; + std::cout << GridLogMessage << "Pseudofermion action " << action << std::endl; return action; }; @@ -119,6 +117,7 @@ class TwoFlavourPseudoFermionAction : public Action { // // = - Ydag dM X - Xdag dMdag Y // + // ////////////////////////////////////////////////////// virtual void deriv(const GaugeField &U, GaugeField &dSdU) { FermOp.ImportGauge(U); @@ -133,8 +132,7 @@ class TwoFlavourPseudoFermionAction : public Action { DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi - // Our conventions really make this UdSdU; We do not differentiate wrt Udag - // here. + // Our conventions really make this UdSdU; We do not differentiate wrt Udag here. // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. FermOp.MDeriv(tmp, Y, X, DaggerNo); diff --git a/lib/qcd/hmc/HMC.h b/lib/qcd/hmc/HMC.h index 05838349..a5dc611e 100644 --- a/lib/qcd/hmc/HMC.h +++ b/lib/qcd/hmc/HMC.h @@ -42,6 +42,9 @@ directory #include +#include +#include + namespace Grid { namespace QCD { @@ -230,7 +233,12 @@ class HybridMonteCarlo { } }; + } // QCD } // Grid +#include +#include +#include + #endif diff --git a/lib/qcd/hmc/HMC_aggregate.h b/lib/qcd/hmc/HMC_aggregate.h new file mode 100644 index 00000000..7d3ec377 --- /dev/null +++ b/lib/qcd/hmc/HMC_aggregate.h @@ -0,0 +1,42 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/hmc/HMC.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +//-------------------------------------------------------------------- +//-------------------------------------------------------------------- +#ifndef HMC_AGGREGATE_INCLUDED +#define HMC_AGGREGATE_INCLUDED + +#include + +#include +// annoying location; should move this ? +#include +#include +#include + +#endif diff --git a/lib/qcd/hmc/HmcRunner.h b/lib/qcd/hmc/HmcRunner.h index 53b127cf..ed9777dc 100644 --- a/lib/qcd/hmc/HmcRunner.h +++ b/lib/qcd/hmc/HmcRunner.h @@ -114,8 +114,8 @@ class NerscHmcRunnerTemplate { */ ////////////// NoSmearing SmearingPolicy; - typedef MinimumNorm2, RepresentationsPolicy > - IntegratorType; // change here to change the algorithm + // change here to change the algorithm + typedef MinimumNorm2, RepresentationsPolicy > IntegratorType; IntegratorParameters MDpar(40, 1.0); IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy); diff --git a/lib/qcd/representations/Representations.h b/lib/qcd/representations/Representations.h new file mode 100644 index 00000000..22311be0 --- /dev/null +++ b/lib/qcd/representations/Representations.h @@ -0,0 +1,9 @@ +#ifndef REPRESENTATIONS_H +#define REPRESENTATIONS_H + +#include +#include +#include +#include + +#endif diff --git a/lib/qcd/representations/adjoint.h b/lib/qcd/representations/adjoint.h index facc72f1..078d12a1 100644 --- a/lib/qcd/representations/adjoint.h +++ b/lib/qcd/representations/adjoint.h @@ -112,4 +112,4 @@ typedef AdjointRep AdjointRepresentation; } } -#endif \ No newline at end of file +#endif diff --git a/lib/qcd/spin/Spin.h b/lib/qcd/spin/Spin.h new file mode 100644 index 00000000..107515ed --- /dev/null +++ b/lib/qcd/spin/Spin.h @@ -0,0 +1,5 @@ +#ifndef QCD_SPIN_H +#define QCD_SPIN_H +#include +#include +#endif diff --git a/lib/qcd/utils/LinalgUtils.h b/lib/qcd/utils/LinalgUtils.h index 754cdd2c..5eaf1c2a 100644 --- a/lib/qcd/utils/LinalgUtils.h +++ b/lib/qcd/utils/LinalgUtils.h @@ -48,8 +48,7 @@ void axpibg5x(Lattice &z,const Lattice &x,Coeff a,Coeff b) GridBase *grid=x._grid; Gamma G5(Gamma::Algebra::Gamma5); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ + parallel_for(int ss=0;ssoSites();ss++){ vobj tmp; tmp = a*x._odata[ss]; tmp = tmp + G5*(b*timesI(x._odata[ss])); @@ -65,8 +64,7 @@ void axpby_ssp(Lattice &z, Coeff a,const Lattice &x,Coeff b,const La conformable(x,z); GridBase *grid=x._grid; int Ls = grid->_rdimensions[0]; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls vobj tmp = a*x._odata[ss+s]+b*y._odata[ss+sp]; vstream(z._odata[ss+s],tmp); } @@ -80,9 +78,9 @@ void ag5xpby_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L conformable(x,z); GridBase *grid=x._grid; int Ls = grid->_rdimensions[0]; + Gamma G5(Gamma::Algebra::Gamma5); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls vobj tmp; tmp = G5*x._odata[ss+s]*a; tmp = tmp + b*y._odata[ss+sp]; @@ -99,8 +97,7 @@ void axpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L GridBase *grid=x._grid; int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls vobj tmp; tmp = G5*y._odata[ss+sp]*b; tmp = tmp + a*x._odata[ss+s]; @@ -116,9 +113,9 @@ void ag5xpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const conformable(x,z); GridBase *grid=x._grid; int Ls = grid->_rdimensions[0]; + Gamma G5(Gamma::Algebra::Gamma5); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls vobj tmp1; vobj tmp2; tmp1 = a*x._odata[ss+s]+b*y._odata[ss+sp]; @@ -135,8 +132,7 @@ void axpby_ssp_pminus(Lattice &z,Coeff a,const Lattice &x,Coeff b,co conformable(x,z); GridBase *grid=x._grid; int Ls = grid->_rdimensions[0]; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls vobj tmp; spProj5m(tmp,y._odata[ss+sp]); tmp = a*x._odata[ss+s]+b*tmp; @@ -152,8 +148,7 @@ void axpby_ssp_pplus(Lattice &z,Coeff a,const Lattice &x,Coeff b,con conformable(x,z); GridBase *grid=x._grid; int Ls = grid->_rdimensions[0]; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls vobj tmp; spProj5p(tmp,y._odata[ss+sp]); tmp = a*x._odata[ss+s]+b*tmp; @@ -169,8 +164,7 @@ void G5R5(Lattice &z,const Lattice &x) conformable(x,z); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + parallel_for(int ss=0;ssoSites();ss+=Ls) { vobj tmp; for(int s=0;soSites(); ss++) { + parallel_for (int ss = 0; ss < grid->oSites(); ss++) { subgroup._odata[ss]()()(0, 0) = source._odata[ss]()()(i0, i0); subgroup._odata[ss]()()(0, 1) = source._odata[ss]()()(i0, i1); subgroup._odata[ss]()()(1, 0) = source._odata[ss]()()(i1, i0); @@ -252,8 +251,7 @@ class SU { su2SubGroupIndex(i0, i1, su2_index); dest = 1.0; // start out with identity - PARALLEL_FOR_LOOP - for (int ss = 0; ss < grid->oSites(); ss++) { + parallel_for (int ss = 0; ss < grid->oSites(); ss++) { dest._odata[ss]()()(i0, i0) = subgroup._odata[ss]()()(0, 0); dest._odata[ss]()()(i0, i1) = subgroup._odata[ss]()()(0, 1); dest._odata[ss]()()(i1, i0) = subgroup._odata[ss]()()(1, 0); diff --git a/lib/qcd/utils/SUnAdjoint.h b/lib/qcd/utils/SUnAdjoint.h index 7c1145e3..9d9b77bd 100644 --- a/lib/qcd/utils/SUnAdjoint.h +++ b/lib/qcd/utils/SUnAdjoint.h @@ -179,4 +179,4 @@ typedef SU_Adjoint AdjointMatrices; } } -#endif \ No newline at end of file +#endif diff --git a/lib/qcd/utils/SpaceTimeGrid.cc b/lib/qcd/utils/SpaceTimeGrid.cc index 81af5f24..3ada4a3b 100644 --- a/lib/qcd/utils/SpaceTimeGrid.cc +++ b/lib/qcd/utils/SpaceTimeGrid.cc @@ -25,7 +25,8 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include +#include namespace Grid { namespace QCD { diff --git a/lib/qcd/utils/Utils.h b/lib/qcd/utils/Utils.h new file mode 100644 index 00000000..1bde6f9b --- /dev/null +++ b/lib/qcd/utils/Utils.h @@ -0,0 +1,9 @@ +#ifndef QCD_UTILS_H +#define QCD_UTILS_H +#include +#include +#include +#include +#include +#include +#endif diff --git a/lib/serialisation/BinaryIO.cc b/lib/serialisation/BinaryIO.cc index dbee9d8b..7133adc0 100644 --- a/lib/serialisation/BinaryIO.cc +++ b/lib/serialisation/BinaryIO.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace Grid; using namespace std; diff --git a/lib/serialisation/TextIO.cc b/lib/serialisation/TextIO.cc index ed6f4f5c..c0018bee 100644 --- a/lib/serialisation/TextIO.cc +++ b/lib/serialisation/TextIO.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace Grid; using namespace std; diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index f8469ada..db1535d6 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace Grid; using namespace std; diff --git a/lib/simd/BGQQPX.h b/lib/simd/BGQQPX.h new file mode 100644 index 00000000..34888ab7 --- /dev/null +++ b/lib/simd/BGQQPX.h @@ -0,0 +1,796 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/simd/BGQQPX.h + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_ASM_BGQ_QPX_H +#define GRID_ASM_BGQ_QPX_H + +#include + +/********************************************************* + * Architectural macros + *********************************************************/ +#define VLOADf(OFF,PTR,DEST) "qvlfsux " #DEST "," #OFF "," #PTR ") ;\n" +#define VLOADd(OFF,PTR,DEST) "qvlfdux " #DEST "," #OFF "," #PTR ") ;\n" +#define VSTOREf(OFF,PTR,SRC) "qvstfsux " #SRC "," #OFF "," #PTR ") ;\n" +#define VSTOREd(OFF,PTR,SRC) "qvstfdux " #SRC "," #OFF "," #PTR ") ;\n" +#define VSPLATf(A,B,DEST) "qvlfcdxa " #A "," #B "," #DEST ";\n" +#define VSPLATd(A,B,DEST) "qvlfcsxa " #A "," #B "," #DEST ";\n" + +#define LOAD64(A,ptr) +#define VZERO(DEST) "qvfclr " #DEST "; \n" +#define VONE (DEST) "qvfset " #DEST "; \n" +#define VNEG (SRC,DEST) "qvfneg " #DEST "," #SRC "; \n" +#define VMOV(A,DEST) "qvfmr " #DEST, "," #A ";\n" + +#define VADD(A,B,DEST) "qvfadd " #DEST "," #A "," #B ";\n" +#define VSUB(A,B,DEST) "qvfsub " #DEST "," #A "," #B ";\n" +#define VMUL(A,B,DEST) "qvfmul " #DEST "," #A "," #B ";\n" +#define VMUL_RR_RI(A,B,DEST) "qvfxmul " #DEST "," #A "," #B ";\n" +#define VMADD(A,B,C,DEST) "qvfmadd " #DEST "," #A "," #B ","#C ";\n" +#define VMADD_RR_RI(A,B,C,DEST) "qvfxmadd " #DEST "," #A "," #B ","#C ";\n" +#define VMADD_MII_IR(A,B,C,DEST) "qvfxxnpmadd " #DEST "," #A "," #B ","#C ";\n" +#define VMADD_II_MIR(A,B,C,DEST) "qvfmadd " #DEST "," #A "," #B ","#C ";\n" + +#define CACHE_LOCK (PTR) asm (" dcbtls %%r0, %0 \n" : : "r" (PTR) ); +#define CACHE_UNLOCK(PTR) asm (" dcblc %%r0, %0 \n" : : "r" (PTR) ); +#define CACHE_FLUSH (PTR) asm (" dcbf %%r0, %0 \n" : : "r" (PTR) ); +#define CACHE_TOUCH (PTR) asm (" dcbt %%r0, %0 \n" : : "r" (PTR) ); + +// Gauge field locking 2 x 9 complex == 18*8 / 16 bytes per link +// This is 144/288 bytes == 4.5; 9 lines +#define MASK_REGS /*NOOP ON BGQ*/ +#define PF_GAUGE(A) /*NOOP ON BGQ*/ +#define PREFETCH1_CHIMU(base) /*NOOP ON BGQ*/ +#define PREFETCH_CHIMU(base) /*NOOP ON BGQ*/ + +/********************************************************* + * Register definitions + *********************************************************/ +#define psi_00 0 +#define psi_01 1 +#define psi_02 2 + +#define psi_10 3 +#define psi_11 4 +#define psi_12 5 + +#define psi_20 6 +#define psi_21 7 +#define psi_22 8 + +#define psi_30 9 +#define psi_31 10 +#define psi_32 11 + +#define Chi_00 12 +#define Chi_01 13 +#define Chi_02 14 + +#define Chi_10 15 +#define Chi_11 16 +#define Chi_12 17 + +#define UChi_00 18 +#define UChi_01 19 +#define UChi_02 20 + +#define UChi_10 21 +#define UChi_11 22 +#define UChi_12 23 + +#define U0 24 +#define U1 25 +#define U2 26 +#define one 27 + +#define REP %%r16 +#define IMM %%r17 + +/*Alias regs*/ +#define Chimu_00 Chi_00 +#define Chimu_01 Chi_01 +#define Chimu_02 Chi_02 +#define Chimu_10 Chi_10 +#define Chimu_11 Chi_11 +#define Chimu_12 Chi_02 +#define Chimu_20 UChi_00 +#define Chimu_21 UChi_01 +#define Chimu_22 UChi_02 +#define Chimu_30 UChi_10 +#define Chimu_31 UChi_11 +#define Chimu_32 UChi_02 + +/********************************************************* + * Macro sequences encoding QCD + *********************************************************/ +#define LOCK_GAUGE(dir) \ + { \ + uint8_t *byte_addr = (uint8_t *)&U._odata[sU](dir); \ + for(int i=0;i< 18*2*BYTES_PER_WORD*8;i+=32){ \ + CACHE_LOCK(&byte_addr[i]); \ + } \ + } + +#define UNLOCK_GAUGE(dir) \ + { \ + uint8_t *byte_addr = (uint8_t *)&U._odata[sU](dir); \ + for(int i=0;i< 18*2*BYTES_PER_WORD*8;i+=32){ \ + CACHE_UNLOCK(&byte_addr[i]); \ + } \ + } + +#define MAYBEPERM(A,B) + +#define PERMUTE_DIR3 +#define PERMUTE_DIR2 +#define PERMUTE_DIR1 +#define PERMUTE_DIR0 + +#define MULT_2SPIN_DIR_PFXP(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFYP(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFZP(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFTP(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFXM(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFYM(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFZM(A,p) MULT_2SPIN(&U._odata[sU](A),p) +#define MULT_2SPIN_DIR_PFTM(A,p) MULT_2SPIN(&U._odata[sU](A),p) + +#define MULT_SPIN(ptr,p) { \ + uint64_t ub = ((uint64_t)base); \ + asm ( \ + VLOAD(%0,%3,U0) \ + VLOAD(%1,%3,U1) \ + VLOAD(%2,%3,U2) \ + VMUL_RR_RI(U0,Chi_00,UChi_00) \ + VMUL_RR_RI(U1,Chi_00,UChi_01) \ + VMUL_RR_RI(U2,Chi_00,UChi_02) \ + VMUL_RR_RI(U0,Chi_10,UChi_10) \ + VMUL_RR_RI(U1,Chi_10,UChi_11) \ + VMUL_RR_RI(U2,Chi_10,UChi_12) \ + VMADD_MII_IR(U0,Chi_00,UChi_00,UChi_00) \ + VMADD_MII_IR(U1,Chi_00,UChi_01,UChi_01) \ + VMADD_MII_IR(U2,Chi_00,UChi_02,UChi_02) \ + VMADD_MII_IR(U0,Chi_10,UChi_10,UChi_10) \ + VMADD_MII_IR(U1,Chi_10,UChi_11,UChi_11) \ + VMADD_MII_IR(U2,Chi_10,UChi_12,UChi_12) \ + : : "r" (0), "r" (32*3), "r" (32*6), "r" (ub )); \ + asm ( \ + VLOAD(%0,%3,U0) \ + VLOAD(%1,%3,U1) \ + VLOAD(%2,%3,U2) \ + VMADD_RR_RI(U0,Chi_01,UChi_00,UChi_00) \ + VMADD_RR_RI(U1,Chi_01,UChi_01,UChi_01) \ + VMADD_RR_RI(U2,Chi_01,UChi_02,UChi_02) \ + VMADD_RR_RI(U0,Chi_11,UChi_10,UChi_10) \ + VMADD_RR_RI(U1,Chi_11,UChi_11,UChi_11) \ + VMADD_RR_RI(U2,Chi_11,UChi_12,UChi_12) \ + VMADD_MII_IR(U0,Chi_01,UChi_00,UChi_00) \ + VMADD_MII_IR(U1,Chi_01,UChi_01,UChi_01) \ + VMADD_MII_IR(U2,Chi_01,UChi_02,UChi_02) \ + VMADD_MII_IR(U0,Chi_11,UChi_10,UChi_10) \ + VMADD_MII_IR(U1,Chi_11,UChi_11,UChi_11) \ + VMADD_MII_IR(U2,Chi_11,UChi_12,UChi_12) \ + : : "r" (32), "r" (32*4), "r" (32*7), "r" (ub )); \ + asm ( \ + VLOAD(%0,%3,U0) \ + VLOAD(%1,%3,U1) \ + VLOAD(%2,%3,U2) \ + VMADD_RR_RI(U0,Chi_02,UChi_00,UChi_00) \ + VMADD_RR_RI(U1,Chi_02,UChi_01,UChi_01) \ + VMADD_RR_RI(U2,Chi_02,UChi_02,UChi_02) \ + VMADD_RR_RI(U0,Chi_12,UChi_10,UChi_10) \ + VMADD_RR_RI(U1,Chi_12,UChi_11,UChi_11) \ + VMADD_RR_RI(U2,Chi_12,UChi_12,UChi_12) \ + VMADD_MII_IR(U0,Chi_02,UChi_00,UChi_00) \ + VMADD_MII_IR(U1,Chi_02,UChi_01,UChi_01) \ + VMADD_MII_IR(U2,Chi_02,UChi_02,UChi_02) \ + VMADD_MII_IR(U0,Chi_12,UChi_10,UChi_10) \ + VMADD_MII_IR(U1,Chi_12,UChi_11,UChi_11) \ + VMADD_MII_IR(U2,Chi_12,UChi_12,UChi_12) \ + : : "r" (32*2), "r" (32*5), "r" (32*8), "r" (ub )); \ + } + +#define SAVE_RESULT(base,basep) {\ + uint64_t ub = ((uint64_t)base) - 32; \ + asm("mr %0,"REP";\n\t" \ + "li " IMM ",32;\n\t" \ + VSTORE(IMM,REP,psi_00) \ + VSTORE(IMM,REP,psi_01) \ + VSTORE(IMM,REP,psi_02) \ + VSTORE(IMM,REP,psi_10) \ + VSTORE(IMM,REP,psi_11) \ + VSTORE(IMM,REP,psi_12) \ + VSTORE(IMM,REP,psi_20) \ + VSTORE(IMM,REP,psi_21) \ + VSTORE(IMM,REP,psi_22) \ + VSTORE(IMM,REP,psi_30) \ + VSTORE(IMM,REP,psi_31) \ + VSTORE(IMM,REP,psi_32) \ + ); \ +} + +/* + *Annoying BG/Q loads with no immediat indexing and big performance hit + *when second miss to a L1 line occurs + */ +#define LOAD_CHI(base) { \ + uint64_t ub = ((uint64_t)base) - 64; \ + asm("mr %0,"REP";\n\t" \ + "li " IMM ",64;\n\t" \ + VLOAD(IMM,REP,Chi_00) \ + VLOAD(IMM,REP,Chi_02) \ + VLOAD(IMM,REP,Chi_11) : : "r" (ub) ); \ + ub = ((uint64_t)base) - 32; \ + asm("mr %0,"REP";\n\t" \ + "li IMM,64;\n\t" \ + VLOAD(IMM,REP,Chimu_01) \ + VLOAD(IMM,REP,Chimu_10) \ + VLOAD(IMM,REP,Chimu_12) : : "r" (ub) ); \ + } + +#define LOAD_CHIMU(base) { \ + uint64_t ub = ((uint64_t)base) - 64; \ + asm("mr %0,"REP";\n\t" \ + "li IMM,64;\n\t" \ + VLOAD(IMM,REP,Chimu_00) \ + VLOAD(IMM,REP,Chimu_02) \ + VLOAD(IMM,REP,Chimu_11) \ + VLOAD(IMM,REP,Chimu_20) \ + VLOAD(IMM,REP,Chimu_22) \ + VLOAD(IMM,REP,Chimu_31) : : "r" (ub) ); \ + ub = ((uint64_t)base) - 32; \ + asm("mr %0,"REP";\n\t" \ + "li IMM,64;\n\t" \ + VLOAD(IMM,REP,Chimu_01) \ + VLOAD(IMM,REP,Chimu_10) \ + VLOAD(IMM,REP,Chimu_12) \ + VLOAD(IMM,REP,Chimu_21) \ + VLOAD(IMM,REP,Chimu_30) \ + VLOAD(IMM,REP,Chimu_32) : : "r" (ub) ); \ + } + +// hspin(0)=fspin(0)+timesI(fspin(3)); +// hspin(1)=fspin(1)+timesI(fspin(2)); +#define XP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_MII_IR(one,Chimu_30,Chimu_00,Chi_00) \ + VMADD_MII_IR(one,Chimu_31,Chimu_01,Chi_01) \ + VMADD_MII_IR(one,Chimu_32,Chimu_02,Chi_02) \ + VMADD_MII_IR(one,Chimu_20,Chimu_10,Chi_10) \ + VMADD_MII_IR(one,Chimu_21,Chimu_11,Chi_11) \ + VMADD_MII_IR(one,Chimu_22,Chimu_12,Chi_12) \ + ); \ + } + +#define XM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_II_MIR(one,Chimu_30,Chimu_00,Chi_00) \ + VMADD_II_MIR(one,Chimu_31,Chimu_01,Chi_01) \ + VMADD_II_MIR(one,Chimu_32,Chimu_02,Chi_02) \ + VMADD_II_MIR(one,Chimu_20,Chimu_10,Chi_10) \ + VMADD_II_MIR(one,Chimu_21,Chimu_11,Chi_11) \ + VMADD_II_MIR(one,Chimu_22,Chimu_12,Chi_12) \ + ); \ + } + +// hspin(0)=fspin(0)-fspin(3); +// hspin(1)=fspin(1)+fspin(2); +#define YP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VSUB(Chimu_00,Chimu_00,Chi_30) \ + VSUB(Chimu_01,Chimu_01,Chi_31) \ + VSUB(Chimu_02,Chimu_02,Chi_32) \ + VADD(Chimu_10,Chimu_10,Chi_20) \ + VADD(Chimu_11,Chimu_11,Chi_21) \ + VADD(Chimu_12,Chimu_12,Chi_22) \ + ); \ + } + +#define YM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VADD(Chimu_00,Chimu_00,Chi_30) \ + VADD(Chimu_01,Chimu_01,Chi_31) \ + VADD(Chimu_02,Chimu_02,Chi_32) \ + VSUB(Chimu_10,Chimu_10,Chi_20) \ + VSUB(Chimu_11,Chimu_11,Chi_21) \ + VSUB(Chimu_12,Chimu_12,Chi_22) \ + ); \ + } + + /*Gz + * 0 0 i 0 [0]+-i[2] + * 0 0 0 -i [1]-+i[3] + * -i 0 0 0 + * 0 i 0 0 + */ +#define ZP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_MII_IR(one,Chimu_20,Chimu_00,Chi_00) \ + VMADD_MII_IR(one,Chimu_21,Chimu_01,Chi_01) \ + VMADD_MII_IR(one,Chimu_22,Chimu_02,Chi_02) \ + VMADD_II_MIR(one,Chimu_30,Chimu_10,Chi_10) \ + VMADD_II_MIR(one,Chimu_31,Chimu_11,Chi_11) \ + VMADD_II_MIR(one,Chimu_32,Chimu_12,Chi_12) \ + ); \ + } + +#define ZM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_II_MIR(one,Chimu_20,Chimu_00,Chi_00) \ + VMADD_II_MIR(one,Chimu_21,Chimu_01,Chi_01) \ + VMADD_II_MIR(one,Chimu_22,Chimu_02,Chi_02) \ + VMADD_MII_IR(one,Chimu_30,Chimu_10,Chi_10) \ + VMADD_MII_IR(one,Chimu_31,Chimu_11,Chi_11) \ + VMADD_MII_IR(one,Chimu_32,Chimu_12,Chi_12) \ + ); \ + } + /*Gt + * 0 0 1 0 [0]+-[2] + * 0 0 0 1 [1]+-[3] + * 1 0 0 0 + * 0 1 0 0 + */ +#define TP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VADD(Chimu_00,Chimu_00,Chi_20) \ + VADD(Chimu_01,Chimu_01,Chi_21) \ + VADD(Chimu_02,Chimu_02,Chi_22) \ + VADD(Chimu_10,Chimu_10,Chi_30) \ + VADD(Chimu_11,Chimu_11,Chi_31) \ + VADD(Chimu_12,Chimu_12,Chi_32) \ + ); \ + } + +#define TM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VSUB(Chimu_00,Chimu_00,Chi_20) \ + VSUB(Chimu_01,Chimu_01,Chi_21) \ + VSUB(Chimu_02,Chimu_02,Chi_22) \ + VSUB(Chimu_10,Chimu_10,Chi_30) \ + VSUB(Chimu_11,Chimu_11,Chi_31) \ + VSUB(Chimu_12,Chimu_12,Chi_32) \ + ); \ + } + +/* + fspin(0)=hspin(0); + fspin(1)=hspin(1); + fspin(2)=timesMinusI(hspin(1)); + fspin(3)=timesMinusI(hspin(0)); + + fspin(0)+=hspin(0); + fspin(1)+=hspin(1); + fspin(2)-=timesI(hspin(1)); + fspin(3)-=timesI(hspin(0)); + */ +#define XP_RECON { \ + asm(\ + VONE(one)\ + VMOV(psi_00,UChi_00) VMOV(psi_01,UChi_01) VMOV(psi_02,UChi_02)\ + VMOV(psi_10,UChi_10) VMOV(psi_11,UChi_11) VMOV(psi_12,UChi_12)\ + VZERO(psi_20) VZERO(psi_21) VZERO(psi_22) \ + VZERO(psi_30) VZERO(psi_31) VZERO(psi_32) \ + VMADD_II_MIR(one,UChi_10,psi_20,psi_20) \ + VMADD_II_MIR(one,UChi_11,psi_21,psi_21) \ + VMADD_II_MIR(one,UChi_12,psi_22,psi_22) \ + VMADD_II_MIR(one,UChi_00,psi_30,psi_30) \ + VMADD_II_MIR(one,UChi_01,psi_31,psi_31) \ + VMADD_II_MIR(one,UChi_02,psi_32,psi_32) \ + ); \ + } + +#define XM_RECON { \ + asm(\ + VONE(one)\ + VMOV(psi_00,UChi_00) VMOV(psi_01,UChi_01) VMOV(psi_02,UChi_02)\ + VMOV(psi_10,UChi_10) VMOV(psi_11,UChi_11) VMOV(psi_12,UChi_12)\ + VZERO(psi_20) VZERO(psi_21) VZERO(psi_22) \ + VZERO(psi_30) VZERO(psi_31) VZERO(psi_32) \ + VMADD_MII_IR(one,UChi_10,psi_20,psi_20) \ + VMADD_MII_IR(one,UChi_11,psi_21,psi_21) \ + VMADD_MII_IR(one,UChi_12,psi_22,psi_22) \ + VMADD_MII_IR(one,UChi_00,psi_30,psi_30) \ + VMADD_MII_IR(one,UChi_01,psi_31,psi_31) \ + VMADD_MII_IR(one,UChi_02,psi_32,psi_32) \ + ); \ + } + +#define XP_RECON_ACCUM { \ + asm(\ + VONE(one)\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VMADD_II_MIR(one,UChi_10,psi_20,psi_20) \ + VMADD_II_MIR(one,UChi_11,psi_21,psi_21) \ + VMADD_II_MIR(one,UChi_12,psi_22,psi_22) \ + VMADD_II_MIR(one,UChi_00,psi_30,psi_30) \ + VMADD_II_MIR(one,UChi_01,psi_31,psi_31) \ + VMADD_II_MIR(one,UChi_02,psi_32,psi_32) \ + ); \ + } + +#define XM_RECON_ACCUM { \ + asm(\ + VONE(one)\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VMADD_MII_IR(one,UChi_10,psi_20,psi_20) \ + VMADD_MII_IR(one,UChi_11,psi_21,psi_21) \ + VMADD_MII_IR(one,UChi_12,psi_22,psi_22) \ + VMADD_MII_IR(one,UChi_00,psi_30,psi_30) \ + VMADD_MII_IR(one,UChi_01,psi_31,psi_31) \ + VMADD_MII_IR(one,UChi_02,psi_32,psi_32) \ + ); \ + } + +// fspin(2)+=hspin(1); +// fspin(3)-=hspin(0); +#define YP_RECON_ACCUM {\ + asm(\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VADD(psi_20,UChi_10,psi_20) VADD(psi_21,UChi_11,psi_21) VADD(psi_22,UChi_12,psi_22) \ + VSUB(psi_30,UChi_00,psi_30) VSUB(psi_31,UChi_01,psi_31) VSUB(psi_32,UChi_02,psi_32) \ + );\ + } +#define YM_RECON_ACCUM {\ + asm(\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VSUB(psi_20,UChi_10,psi_20) VSUB(psi_21,UChi_11,psi_21) VSUB(psi_22,UChi_12,psi_22) \ + VADD(psi_30,UChi_00,psi_30) VADD(psi_31,UChi_01,psi_31) VADD(psi_32,UChi_02,psi_32) \ + );\ + } + +// fspin(2)-=timesI(hspin(0)); +// fspin(3)+=timesI(hspin(1)); +#define ZP_RECON_ACCUM {\ + asm(\ + VONE(one)\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VMADD_II_MIR(one,UChi_00,psi_20,psi_20) \ + VMADD_II_MIR(one,UChi_01,psi_21,psi_21) \ + VMADD_II_MIR(one,UChi_02,psi_22,psi_22) \ + VMADD_MII_IR(one,UChi_10,psi_30,psi_30) \ + VMADD_MII_IR(one,UChi_11,psi_31,psi_31) \ + VMADD_MII_IR(one,UChi_12,psi_32,psi_32) \ + );\ + } + +#define ZM_RECON_ACCUM {\ + asm(\ + VONE(one)\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VMADD_MII_IR(one,UChi_00,psi_20,psi_20) \ + VMADD_MII_IR(one,UChi_01,psi_21,psi_21) \ + VMADD_MII_IR(one,UChi_02,psi_22,psi_22) \ + VMADD_II_MIR(one,UChi_10,psi_30,psi_30) \ + VMADD_II_MIR(one,UChi_11,psi_31,psi_31) \ + VMADD_II_MIR(one,UChi_12,psi_32,psi_32) \ + );\ + } + +// fspin(2)+=hspin(0); +// fspin(3)+=hspin(1); +#define TP_RECON_ACCUM {\ + asm(\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VADD(psi_20,UChi_00,psi_20) VADD(psi_21,UChi_01,psi_21) VADD(psi_22,UChi_02,psi_22) \ + VADD(psi_30,UChi_10,psi_30) VADD(psi_31,UChi_11,psi_31) VADD(psi_32,UChi_12,psi_32) \ + );\ + } + +#define TM_RECON_ACCUM {\ + asm(\ + VONE(one)\ + VADD(psi_00,UChi_00,psi_00) VADD(psi_01,UChi_01,psi_01) VADD(psi_02,UChi_02,psi_02) \ + VADD(psi_10,UChi_10,psi_10) VADD(psi_11,UChi_11,psi_11) VADD(psi_12,UChi_12,psi_12) \ + VSUB(psi_20,UChi_00,psi_20) VSUB(psi_21,UChi_01,psi_21) VSUB(psi_22,UChi_02,psi_22) \ + VSUB(psi_30,UChi_10,psi_30) VSUB(psi_31,UChi_11,psi_31) VSUB(psi_32,UChi_12,psi_32) \ + );\ + } + +uint64_t GetPFInfo(int nent,int plocal); +uint64_t GetInfo(int ptype,int local,int perm,int Xp,int ent,int plocal); + +#define COMPLEX_TYPE int; +int signs[4]; + +void testme(int osites,int ssU) +{ + int local,perm, ptype; + uint64_t base; + uint64_t basep; + const uint64_t plocal =(uint64_t) & in._odata[0]; + + // vComplexF isigns[2] = { signs[0], signs[1] }; + //COMPLEX_TYPE is vComplexF of vComplexD depending + //on the chosen precision + COMPLEX_TYPE *isigns = &signs[0]; + + MASK_REGS; + int nmax=osites; + for(int site=0;site=nmax) ssn=0; + int sUn=ssn; + for(int s=0;s shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_PROJMEM(base); +#else + YM_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR2,perm); + } else { + LOAD_CHI(base); + } + base = GetInfo(ptype,local,perm,Zp,ent,plocal); ent++; + PREFETCH_CHIMU(base); + { + MULT_2SPIN_DIR_PFYP(Yp,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_RECON_ACCUM; +#else + YM_RECON_ACCUM; +#endif + + //////////////////////////////// + // Zp + //////////////////////////////// + basep = GetPFInfo(nent,plocal); nent++; + if ( local ) { + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_PROJMEM(base); +#else + ZM_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR1,perm); + } else { + LOAD_CHI(base); + } + base = GetInfo(ptype,local,perm,Tp,ent,plocal); ent++; + PREFETCH_CHIMU(base); + { + MULT_2SPIN_DIR_PFZP(Zp,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_RECON_ACCUM; +#else + ZM_RECON_ACCUM; +#endif + + //////////////////////////////// + // Tp + //////////////////////////////// + basep = GetPFInfo(nent,plocal); nent++; + if ( local ) { + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_PROJMEM(base); +#else + TM_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR0,perm); + } else { + LOAD_CHI(base); + } + base = GetInfo(ptype,local,perm,Xm,ent,plocal); ent++; + PREFETCH_CHIMU(base); + { + MULT_2SPIN_DIR_PFTP(Tp,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_RECON_ACCUM; +#else + TM_RECON_ACCUM; +#endif + + //////////////////////////////// + // Xm + //////////////////////////////// +#ifndef STREAM_STORE + basep= (uint64_t) &out._odata[ss]; +#endif + // basep= GetPFInfo(nent,plocal); nent++; + if ( local ) { + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_PROJMEM(base); +#else + XP_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR3,perm); + } else { + LOAD_CHI(base); + } + base = GetInfo(ptype,local,perm,Ym,ent,plocal); ent++; + PREFETCH_CHIMU(base); + { + MULT_2SPIN_DIR_PFXM(Xm,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_RECON_ACCUM; +#else + XP_RECON_ACCUM; +#endif + + //////////////////////////////// + // Ym + //////////////////////////////// + basep= GetPFInfo(nent,plocal); nent++; + if ( local ) { + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_PROJMEM(base); +#else + YP_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR2,perm); + } else { + LOAD_CHI(base); + } + base = GetInfo(ptype,local,perm,Zm,ent,plocal); ent++; + PREFETCH_CHIMU(base); + { + MULT_2SPIN_DIR_PFYM(Ym,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_RECON_ACCUM; +#else + YP_RECON_ACCUM; +#endif + + //////////////////////////////// + // Zm + //////////////////////////////// + basep= GetPFInfo(nent,plocal); nent++; + if ( local ) { + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_PROJMEM(base); +#else + ZP_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR1,perm); + } else { + LOAD_CHI(base); + } + base = GetInfo(ptype,local,perm,Tm,ent,plocal); ent++; + PREFETCH_CHIMU(base); + { + MULT_2SPIN_DIR_PFZM(Zm,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_RECON_ACCUM; +#else + ZP_RECON_ACCUM; +#endif + + //////////////////////////////// + // Tm + //////////////////////////////// + basep= GetPFInfo(nent,plocal); nent++; + if ( local ) { + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_PROJMEM(base); +#else + TP_PROJMEM(base); +#endif + MAYBEPERM(PERMUTE_DIR0,perm); + } else { + LOAD_CHI(base); + } + base= (uint64_t) &out._odata[ss]; +#ifndef STREAM_STORE + PREFETCH_CHIMU(base); +#endif + { + MULT_2SPIN_DIR_PFTM(Tm,basep); + } + LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_RECON_ACCUM; +#else + TP_RECON_ACCUM; +#endif + + basep= GetPFInfo(nent,plocal); nent++; + SAVE_RESULT(base,basep); + + } + ssU++; + } +} + + +#endif diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index 724f52bb..2dbe26f4 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -469,9 +469,62 @@ namespace Optimization { static inline __m256d Permute3(__m256d in){ return in; }; - }; + struct Exchange{ + // 3210 ordering + static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){ + //Invertible + //AB CD -> AC BD + //AC BD -> AB CD + out1= _mm256_permute2f128_ps(in1,in2,0x20); + out2= _mm256_permute2f128_ps(in1,in2,0x31); + }; + static inline void Exchange1(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){ + //Invertible + // ABCD EFGH ->ABEF CDGH + // ABEF CDGH ->ABCD EFGH + out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0)); + out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); + }; + static inline void Exchange2(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){ + // Invertible ? + // ABCD EFGH -> ACEG BDFH + // ACEG BDFH -> AEBF CGDH + // out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); + // out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + // Bollocks; need + // AECG BFDH -> ABCD EFGH + out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); /*ACEG*/ + out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); /*BDFH*/ + out1= _mm256_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm256_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + }; + static inline void Exchange3(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){ + assert(0); + return; + }; + + static inline void Exchange0(__m256d &out1,__m256d &out2,__m256d in1,__m256d in2){ + out1= _mm256_permute2f128_pd(in1,in2,0x20); + out2= _mm256_permute2f128_pd(in1,in2,0x31); + return; + }; + static inline void Exchange1(__m256d &out1,__m256d &out2,__m256d in1,__m256d in2){ + out1= _mm256_shuffle_pd(in1,in2,0x0); + out2= _mm256_shuffle_pd(in1,in2,0xF); + }; + static inline void Exchange2(__m256d &out1,__m256d &out2,__m256d in1,__m256d in2){ + assert(0); + return; + }; + static inline void Exchange3(__m256d &out1,__m256d &out2,__m256d in1,__m256d in2){ + assert(0); + return; + }; + }; + + #if defined (AVX2) #define _mm256_alignr_epi32_grid(ret,a,b,n) ret=(__m256) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*4)%16) #define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index ebf99e16..f39c4033 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -343,6 +343,52 @@ namespace Optimization { }; + // On extracting face: Ah Al , Bh Bl -> Ah Bh, Al Bl + // On merging buffers: Ah,Bh , Al Bl -> Ah Al, Bh, Bl + // The operation is its own inverse + struct Exchange{ + // 3210 ordering + static inline void Exchange0(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ + out1= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0)); + out2= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); + }; + static inline void Exchange1(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ + out1= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); + out2= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + out1= _mm512_shuffle_f32x4(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm512_shuffle_f32x4(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + }; + static inline void Exchange2(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ + out1= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0)); + out2= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); + }; + static inline void Exchange3(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ + out1= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); + out2= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + out1= _mm512_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm512_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + }; + + static inline void Exchange0(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ + out1= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0)); + out2= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); + }; + static inline void Exchange1(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ + out1= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); + out2= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + out1= _mm512_shuffle_f64x2(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm512_shuffle_f64x2(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + }; + static inline void Exchange2(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ + out1 = _mm512_shuffle_pd(in1,in2,0x00); + out2 = _mm512_shuffle_pd(in1,in2,0xFF); + }; + static inline void Exchange3(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ + assert(0); + return; + }; + }; + struct Rotate{ diff --git a/lib/simd/Grid_generic.h b/lib/simd/Grid_generic.h index 91e9cda2..7972da55 100644 --- a/lib/simd/Grid_generic.h +++ b/lib/simd/Grid_generic.h @@ -5,8 +5,10 @@ Source file: ./lib/simd/Grid_generic.h Copyright (C) 2015 + Copyright (C) 2017 Author: Antonin Portelli + Andrew Lawson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,51 +28,10 @@ Author: Antonin Portelli *************************************************************************************/ /* END LEGAL */ -static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes"); - -//#define VECTOR_LOOPS - -// playing with compiler pragmas -#ifdef VECTOR_LOOPS -#ifdef __clang__ -#define VECTOR_FOR(i, w, inc)\ -_Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\ -for (unsigned int i = 0; i < w; i += inc) -#elif defined __INTEL_COMPILER -#define VECTOR_FOR(i, w, inc)\ -_Pragma("simd vectorlength(w*8)")\ -for (unsigned int i = 0; i < w; i += inc) -#else -#define VECTOR_FOR(i, w, inc)\ -for (unsigned int i = 0; i < w; i += inc) -#endif -#else -#define VECTOR_FOR(i, w, inc)\ -for (unsigned int i = 0; i < w; i += inc) -#endif +#include "Grid_generic_types.h" namespace Grid { namespace Optimization { - - // type traits giving the number of elements for each vector type - template struct W; - template <> struct W { - constexpr static unsigned int c = GEN_SIMD_WIDTH/16u; - constexpr static unsigned int r = GEN_SIMD_WIDTH/8u; - }; - template <> struct W { - constexpr static unsigned int c = GEN_SIMD_WIDTH/8u; - constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; - }; - - // SIMD vector types - template - struct vec { - alignas(GEN_SIMD_WIDTH) T v[W::r]; - }; - - typedef vec vecf; - typedef vec vecd; struct Vsplat{ // Complex @@ -99,11 +60,6 @@ namespace Optimization { return out; } - - // Integer - inline int operator()(Integer a){ - return a; - } }; struct Vstore{ @@ -112,11 +68,6 @@ namespace Optimization { inline void operator()(vec a, T *D){ *((vec *)D) = a; } - //Integer - inline void operator()(int a, Integer *I){ - *I = a; - } - }; struct Vstream{ @@ -151,11 +102,6 @@ namespace Optimization { return out; } - - // Integer - inline int operator()(Integer *a){ - return *a; - } }; ///////////////////////////////////////////////////// @@ -174,11 +120,6 @@ namespace Optimization { return out; } - - //I nteger - inline int operator()(int a, int b){ - return a + b; - } }; struct Sub{ @@ -194,11 +135,6 @@ namespace Optimization { return out; } - - //Integer - inline int operator()(int a, int b){ - return a-b; - } }; struct Mult{ @@ -214,11 +150,6 @@ namespace Optimization { return out; } - - // Integer - inline int operator()(int a, int b){ - return a*b; - } }; #define cmul(a, b, c, i)\ @@ -232,13 +163,26 @@ namespace Optimization { VECTOR_FOR(i, W::c, 1) { - out.v[2*i] = a[2*i]*b[2*i]; - out.v[2*i+1] = a[2*i]*b[2*i+1]; + out.v[2*i] = a.v[2*i]*b.v[2*i]; + out.v[2*i+1] = a.v[2*i]*b.v[2*i+1]; } return out; - }; + } }; + struct MaddRealPart{ + template + inline vec operator()(vec a, vec b, vec c){ + vec out; + + VECTOR_FOR(i, W::c, 1) + { + out.v[2*i] = a.v[2*i]*b.v[2*i] + c.v[2*i]; + out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1]; + } + return out; + } + }; struct MultComplex{ // Complex @@ -369,6 +313,11 @@ namespace Optimization { } struct Rotate{ + + template static inline vec tRotate(vec in){ + return rotate(in, n); + } + template static inline vec rotate(vec in, int n){ vec out; @@ -442,8 +391,12 @@ namespace Optimization { //Integer Reduce template<> - inline Integer Reduce::operator()(int in){ - return in; + inline Integer Reduce::operator()(veci in){ + Integer a = 0; + + acc(in.v, a, 0, 1, W::r); + + return a; } } @@ -452,7 +405,7 @@ namespace Optimization { typedef Optimization::vecf SIMD_Ftype; // Single precision type typedef Optimization::vecd SIMD_Dtype; // Double precision type - typedef int SIMD_Itype; // Integer type + typedef Optimization::veci SIMD_Itype; // Integer type // prefetch utilities inline void v_prefetch0(int size, const char *ptr){}; @@ -472,6 +425,7 @@ namespace Optimization { typedef Optimization::Mult MultSIMD; typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultRealPart MultRealPartSIMD; + typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_generic_types.h b/lib/simd/Grid_generic_types.h new file mode 100644 index 00000000..2142bc8e --- /dev/null +++ b/lib/simd/Grid_generic_types.h @@ -0,0 +1,80 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/simd/Grid_generic_types.h + + Copyright (C) 2017 + +Author: Antonin Portelli + Andrew Lawson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +static_assert(GEN_SIMD_WIDTH % 16u == 0, "SIMD vector size is not an integer multiple of 16 bytes"); + +//#define VECTOR_LOOPS + +// playing with compiler pragmas +#ifdef VECTOR_LOOPS +#ifdef __clang__ +#define VECTOR_FOR(i, w, inc)\ +_Pragma("clang loop unroll(full) vectorize(enable) interleave(enable) vectorize_width(w)")\ +for (unsigned int i = 0; i < w; i += inc) +#elif defined __INTEL_COMPILER +#define VECTOR_FOR(i, w, inc)\ +_Pragma("simd vectorlength(w*8)")\ +for (unsigned int i = 0; i < w; i += inc) +#else +#define VECTOR_FOR(i, w, inc)\ +for (unsigned int i = 0; i < w; i += inc) +#endif +#else +#define VECTOR_FOR(i, w, inc)\ +for (unsigned int i = 0; i < w; i += inc) +#endif + +namespace Grid { +namespace Optimization { + + // type traits giving the number of elements for each vector type + template struct W; + template <> struct W { + constexpr static unsigned int c = GEN_SIMD_WIDTH/16u; + constexpr static unsigned int r = GEN_SIMD_WIDTH/8u; + }; + template <> struct W { + constexpr static unsigned int c = GEN_SIMD_WIDTH/8u; + constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; + }; + template <> struct W { + constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; + }; + + // SIMD vector types + template + struct vec { + alignas(GEN_SIMD_WIDTH) T v[W::r]; + }; + + typedef vec vecf; + typedef vec vecd; + typedef vec veci; + +}} diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index 99a9ea68..d77a560a 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -5,8 +5,10 @@ Source file: ./lib/simd/Grid_qpx.h Copyright (C) 2016 + Copyright (C) 2017 Author: Antonin Portelli + Andrew Lawson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +27,11 @@ See the full license in the file "LICENSE" in the top level distribution directory ******************************************************************************/ +#ifndef GEN_SIMD_WIDTH +#define GEN_SIMD_WIDTH 32u +#endif +#include "Grid_generic_types.h" // Definitions for simulated integer SIMD. + namespace Grid { namespace Optimization { typedef struct @@ -62,8 +69,15 @@ namespace Optimization { return (vector4double){a, a, a, a}; } //Integer - inline int operator()(Integer a){ - return a; + inline veci operator()(Integer a){ + veci out; + + VECTOR_FOR(i, W::r, 1) + { + out.v[i] = a; + } + + return out; } }; @@ -88,9 +102,10 @@ namespace Optimization { inline void operator()(vector4double a, double *d){ vec_st(a, 0, d); } + //Integer - inline void operator()(int a, Integer *i){ - i[0] = a; + inline void operator()(veci a, Integer *i){ + *((veci *)i) = a; } }; @@ -142,11 +157,13 @@ namespace Optimization { return vec_ld(0, a); } // Integer - inline int operator()(Integer *a){ - return a[0]; - } - - + inline veci operator()(Integer *a){ + veci out; + + out = *((veci *)a); + + return out; + } }; template @@ -163,6 +180,22 @@ namespace Optimization { ///////////////////////////////////////////////////// // Arithmetic operations ///////////////////////////////////////////////////// + + #define FLOAT_WRAP_3(fn, pref)\ + pref vector4float fn(vector4float a, vector4float b, vector4float c) \ + {\ + vector4double ad, bd, rd, cd; \ + vector4float r;\ + \ + ad = Vset()(a);\ + bd = Vset()(b);\ + cd = Vset()(c);\ + rd = fn(ad, bd, cd); \ + Vstore()(rd, r);\ + \ + return r;\ + } + #define FLOAT_WRAP_2(fn, pref)\ pref vector4float fn(vector4float a, vector4float b)\ {\ @@ -200,8 +233,15 @@ namespace Optimization { FLOAT_WRAP_2(operator(), inline) //Integer - inline int operator()(int a, int b){ - return a + b; + inline veci operator()(veci a, veci b){ + veci out; + + VECTOR_FOR(i, W::r, 1) + { + out.v[i] = a.v[i] + b.v[i]; + } + + return out; } }; @@ -215,8 +255,15 @@ namespace Optimization { FLOAT_WRAP_2(operator(), inline) //Integer - inline int operator()(int a, int b){ - return a - b; + inline veci operator()(veci a, veci b){ + veci out; + + VECTOR_FOR(i, W::r, 1) + { + out.v[i] = a.v[i] - b.v[i]; + } + + return out; } }; @@ -228,6 +275,13 @@ namespace Optimization { } FLOAT_WRAP_2(operator(), inline) }; + struct MaddRealPart{ + // Complex double + inline vector4double operator()(vector4double a, vector4double b,vector4double c){ + return vec_xmadd(a, b, c); + } + FLOAT_WRAP_3(operator(), inline) + }; struct MultComplex{ // Complex double inline vector4double operator()(vector4double a, vector4double b){ @@ -248,8 +302,15 @@ namespace Optimization { FLOAT_WRAP_2(operator(), inline) // Integer - inline int operator()(int a, int b){ - return a*b; + inline veci operator()(veci a, veci b){ + veci out; + + VECTOR_FOR(i, W::r, 1) + { + out.v[i] = a.v[i]*b.v[i]; + } + + return out; } }; @@ -263,8 +324,15 @@ namespace Optimization { FLOAT_WRAP_2(operator(), inline) // Integer - inline int operator()(int a, int b){ - return a/b; + inline veci operator()(veci a, veci b){ + veci out; + + VECTOR_FOR(i, W::r, 1) + { + out.v[i] = a.v[i]/b.v[i]; + } + + return out; } }; @@ -323,19 +391,36 @@ namespace Optimization { }; struct Rotate{ + + template static inline vector4double tRotate(vector4double v){ + if ( n==1 ) return vec_perm(v, v, vec_gpci(01230)); + if ( n==2 ) return vec_perm(v, v, vec_gpci(02301)); + if ( n==3 ) return vec_perm(v, v, vec_gpci(03012)); + return v; + }; + template static inline vector4float tRotate(vector4float a) + { + vector4double ad, rd; + vector4float r; + ad = Vset()(a); + rd = tRotate(ad); + Vstore()(rd, r); + return r; + }; + static inline vector4double rotate(vector4double v, int n){ switch(n){ case 0: return v; break; case 1: - return vec_perm(v, v, vec_gpci(01230)); + return tRotate<1>(v); break; case 2: - return vec_perm(v, v, vec_gpci(02301)); + return tRotate<2>(v); break; case 3: - return vec_perm(v, v, vec_gpci(03012)); + return tRotate<3>(v); break; default: assert(0); } @@ -344,11 +429,9 @@ namespace Optimization { static inline vector4float rotate(vector4float v, int n){ vector4double vd, rd; vector4float r; - vd = Vset()(v); rd = rotate(vd, n); Vstore()(rd, r); - return r; } }; @@ -418,7 +501,7 @@ namespace Optimization { // Here assign types typedef Optimization::vector4float SIMD_Ftype; // Single precision type typedef vector4double SIMD_Dtype; // Double precision type -typedef int SIMD_Itype; // Integer type +typedef Optimization::veci SIMD_Itype; // Integer type // prefetch utilities inline void v_prefetch0(int size, const char *ptr){}; @@ -439,6 +522,7 @@ typedef Optimization::Mult MultSIMD; typedef Optimization::Div DivSIMD; typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultRealPart MultRealPartSIMD; +typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 943756b2..fcad4c28 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -326,7 +326,43 @@ namespace Optimization { static inline __m128d Permute3(__m128d in){ return in; }; + }; + struct Exchange{ + // 3210 ordering + static inline void Exchange0(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ + out1= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0)); + out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); + }; + static inline void Exchange1(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ + out1= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); + out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + }; + static inline void Exchange2(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ + assert(0); + return; + }; + static inline void Exchange3(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ + assert(0); + return; + }; + + static inline void Exchange0(__m128d &out1,__m128d &out2,__m128d in1,__m128d in2){ + out1= _mm_shuffle_pd(in1,in2,0x0); + out2= _mm_shuffle_pd(in1,in2,0x3); + }; + static inline void Exchange1(__m128d &out1,__m128d &out2,__m128d in1,__m128d in2){ + assert(0); + return; + }; + static inline void Exchange2(__m128d &out1,__m128d &out2,__m128d in1,__m128d in2){ + assert(0); + return; + }; + static inline void Exchange3(__m128d &out1,__m128d &out2,__m128d in1,__m128d in2){ + assert(0); + return; + }; }; struct Rotate{ diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 8a6ab2e7..57e7f11e 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -350,6 +350,27 @@ class Grid_simd { return ret; } + /////////////////////// + // Exchange + // Al Ah , Bl Bh -> Al Bl Ah,Bh + /////////////////////// + friend inline void exchange(Grid_simd &out1,Grid_simd &out2,Grid_simd in1,Grid_simd in2,int n) + { + if (n==3) { + Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v); + // std::cout << " Exchange3 "<< out1<<" "<< out2<<" <- " << in1 << " "< &ret,const Grid_simd &src,int lane){ ret.v = unary(real(typepun[lane]), VsplatSIMD()); } + + /////////////////////// // Splat /////////////////////// @@ -747,6 +758,15 @@ typedef Grid_simd, SIMD_Ftype> vComplexF; typedef Grid_simd, SIMD_Dtype> vComplexD; typedef Grid_simd vInteger; +// Check our vector types are of an appropriate size. +#if defined QPX +static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); +static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect"); +#else +static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); +static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths incorrect"); +#endif + ///////////////////////////////////////// // Some traits to recognise the types ///////////////////////////////////////// diff --git a/lib/simd/IBM_qpx.h b/lib/simd/IBM_qpx.h new file mode 100644 index 00000000..df91d8e6 --- /dev/null +++ b/lib/simd/IBM_qpx.h @@ -0,0 +1,598 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/simd/BGQQPX.h + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_ASM_BGQ_QPX_H +#define GRID_ASM_BGQ_QPX_H + +#include + +/********************************************************* + * Register definitions + *********************************************************/ +#define psi_00 0 +#define psi_01 1 +#define psi_02 2 + +#define psi_10 3 +#define psi_11 4 +#define psi_12 5 + +#define psi_20 6 +#define psi_21 7 +#define psi_22 8 + +#define psi_30 9 +#define psi_31 10 +#define psi_32 11 + +#define Chi_00 12 +#define Chi_01 13 +#define Chi_02 14 + +#define Chi_10 15 +#define Chi_11 16 +#define Chi_12 17 + +#define UChi_00 18 +#define UChi_01 19 +#define UChi_02 20 + +#define UChi_10 21 +#define UChi_11 22 +#define UChi_12 23 + +#define U0 24 +#define U1 25 +#define U2 26 +#define one 27 +#define perm_reg 28 + +#define REP %%r16 +#define IMM %%r17 +#define pREP %r16 +#define pIMM %r17 + +#define PPC_INST_DCBTLS 0x7c00014c +#define PPC_INST_DCBLC 0x7c00030c +#define __PPC_CT(t) (((t) & 0x0f) << 21) +#define ___PPC_RA(a) (((a) & 0x1f) << 16) +#define ___PPC_RB(b) (((b) & 0x1f) << 11) + +#define LOCK_SET ".long (" HASH(PPC_INST_DCBTLS) "|" HASH(___PPC_RB(16)) ")\n" +#define LOCK_CLEAR ".long (" HASH(PPC_INST_DCBLC) "|" HASH(___PPC_RB(16)) ")\n" + +/*Alias regs for incoming fourspinor on neighbour site*/ +#define Chi_20 UChi_00 +#define Chi_21 UChi_01 +#define Chi_22 UChi_02 +#define Chi_30 UChi_10 +#define Chi_31 UChi_11 +#define Chi_32 UChi_12 + +/********************************************************* + * Architectural macros + *********************************************************/ +#define HASHit(A) #A +#define HASH(A) HASHit(A) +#define LOAD64(A,ptr) + + +#define MASK_REGS /*NOOP ON BGQ*/ +#define PF_GAUGE(A) /*NOOP ON BGQ*/ +#define PREFETCH1_CHIMU(base) /*NOOP ON BGQ*/ +#define PREFETCH_CHIMU(base) /*NOOP ON BGQ*/ + +#define VLOADf(OFF,PTR,DEST) "qvlfsx " #DEST "," #PTR "," #OFF " ;\n" +#define VLOADuf(OFF,PTR,DEST) "qvlfsux " #DEST "," #PTR "," #OFF " ;\n" +#define VSTOREf(OFF,PTR,SRC) "qvstfsx " #SRC "," #PTR "," #OFF " ;\n" +#define VSTOREuf(OFF,PTR,SRC) "qvstfsux " #SRC "," #PTR "," #OFF " ;\n" +#define VSPLATf(A,B,DEST) "qvlfcsxa " #DEST "," #A "," #B ";\n" +#define VSIZEf (16) + +#define VPERMIi(p) "qvgpci " #p ", 1217;\n" +#define VPERMi(A,p) "qvfperm " #A "," #A "," #A "," #p ";\n" +#define VPERMI(p) VPERMIi(p) +#define VPERM(A,p) VPERMi(A,p) + +#define VLOADd(OFF,PTR,DEST) "qvlfdx " #DEST "," #PTR "," #OFF " ;\n" +#define VLOADud(OFF,PTR,DEST) "qvlfdux " #DEST "," #PTR "," #OFF " ;\n" +#define VSTOREd(OFF,PTR,SRC) "qvstfdx " #SRC "," #PTR "," #OFF " ;\n" +#define VSTOREud(OFF,PTR,SRC) "qvstfdux " #SRC "," #PTR "," #OFF " ;\n" +#define VSPLATd(A,B,DEST) "qvlfcdxa " #DEST "," #A "," #B ";\n" +#define VSIZEd (32) + +// QPX manual ordering QRT comes first (dest) +#define VZEROi(DEST) "qvfset " #DEST "; \n qvfsub " #DEST "," #DEST "," #DEST ";\n" +#define VONEi(DEST) "qvfset " #DEST "; \n" +#define VMOVi(DEST,A) "qvfmr " #DEST "," #A ";\n" +#define VADDi(DEST,A,B) "qvfadd " #DEST "," #A "," #B ";\n" +#define VSUBi(DEST,A,B) "qvfsub " #DEST "," #A "," #B ";\n" +#define VMULi(DEST,A,B) "qvfmul " #DEST "," #A "," #B ";\n" +#define VMUL_RR_RIi(DEST,A,B) "qvfxmul " #DEST "," #A "," #B ";\n" +#define VMADDi(DEST,A,B,C) "qvfmadd " #DEST "," #A "," #B ","#C ";\n" +#define VMADD_RR_RIi(DEST,A,B,C) "qvfxmadd " #DEST "," #A "," #B ","#C ";\n" +#define VMADD_MII_IRi(DEST,A,B,C) "qvfxxnpmadd " #DEST "," #B "," #A ","#C ";\n" +#define VMADD_II_MIRi(DEST,A,B,C) "qvfxxcpnmadd " #DEST "," #B "," #A ","#C ";\n" + +#define VZERO(C) VZEROi(C) +#define VONE(C) VONEi(C) +#define VMOV(C,A) VMOVi(C,A) +#define VADD(A,B,C) VADDi(A,B,C) +#define VSUB(A,B,C) VSUBi(A,B,C) +#define VMUL(A,B,C) VMULi(A,B,C) +#define VMUL_RR_RI(A,B,C) VMUL_RR_RIi(A,B,C) +#define VMADD(A,B,C,D) VMADDi(A,B,C,D) +#define VMADD_RR_RI(A,B,C,D) VMADD_RR_RIi(A,B,C,D) +#define VMADD_MII_IR(A,B,C,D) VMADD_MII_IRi(A,B,C,D) +#define VMADD_II_MIR(A,B,C,D) VMADD_II_MIRi(A,B,C,D) + +/********************************************************* + * Macro sequences encoding QCD + *********************************************************/ +#define LOCK_GAUGE(dir) \ + { \ + uint64_t byte_addr = (uint64_t)&U._odata[sU]; \ + int count = (sizeof(U._odata[0])+63)/64; \ + asm (" mtctr %0 \n" \ + " mr " HASH(REP) ", %1\n" \ + " li " HASH(IMM) ", 64\n" \ + "0:\n" \ + LOCK_SET \ + " add " HASH(REP) "," HASH(IMM) "," HASH(REP) "\n" \ + " bdnz 0b\n" \ + : : "b" (count), "b" (byte_addr) ); \ + } + +#define UNLOCK_GAUGE(dir) \ + { \ + uint64_t byte_addr = (uint64_t)&U._odata[sU]; \ + int count = (sizeof(U._odata[0])+63)/64; \ + asm (" mtctr %0 \n" \ + " mr " HASH(REP) ", %1\n" \ + " li " HASH(IMM) ", 64\n" \ + "0:\n" \ + LOCK_CLEAR \ + " add " HASH(REP) "," HASH(IMM) "," HASH(REP) "\n" \ + " bdnz 0b\n" \ + : : "b" (count), "b" (byte_addr) ); \ + } + +#define ZERO_PSI \ + VZERO(psi_00) \ + VZERO(psi_01) \ + VZERO(psi_02) \ + VZERO(psi_10) \ + VZERO(psi_11) \ + VZERO(psi_12) \ + VZERO(psi_20) \ + VZERO(psi_21) \ + VZERO(psi_22) \ + VZERO(psi_30) \ + VZERO(psi_31) \ + VZERO(psi_32) + +#define MULT_2SPIN_QPX_LSd(ptr,p) MULT_2SPIN_QPX_INTERNAL(ptr,p,VSPLAT,16) +#define MULT_2SPIN_QPX_LSf(ptr,p) MULT_2SPIN_QPX_INTERNAL(ptr,p,VSPLAT,8) +#define MULT_2SPIN_QPXd(ptr,p) MULT_2SPIN_QPX_INTERNAL(ptr,p,VLOAD,32) +#define MULT_2SPIN_QPXf(ptr,p) MULT_2SPIN_QPX_INTERNAL(ptr,p,VLOAD,16) + +#define MULT_2SPIN_QPX_INTERNAL(ptr,p,ULOAD,USKIP) { \ + uint64_t ub = ((uint64_t)ptr); \ + asm ( \ + ULOAD(%0,%3,U0) \ + ULOAD(%1,%3,U1) \ + ULOAD(%2,%3,U2) \ + VMUL_RR_RI(UChi_00,U0,Chi_00) \ + VMUL_RR_RI(UChi_01,U1,Chi_00) \ + VMUL_RR_RI(UChi_02,U2,Chi_00) \ + VMUL_RR_RI(UChi_10,U0,Chi_10) \ + VMUL_RR_RI(UChi_11,U1,Chi_10) \ + VMUL_RR_RI(UChi_12,U2,Chi_10) \ + VMADD_MII_IR(UChi_00,U0,Chi_00,UChi_00) \ + VMADD_MII_IR(UChi_01,U1,Chi_00,UChi_01) \ + VMADD_MII_IR(UChi_02,U2,Chi_00,UChi_02) \ + VMADD_MII_IR(UChi_10,U0,Chi_10,UChi_10) \ + VMADD_MII_IR(UChi_11,U1,Chi_10,UChi_11) \ + VMADD_MII_IR(UChi_12,U2,Chi_10,UChi_12) \ + : : "b" (0), "b" (USKIP*3), "b" (USKIP*6), "b" (ub )); \ + asm ( \ + ULOAD(%0,%3,U0) \ + ULOAD(%1,%3,U1) \ + ULOAD(%2,%3,U2) \ + VMADD_RR_RI(UChi_00,U0,Chi_01,UChi_00) \ + VMADD_RR_RI(UChi_01,U1,Chi_01,UChi_01) \ + VMADD_RR_RI(UChi_02,U2,Chi_01,UChi_02) \ + VMADD_RR_RI(UChi_10,U0,Chi_11,UChi_10) \ + VMADD_RR_RI(UChi_11,U1,Chi_11,UChi_11) \ + VMADD_RR_RI(UChi_12,U2,Chi_11,UChi_12) \ + VMADD_MII_IR(UChi_00,U0,Chi_01,UChi_00) \ + VMADD_MII_IR(UChi_01,U1,Chi_01,UChi_01) \ + VMADD_MII_IR(UChi_02,U2,Chi_01,UChi_02) \ + VMADD_MII_IR(UChi_10,U0,Chi_11,UChi_10) \ + VMADD_MII_IR(UChi_11,U1,Chi_11,UChi_11) \ + VMADD_MII_IR(UChi_12,U2,Chi_11,UChi_12) \ + : : "b" (USKIP*1), "b" (USKIP*4), "b" (USKIP*7), "b" (ub )); \ + asm ( \ + ULOAD(%0,%3,U0) \ + ULOAD(%1,%3,U1) \ + ULOAD(%2,%3,U2) \ + VMADD_RR_RI(UChi_00,U0,Chi_02,UChi_00) \ + VMADD_RR_RI(UChi_01,U1,Chi_02,UChi_01) \ + VMADD_RR_RI(UChi_02,U2,Chi_02,UChi_02) \ + VMADD_RR_RI(UChi_10,U0,Chi_12,UChi_10) \ + VMADD_RR_RI(UChi_11,U1,Chi_12,UChi_11) \ + VMADD_RR_RI(UChi_12,U2,Chi_12,UChi_12) \ + VMADD_MII_IR(UChi_00,U0,Chi_02,UChi_00) \ + VMADD_MII_IR(UChi_01,U1,Chi_02,UChi_01) \ + VMADD_MII_IR(UChi_02,U2,Chi_02,UChi_02) \ + VMADD_MII_IR(UChi_10,U0,Chi_12,UChi_10) \ + VMADD_MII_IR(UChi_11,U1,Chi_12,UChi_11) \ + VMADD_MII_IR(UChi_12,U2,Chi_12,UChi_12) \ + : : "b" (USKIP*2), "b" (USKIP*5), "b" (USKIP*8), "b" (ub )); \ + } + + +#define MULT_2SPIN_DIR_PF(A,p) MULT_2SPIN_PF(&U._odata[sU](A),p) +#define MULT_2SPIN_PF(ptr,pf) MULT_2SPIN(ptr,pf) + +#define SAVE_RESULT(base,basep) {\ + uint64_t ub = ((uint64_t)base) - (VSIZE); \ + asm("mr " HASH(REP) ", %0;\n" \ + "li " HASH(IMM) "," HASH(VSIZE)" ;\n" \ + VSTOREu(IMM,REP,psi_00) \ + VSTOREu(IMM,REP,psi_01) \ + VSTOREu(IMM,REP,psi_02) \ + VSTOREu(IMM,REP,psi_10) \ + VSTOREu(IMM,REP,psi_11) \ + VSTOREu(IMM,REP,psi_12) \ + VSTOREu(IMM,REP,psi_20) \ + VSTOREu(IMM,REP,psi_21) \ + VSTOREu(IMM,REP,psi_22) \ + VSTOREu(IMM,REP,psi_30) \ + VSTOREu(IMM,REP,psi_31) \ + VSTOREu(IMM,REP,psi_32) \ + : : "b" (ub) : HASH(pIMM), HASH(pREP) ); \ + } + + +/* + *Annoying BG/Q loads with no immediat indexing and big performance hit + *when second miss to a L1 line occurs + */ +#define LOAD_CHI(base) { \ + uint64_t ub = ((uint64_t)base) - (2*VSIZE); \ + asm("mr " HASH(REP) ",%0 ;\n" \ + "li " HASH(IMM) ",(2*" HASH(VSIZE) ");\n" \ + VLOADu(IMM,REP,Chi_00) \ + VLOADu(IMM,REP,Chi_02) \ + VLOADu(IMM,REP,Chi_11) : : "b" (ub) : HASH(pIMM), HASH(pREP) ); \ + ub = ((uint64_t)base) - VSIZE; \ + asm("mr " HASH(REP) ", %0;\n" \ + "li " HASH(IMM) ",(2*" HASH(VSIZE) ");\n" \ + VLOADu(IMM,REP,Chi_01) \ + VLOADu(IMM,REP,Chi_10) \ + VLOADu(IMM,REP,Chi_12) : : "b" (ub) : HASH(pIMM), HASH(pREP) ); \ + } + +#define LOAD_CHIMU(base) { \ + uint64_t ub = ((uint64_t)base) - (2*VSIZE); \ + asm("mr " HASH(REP) ",%0;\n" \ + "li " HASH(IMM) ",(2*" HASH(VSIZE) ");\n" \ + VLOADu(IMM,REP,Chi_00) \ + VLOADu(IMM,REP,Chi_02) \ + VLOADu(IMM,REP,Chi_11) \ + VLOADu(IMM,REP,Chi_20) \ + VLOADu(IMM,REP,Chi_22) \ + VLOADu(IMM,REP,Chi_31) : : "b" (ub) : HASH(pIMM), HASH(pREP) ); \ + ub = ((uint64_t)base) - VSIZE; \ + asm("mr " HASH(REP) ", %0;\n" \ + "li " HASH(IMM) ", (2*" HASH(VSIZE) ");\n" \ + VLOADu(IMM,REP,Chi_01) \ + VLOADu(IMM,REP,Chi_10) \ + VLOADu(IMM,REP,Chi_12) \ + VLOADu(IMM,REP,Chi_21) \ + VLOADu(IMM,REP,Chi_30) \ + VLOADu(IMM,REP,Chi_32) : : "b" (ub) : HASH(pIMM), HASH(pREP) ); \ + } + +// hspin(0)=fspin(0)+timesI(fspin(3)); +// hspin(1)=fspin(1)+timesI(fspin(2)); +#define XP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_MII_IR(Chi_00,one,Chi_30,Chi_00) \ + VMADD_MII_IR(Chi_01,one,Chi_31,Chi_01) \ + VMADD_MII_IR(Chi_02,one,Chi_32,Chi_02) \ + VMADD_MII_IR(Chi_10,one,Chi_20,Chi_10) \ + VMADD_MII_IR(Chi_11,one,Chi_21,Chi_11) \ + VMADD_MII_IR(Chi_12,one,Chi_22,Chi_12) \ + ); \ + } + +#define XM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_II_MIR(Chi_00,one,Chi_30,Chi_00) \ + VMADD_II_MIR(Chi_01,one,Chi_31,Chi_01) \ + VMADD_II_MIR(Chi_02,one,Chi_32,Chi_02) \ + VMADD_II_MIR(Chi_10,one,Chi_20,Chi_10) \ + VMADD_II_MIR(Chi_11,one,Chi_21,Chi_11) \ + VMADD_II_MIR(Chi_12,one,Chi_22,Chi_12) \ + ); \ + } + +// hspin(0)=fspin(0)-fspin(3); +// hspin(1)=fspin(1)+fspin(2); +#define YP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VSUB(Chi_00,Chi_00,Chi_30) \ + VSUB(Chi_01,Chi_01,Chi_31) \ + VSUB(Chi_02,Chi_02,Chi_32) \ + VADD(Chi_10,Chi_10,Chi_20) \ + VADD(Chi_11,Chi_11,Chi_21) \ + VADD(Chi_12,Chi_12,Chi_22) \ + ); \ + } + +#define YM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VADD(Chi_00,Chi_00,Chi_30) \ + VADD(Chi_01,Chi_01,Chi_31) \ + VADD(Chi_02,Chi_02,Chi_32) \ + VSUB(Chi_10,Chi_10,Chi_20) \ + VSUB(Chi_11,Chi_11,Chi_21) \ + VSUB(Chi_12,Chi_12,Chi_22) ); \ + } + + /*Gz + * 0 0 i 0 [0]+-i[2] + * 0 0 0 -i [1]-+i[3] + * -i 0 0 0 + * 0 i 0 0 + */ +#define ZP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_MII_IR(Chi_00,one,Chi_20,Chi_00) \ + VMADD_MII_IR(Chi_01,one,Chi_21,Chi_01) \ + VMADD_MII_IR(Chi_02,one,Chi_22,Chi_02) \ + VMADD_II_MIR(Chi_10,one,Chi_30,Chi_10) \ + VMADD_II_MIR(Chi_11,one,Chi_31,Chi_11) \ + VMADD_II_MIR(Chi_12,one,Chi_32,Chi_12) \ + ); \ + } + +#define ZM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VONE(one) \ + VMADD_II_MIR(Chi_00,one,Chi_20,Chi_00) \ + VMADD_II_MIR(Chi_01,one,Chi_21,Chi_01) \ + VMADD_II_MIR(Chi_02,one,Chi_22,Chi_02) \ + VMADD_MII_IR(Chi_10,one,Chi_30,Chi_10) \ + VMADD_MII_IR(Chi_11,one,Chi_31,Chi_11) \ + VMADD_MII_IR(Chi_12,one,Chi_32,Chi_12) \ + ); \ + } + /*Gt + * 0 0 1 0 [0]+-[2] + * 0 0 0 1 [1]+-[3] + * 1 0 0 0 + * 0 1 0 0 + */ +#define TP_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VADD(Chi_00,Chi_00,Chi_20) \ + VADD(Chi_01,Chi_01,Chi_21) \ + VADD(Chi_02,Chi_02,Chi_22) \ + VADD(Chi_10,Chi_10,Chi_30) \ + VADD(Chi_11,Chi_11,Chi_31) \ + VADD(Chi_12,Chi_12,Chi_32) \ + ); \ + } + +#define TM_PROJMEM(base) { \ + LOAD_CHIMU(base); \ + asm ( \ + VSUB(Chi_00,Chi_00,Chi_20) \ + VSUB(Chi_01,Chi_01,Chi_21) \ + VSUB(Chi_02,Chi_02,Chi_22) \ + VSUB(Chi_10,Chi_10,Chi_30) \ + VSUB(Chi_11,Chi_11,Chi_31) \ + VSUB(Chi_12,Chi_12,Chi_32) \ + ); \ + } + +/* + fspin(0)=hspin(0); + fspin(1)=hspin(1); + fspin(2)=timesMinusI(hspin(1)); + fspin(3)=timesMinusI(hspin(0)); + + fspin(0)+=hspin(0); + fspin(1)+=hspin(1); + fspin(2)-=timesI(hspin(1)); + fspin(3)-=timesI(hspin(0)); + */ +#define XP_RECON { \ + asm(\ + VONE(one)\ + VMOV(psi_00,UChi_00) VMOV(psi_01,UChi_01) VMOV(psi_02,UChi_02)\ + VMOV(psi_10,UChi_10) VMOV(psi_11,UChi_11) VMOV(psi_12,UChi_12)\ + VZERO(psi_20) VZERO(psi_21) VZERO(psi_22) \ + VZERO(psi_30) VZERO(psi_31) VZERO(psi_32) \ + VMADD_II_MIR(psi_20,one,UChi_10,psi_20) \ + VMADD_II_MIR(psi_21,one,UChi_11,psi_21) \ + VMADD_II_MIR(psi_22,one,UChi_12,psi_22) \ + VMADD_II_MIR(psi_30,one,UChi_00,psi_30) \ + VMADD_II_MIR(psi_31,one,UChi_01,psi_31) \ + VMADD_II_MIR(psi_32,one,UChi_02,psi_32) \ + ); \ + } + +#define XM_RECON { \ + asm(\ + VONE(one)\ + VMOV(psi_00,UChi_00) VMOV(psi_01,UChi_01) VMOV(psi_02,UChi_02)\ + VMOV(psi_10,UChi_10) VMOV(psi_11,UChi_11) VMOV(psi_12,UChi_12)\ + VZERO(psi_20) VZERO(psi_21) VZERO(psi_22) \ + VZERO(psi_30) VZERO(psi_31) VZERO(psi_32) \ + VMADD_MII_IR(psi_20,one,UChi_10,psi_20) \ + VMADD_MII_IR(psi_21,one,UChi_11,psi_21) \ + VMADD_MII_IR(psi_22,one,UChi_12,psi_22) \ + VMADD_MII_IR(psi_30,one,UChi_00,psi_30) \ + VMADD_MII_IR(psi_31,one,UChi_01,psi_31) \ + VMADD_MII_IR(psi_32,one,UChi_02,psi_32) \ + ); \ + } + +#define XP_RECON_ACCUM { \ + asm(\ + VONE(one)\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VMADD_II_MIR(psi_20,one,UChi_10,psi_20) \ + VMADD_II_MIR(psi_21,one,UChi_11,psi_21) \ + VMADD_II_MIR(psi_22,one,UChi_12,psi_22) \ + VMADD_II_MIR(psi_30,one,UChi_00,psi_30) \ + VMADD_II_MIR(psi_31,one,UChi_01,psi_31) \ + VMADD_II_MIR(psi_32,one,UChi_02,psi_32) \ + ); \ + } + +#define XM_RECON_ACCUM { \ + asm(\ + VONE(one)\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VMADD_MII_IR(psi_20,one,UChi_10,psi_20) \ + VMADD_MII_IR(psi_21,one,UChi_11,psi_21) \ + VMADD_MII_IR(psi_22,one,UChi_12,psi_22) \ + VMADD_MII_IR(psi_30,one,UChi_00,psi_30) \ + VMADD_MII_IR(psi_31,one,UChi_01,psi_31) \ + VMADD_MII_IR(psi_32,one,UChi_02,psi_32) \ + ); \ + } + +// fspin(2)+=hspin(1); +// fspin(3)-=hspin(0); +#define YP_RECON_ACCUM {\ + asm(\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VADD(psi_20,psi_20,UChi_10) VADD(psi_21,psi_21,UChi_11) VADD(psi_22,psi_22,UChi_12) \ + VSUB(psi_30,psi_30,UChi_00) VSUB(psi_31,psi_31,UChi_01) VSUB(psi_32,psi_32,UChi_02) \ + );\ + } +#define YM_RECON_ACCUM {\ + asm(\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VSUB(psi_20,psi_20,UChi_10) VSUB(psi_21,psi_21,UChi_11) VSUB(psi_22,psi_22,UChi_12) \ + VADD(psi_30,psi_30,UChi_00) VADD(psi_31,psi_31,UChi_01) VADD(psi_32,psi_32,UChi_02) \ + );\ + } + +// fspin(2)-=timesI(hspin(0)); +// fspin(3)+=timesI(hspin(1)); +#define ZP_RECON_ACCUM {\ + asm(\ + VONE(one)\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VMADD_II_MIR(psi_20,one,UChi_00,psi_20) \ + VMADD_II_MIR(psi_21,one,UChi_01,psi_21) \ + VMADD_II_MIR(psi_22,one,UChi_02,psi_22) \ + VMADD_MII_IR(psi_30,one,UChi_10,psi_30) \ + VMADD_MII_IR(psi_31,one,UChi_11,psi_31) \ + VMADD_MII_IR(psi_32,one,UChi_12,psi_32) \ + );\ + } + +#define ZM_RECON_ACCUM {\ + asm(\ + VONE(one)\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VMADD_MII_IR(psi_20,one,UChi_00,psi_20) \ + VMADD_MII_IR(psi_21,one,UChi_01,psi_21) \ + VMADD_MII_IR(psi_22,one,UChi_02,psi_22) \ + VMADD_II_MIR(psi_30,one,UChi_10,psi_30) \ + VMADD_II_MIR(psi_31,one,UChi_11,psi_31) \ + VMADD_II_MIR(psi_32,one,UChi_12,psi_32) \ + );\ + } + +// fspin(2)+=hspin(0); +// fspin(3)+=hspin(1); +#define TP_RECON_ACCUM {\ + asm(\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VADD(psi_20,psi_20,UChi_00) VADD(psi_21,psi_21,UChi_01) VADD(psi_22,psi_22,UChi_02) \ + VADD(psi_30,psi_30,UChi_10) VADD(psi_31,psi_31,UChi_11) VADD(psi_32,psi_32,UChi_12) \ + );\ + } + +#define TM_RECON_ACCUM {\ + asm(\ + VADD(psi_00,psi_00,UChi_00) VADD(psi_01,psi_01,UChi_01) VADD(psi_02,psi_02,UChi_02) \ + VADD(psi_10,psi_10,UChi_10) VADD(psi_11,psi_11,UChi_11) VADD(psi_12,psi_12,UChi_12) \ + VSUB(psi_20,psi_20,UChi_00) VSUB(psi_21,psi_21,UChi_01) VSUB(psi_22,psi_22,UChi_02) \ + VSUB(psi_30,psi_30,UChi_10) VSUB(psi_31,psi_31,UChi_11) VSUB(psi_32,psi_32,UChi_12) \ + );\ + } + + +#define ADD_RESULTi(PTR,pf) \ + LOAD_CHIMU(PTR) \ + asm( \ + VADD(psi_00,chi_00,psi_00) VADD(psi_01,chi_01,psi_01) VADD(psi_02,chi_02,psi_02) \ + VADD(psi_10,chi_10,psi_10) VADD(psi_11,chi_11,psi_11) VADD(psi_12,chi_12,psi_12) \ + VADD(psi_20,chi_20,psi_20) VADD(psi_21,chi_21,psi_21) VADD(psi_22,chi_22,psi_22) \ + VADD(psi_30,chi_30,psi_30) VADD(psi_31,chi_31,psi_31) VADD(psi_32,chi_32,psi_32) ); \ + SAVE_RESULT(PTR,pf); + + +#define PERMUTE_DIR3 +#define PERMUTE_DIR2 +#define PERMUTE_DIR1 + +#define PERMUTE_DIR0 { \ + asm( \ + VPERMI(perm_reg) \ + VPERM(Chi_00,perm_reg) VPERM(Chi_01,perm_reg) VPERM(Chi_02,perm_reg) \ + VPERM(Chi_10,perm_reg) VPERM(Chi_11,perm_reg) VPERM(Chi_12,perm_reg) ); \ + } + +#endif diff --git a/lib/stencil/Stencil_common.cc b/lib/simd/IBM_qpx_double.h similarity index 65% rename from lib/stencil/Stencil_common.cc rename to lib/simd/IBM_qpx_double.h index 66fccb4c..60709102 100644 --- a/lib/stencil/Stencil_common.cc +++ b/lib/simd/IBM_qpx_double.h @@ -2,12 +2,11 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/stencil/Stencil_common.cc + Source file: ./lib/simd/Avx512Asm.h Copyright (C) 2015 -Author: Peter Boyle -Author: Peter Boyle +Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,9 +25,22 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include - -namespace Grid { -} +// No guard; ok multi-include +#undef VSIZE +#undef VLOAD +#undef VLOADu +#undef VSPLAT +#undef VSTORE +#undef VSTOREu +#undef MULT_2SPIN_QPX_LS +#undef MULT_2SPIN_QPX +#define VSIZE VSIZEd +#define VLOAD(A,B,C) VLOADd(A,B,C) +#define VLOADu(A,B,C) VLOADud(A,B,C) +#define VSPLAT(A,B,DEST) VSPLATd(A,B,DEST) +#define VSTORE(A,B,C) VSTOREd(A,B,C) +#define VSTOREu(A,B,C) VSTOREud(A,B,C) +#define MULT_2SPIN_QPX_LS(ptr,p) MULT_2SPIN_QPX_LSd(ptr,p) +#define MULT_2SPIN_QPX(ptr,p) MULT_2SPIN_QPXd(ptr,p) diff --git a/lib/simd/IBM_qpx_single.h b/lib/simd/IBM_qpx_single.h new file mode 100644 index 00000000..ab903ea7 --- /dev/null +++ b/lib/simd/IBM_qpx_single.h @@ -0,0 +1,46 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/simd/Avx512Asm.h + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +// No guard; ok multi-include +#undef VSIZE +#undef VLOAD +#undef VLOADu +#undef VSPLAT +#undef VSTORE +#undef VSTOREu +#undef MULT_2SPIN_QPX_LS +#undef MULT_2SPIN_QPX + +#define VSIZE VSIZEf +#define VLOAD(A,B,C) VLOADf(A,B,C) +#define VLOADu(A,B,C) VLOADuf(A,B,C) +#define VSPLAT(A,B,DEST) VSPLATf(A,B,DEST) +#define VSTORE(A,B,C) VSTOREf(A,B,C) +#define VSTOREu(A,B,C) VSTOREuf(A,B,C) +#define MULT_2SPIN_QPX_LS(ptr,p) MULT_2SPIN_QPX_LSf(ptr,p) +#define MULT_2SPIN_QPX(ptr,p) MULT_2SPIN_QPXf(ptr,p) + diff --git a/lib/simd/Intel512wilson.h b/lib/simd/Intel512wilson.h index 6d0d6f63..64142a2e 100644 --- a/lib/simd/Intel512wilson.h +++ b/lib/simd/Intel512wilson.h @@ -31,21 +31,21 @@ Author: paboyle ////////////////////////////////////////////////////////////////////////////////////////// // Register allocations for Wilson Kernel are precision indept ////////////////////////////////////////////////////////////////////////////////////////// -#define result_00 %zmm0 -#define result_01 %zmm1 -#define result_02 %zmm2 +#define psi_00 %zmm0 +#define psi_01 %zmm1 +#define psi_02 %zmm2 -#define result_10 %zmm3 -#define result_11 %zmm4 -#define result_12 %zmm5 +#define psi_10 %zmm3 +#define psi_11 %zmm4 +#define psi_12 %zmm5 -#define result_20 %zmm6 -#define result_21 %zmm7 -#define result_22 %zmm8 +#define psi_20 %zmm6 +#define psi_21 %zmm7 +#define psi_22 %zmm8 -#define result_30 %zmm9 -#define result_31 %zmm10 -#define result_32 %zmm11 +#define psi_30 %zmm9 +#define psi_31 %zmm10 +#define psi_32 %zmm11 #define Chi_00 %zmm12 #define Chi_01 %zmm13 @@ -98,34 +98,50 @@ Author: paboyle // a little as some duplication developed during trying different // variants during optimisation. Could cut back to only those used. ////////////////////////////////////////////////////////////////// +#define LOCK_GAUGE(dir) +#define UNLOCK_GAUGE(dir) // const SiteSpinor * ptr = & in._odata[offset]; -#define LOAD_CHIMU(PTR) LOAD_CHIMUi(PTR) +#define LOAD_CHIMU(PTR) LOAD64(%r8,PTR) __asm__ ( LOAD_CHIMUi ); #define LOAD_CHI(PTR) LOAD64(%r8,PTR) __asm__ ( LOAD_CHIi ); #define SAVE_UCHI(PTR) SAVE_UCHIi(PTR) #define SAVE_CHI(PTR) SAVE_CHIi(PTR) #define SAVE_RESULT(PT,R) SAVE_RESULTi(PT,R) +#define ADD_RESULT(PT,R) ADD_RESULTi(PT,R) -#define LOAD_CHIMUi \ - LOAD_CHIMU01i \ - LOAD_CHIMU23i ); +#define ZERO_PSI \ + asm( VZERO(psi_00) \ + VZERO(psi_01) \ + VZERO(psi_02) \ + VZERO(psi_10) \ + VZERO(psi_11) \ + VZERO(psi_12) \ + VZERO(psi_20) \ + VZERO(psi_21) \ + VZERO(psi_22) \ + VZERO(psi_30) \ + VZERO(psi_31) \ + VZERO(psi_32)); +#define LOAD_CHIMUi \ + LOAD_CHIMU01i \ + LOAD_CHIMU23i -#define LOAD_CHIMU01i\ - VLOAD(0,%r8,Chimu_00) \ - VLOAD(1,%r8,Chimu_01) \ - VLOAD(2,%r8,Chimu_02) \ - VLOAD(3,%r8,Chimu_10) \ - VLOAD(4,%r8,Chimu_11) \ - VLOAD(5,%r8,Chimu_12) +#define LOAD_CHIMU01i \ + VLOAD(0,%r8,Chimu_00) \ + VLOAD(1,%r8,Chimu_01) \ + VLOAD(2,%r8,Chimu_02) \ + VLOAD(3,%r8,Chimu_10) \ + VLOAD(4,%r8,Chimu_11) \ + VLOAD(5,%r8,Chimu_12) -#define LOAD_CHIMU23i\ - VLOAD(6,%r8,Chimu_20) \ - VLOAD(7,%r8,Chimu_21) \ - VLOAD(8,%r8,Chimu_22) \ - VLOAD(9,%r8,Chimu_30) \ - VLOAD(10,%r8,Chimu_31) \ - VLOAD(11,%r8,Chimu_32) +#define LOAD_CHIMU23i \ + VLOAD(6,%r8,Chimu_20) \ + VLOAD(7,%r8,Chimu_21) \ + VLOAD(8,%r8,Chimu_22) \ + VLOAD(9,%r8,Chimu_30) \ + VLOAD(10,%r8,Chimu_31) \ + VLOAD(11,%r8,Chimu_32) #define SHUF_CHIMU23i\ VSHUFMEM(6,%r8,Chimu_20) \ @@ -135,9 +151,6 @@ Author: paboyle VSHUFMEM(10,%r8,Chimu_31) \ VSHUFMEM(11,%r8,Chimu_32) - -// const SiteHalfSpinor *ptr = &buf[offset]; - #define LOAD_CHIi \ VLOAD(0,%r8,Chi_00) \ VLOAD(1,%r8,Chi_01) \ @@ -145,7 +158,6 @@ Author: paboyle VLOAD(3,%r8,Chi_10) \ VLOAD(4,%r8,Chi_11) \ VLOAD(5,%r8,Chi_12) - #define SAVE_UCHIi(PTR) \ LOAD64(%r8,PTR) \ @@ -155,8 +167,7 @@ Author: paboyle VSTORE(2,%r8,UChi_02) \ VSTORE(3,%r8,UChi_10) \ VSTORE(4,%r8,UChi_11) \ - VSTORE(5,%r8,UChi_12) \ - ); + VSTORE(5,%r8,UChi_12) ); #define SAVE_CHIi(PTR) \ LOAD64(%r8,PTR) \ @@ -166,33 +177,14 @@ Author: paboyle VSTORE(2,%r8,Chi_02) \ VSTORE(3,%r8,Chi_10) \ VSTORE(4,%r8,Chi_11) \ - VSTORE(5,%r8,Chi_12) \ - ); + VSTORE(5,%r8,Chi_12) ); - -#define MULT_2SPIN_DIR_PFXP(A,p) MULT_2SPIN_PFXP(&U._odata[sU](A),p) -#define MULT_2SPIN_DIR_PFYP(A,p) MULT_2SPIN_PFYP(&U._odata[sU](A),p) -#define MULT_2SPIN_DIR_PFZP(A,p) MULT_2SPIN_PFZP(&U._odata[sU](A),p) -#define MULT_2SPIN_DIR_PFTP(A,p) MULT_2SPIN_PFTP(&U._odata[sU](A),p) - -#define MULT_2SPIN_DIR_PFXM(A,p) MULT_2SPIN_PFXM(&U._odata[sU](A),p) -#define MULT_2SPIN_DIR_PFYM(A,p) MULT_2SPIN_PFYM(&U._odata[sU](A),p) -#define MULT_2SPIN_DIR_PFZM(A,p) MULT_2SPIN_PFZM(&U._odata[sU](A),p) -#define MULT_2SPIN_DIR_PFTM(A,p) MULT_2SPIN_PFTM(&U._odata[sU](A),p) - -#define MULT_2SPIN_PFXM(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFYM(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFZM(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFTM(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFTP(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFZP(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFYP(ptr,pf) MULT_2SPIN(ptr,pf) -#define MULT_2SPIN_PFXP(ptr,pf) MULT_2SPIN(ptr,pf) +#define MULT_2SPIN_DIR_PF(A,p) MULT_2SPIN_PF(&U._odata[sU](A),p) +#define MULT_2SPIN_PF(ptr,pf) MULT_2SPIN(ptr,pf) ////////////////////////////////////////////////////////////////// // Dirac algebra ////////////////////////////////////////////////////////////////// - // hspin(0)=fspin(0)+timesI(fspin(3)); // hspin(1)=fspin(1)+timesI(fspin(2)); #define XP_PROJMEM(PTR) \ @@ -257,7 +249,6 @@ Author: paboyle // hspin(0)=fspin(0)-timesI(fspin(3)) // hspin(1)=fspin(1)-timesI(fspin(2)) - #define XM_PROJMEM(PTR) \ LOAD64(%r8,PTR)\ __asm__ ( \ @@ -322,226 +313,226 @@ Author: paboyle // fspin(3)=timesMinusI(hspin(0)) #define XP_RECON __asm__ ( \ VZERO(TMP) \ - VTIMESMINUSI0(UChi_00,result_30,TMP) \ - VTIMESMINUSI0(UChi_10,result_20,TMP) \ - VTIMESMINUSI0(UChi_01,result_31,TMP) \ - VTIMESMINUSI0(UChi_11,result_21,TMP) \ - VTIMESMINUSI0(UChi_02,result_32,TMP) \ - VTIMESMINUSI0(UChi_12,result_22,TMP) \ - VMOV(UChi_00,result_00) \ - VMOV(UChi_10,result_10) \ - VMOV(UChi_01,result_01) \ - VMOV(UChi_11,result_11) \ - VMOV(UChi_02,result_02) \ - VMOV(UChi_12,result_12) \ - VTIMESMINUSI1(UChi_10,result_20,TMP) \ - VTIMESMINUSI1(UChi_11,result_21,TMP) \ - VTIMESMINUSI1(UChi_12,result_22,TMP) \ - VTIMESMINUSI1(UChi_00,result_30,TMP) \ - VTIMESMINUSI1(UChi_01,result_31,TMP) \ - VTIMESMINUSI1(UChi_02,result_32,TMP) \ - VTIMESMINUSI2(UChi_10,result_20,TMP) \ - VTIMESMINUSI2(UChi_11,result_21,TMP) \ - VTIMESMINUSI2(UChi_12,result_22,TMP) \ - VTIMESMINUSI2(UChi_00,result_30,TMP) \ - VTIMESMINUSI2(UChi_01,result_31,TMP) \ - VTIMESMINUSI2(UChi_02,result_32,TMP) \ + VTIMESMINUSI0(UChi_00,psi_30,TMP) \ + VTIMESMINUSI0(UChi_10,psi_20,TMP) \ + VTIMESMINUSI0(UChi_01,psi_31,TMP) \ + VTIMESMINUSI0(UChi_11,psi_21,TMP) \ + VTIMESMINUSI0(UChi_02,psi_32,TMP) \ + VTIMESMINUSI0(UChi_12,psi_22,TMP) \ + VMOV(UChi_00,psi_00) \ + VMOV(UChi_10,psi_10) \ + VMOV(UChi_01,psi_01) \ + VMOV(UChi_11,psi_11) \ + VMOV(UChi_02,psi_02) \ + VMOV(UChi_12,psi_12) \ + VTIMESMINUSI1(UChi_10,psi_20,TMP) \ + VTIMESMINUSI1(UChi_11,psi_21,TMP) \ + VTIMESMINUSI1(UChi_12,psi_22,TMP) \ + VTIMESMINUSI1(UChi_00,psi_30,TMP) \ + VTIMESMINUSI1(UChi_01,psi_31,TMP) \ + VTIMESMINUSI1(UChi_02,psi_32,TMP) \ + VTIMESMINUSI2(UChi_10,psi_20,TMP) \ + VTIMESMINUSI2(UChi_11,psi_21,TMP) \ + VTIMESMINUSI2(UChi_12,psi_22,TMP) \ + VTIMESMINUSI2(UChi_00,psi_30,TMP) \ + VTIMESMINUSI2(UChi_01,psi_31,TMP) \ + VTIMESMINUSI2(UChi_02,psi_32,TMP) \ ); // NB could save 6 ops using addsub => 12 cycles #define XP_RECON_ACCUM __asm__ ( \ VZERO(TMP)\ - VACCTIMESMINUSI0(UChi_00,result_30,Z3)\ - VACCTIMESMINUSI0(UChi_10,result_20,Z0)\ - VACCTIMESMINUSI0(UChi_01,result_31,Z4)\ - VACCTIMESMINUSI0(UChi_11,result_21,Z1)\ - VACCTIMESMINUSI0(UChi_02,result_32,Z5)\ - VACCTIMESMINUSI0(UChi_12,result_22,Z2)\ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VACCTIMESMINUSI1(UChi_00,result_30,Z3)\ - VACCTIMESMINUSI1(UChi_10,result_20,Z0)\ - VACCTIMESMINUSI1(UChi_01,result_31,Z4)\ - VACCTIMESMINUSI1(UChi_11,result_21,Z1)\ - VACCTIMESMINUSI1(UChi_02,result_32,Z5)\ - VACCTIMESMINUSI1(UChi_12,result_22,Z2)\ - VACCTIMESMINUSI2(UChi_10,result_20,Z0)\ - VACCTIMESMINUSI2(UChi_11,result_21,Z1)\ - VACCTIMESMINUSI2(UChi_12,result_22,Z2)\ - VACCTIMESMINUSI2(UChi_00,result_30,Z3)\ - VACCTIMESMINUSI2(UChi_01,result_31,Z4)\ - VACCTIMESMINUSI2(UChi_02,result_32,Z5)\ + VACCTIMESMINUSI0(UChi_00,psi_30,Z3)\ + VACCTIMESMINUSI0(UChi_10,psi_20,Z0)\ + VACCTIMESMINUSI0(UChi_01,psi_31,Z4)\ + VACCTIMESMINUSI0(UChi_11,psi_21,Z1)\ + VACCTIMESMINUSI0(UChi_02,psi_32,Z5)\ + VACCTIMESMINUSI0(UChi_12,psi_22,Z2)\ + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VACCTIMESMINUSI1(UChi_00,psi_30,Z3)\ + VACCTIMESMINUSI1(UChi_10,psi_20,Z0)\ + VACCTIMESMINUSI1(UChi_01,psi_31,Z4)\ + VACCTIMESMINUSI1(UChi_11,psi_21,Z1)\ + VACCTIMESMINUSI1(UChi_02,psi_32,Z5)\ + VACCTIMESMINUSI1(UChi_12,psi_22,Z2)\ + VACCTIMESMINUSI2(UChi_10,psi_20,Z0)\ + VACCTIMESMINUSI2(UChi_11,psi_21,Z1)\ + VACCTIMESMINUSI2(UChi_12,psi_22,Z2)\ + VACCTIMESMINUSI2(UChi_00,psi_30,Z3)\ + VACCTIMESMINUSI2(UChi_01,psi_31,Z4)\ + VACCTIMESMINUSI2(UChi_02,psi_32,Z5)\ ); #define XM_RECON __asm__ ( \ VZERO(TMP)\ - VTIMESI0(UChi_00,result_30,TMP)\ - VTIMESI0(UChi_10,result_20,TMP)\ - VTIMESI0(UChi_01,result_31,TMP)\ - VTIMESI0(UChi_11,result_21,TMP)\ - VTIMESI0(UChi_02,result_32,TMP)\ - VTIMESI0(UChi_12,result_22,TMP)\ - VMOV(UChi_00,result_00)\ - VMOV(UChi_10,result_10)\ - VMOV(UChi_01,result_01)\ - VMOV(UChi_11,result_11)\ - VMOV(UChi_02,result_02)\ - VMOV(UChi_12,result_12)\ - VTIMESI1(UChi_00,result_30,TMP)\ - VTIMESI1(UChi_10,result_20,TMP)\ - VTIMESI1(UChi_01,result_31,TMP)\ - VTIMESI1(UChi_11,result_21,TMP)\ - VTIMESI1(UChi_02,result_32,TMP)\ - VTIMESI1(UChi_12,result_22,TMP)\ - VTIMESI2(UChi_10,result_20,TMP)\ - VTIMESI2(UChi_11,result_21,TMP)\ - VTIMESI2(UChi_12,result_22,TMP)\ - VTIMESI2(UChi_00,result_30,TMP)\ - VTIMESI2(UChi_01,result_31,TMP)\ - VTIMESI2(UChi_02,result_32,TMP)\ + VTIMESI0(UChi_00,psi_30,TMP)\ + VTIMESI0(UChi_10,psi_20,TMP)\ + VTIMESI0(UChi_01,psi_31,TMP)\ + VTIMESI0(UChi_11,psi_21,TMP)\ + VTIMESI0(UChi_02,psi_32,TMP)\ + VTIMESI0(UChi_12,psi_22,TMP)\ + VMOV(UChi_00,psi_00)\ + VMOV(UChi_10,psi_10)\ + VMOV(UChi_01,psi_01)\ + VMOV(UChi_11,psi_11)\ + VMOV(UChi_02,psi_02)\ + VMOV(UChi_12,psi_12)\ + VTIMESI1(UChi_00,psi_30,TMP)\ + VTIMESI1(UChi_10,psi_20,TMP)\ + VTIMESI1(UChi_01,psi_31,TMP)\ + VTIMESI1(UChi_11,psi_21,TMP)\ + VTIMESI1(UChi_02,psi_32,TMP)\ + VTIMESI1(UChi_12,psi_22,TMP)\ + VTIMESI2(UChi_10,psi_20,TMP)\ + VTIMESI2(UChi_11,psi_21,TMP)\ + VTIMESI2(UChi_12,psi_22,TMP)\ + VTIMESI2(UChi_00,psi_30,TMP)\ + VTIMESI2(UChi_01,psi_31,TMP)\ + VTIMESI2(UChi_02,psi_32,TMP)\ ); #define XM_RECON_ACCUM __asm__ ( \ - VACCTIMESI0(UChi_10,result_20,Z0)\ - VACCTIMESI0(UChi_00,result_30,Z3)\ - VACCTIMESI0(UChi_11,result_21,Z1)\ - VACCTIMESI0(UChi_01,result_31,Z4)\ - VACCTIMESI0(UChi_12,result_22,Z2)\ - VACCTIMESI0(UChi_02,result_32,Z5)\ + VACCTIMESI0(UChi_10,psi_20,Z0)\ + VACCTIMESI0(UChi_00,psi_30,Z3)\ + VACCTIMESI0(UChi_11,psi_21,Z1)\ + VACCTIMESI0(UChi_01,psi_31,Z4)\ + VACCTIMESI0(UChi_12,psi_22,Z2)\ + VACCTIMESI0(UChi_02,psi_32,Z5)\ \ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_12,result_12,result_12)\ - VADD(UChi_02,result_02,result_02)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_12,psi_12,psi_12)\ + VADD(UChi_02,psi_02,psi_02)\ \ - VACCTIMESI1(UChi_10,result_20,Z0)\ - VACCTIMESI1(UChi_00,result_30,Z3)\ - VACCTIMESI1(UChi_11,result_21,Z1)\ - VACCTIMESI1(UChi_01,result_31,Z4)\ - VACCTIMESI1(UChi_12,result_22,Z2)\ - VACCTIMESI1(UChi_02,result_32,Z5)\ - VACCTIMESI2(UChi_10,result_20,Z0)\ - VACCTIMESI2(UChi_11,result_21,Z1)\ - VACCTIMESI2(UChi_12,result_22,Z2)\ - VACCTIMESI2(UChi_00,result_30,Z3)\ - VACCTIMESI2(UChi_01,result_31,Z4)\ - VACCTIMESI2(UChi_02,result_32,Z5)\ + VACCTIMESI1(UChi_10,psi_20,Z0)\ + VACCTIMESI1(UChi_00,psi_30,Z3)\ + VACCTIMESI1(UChi_11,psi_21,Z1)\ + VACCTIMESI1(UChi_01,psi_31,Z4)\ + VACCTIMESI1(UChi_12,psi_22,Z2)\ + VACCTIMESI1(UChi_02,psi_32,Z5)\ + VACCTIMESI2(UChi_10,psi_20,Z0)\ + VACCTIMESI2(UChi_11,psi_21,Z1)\ + VACCTIMESI2(UChi_12,psi_22,Z2)\ + VACCTIMESI2(UChi_00,psi_30,Z3)\ + VACCTIMESI2(UChi_01,psi_31,Z4)\ + VACCTIMESI2(UChi_02,psi_32,Z5)\ ); #define YP_RECON_ACCUM __asm__ ( \ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VADD(UChi_10,result_20,result_20)\ - VADD(UChi_11,result_21,result_21)\ - VADD(UChi_12,result_22,result_22)\ - VSUB(UChi_00,result_30,result_30)\ - VSUB(UChi_01,result_31,result_31)\ - VSUB(UChi_02,result_32,result_32) ); + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VADD(UChi_10,psi_20,psi_20)\ + VADD(UChi_11,psi_21,psi_21)\ + VADD(UChi_12,psi_22,psi_22)\ + VSUB(UChi_00,psi_30,psi_30)\ + VSUB(UChi_01,psi_31,psi_31)\ + VSUB(UChi_02,psi_32,psi_32) ); #define YM_RECON_ACCUM __asm__ ( \ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VSUB(UChi_10,result_20,result_20)\ - VSUB(UChi_11,result_21,result_21)\ - VSUB(UChi_12,result_22,result_22)\ - VADD(UChi_00,result_30,result_30)\ - VADD(UChi_01,result_31,result_31)\ - VADD(UChi_02,result_32,result_32) ); + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VSUB(UChi_10,psi_20,psi_20)\ + VSUB(UChi_11,psi_21,psi_21)\ + VSUB(UChi_12,psi_22,psi_22)\ + VADD(UChi_00,psi_30,psi_30)\ + VADD(UChi_01,psi_31,psi_31)\ + VADD(UChi_02,psi_32,psi_32) ); #define ZP_RECON_ACCUM __asm__ ( \ - VACCTIMESMINUSI0(UChi_00,result_20,Z0)\ - VACCTIMESI0(UChi_10,result_30,Z3)\ - VACCTIMESMINUSI0(UChi_01,result_21,Z1)\ - VACCTIMESI0(UChi_11,result_31,Z4)\ - VACCTIMESMINUSI0(UChi_02,result_22,Z2)\ - VACCTIMESI0(UChi_12,result_32,Z5)\ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VACCTIMESMINUSI1(UChi_00,result_20,Z0)\ - VACCTIMESI1(UChi_10,result_30,Z3)\ - VACCTIMESMINUSI1(UChi_01,result_21,Z1)\ - VACCTIMESI1(UChi_11,result_31,Z4)\ - VACCTIMESMINUSI1(UChi_02,result_22,Z2)\ - VACCTIMESI1(UChi_12,result_32,Z5)\ - VACCTIMESMINUSI2(UChi_00,result_20,Z0)\ - VACCTIMESMINUSI2(UChi_01,result_21,Z1)\ - VACCTIMESMINUSI2(UChi_02,result_22,Z2)\ - VACCTIMESI2(UChi_10,result_30,Z3)\ - VACCTIMESI2(UChi_11,result_31,Z4)\ - VACCTIMESI2(UChi_12,result_32,Z5)\ + VACCTIMESMINUSI0(UChi_00,psi_20,Z0)\ + VACCTIMESI0(UChi_10,psi_30,Z3)\ + VACCTIMESMINUSI0(UChi_01,psi_21,Z1)\ + VACCTIMESI0(UChi_11,psi_31,Z4)\ + VACCTIMESMINUSI0(UChi_02,psi_22,Z2)\ + VACCTIMESI0(UChi_12,psi_32,Z5)\ + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VACCTIMESMINUSI1(UChi_00,psi_20,Z0)\ + VACCTIMESI1(UChi_10,psi_30,Z3)\ + VACCTIMESMINUSI1(UChi_01,psi_21,Z1)\ + VACCTIMESI1(UChi_11,psi_31,Z4)\ + VACCTIMESMINUSI1(UChi_02,psi_22,Z2)\ + VACCTIMESI1(UChi_12,psi_32,Z5)\ + VACCTIMESMINUSI2(UChi_00,psi_20,Z0)\ + VACCTIMESMINUSI2(UChi_01,psi_21,Z1)\ + VACCTIMESMINUSI2(UChi_02,psi_22,Z2)\ + VACCTIMESI2(UChi_10,psi_30,Z3)\ + VACCTIMESI2(UChi_11,psi_31,Z4)\ + VACCTIMESI2(UChi_12,psi_32,Z5)\ ); #define ZM_RECON_ACCUM __asm__ ( \ - VACCTIMESI0(UChi_00,result_20,Z0)\ - VACCTIMESMINUSI0(UChi_10,result_30,Z3)\ - VACCTIMESI0(UChi_01,result_21,Z1)\ - VACCTIMESMINUSI0(UChi_11,result_31,Z4)\ - VACCTIMESI0(UChi_02,result_22,Z2)\ - VACCTIMESMINUSI0(UChi_12,result_32,Z5)\ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VACCTIMESI1(UChi_00,result_20,Z0)\ - VACCTIMESMINUSI1(UChi_10,result_30,Z3)\ - VACCTIMESI1(UChi_01,result_21,Z1)\ - VACCTIMESMINUSI1(UChi_11,result_31,Z4)\ - VACCTIMESI1(UChi_02,result_22,Z2)\ - VACCTIMESMINUSI1(UChi_12,result_32,Z5)\ - VACCTIMESI2(UChi_00,result_20,Z0)\ - VACCTIMESI2(UChi_01,result_21,Z1)\ - VACCTIMESI2(UChi_02,result_22,Z2)\ - VACCTIMESMINUSI2(UChi_10,result_30,Z3)\ - VACCTIMESMINUSI2(UChi_11,result_31,Z4)\ - VACCTIMESMINUSI2(UChi_12,result_32,Z5)\ + VACCTIMESI0(UChi_00,psi_20,Z0)\ + VACCTIMESMINUSI0(UChi_10,psi_30,Z3)\ + VACCTIMESI0(UChi_01,psi_21,Z1)\ + VACCTIMESMINUSI0(UChi_11,psi_31,Z4)\ + VACCTIMESI0(UChi_02,psi_22,Z2)\ + VACCTIMESMINUSI0(UChi_12,psi_32,Z5)\ + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VACCTIMESI1(UChi_00,psi_20,Z0)\ + VACCTIMESMINUSI1(UChi_10,psi_30,Z3)\ + VACCTIMESI1(UChi_01,psi_21,Z1)\ + VACCTIMESMINUSI1(UChi_11,psi_31,Z4)\ + VACCTIMESI1(UChi_02,psi_22,Z2)\ + VACCTIMESMINUSI1(UChi_12,psi_32,Z5)\ + VACCTIMESI2(UChi_00,psi_20,Z0)\ + VACCTIMESI2(UChi_01,psi_21,Z1)\ + VACCTIMESI2(UChi_02,psi_22,Z2)\ + VACCTIMESMINUSI2(UChi_10,psi_30,Z3)\ + VACCTIMESMINUSI2(UChi_11,psi_31,Z4)\ + VACCTIMESMINUSI2(UChi_12,psi_32,Z5)\ ); #define TP_RECON_ACCUM __asm__ ( \ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VADD(UChi_00,result_20,result_20)\ - VADD(UChi_10,result_30,result_30)\ - VADD(UChi_01,result_21,result_21)\ - VADD(UChi_11,result_31,result_31)\ - VADD(UChi_02,result_22,result_22)\ - VADD(UChi_12,result_32,result_32) ); + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VADD(UChi_00,psi_20,psi_20)\ + VADD(UChi_10,psi_30,psi_30)\ + VADD(UChi_01,psi_21,psi_21)\ + VADD(UChi_11,psi_31,psi_31)\ + VADD(UChi_02,psi_22,psi_22)\ + VADD(UChi_12,psi_32,psi_32) ); #define TM_RECON_ACCUM __asm__ ( \ - VADD(UChi_00,result_00,result_00)\ - VADD(UChi_10,result_10,result_10)\ - VADD(UChi_01,result_01,result_01)\ - VADD(UChi_11,result_11,result_11)\ - VADD(UChi_02,result_02,result_02)\ - VADD(UChi_12,result_12,result_12)\ - VSUB(UChi_00,result_20,result_20)\ - VSUB(UChi_10,result_30,result_30)\ - VSUB(UChi_01,result_21,result_21)\ - VSUB(UChi_11,result_31,result_31)\ - VSUB(UChi_02,result_22,result_22)\ - VSUB(UChi_12,result_32,result_32) ); + VADD(UChi_00,psi_00,psi_00)\ + VADD(UChi_10,psi_10,psi_10)\ + VADD(UChi_01,psi_01,psi_01)\ + VADD(UChi_11,psi_11,psi_11)\ + VADD(UChi_02,psi_02,psi_02)\ + VADD(UChi_12,psi_12,psi_12)\ + VSUB(UChi_00,psi_20,psi_20)\ + VSUB(UChi_10,psi_30,psi_30)\ + VSUB(UChi_01,psi_21,psi_21)\ + VSUB(UChi_11,psi_31,psi_31)\ + VSUB(UChi_02,psi_22,psi_22)\ + VSUB(UChi_12,psi_32,psi_32) ); #define AVX512_PF_L1 #define AVX512_PF_L2_GAUGE @@ -580,22 +571,62 @@ Author: paboyle LOAD64(%r8,PTR) \ LOAD64(%r9,pf) \ __asm__ ( \ - VSTORE(0,%r8,result_00) VPREFETCH_M1(0,%r9) \ - VSTORE(1,%r8,result_01) VPREFETCH_M1(1,%r9) \ - VSTORE(2,%r8,result_02) VPREFETCH_M1(2,%r9) \ - VSTORE(3,%r8,result_10) VPREFETCH_M1(3,%r9) \ - VSTORE(4,%r8,result_11) VPREFETCH_M1(4,%r9) \ - VSTORE(5,%r8,result_12) VPREFETCH_M1(5,%r9) \ - VSTORE(6,%r8,result_20) VPREFETCH_M1(6,%r9) \ - VSTORE(7,%r8,result_21) VPREFETCH_M1(7,%r9) \ - VSTORE(8,%r8,result_22) VPREFETCH_M1(8,%r9) \ - VSTORE(9,%r8,result_30) VPREFETCH_M1(9,%r9) \ - VSTORE(10,%r8,result_31) VPREFETCH_M1(10,%r9) \ - VSTORE(11,%r8,result_32) VPREFETCH_M1(11,%r9) \ + VSTORE(0,%r8,psi_00) VPREFETCH_M1(0,%r9) \ + VSTORE(1,%r8,psi_01) VPREFETCH_M1(1,%r9) \ + VSTORE(2,%r8,psi_02) VPREFETCH_M1(2,%r9) \ + VSTORE(3,%r8,psi_10) VPREFETCH_M1(3,%r9) \ + VSTORE(4,%r8,psi_11) VPREFETCH_M1(4,%r9) \ + VSTORE(5,%r8,psi_12) VPREFETCH_M1(5,%r9) \ + VSTORE(6,%r8,psi_20) VPREFETCH_M1(6,%r9) \ + VSTORE(7,%r8,psi_21) VPREFETCH_M1(7,%r9) \ + VSTORE(8,%r8,psi_22) VPREFETCH_M1(8,%r9) \ + VSTORE(9,%r8,psi_30) VPREFETCH_M1(9,%r9) \ + VSTORE(10,%r8,psi_31) VPREFETCH_M1(10,%r9) \ + VSTORE(11,%r8,psi_32) VPREFETCH_M1(11,%r9) \ ); +#define ADD_RESULTi(PTR,pf) \ + LOAD_CHIMU(PTR); \ + asm(VADD(psi_00,Chimu_00,psi_00) VADD(psi_01,Chimu_01,psi_01) VADD(psi_02,Chimu_02,psi_02) \ + VADD(psi_10,Chimu_10,psi_10) VADD(psi_11,Chimu_11,psi_11) VADD(psi_12,Chimu_12,psi_12) \ + VADD(psi_20,Chimu_20,psi_20) VADD(psi_21,Chimu_21,psi_21) VADD(psi_22,Chimu_22,psi_22) \ + VADD(psi_30,Chimu_30,psi_30) VADD(psi_31,Chimu_31,psi_31) VADD(psi_32,Chimu_32,psi_32) ); \ + SAVE_RESULT(PTR,pf); + + + +#define ADD_RESULTia(PTR,pf) \ + LOAD64(%r8,PTR) \ + __asm__ ( \ + VADDMEM(0,%r8,psi_00,psi_00) \ + VADDMEM(1,%r8,psi_01,psi_01) \ + VADDMEM(2,%r8,psi_02,psi_02) \ + VADDMEM(3,%r8,psi_10,psi_10) \ + VADDMEM(4,%r8,psi_11,psi_11) \ + VADDMEM(5,%r8,psi_12,psi_12) \ + VADDMEM(6,%r8,psi_20,psi_20) \ + VADDMEM(7,%r8,psi_21,psi_21) \ + VADDMEM(8,%r8,psi_22,psi_22) \ + VADDMEM(9,%r8,psi_30,psi_30) \ + VADDMEM(10,%r8,psi_31,psi_31) \ + VADDMEM(11,%r8,psi_32,psi_32) \ + VSTORE(0,%r8,psi_00) \ + VSTORE(1,%r8,psi_01) \ + VSTORE(2,%r8,psi_02) \ + VSTORE(3,%r8,psi_10) \ + VSTORE(4,%r8,psi_11) \ + VSTORE(5,%r8,psi_12) \ + VSTORE(6,%r8,psi_20) \ + VSTORE(7,%r8,psi_21) \ + VSTORE(8,%r8,psi_22) \ + VSTORE(9,%r8,psi_30) \ + VSTORE(10,%r8,psi_31) \ + VSTORE(11,%r8,psi_32) \ + ); + + #ifdef AVX512_PF_L2_TABLE -#define PREFETCH_CHIMU(A) \ +#define PREFETCH_CHIMU(A) \ LOAD64(%r9,A) \ __asm__ ( \ VPREFETCH_P1(0,%r9) \ diff --git a/lib/Simd.h b/lib/simd/Simd.h similarity index 99% rename from lib/Simd.h rename to lib/simd/Simd.h index adc2849d..3f2b10dc 100644 --- a/lib/Simd.h +++ b/lib/simd/Simd.h @@ -172,8 +172,8 @@ namespace Grid { }; -#include "simd/Grid_vector_types.h" -#include "simd/Grid_vector_unops.h" +#include +#include namespace Grid { // Default precision diff --git a/lib/stencil/Lebesgue.cc b/lib/stencil/Lebesgue.cc index 97dd2cf4..4551878c 100644 --- a/lib/stencil/Lebesgue.cc +++ b/lib/stencil/Lebesgue.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include #include namespace Grid { diff --git a/lib/stencil/Stencil.cc b/lib/stencil/Stencil.cc new file mode 100644 index 00000000..e04a5360 --- /dev/null +++ b/lib/stencil/Stencil.cc @@ -0,0 +1,69 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/Stencil.cc + + Copyright (C) 2015 + + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +namespace Grid { + +void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask, + int off,std::vector > & table) +{ + table.resize(0); + + if ( !grid->CheckerBoarded(dimension) ) { + cbmask = 0x3; + } + int rd = grid->_rdimensions[dimension]; + int so= plane*grid->_ostride[dimension]; // base offset for start of plane + int e1=grid->_slice_nblock[dimension]; + int e2=grid->_slice_block[dimension]; + int stride=grid->_slice_stride[dimension]; + if ( cbmask == 0x3 ) { + table.resize(e1*e2); + for(int n=0;n(bo+b,o+b); + } + } + } else { + int bo=0; + table.resize(e1*e2/2); + for(int n=0;nCheckerBoardFromOindexTable(o+b); + if ( ocb &cbmask ) { + table[bo]=std::pair(bo,o+b); bo++; + } + } + } + } +} + +} diff --git a/lib/Stencil.h b/lib/stencil/Stencil.h similarity index 70% rename from lib/Stencil.h rename to lib/stencil/Stencil.h index 89533b82..479cd979 100644 --- a/lib/Stencil.h +++ b/lib/stencil/Stencil.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,13 +25,11 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ - #ifndef GRID_STENCIL_H - #define GRID_STENCIL_H - - #include - - #include // subdir aggregate +#ifndef GRID_STENCIL_H +#define GRID_STENCIL_H +#include // subdir aggregate +#define NEW_XYZT_GATHER ////////////////////////////////////////////////////////////////////////////////////////// // Must not lose sight that goal is to be able to construct really efficient // gather to a point stencil code. CSHIFT is not the best way, so need @@ -70,51 +68,49 @@ namespace Grid { -inline void Gather_plane_simple_table_compute (GridBase *grid,int dimension,int plane,int cbmask, - int off,std::vector > & table) +/////////////////////////////////////////////////////////////////// +// Gather for when there *is* need to SIMD split with compression +/////////////////////////////////////////////////////////////////// +void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask, + int off,std::vector > & table); + +template +void Gather_plane_simple_table (std::vector >& table,const Lattice &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline)); + +template +void Gather_plane_simple_table (std::vector >& table,const Lattice &rhs,cobj *buffer,compressor &compress, int off,int so) { - table.resize(0); - int rd = grid->_rdimensions[dimension]; - - if ( !grid->CheckerBoarded(dimension) ) { - cbmask = 0x3; - } - int so= plane*grid->_ostride[dimension]; // base offset for start of plane - int e1=grid->_slice_nblock[dimension]; - int e2=grid->_slice_block[dimension]; - - int stride=grid->_slice_stride[dimension]; - if ( cbmask == 0x3 ) { - table.resize(e1*e2); - for(int n=0;n(bo+b,o+b); - } - } - } else { - int bo=0; - table.resize(e1*e2/2); - for(int n=0;nCheckerBoardFromOindexTable(o+b); - if ( ocb &cbmask ) { - table[bo]=std::pair(bo,o+b); bo++; - } - } - } + int num=table.size(); + parallel_for(int i=0;i void -Gather_plane_simple_table (std::vector >& table,const Lattice &rhs,cobj *buffer,compressor &compress, int off,int so) +/////////////////////////////////////////////////////////////////// +// Gather for when there *is* need to SIMD split with compression +/////////////////////////////////////////////////////////////////// +template +void Gather_plane_exchange_table(const Lattice &rhs, + std::vector pointers,int dimension,int plane,int cbmask,compressor &compress,int type) __attribute__((noinline)); + +template +void Gather_plane_exchange_table(std::vector >& table,const Lattice &rhs, + std::vector pointers,int dimension,int plane,int cbmask, + compressor &compress,int type) { -PARALLEL_FOR_LOOP - for(int i=0;i_ostride[dimension]; // base offset for start of plane + parallel_for(int j=0;j class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. public: @@ -159,7 +157,6 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal p.to_rank = to; p.from_rank= from; p.bytes = bytes; - comms_bytes+=2.0*bytes; Packets.push_back(p); } @@ -168,36 +165,45 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal reqs.resize(Packets.size()); commtime-=usecond(); for(int i=0;iStencilSendToRecvFromBegin(reqs[i], + comms_bytes+=_grid->StencilSendToRecvFromBegin(reqs[i], Packets[i].send_buf, Packets[i].to_rank, Packets[i].recv_buf, Packets[i].from_rank, Packets[i].bytes); - /* - }else{ - _grid->SendToRecvFromBegin(reqs[i], - Packets[i].send_buf, - Packets[i].to_rank, - Packets[i].recv_buf, - Packets[i].from_rank, - Packets[i].bytes); - } - */ } commtime+=usecond(); } void CommunicateComplete(std::vector > &reqs) { commtime-=usecond(); - for(int i=0;iStencilSendToRecvFromComplete(reqs[i]); - // else - // _grid->SendToRecvFromComplete(reqs[i]); + _grid->StencilSendToRecvFromComplete(reqs[i]); } + _grid->StencilBarrier();// Synch shared memory on a single nodes commtime+=usecond(); + /* + int dump=1; + if(dump){ + for(int i=0;i_ndimension;d++){ + ss<<"."<<_grid->_processor_coor[d]; + } + ss<<"_mu_"< rpointers; + std::vector vpointers; Integer buffer_size; Integer packet_id; + Integer exchange; + Integer type; }; std::vector Mergers; void AddMerge(cobj *merge_p,std::vector &rpointers,Integer buffer_size,Integer packet_id) { Merge m; + m.exchange = 0; m.mpointer = merge_p; m.rpointers= rpointers; m.buffer_size = buffer_size; @@ -221,17 +231,48 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal Mergers.push_back(m); } + void AddMergeNew(cobj *merge_p,std::vector &rpointers,Integer buffer_size,Integer packet_id,Integer type) { + Merge m; + m.exchange = 1; + m.type = type; + m.mpointer = merge_p; + m.vpointers= rpointers; + m.buffer_size = buffer_size; + m.packet_id = packet_id; + Mergers.push_back(m); + } + void CommsMerge(void ) { for(int i=0;i_ndimension;d++){ + // ss<<"."<<_grid->_processor_coor[d]; + // } + // ss<<"_m_"< new_simd_send_buf; + std::vector new_simd_recv_buf; std::vector u_simd_send_buf; std::vector u_simd_recv_buf; @@ -306,8 +349,8 @@ PARALLEL_FOR_LOOP ///////////////////////////////////////// // Timing info; ugly; possibly temporary ///////////////////////////////////////// - #define TIMING_HACK - #ifdef TIMING_HACK +#define TIMING_HACK +#ifdef TIMING_HACK double jointime; double gathertime; double commtime; @@ -341,6 +384,11 @@ PARALLEL_FOR_LOOP void Report(void) { #define PRINTIT(A) \ std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<_Nprocessors; + RealD NN = _grid->NodeCount(); + + _grid->GlobalSum(commtime); commtime/=NP; if ( calls > 0. ) { std::cout << GridLogMessage << " Stencil calls "<1.0){ PRINTIT(comms_bytes); PRINTIT(commtime); - std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s "<_simd_layout[dimension]; int comm_dim = _grid->_processors[dimension] >1 ; int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); @@ -404,9 +455,11 @@ PARALLEL_FOR_LOOP int sshift[2]; + ////////////////////////// // Underlying approach. For each local site build // up a table containing the npoint "neighbours" and whether they // live in lattice or a comms buffer. + ////////////////////////// if ( !comm_dim ) { sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); @@ -417,11 +470,11 @@ PARALLEL_FOR_LOOP Local(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes Local(point,dimension,shift,0x2);// both with block stride loop iteration } - } else { // All permute extract done in comms phase prior to Stencil application + } else { + // All permute extract done in comms phase prior to Stencil application // So tables are the same whether comm_dim or splice_dim sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); - if ( sshift[0] == sshift[1] ) { Comms(point,dimension,shift,0x3); } else { @@ -440,13 +493,21 @@ PARALLEL_FOR_LOOP u_simd_send_buf.resize(Nsimd); u_simd_recv_buf.resize(Nsimd); - + new_simd_send_buf.resize(Nsimd); + new_simd_recv_buf.resize(Nsimd); u_send_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); u_recv_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); +#ifdef NEW_XYZT_GATHER + for(int l=0;l<2;l++){ + new_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); + new_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); + } +#else for(int l=0;lShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object)); u_simd_send_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object)); } +#endif PrecomputeByteOffsets(); } @@ -513,9 +574,11 @@ PARALLEL_FOR_LOOP assert(shift>=0); assert(shift_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored - + // done in reduced dims, so SIMD factored + int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; + _comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and + // send to one or more remote nodes. int cb= (cbmask==0x2)? Odd : Even; @@ -678,13 +741,10 @@ PARALLEL_FOR_LOOP calls++; Mergers.resize(0); Packets.resize(0); - _grid->StencilBarrier(); HaloGather(source,compress); this->CommunicateBegin(reqs); - _grid->StencilBarrier(); this->CommunicateComplete(reqs); - _grid->StencilBarrier(); - CommsMerge(); // spins + CommsMerge(); } template void HaloGatherDir(const Lattice &source,compressor &compress,int point,int & face_idx) @@ -715,7 +775,13 @@ PARALLEL_FOR_LOOP if ( sshift[0] == sshift[1] ) { if (splice_dim) { splicetime-=usecond(); + // GatherSimd(source,dimension,shift,0x3,compress,face_idx); + // std::cout << "GatherSimdNew"< void HaloGather(const Lattice &source,compressor &compress) { + _grid->StencilBarrier();// Synch shared memory on a single nodes + // conformable(source._grid,_grid); assert(source._grid==_grid); halogtime-=usecond(); @@ -801,13 +875,13 @@ PARALLEL_FOR_LOOP if ( !face_table_computed ) { t_table-=usecond(); face_table.resize(face_idx+1); - Gather_plane_simple_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset, - face_table[face_idx]); + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table[face_idx]); + // std::cout << " face table size "< rpointers(Nsimd); std::vector spointers(Nsimd); - + + // std::cout << "GatherSimd " << dimension << " shift "<= rd ); - + if ( any_offnode ) { for(int i=0;i2 - // std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<2 + // for(int w=0;w : lane " << i <<" elem "<>(permute_type+1)); int ic= (i&inner_bit)? 1:0; - int my_coor = rd*ic + x; - int nbr_coor = my_coor+sshift; + int my_coor = rd*ic + x; + int nbr_coor = my_coor+sshift; int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors int nbr_lcoor= (nbr_coor%ld); int nbr_ic = (nbr_lcoor)/rd; // inner coord of peer @@ -919,10 +992,10 @@ PARALLEL_FOR_LOOP if (nbr_ic) nbr_lane|=inner_bit; assert (sx == nbr_ox); - + auto rp = &u_simd_recv_buf[i ][u_comm_offset]; auto sp = &u_simd_send_buf[nbr_lane][u_comm_offset]; - + if(nbr_proc){ int recv_from_rank; @@ -930,16 +1003,17 @@ PARALLEL_FOR_LOOP _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); + // shm == receive pointer if offnode + // shm == Translate[send pointer] if on node -- my view of his send pointer scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp); - // if ((ShmDirectCopy==0)||(shm==NULL)) { if (shm==NULL) { shm = rp; - } - + } + // if Direct, StencilSendToRecvFrom will suppress copy to a peer on node // assuming above pointer flip AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); - + rpointers[i] = shm; } else { @@ -955,6 +1029,133 @@ PARALLEL_FOR_LOOP } } } + + + template + void GatherSimdNew(const Lattice &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx) + { + const int Nsimd = _grid->Nsimd(); + + const int maxl =2;// max layout in a direction + int fd = _grid->_fdimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + int ld = _grid->_ldimensions[dimension]; + int pd = _grid->_processors[dimension]; + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + assert(comm_dim==1); + // This will not work with a rotate dim + assert(simd_layout==maxl); + assert(shift>=0); + assert(shiftPermuteType(dimension); + // std::cout << "SimdNew permute type "<_slice_nblock[dimension]*_grid->_slice_block[dimension]; + int words = sizeof(cobj)/sizeof(vector_type); + + assert(cbmask==0x3); // Fixme think there is a latent bug if not true + + int reduced_buffer_size = buffer_size; + if (cbmask != 0x3) reduced_buffer_size=buffer_size>>1; + + int bytes = (reduced_buffer_size*sizeof(cobj))/simd_layout; + assert(bytes*simd_layout == reduced_buffer_size*sizeof(cobj)); + + std::vector rpointers(maxl); + std::vector spointers(maxl); + + /////////////////////////////////////////// + // Work out what to send where + /////////////////////////////////////////// + + int cb = (cbmask==0x2)? Odd : Even; + int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); + + // loop over outer coord planes orthog to dim + for(int x=0;x= rd ); + + if ( any_offnode ) { + + + for(int i=0;i > table; + t_table-=usecond(); + if ( !face_table_computed ) { + face_table.resize(face_idx+1); + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table[face_idx]); + // std::cout << " face table size "<ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); + + // shm == receive pointer if offnode + // shm == Translate[send pointer] if on node -- my view of his send pointer + cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp); + if (shm==NULL) { + shm = rp; + } + + // if Direct, StencilSendToRecvFrom will suppress copy to a peer on node + // assuming above pointer flip + AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); + + rpointers[i] = shm; + + } else { + + rpointers[i] = sp; + + } + } + + AddMergeNew(&u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,Packets.size()-1,permute_type); + + u_comm_offset +=buffer_size; + } + } + } }; } diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index 473dd6b1..e0b69eb0 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -105,6 +105,11 @@ class iScalar { friend strong_inline void rotate(iScalar &out,const iScalar &in,int rot){ rotate(out._internal,in._internal,rot); } + friend strong_inline void exchange(iScalar &out1,iScalar &out2, + const iScalar &in1,const iScalar &in2,int type){ + exchange(out1._internal,out2._internal, + in1._internal, in2._internal,type); + } // Unary negation friend strong_inline iScalar operator-(const iScalar &r) { @@ -248,6 +253,13 @@ class iVector { rotate(out._internal[i],in._internal[i],rot); } } + friend strong_inline void exchange(iVector &out1,iVector &out2, + const iVector &in1,const iVector &in2,int type){ + for(int i=0;i operator-(const iVector &r) { @@ -374,6 +386,14 @@ class iMatrix { rotate(out._internal[i][j],in._internal[i][j],rot); }} } + friend strong_inline void exchange(iMatrix &out1,iMatrix &out2, + const iMatrix &in1,const iMatrix &in2,int type){ + for(int i=0;i operator-(const iMatrix &r) { diff --git a/lib/Tensors.h b/lib/tensors/Tensors.h similarity index 100% rename from lib/Tensors.h rename to lib/tensors/Tensors.h diff --git a/lib/Threads.h b/lib/threads/Threads.h similarity index 95% rename from lib/Threads.h rename to lib/threads/Threads.h index 2f072633..d15f15ce 100644 --- a/lib/Threads.h +++ b/lib/threads/Threads.h @@ -37,13 +37,9 @@ Author: paboyle #ifdef GRID_OMP #include -#ifdef GRID_NUMA + #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") -#else -#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") -#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") -#endif #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") #define PARALLEL_REGION _Pragma("omp parallel") #define PARALLEL_CRITICAL _Pragma("omp critical") @@ -55,6 +51,9 @@ Author: paboyle #define PARALLEL_CRITICAL #endif +#define parallel_for PARALLEL_FOR_LOOP for +#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for + namespace Grid { // Introduce a class to gain deterministic bit reproducible reduction. diff --git a/lib/Init.cc b/lib/util/Init.cc similarity index 73% rename from lib/Init.cc rename to lib/util/Init.cc index 5abe7a4b..dd3e6d13 100644 --- a/lib/Init.cc +++ b/lib/util/Init.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -41,12 +41,13 @@ Author: paboyle #include #include #include -#include #include #include #include #include +#include + #include #ifdef __APPLE__ @@ -219,8 +220,57 @@ void Grid_init(int *argc,char ***argv) CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; } + if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ + Grid_debug_handler_init(); + } + CartesianCommunicator::Init(argc,argv); + if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){ + Grid_quiesce_nodes(); + } else { + std::ostringstream fname; + fname<<"Grid.stdout."; + fname<si_signo); - printf(" mem address %llx\n",(unsigned long long)si->si_addr); - printf(" code %d\n",si->si_code); - + fprintf(stderr,"Caught signal %d\n",si->si_signo); + fprintf(stderr," mem address %llx\n",(unsigned long long)si->si_addr); + fprintf(stderr," code %d\n",si->si_code); // Linux/Posix #ifdef __linux__ // And x86 64bit #ifdef __x86_64__ ucontext_t * uc= (ucontext_t *)ptr; struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext; - printf(" instruction %llx\n",(unsigned long long)sc->rip); + fprintf(stderr," instruction %llx\n",(unsigned long long)sc->rip); #define REG(A) printf(" %s %lx\n",#A,sc-> A); REG(rdi); REG(rsi); @@ -412,7 +434,11 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr) REG(r15); #endif #endif - BACKTRACE(); + fflush(stderr); + BACKTRACEFP(stderr); + fprintf(stderr,"Called backtrace\n"); + fflush(stdout); + fflush(stderr); exit(0); return; }; @@ -425,9 +451,11 @@ void Grid_debug_handler_init(void) sa.sa_flags = SA_SIGINFO; sigaction(SIGSEGV,&sa,NULL); sigaction(SIGTRAP,&sa,NULL); + sigaction(SIGBUS,&sa,NULL); feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO); sigaction(SIGFPE,&sa,NULL); + sigaction(SIGKILL,&sa,NULL); } } diff --git a/lib/Init.h b/lib/util/Init.h similarity index 100% rename from lib/Init.h rename to lib/util/Init.h diff --git a/lib/Lexicographic.h b/lib/util/Lexicographic.h similarity index 100% rename from lib/Lexicographic.h rename to lib/util/Lexicographic.h diff --git a/lib/util/Util.h b/lib/util/Util.h new file mode 100644 index 00000000..0a6802a0 --- /dev/null +++ b/lib/util/Util.h @@ -0,0 +1,5 @@ +#ifndef GRID_UTIL_H +#define GRID_UTIL_H +#include +#include +#endif diff --git a/m4/ax_prog_doxygen.m4 b/m4/ax_prog_doxygen.m4 index fd145991..a3d90ace 100644 --- a/m4/ax_prog_doxygen.m4 +++ b/m4/ax_prog_doxygen.m4 @@ -316,7 +316,7 @@ AC_DEFUN([DX_TEST_FEATURE], [test "$DX_FLAG_$1" = 1]) AC_DEFUN([DX_CHECK_DEPEND], [ test "$DX_FLAG_$1" = "$2" \ || AC_MSG_ERROR([doxygen-DX_CURRENT_FEATURE ifelse([$2], 1, - requires, contradicts) doxygen-DX_CURRENT_FEATURE]) + requires, contradicts) doxygen-$1]) ]) # DX_CLEAR_DEPEND(FEATURE, REQUIRED_FEATURE, REQUIRED_STATE) diff --git a/scripts/filelist b/scripts/filelist index de8351f8..38e8ea94 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -7,10 +7,13 @@ cd $home/lib HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/gamma-gen/*' -not -path '*/Old/*' -not -path '*/Eigen/*'` HFILES="$HFILES" CCFILES=`find . -type f -name '*.cc' -not -path '*/gamma-gen/*' -not -name '*Communicator*.cc' -not -name '*Hdf5*'` -echo HFILES=$HFILES > Make.inc +HPPFILES=`find . -type f -name '*.hpp'` +echo HFILES=$HFILES $HPPFILES > Make.inc echo >> Make.inc echo CCFILES=$CCFILES >> Make.inc + + # tests Make.inc cd $home/tests dirs=`find . -type d -not -path '*/\.*'` diff --git a/scripts/loop.log b/scripts/loop.log new file mode 100644 index 00000000..21b00a0d --- /dev/null +++ b/scripts/loop.log @@ -0,0 +1,15432 @@ +Grid : Message : Stencil 12.6786 GB/s per node +Grid : Message : Stencil 17.2339 GB/s per node +Grid : Message : Stencil 17.5982 GB/s per node +Grid : Message : Stencil 12.79 GB/s per node +Grid : Message : Average mflops/s per call per node : 671730 +Grid : Message : Average mflops/s per call per node : 802660 +Grid : Message : Average mflops/s per call per node : 827085 +Grid : Message : Average mflops/s per call per node : 665055 +Grid : Message : Average mflops/s per call per node (full): 314188 +Grid : Message : Average mflops/s per call per node (full): 437622 +Grid : Message : Average mflops/s per call per node (full): 446214 +Grid : Message : Average mflops/s per call per node (full): 304778 +Grid : Message : Stencil 12.3348 GB/s per node +Grid : Message : Stencil 16.7072 GB/s per node +Grid : Message : Stencil 17.5215 GB/s per node +Grid : Message : Stencil 13.0792 GB/s per node +Grid : Message : Average mflops/s per call per node : 673845 +Grid : Message : Average mflops/s per call per node : 803432 +Grid : Message : Average mflops/s per call per node : 827130 +Grid : Message : Average mflops/s per call per node : 661863 +Grid : Message : Average mflops/s per call per node (full): 312916 +Grid : Message : Average mflops/s per call per node (full): 437409 +Grid : Message : Average mflops/s per call per node (full): 446117 +Grid : Message : Average mflops/s per call per node (full): 305031 +Grid : Message : Stencil 13.251 GB/s per node +Grid : Message : Stencil 17.0389 GB/s per node +Grid : Message : Stencil 17.9444 GB/s per node +Grid : Message : Stencil 12.8909 GB/s per node +Grid : Message : Average mflops/s per call per node : 670180 +Grid : Message : Average mflops/s per call per node : 803789 +Grid : Message : Average mflops/s per call per node : 820549 +Grid : Message : Average mflops/s per call per node : 665372 +Grid : Message : Average mflops/s per call per node (full): 316277 +Grid : Message : Average mflops/s per call per node (full): 435976 +Grid : Message : Average mflops/s per call per node (full): 446442 +Grid : Message : Average mflops/s per call per node (full): 305106 +Grid : Message : Stencil 13.6097 GB/s per node +Grid : Message : Stencil 17.7981 GB/s per node +Grid : Message : Stencil 17.5185 GB/s per node +Grid : Message : Stencil 14.4014 GB/s per node +Grid : Message : Average mflops/s per call per node : 666791 +Grid : Message : Average mflops/s per call per node : 799898 +Grid : Message : Average mflops/s per call per node : 826498 +Grid : Message : Average mflops/s per call per node : 663479 +Grid : Message : Average mflops/s per call per node (full): 316681 +Grid : Message : Average mflops/s per call per node (full): 437896 +Grid : Message : Average mflops/s per call per node (full): 445597 +Grid : Message : Average mflops/s per call per node (full): 307770 +Grid : Message : Stencil 14.2804 GB/s per node +Grid : Message : Stencil 16.6792 GB/s per node +Grid : Message : Stencil 18.2657 GB/s per node +Grid : Message : Stencil 12.4275 GB/s per node +Grid : Message : Average mflops/s per call per node : 661518 +Grid : Message : Average mflops/s per call per node : 804204 +Grid : Message : Average mflops/s per call per node : 821252 +Grid : Message : Average mflops/s per call per node : 669218 +Grid : Message : Average mflops/s per call per node (full): 315153 +Grid : Message : Average mflops/s per call per node (full): 436294 +Grid : Message : Average mflops/s per call per node (full): 448032 +Grid : Message : Average mflops/s per call per node (full): 305047 +Grid : Message : Stencil 13.1218 GB/s per node +Grid : Message : Stencil 16.6241 GB/s per node +Grid : Message : Stencil 17.8377 GB/s per node +Grid : Message : Stencil 13.1379 GB/s per node +Grid : Message : Average mflops/s per call per node : 667798 +Grid : Message : Average mflops/s per call per node : 804417 +Grid : Message : Average mflops/s per call per node : 816011 +Grid : Message : Average mflops/s per call per node : 658982 +Grid : Message : Average mflops/s per call per node (full): 314849 +Grid : Message : Average mflops/s per call per node (full): 436163 +Grid : Message : Average mflops/s per call per node (full): 438805 +Grid : Message : Average mflops/s per call per node (full): 304163 +Grid : Message : Stencil 13.5449 GB/s per node +Grid : Message : Stencil 17.0727 GB/s per node +Grid : Message : Stencil 18.1857 GB/s per node +Grid : Message : Stencil 13.3749 GB/s per node +Grid : Message : Average mflops/s per call per node : 667016 +Grid : Message : Average mflops/s per call per node : 805999 +Grid : Message : Average mflops/s per call per node : 825220 +Grid : Message : Average mflops/s per call per node : 664181 +Grid : Message : Average mflops/s per call per node (full): 315059 +Grid : Message : Average mflops/s per call per node (full): 438031 +Grid : Message : Average mflops/s per call per node (full): 447100 +Grid : Message : Average mflops/s per call per node (full): 306033 +Grid : Message : Stencil 12.7492 GB/s per node +Grid : Message : Stencil 16.8398 GB/s per node +Grid : Message : Stencil 18.5169 GB/s per node +Grid : Message : Stencil 14.1175 GB/s per node +Grid : Message : Average mflops/s per call per node : 667433 +Grid : Message : Average mflops/s per call per node : 805205 +Grid : Message : Average mflops/s per call per node : 818690 +Grid : Message : Average mflops/s per call per node : 662018 +Grid : Message : Average mflops/s per call per node (full): 312571 +Grid : Message : Average mflops/s per call per node (full): 436413 +Grid : Message : Average mflops/s per call per node (full): 447176 +Grid : Message : Average mflops/s per call per node (full): 307166 +Grid : Message : Stencil 14.073 GB/s per node +Grid : Message : Stencil 14.0583 GB/s per node +Grid : Message : Stencil 17.3888 GB/s per node +Grid : Message : Stencil 12.4105 GB/s per node +Grid : Message : Average mflops/s per call per node : 662617 +Grid : Message : Average mflops/s per call per node : 809873 +Grid : Message : Average mflops/s per call per node : 823381 +Grid : Message : Average mflops/s per call per node : 664561 +Grid : Message : Average mflops/s per call per node (full): 315727 +Grid : Message : Average mflops/s per call per node (full): 400767 +Grid : Message : Average mflops/s per call per node (full): 444114 +Grid : Message : Average mflops/s per call per node (full): 304126 +Grid : Message : Stencil 14.5548 GB/s per node +Grid : Message : Stencil 16.9469 GB/s per node +Grid : Message : Stencil 17.4579 GB/s per node +Grid : Message : Stencil 14.3184 GB/s per node +Grid : Message : Average mflops/s per call per node : 662519 +Grid : Message : Average mflops/s per call per node : 799987 +Grid : Message : Average mflops/s per call per node : 822293 +Grid : Message : Average mflops/s per call per node : 662139 +Grid : Message : Average mflops/s per call per node (full): 316617 +Grid : Message : Average mflops/s per call per node (full): 433846 +Grid : Message : Average mflops/s per call per node (full): 444509 +Grid : Message : Average mflops/s per call per node (full): 305318 +Grid : Message : Stencil 13.7009 GB/s per node +Grid : Message : Stencil 16.6146 GB/s per node +Grid : Message : Stencil 17.494 GB/s per node +Grid : Message : Stencil 12.1012 GB/s per node +Grid : Message : Average mflops/s per call per node : 664936 +Grid : Message : Average mflops/s per call per node : 808221 +Grid : Message : Average mflops/s per call per node : 822622 +Grid : Message : Average mflops/s per call per node : 670400 +Grid : Message : Average mflops/s per call per node (full): 316165 +Grid : Message : Average mflops/s per call per node (full): 436412 +Grid : Message : Average mflops/s per call per node (full): 444939 +Grid : Message : Average mflops/s per call per node (full): 303228 +Grid : Message : Stencil 12.6849 GB/s per node +Grid : Message : Stencil 16.6473 GB/s per node +Grid : Message : Stencil 17.4004 GB/s per node +Grid : Message : Stencil 13.1606 GB/s per node +Grid : Message : Average mflops/s per call per node : 671546 +Grid : Message : Average mflops/s per call per node : 807297 +Grid : Message : Average mflops/s per call per node : 821422 +Grid : Message : Average mflops/s per call per node : 667455 +Grid : Message : Average mflops/s per call per node (full): 314965 +Grid : Message : Average mflops/s per call per node (full): 436228 +Grid : Message : Average mflops/s per call per node (full): 444879 +Grid : Message : Average mflops/s per call per node (full): 305982 +Grid : Message : Stencil 13.809 GB/s per node +Grid : Message : Stencil 16.3086 GB/s per node +Grid : Message : Stencil 17.3784 GB/s per node +Grid : Message : Stencil 12.5934 GB/s per node +Grid : Message : Average mflops/s per call per node : 667096 +Grid : Message : Average mflops/s per call per node : 803680 +Grid : Message : Average mflops/s per call per node : 823904 +Grid : Message : Average mflops/s per call per node : 667368 +Grid : Message : Average mflops/s per call per node (full): 317077 +Grid : Message : Average mflops/s per call per node (full): 432108 +Grid : Message : Average mflops/s per call per node (full): 445114 +Grid : Message : Average mflops/s per call per node (full): 304958 +Grid : Message : Stencil 14.3258 GB/s per node +Grid : Message : Stencil 16.9233 GB/s per node +Grid : Message : Stencil 17.6914 GB/s per node +Grid : Message : Stencil 12.1857 GB/s per node +Grid : Message : Average mflops/s per call per node : 665305 +Grid : Message : Average mflops/s per call per node : 804694 +Grid : Message : Average mflops/s per call per node : 824508 +Grid : Message : Average mflops/s per call per node : 667104 +Grid : Message : Average mflops/s per call per node (full): 316651 +Grid : Message : Average mflops/s per call per node (full): 428082 +Grid : Message : Average mflops/s per call per node (full): 446379 +Grid : Message : Average mflops/s per call per node (full): 303779 +Grid : Message : Stencil 13.2527 GB/s per node +Grid : Message : Stencil 16.6723 GB/s per node +Grid : Message : Stencil 17.581 GB/s per node +Grid : Message : Stencil 13.4152 GB/s per node +Grid : Message : Average mflops/s per call per node : 669149 +Grid : Message : Average mflops/s per call per node : 799407 +Grid : Message : Average mflops/s per call per node : 820946 +Grid : Message : Average mflops/s per call per node : 658257 +Grid : Message : Average mflops/s per call per node (full): 315691 +Grid : Message : Average mflops/s per call per node (full): 435436 +Grid : Message : Average mflops/s per call per node (full): 445192 +Grid : Message : Average mflops/s per call per node (full): 305074 +Grid : Message : Stencil 13.0576 GB/s per node +Grid : Message : Stencil 16.4259 GB/s per node +Grid : Message : Stencil 17.3695 GB/s per node +Grid : Message : Stencil 13.224 GB/s per node +Grid : Message : Average mflops/s per call per node : 671756 +Grid : Message : Average mflops/s per call per node : 805129 +Grid : Message : Average mflops/s per call per node : 826323 +Grid : Message : Average mflops/s per call per node : 663117 +Grid : Message : Average mflops/s per call per node (full): 316939 +Grid : Message : Average mflops/s per call per node (full): 434386 +Grid : Message : Average mflops/s per call per node (full): 445016 +Grid : Message : Average mflops/s per call per node (full): 306607 +Grid : Message : Stencil 12.735 GB/s per node +Grid : Message : Stencil 18.7133 GB/s per node +Grid : Message : Stencil 17.5949 GB/s per node +Grid : Message : Stencil 13.6578 GB/s per node +Grid : Message : Average mflops/s per call per node : 673814 +Grid : Message : Average mflops/s per call per node : 803124 +Grid : Message : Average mflops/s per call per node : 822958 +Grid : Message : Average mflops/s per call per node : 662935 +Grid : Message : Average mflops/s per call per node (full): 315459 +Grid : Message : Average mflops/s per call per node (full): 440286 +Grid : Message : Average mflops/s per call per node (full): 446144 +Grid : Message : Average mflops/s per call per node (full): 306944 +Grid : Message : Stencil 14.5299 GB/s per node +Grid : Message : Stencil 16.827 GB/s per node +Grid : Message : Stencil 18.227 GB/s per node +Grid : Message : Stencil 12.5667 GB/s per node +Grid : Message : Average mflops/s per call per node : 664966 +Grid : Message : Average mflops/s per call per node : 805649 +Grid : Message : Average mflops/s per call per node : 820323 +Grid : Message : Average mflops/s per call per node : 662468 +Grid : Message : Average mflops/s per call per node (full): 316234 +Grid : Message : Average mflops/s per call per node (full): 436979 +Grid : Message : Average mflops/s per call per node (full): 445870 +Grid : Message : Average mflops/s per call per node (full): 304734 +Grid : Message : Stencil 14.0947 GB/s per node +Grid : Message : Stencil 17.2047 GB/s per node +Grid : Message : Stencil 16.3778 GB/s per node +Grid : Message : Stencil 11.9717 GB/s per node +Grid : Message : Average mflops/s per call per node : 667253 +Grid : Message : Average mflops/s per call per node : 801164 +Grid : Message : Average mflops/s per call per node : 823547 +Grid : Message : Average mflops/s per call per node : 671607 +Grid : Message : Average mflops/s per call per node (full): 317280 +Grid : Message : Average mflops/s per call per node (full): 438057 +Grid : Message : Average mflops/s per call per node (full): 429727 +Grid : Message : Average mflops/s per call per node (full): 302300 +Grid : Message : Stencil 14.4044 GB/s per node +Grid : Message : Stencil 16.4973 GB/s per node +Grid : Message : Stencil 16.8518 GB/s per node +Grid : Message : Stencil 13.1009 GB/s per node +Grid : Message : Average mflops/s per call per node : 664077 +Grid : Message : Average mflops/s per call per node : 802258 +Grid : Message : Average mflops/s per call per node : 823349 +Grid : Message : Average mflops/s per call per node : 667635 +Grid : Message : Average mflops/s per call per node (full): 316900 +Grid : Message : Average mflops/s per call per node (full): 433661 +Grid : Message : Average mflops/s per call per node (full): 436108 +Grid : Message : Average mflops/s per call per node (full): 305754 +Grid : Message : Stencil 13.228 GB/s per node +Grid : Message : Stencil 16.8887 GB/s per node +Grid : Message : Stencil 17.6471 GB/s per node +Grid : Message : Stencil 12.7062 GB/s per node +Grid : Message : Average mflops/s per call per node : 669067 +Grid : Message : Average mflops/s per call per node : 804099 +Grid : Message : Average mflops/s per call per node : 821604 +Grid : Message : Average mflops/s per call per node : 658039 +Grid : Message : Average mflops/s per call per node (full): 316770 +Grid : Message : Average mflops/s per call per node (full): 437420 +Grid : Message : Average mflops/s per call per node (full): 445975 +Grid : Message : Average mflops/s per call per node (full): 298715 +Grid : Message : Stencil 13.5671 GB/s per node +Grid : Message : Stencil 16.5942 GB/s per node +Grid : Message : Stencil 18.1271 GB/s per node +Grid : Message : Stencil 11.9482 GB/s per node +Grid : Message : Average mflops/s per call per node : 667462 +Grid : Message : Average mflops/s per call per node : 807110 +Grid : Message : Average mflops/s per call per node : 823079 +Grid : Message : Average mflops/s per call per node : 667472 +Grid : Message : Average mflops/s per call per node (full): 314605 +Grid : Message : Average mflops/s per call per node (full): 436264 +Grid : Message : Average mflops/s per call per node (full): 446541 +Grid : Message : Average mflops/s per call per node (full): 301224 +Grid : Message : Stencil 12.3664 GB/s per node +Grid : Message : Stencil 13.5017 GB/s per node +Grid : Message : Stencil 17.7799 GB/s per node +Grid : Message : Stencil 12.3154 GB/s per node +Grid : Message : Average mflops/s per call per node : 672086 +Grid : Message : Average mflops/s per call per node : 808002 +Grid : Message : Average mflops/s per call per node : 822256 +Grid : Message : Average mflops/s per call per node : 666533 +Grid : Message : Average mflops/s per call per node (full): 313568 +Grid : Message : Average mflops/s per call per node (full): 391027 +Grid : Message : Average mflops/s per call per node (full): 441218 +Grid : Message : Average mflops/s per call per node (full): 303786 +Grid : Message : Stencil 13.5781 GB/s per node +Grid : Message : Stencil 16.4869 GB/s per node +Grid : Message : Stencil 17.8246 GB/s per node +Grid : Message : Stencil 13.8167 GB/s per node +Grid : Message : Average mflops/s per call per node : 667731 +Grid : Message : Average mflops/s per call per node : 803031 +Grid : Message : Average mflops/s per call per node : 826184 +Grid : Message : Average mflops/s per call per node : 662380 +Grid : Message : Average mflops/s per call per node (full): 316580 +Grid : Message : Average mflops/s per call per node (full): 434474 +Grid : Message : Average mflops/s per call per node (full): 447463 +Grid : Message : Average mflops/s per call per node (full): 305866 +Grid : Message : Stencil 14.1182 GB/s per node +Grid : Message : Stencil 16.975 GB/s per node +Grid : Message : Stencil 18.8744 GB/s per node +Grid : Message : Stencil 12.1288 GB/s per node +Grid : Message : Average mflops/s per call per node : 661970 +Grid : Message : Average mflops/s per call per node : 803461 +Grid : Message : Average mflops/s per call per node : 820977 +Grid : Message : Average mflops/s per call per node : 668043 +Grid : Message : Average mflops/s per call per node (full): 316254 +Grid : Message : Average mflops/s per call per node (full): 437180 +Grid : Message : Average mflops/s per call per node (full): 448645 +Grid : Message : Average mflops/s per call per node (full): 302267 +Grid : Message : Stencil 13.4813 GB/s per node +Grid : Message : Stencil 16.4433 GB/s per node +Grid : Message : Stencil 17.8945 GB/s per node +Grid : Message : Stencil 11.9791 GB/s per node +Grid : Message : Average mflops/s per call per node : 667678 +Grid : Message : Average mflops/s per call per node : 802481 +Grid : Message : Average mflops/s per call per node : 823512 +Grid : Message : Average mflops/s per call per node : 662641 +Grid : Message : Average mflops/s per call per node (full): 316494 +Grid : Message : Average mflops/s per call per node (full): 433789 +Grid : Message : Average mflops/s per call per node (full): 447496 +Grid : Message : Average mflops/s per call per node (full): 300465 +Grid : Message : Stencil 13.123 GB/s per node +Grid : Message : Stencil 11.7805 GB/s per node +Grid : Message : Stencil 17.9281 GB/s per node +Grid : Message : Stencil 12.2311 GB/s per node +Grid : Message : Average mflops/s per call per node : 665079 +Grid : Message : Average mflops/s per call per node : 806976 +Grid : Message : Average mflops/s per call per node : 824353 +Grid : Message : Average mflops/s per call per node : 669184 +Grid : Message : Average mflops/s per call per node (full): 315687 +Grid : Message : Average mflops/s per call per node (full): 356593 +Grid : Message : Average mflops/s per call per node (full): 441731 +Grid : Message : Average mflops/s per call per node (full): 304029 +Grid : Message : Stencil 14.1549 GB/s per node +Grid : Message : Stencil 18.8328 GB/s per node +Grid : Message : Stencil 16.9621 GB/s per node +Grid : Message : Stencil 12.2749 GB/s per node +Grid : Message : Average mflops/s per call per node : 664087 +Grid : Message : Average mflops/s per call per node : 798788 +Grid : Message : Average mflops/s per call per node : 817414 +Grid : Message : Average mflops/s per call per node : 667448 +Grid : Message : Average mflops/s per call per node (full): 316819 +Grid : Message : Average mflops/s per call per node (full): 440185 +Grid : Message : Average mflops/s per call per node (full): 439012 +Grid : Message : Average mflops/s per call per node (full): 304156 +Grid : Message : Stencil 13.5542 GB/s per node +Grid : Message : Stencil 18.0781 GB/s per node +Grid : Message : Stencil 18.4257 GB/s per node +Grid : Message : Stencil 12.1906 GB/s per node +Grid : Message : Average mflops/s per call per node : 665956 +Grid : Message : Average mflops/s per call per node : 801445 +Grid : Message : Average mflops/s per call per node : 816661 +Grid : Message : Average mflops/s per call per node : 663424 +Grid : Message : Average mflops/s per call per node (full): 316319 +Grid : Message : Average mflops/s per call per node (full): 438901 +Grid : Message : Average mflops/s per call per node (full): 445947 +Grid : Message : Average mflops/s per call per node (full): 301634 +Grid : Message : Stencil 12.6518 GB/s per node +Grid : Message : Stencil 17.1647 GB/s per node +Grid : Message : Stencil 17.1054 GB/s per node +Grid : Message : Stencil 14.0803 GB/s per node +Grid : Message : Average mflops/s per call per node : 669876 +Grid : Message : Average mflops/s per call per node : 800051 +Grid : Message : Average mflops/s per call per node : 817636 +Grid : Message : Average mflops/s per call per node : 661101 +Grid : Message : Average mflops/s per call per node (full): 314390 +Grid : Message : Average mflops/s per call per node (full): 436913 +Grid : Message : Average mflops/s per call per node (full): 439852 +Grid : Message : Average mflops/s per call per node (full): 305943 +Grid : Message : Stencil 12.5875 GB/s per node +Grid : Message : Stencil 16.6244 GB/s per node +Grid : Message : Stencil 17.697 GB/s per node +Grid : Message : Stencil 13.9751 GB/s per node +Grid : Message : Average mflops/s per call per node : 668687 +Grid : Message : Average mflops/s per call per node : 804376 +Grid : Message : Average mflops/s per call per node : 820248 +Grid : Message : Average mflops/s per call per node : 657872 +Grid : Message : Average mflops/s per call per node (full): 312425 +Grid : Message : Average mflops/s per call per node (full): 432575 +Grid : Message : Average mflops/s per call per node (full): 445039 +Grid : Message : Average mflops/s per call per node (full): 306199 +Grid : Message : Stencil 12.9612 GB/s per node +Grid : Message : Stencil 16.9251 GB/s per node +Grid : Message : Stencil 18.2427 GB/s per node +Grid : Message : Stencil 13.9809 GB/s per node +Grid : Message : Average mflops/s per call per node : 670116 +Grid : Message : Average mflops/s per call per node : 802462 +Grid : Message : Average mflops/s per call per node : 815357 +Grid : Message : Average mflops/s per call per node : 663631 +Grid : Message : Average mflops/s per call per node (full): 315610 +Grid : Message : Average mflops/s per call per node (full): 437106 +Grid : Message : Average mflops/s per call per node (full): 447389 +Grid : Message : Average mflops/s per call per node (full): 307173 +Grid : Message : Stencil 14.9913 GB/s per node +Grid : Message : Stencil 16.6469 GB/s per node +Grid : Message : Stencil 19.1252 GB/s per node +Grid : Message : Stencil 12.2903 GB/s per node +Grid : Message : Average mflops/s per call per node : 662529 +Grid : Message : Average mflops/s per call per node : 808307 +Grid : Message : Average mflops/s per call per node : 819406 +Grid : Message : Average mflops/s per call per node : 671267 +Grid : Message : Average mflops/s per call per node (full): 316715 +Grid : Message : Average mflops/s per call per node (full): 436353 +Grid : Message : Average mflops/s per call per node (full): 448492 +Grid : Message : Average mflops/s per call per node (full): 304655 +Grid : Message : Stencil 13.9691 GB/s per node +Grid : Message : Stencil 17.0655 GB/s per node +Grid : Message : Stencil 17.2496 GB/s per node +Grid : Message : Stencil 12.9983 GB/s per node +Grid : Message : Average mflops/s per call per node : 665233 +Grid : Message : Average mflops/s per call per node : 802284 +Grid : Message : Average mflops/s per call per node : 826984 +Grid : Message : Average mflops/s per call per node : 668970 +Grid : Message : Average mflops/s per call per node (full): 315863 +Grid : Message : Average mflops/s per call per node (full): 437702 +Grid : Message : Average mflops/s per call per node (full): 442069 +Grid : Message : Average mflops/s per call per node (full): 306745 +Grid : Message : Stencil 15.661 GB/s per node +Grid : Message : Stencil 16.8648 GB/s per node +Grid : Message : Stencil 17.2765 GB/s per node +Grid : Message : Stencil 12.083 GB/s per node +Grid : Message : Average mflops/s per call per node : 659457 +Grid : Message : Average mflops/s per call per node : 806816 +Grid : Message : Average mflops/s per call per node : 824213 +Grid : Message : Average mflops/s per call per node : 667384 +Grid : Message : Average mflops/s per call per node (full): 316720 +Grid : Message : Average mflops/s per call per node (full): 437656 +Grid : Message : Average mflops/s per call per node (full): 444228 +Grid : Message : Average mflops/s per call per node (full): 302337 +Grid : Message : Stencil 13.4679 GB/s per node +Grid : Message : Stencil 16.4826 GB/s per node +Grid : Message : Stencil 17.4329 GB/s per node +Grid : Message : Stencil 12.1631 GB/s per node +Grid : Message : Average mflops/s per call per node : 663334 +Grid : Message : Average mflops/s per call per node : 801140 +Grid : Message : Average mflops/s per call per node : 827083 +Grid : Message : Average mflops/s per call per node : 668463 +Grid : Message : Average mflops/s per call per node (full): 313267 +Grid : Message : Average mflops/s per call per node (full): 432812 +Grid : Message : Average mflops/s per call per node (full): 443554 +Grid : Message : Average mflops/s per call per node (full): 303006 +Grid : Message : Stencil 14.1353 GB/s per node +Grid : Message : Stencil 16.8278 GB/s per node +Grid : Message : Stencil 17.6039 GB/s per node +Grid : Message : Stencil 12.1976 GB/s per node +Grid : Message : Average mflops/s per call per node : 664989 +Grid : Message : Average mflops/s per call per node : 800166 +Grid : Message : Average mflops/s per call per node : 826481 +Grid : Message : Average mflops/s per call per node : 666778 +Grid : Message : Average mflops/s per call per node (full): 316360 +Grid : Message : Average mflops/s per call per node (full): 436968 +Grid : Message : Average mflops/s per call per node (full): 446051 +Grid : Message : Average mflops/s per call per node (full): 303706 +Grid : Message : Stencil 14.9322 GB/s per node +Grid : Message : Stencil 17.0553 GB/s per node +Grid : Message : Stencil 17.8388 GB/s per node +Grid : Message : Stencil 11.9921 GB/s per node +Grid : Message : Average mflops/s per call per node : 663931 +Grid : Message : Average mflops/s per call per node : 797226 +Grid : Message : Average mflops/s per call per node : 817099 +Grid : Message : Average mflops/s per call per node : 664999 +Grid : Message : Average mflops/s per call per node (full): 317371 +Grid : Message : Average mflops/s per call per node (full): 437367 +Grid : Message : Average mflops/s per call per node (full): 442754 +Grid : Message : Average mflops/s per call per node (full): 301680 +Grid : Message : Stencil 13.8221 GB/s per node +Grid : Message : Stencil 16.6444 GB/s per node +Grid : Message : Stencil 18.01 GB/s per node +Grid : Message : Stencil 12.6676 GB/s per node +Grid : Message : Average mflops/s per call per node : 664187 +Grid : Message : Average mflops/s per call per node : 805007 +Grid : Message : Average mflops/s per call per node : 816560 +Grid : Message : Average mflops/s per call per node : 662956 +Grid : Message : Average mflops/s per call per node (full): 313418 +Grid : Message : Average mflops/s per call per node (full): 436533 +Grid : Message : Average mflops/s per call per node (full): 441327 +Grid : Message : Average mflops/s per call per node (full): 304681 +Grid : Message : Stencil 12.9318 GB/s per node +Grid : Message : Stencil 16.4364 GB/s per node +Grid : Message : Stencil 17.768 GB/s per node +Grid : Message : Stencil 13.492 GB/s per node +Grid : Message : Average mflops/s per call per node : 669173 +Grid : Message : Average mflops/s per call per node : 806886 +Grid : Message : Average mflops/s per call per node : 829390 +Grid : Message : Average mflops/s per call per node : 663708 +Grid : Message : Average mflops/s per call per node (full): 314081 +Grid : Message : Average mflops/s per call per node (full): 434090 +Grid : Message : Average mflops/s per call per node (full): 443265 +Grid : Message : Average mflops/s per call per node (full): 303994 +Grid : Message : Stencil 12.0068 GB/s per node +Grid : Message : Stencil 18.4427 GB/s per node +Grid : Message : Stencil 17.3472 GB/s per node +Grid : Message : Stencil 12.1353 GB/s per node +Grid : Message : Average mflops/s per call per node : 669657 +Grid : Message : Average mflops/s per call per node : 806409 +Grid : Message : Average mflops/s per call per node : 821373 +Grid : Message : Average mflops/s per call per node : 664660 +Grid : Message : Average mflops/s per call per node (full): 308790 +Grid : Message : Average mflops/s per call per node (full): 443314 +Grid : Message : Average mflops/s per call per node (full): 443511 +Grid : Message : Average mflops/s per call per node (full): 302802 +Grid : Message : Stencil 11.8595 GB/s per node +Grid : Message : Stencil 13.2191 GB/s per node +Grid : Message : Stencil 17.0265 GB/s per node +Grid : Message : Stencil 11.9838 GB/s per node +Grid : Message : Average mflops/s per call per node : 670429 +Grid : Message : Average mflops/s per call per node : 805409 +Grid : Message : Average mflops/s per call per node : 821689 +Grid : Message : Average mflops/s per call per node : 663832 +Grid : Message : Average mflops/s per call per node (full): 306883 +Grid : Message : Average mflops/s per call per node (full): 385481 +Grid : Message : Average mflops/s per call per node (full): 440786 +Grid : Message : Average mflops/s per call per node (full): 301769 +Grid : Message : Stencil 12.2979 GB/s per node +Grid : Message : Stencil 17.2923 GB/s per node +Grid : Message : Stencil 17.2221 GB/s per node +Grid : Message : Stencil 13.9567 GB/s per node +Grid : Message : Average mflops/s per call per node : 671006 +Grid : Message : Average mflops/s per call per node : 803697 +Grid : Message : Average mflops/s per call per node : 826999 +Grid : Message : Average mflops/s per call per node : 663643 +Grid : Message : Average mflops/s per call per node (full): 311482 +Grid : Message : Average mflops/s per call per node (full): 437719 +Grid : Message : Average mflops/s per call per node (full): 443215 +Grid : Message : Average mflops/s per call per node (full): 307982 +Grid : Message : Stencil 12.3281 GB/s per node +Grid : Message : Stencil 17.6035 GB/s per node +Grid : Message : Stencil 17.9606 GB/s per node +Grid : Message : Stencil 13.341 GB/s per node +Grid : Message : Average mflops/s per call per node : 668979 +Grid : Message : Average mflops/s per call per node : 801647 +Grid : Message : Average mflops/s per call per node : 820156 +Grid : Message : Average mflops/s per call per node : 662391 +Grid : Message : Average mflops/s per call per node (full): 310984 +Grid : Message : Average mflops/s per call per node (full): 438264 +Grid : Message : Average mflops/s per call per node (full): 440264 +Grid : Message : Average mflops/s per call per node (full): 306365 +Grid : Message : Stencil 12.1279 GB/s per node +Grid : Message : Stencil 16.409 GB/s per node +Grid : Message : Stencil 17.7244 GB/s per node +Grid : Message : Stencil 12.4881 GB/s per node +Grid : Message : Average mflops/s per call per node : 666087 +Grid : Message : Average mflops/s per call per node : 800875 +Grid : Message : Average mflops/s per call per node : 815854 +Grid : Message : Average mflops/s per call per node : 662393 +Grid : Message : Average mflops/s per call per node (full): 310224 +Grid : Message : Average mflops/s per call per node (full): 433186 +Grid : Message : Average mflops/s per call per node (full): 444792 +Grid : Message : Average mflops/s per call per node (full): 303496 +Grid : Message : Stencil 13.0349 GB/s per node +Grid : Message : Stencil 14.3654 GB/s per node +Grid : Message : Stencil 17.6391 GB/s per node +Grid : Message : Stencil 13.7551 GB/s per node +Grid : Message : Average mflops/s per call per node : 668233 +Grid : Message : Average mflops/s per call per node : 804659 +Grid : Message : Average mflops/s per call per node : 814795 +Grid : Message : Average mflops/s per call per node : 664072 +Grid : Message : Average mflops/s per call per node (full): 314520 +Grid : Message : Average mflops/s per call per node (full): 406269 +Grid : Message : Average mflops/s per call per node (full): 444639 +Grid : Message : Average mflops/s per call per node (full): 306410 +Grid : Message : Stencil 14.403 GB/s per node +Grid : Message : Stencil 16.4274 GB/s per node +Grid : Message : Stencil 17.764 GB/s per node +Grid : Message : Stencil 13.506 GB/s per node +Grid : Message : Average mflops/s per call per node : 662911 +Grid : Message : Average mflops/s per call per node : 804622 +Grid : Message : Average mflops/s per call per node : 820148 +Grid : Message : Average mflops/s per call per node : 661708 +Grid : Message : Average mflops/s per call per node (full): 316671 +Grid : Message : Average mflops/s per call per node (full): 433131 +Grid : Message : Average mflops/s per call per node (full): 445038 +Grid : Message : Average mflops/s per call per node (full): 304646 +Grid : Message : Stencil 13.1869 GB/s per node +Grid : Message : Stencil 16.7596 GB/s per node +Grid : Message : Stencil 17.5764 GB/s per node +Grid : Message : Stencil 12.3926 GB/s per node +Grid : Message : Average mflops/s per call per node : 666711 +Grid : Message : Average mflops/s per call per node : 806839 +Grid : Message : Average mflops/s per call per node : 826096 +Grid : Message : Average mflops/s per call per node : 667397 +Grid : Message : Average mflops/s per call per node (full): 315643 +Grid : Message : Average mflops/s per call per node (full): 436755 +Grid : Message : Average mflops/s per call per node (full): 445928 +Grid : Message : Average mflops/s per call per node (full): 304457 +Grid : Message : Stencil 13.263 GB/s per node +Grid : Message : Stencil 17.5252 GB/s per node +Grid : Message : Stencil 17.8626 GB/s per node +Grid : Message : Stencil 13.3588 GB/s per node +Grid : Message : Average mflops/s per call per node : 668043 +Grid : Message : Average mflops/s per call per node : 799596 +Grid : Message : Average mflops/s per call per node : 822349 +Grid : Message : Average mflops/s per call per node : 663654 +Grid : Message : Average mflops/s per call per node (full): 316238 +Grid : Message : Average mflops/s per call per node (full): 438539 +Grid : Message : Average mflops/s per call per node (full): 445313 +Grid : Message : Average mflops/s per call per node (full): 305676 +Grid : Message : Stencil 13.1748 GB/s per node +Grid : Message : Stencil 16.4387 GB/s per node +Grid : Message : Stencil 16.8156 GB/s per node +Grid : Message : Stencil 11.9933 GB/s per node +Grid : Message : Average mflops/s per call per node : 665377 +Grid : Message : Average mflops/s per call per node : 801811 +Grid : Message : Average mflops/s per call per node : 824709 +Grid : Message : Average mflops/s per call per node : 666862 +Grid : Message : Average mflops/s per call per node (full): 314939 +Grid : Message : Average mflops/s per call per node (full): 433577 +Grid : Message : Average mflops/s per call per node (full): 438749 +Grid : Message : Average mflops/s per call per node (full): 302165 +Grid : Message : Stencil 13.6378 GB/s per node +Grid : Message : Stencil 16.6342 GB/s per node +Grid : Message : Stencil 17.8761 GB/s per node +Grid : Message : Stencil 13.0271 GB/s per node +Grid : Message : Average mflops/s per call per node : 665072 +Grid : Message : Average mflops/s per call per node : 803115 +Grid : Message : Average mflops/s per call per node : 811562 +Grid : Message : Average mflops/s per call per node : 665020 +Grid : Message : Average mflops/s per call per node (full): 315482 +Grid : Message : Average mflops/s per call per node (full): 435792 +Grid : Message : Average mflops/s per call per node (full): 443680 +Grid : Message : Average mflops/s per call per node (full): 306156 +Grid : Message : Stencil 14.0895 GB/s per node +Grid : Message : Stencil 16.4099 GB/s per node +Grid : Message : Stencil 16.8037 GB/s per node +Grid : Message : Stencil 12.5877 GB/s per node +Grid : Message : Average mflops/s per call per node : 665096 +Grid : Message : Average mflops/s per call per node : 804712 +Grid : Message : Average mflops/s per call per node : 825464 +Grid : Message : Average mflops/s per call per node : 665110 +Grid : Message : Average mflops/s per call per node (full): 316653 +Grid : Message : Average mflops/s per call per node (full): 433047 +Grid : Message : Average mflops/s per call per node (full): 438423 +Grid : Message : Average mflops/s per call per node (full): 303523 +Grid : Message : Stencil 14.4215 GB/s per node +Grid : Message : Stencil 18.1001 GB/s per node +Grid : Message : Stencil 18.8503 GB/s per node +Grid : Message : Stencil 12.8565 GB/s per node +Grid : Message : Average mflops/s per call per node : 663644 +Grid : Message : Average mflops/s per call per node : 803815 +Grid : Message : Average mflops/s per call per node : 818369 +Grid : Message : Average mflops/s per call per node : 660859 +Grid : Message : Average mflops/s per call per node (full): 316221 +Grid : Message : Average mflops/s per call per node (full): 440122 +Grid : Message : Average mflops/s per call per node (full): 447962 +Grid : Message : Average mflops/s per call per node (full): 303555 +Grid : Message : Stencil 13.2125 GB/s per node +Grid : Message : Stencil 17.2994 GB/s per node +Grid : Message : Stencil 17.782 GB/s per node +Grid : Message : Stencil 12.6348 GB/s per node +Grid : Message : Average mflops/s per call per node : 666511 +Grid : Message : Average mflops/s per call per node : 803513 +Grid : Message : Average mflops/s per call per node : 820944 +Grid : Message : Average mflops/s per call per node : 666189 +Grid : Message : Average mflops/s per call per node (full): 314097 +Grid : Message : Average mflops/s per call per node (full): 437299 +Grid : Message : Average mflops/s per call per node (full): 443860 +Grid : Message : Average mflops/s per call per node (full): 304287 +Grid : Message : Stencil 12.5 GB/s per node +Grid : Message : Stencil 18.2757 GB/s per node +Grid : Message : Stencil 18.1259 GB/s per node +Grid : Message : Stencil 12.4919 GB/s per node +Grid : Message : Average mflops/s per call per node : 667520 +Grid : Message : Average mflops/s per call per node : 799292 +Grid : Message : Average mflops/s per call per node : 818353 +Grid : Message : Average mflops/s per call per node : 665097 +Grid : Message : Average mflops/s per call per node (full): 313158 +Grid : Message : Average mflops/s per call per node (full): 438549 +Grid : Message : Average mflops/s per call per node (full): 447002 +Grid : Message : Average mflops/s per call per node (full): 304029 +Grid : Message : Stencil 15.4003 GB/s per node +Grid : Message : Stencil 16.7543 GB/s per node +Grid : Message : Stencil 18.0461 GB/s per node +Grid : Message : Stencil 12.4582 GB/s per node +Grid : Message : Average mflops/s per call per node : 659704 +Grid : Message : Average mflops/s per call per node : 802566 +Grid : Message : Average mflops/s per call per node : 827087 +Grid : Message : Average mflops/s per call per node : 667201 +Grid : Message : Average mflops/s per call per node (full): 316473 +Grid : Message : Average mflops/s per call per node (full): 435987 +Grid : Message : Average mflops/s per call per node (full): 448107 +Grid : Message : Average mflops/s per call per node (full): 304209 +Grid : Message : Stencil 15.2555 GB/s per node +Grid : Message : Stencil 17.2014 GB/s per node +Grid : Message : Stencil 17.4156 GB/s per node +Grid : Message : Stencil 12.7329 GB/s per node +Grid : Message : Average mflops/s per call per node : 662821 +Grid : Message : Average mflops/s per call per node : 801309 +Grid : Message : Average mflops/s per call per node : 819615 +Grid : Message : Average mflops/s per call per node : 670265 +Grid : Message : Average mflops/s per call per node (full): 317148 +Grid : Message : Average mflops/s per call per node (full): 436550 +Grid : Message : Average mflops/s per call per node (full): 440511 +Grid : Message : Average mflops/s per call per node (full): 306119 +Grid : Message : Stencil 12.5579 GB/s per node +Grid : Message : Stencil 17.8074 GB/s per node +Grid : Message : Stencil 18.2134 GB/s per node +Grid : Message : Stencil 13.6269 GB/s per node +Grid : Message : Average mflops/s per call per node : 666563 +Grid : Message : Average mflops/s per call per node : 802986 +Grid : Message : Average mflops/s per call per node : 822776 +Grid : Message : Average mflops/s per call per node : 661145 +Grid : Message : Average mflops/s per call per node (full): 313962 +Grid : Message : Average mflops/s per call per node (full): 439128 +Grid : Message : Average mflops/s per call per node (full): 447571 +Grid : Message : Average mflops/s per call per node (full): 306357 +Grid : Message : Stencil 13.181 GB/s per node +Grid : Message : Stencil 17.6128 GB/s per node +Grid : Message : Stencil 17.5374 GB/s per node +Grid : Message : Stencil 13.5804 GB/s per node +Grid : Message : Average mflops/s per call per node : 664015 +Grid : Message : Average mflops/s per call per node : 803507 +Grid : Message : Average mflops/s per call per node : 818642 +Grid : Message : Average mflops/s per call per node : 663962 +Grid : Message : Average mflops/s per call per node (full): 314661 +Grid : Message : Average mflops/s per call per node (full): 439069 +Grid : Message : Average mflops/s per call per node (full): 444028 +Grid : Message : Average mflops/s per call per node (full): 307019 +Grid : Message : Stencil 13.6153 GB/s per node +Grid : Message : Stencil 17.0466 GB/s per node +Grid : Message : Stencil 17.4903 GB/s per node +Grid : Message : Stencil 12.5444 GB/s per node +Grid : Message : Average mflops/s per call per node : 666360 +Grid : Message : Average mflops/s per call per node : 800808 +Grid : Message : Average mflops/s per call per node : 824012 +Grid : Message : Average mflops/s per call per node : 663114 +Grid : Message : Average mflops/s per call per node (full): 315889 +Grid : Message : Average mflops/s per call per node (full): 437328 +Grid : Message : Average mflops/s per call per node (full): 444113 +Grid : Message : Average mflops/s per call per node (full): 303517 +Grid : Message : Stencil 14.3039 GB/s per node +Grid : Message : Stencil 17.4336 GB/s per node +Grid : Message : Stencil 17.3709 GB/s per node +Grid : Message : Stencil 13.4663 GB/s per node +Grid : Message : Average mflops/s per call per node : 661343 +Grid : Message : Average mflops/s per call per node : 801882 +Grid : Message : Average mflops/s per call per node : 825649 +Grid : Message : Average mflops/s per call per node : 661617 +Grid : Message : Average mflops/s per call per node (full): 316563 +Grid : Message : Average mflops/s per call per node (full): 439155 +Grid : Message : Average mflops/s per call per node (full): 443762 +Grid : Message : Average mflops/s per call per node (full): 306622 +Grid : Message : Stencil 13.4482 GB/s per node +Grid : Message : Stencil 16.7404 GB/s per node +Grid : Message : Stencil 18.4052 GB/s per node +Grid : Message : Stencil 12.698 GB/s per node +Grid : Message : Average mflops/s per call per node : 667886 +Grid : Message : Average mflops/s per call per node : 798467 +Grid : Message : Average mflops/s per call per node : 819316 +Grid : Message : Average mflops/s per call per node : 663127 +Grid : Message : Average mflops/s per call per node (full): 316365 +Grid : Message : Average mflops/s per call per node (full): 435445 +Grid : Message : Average mflops/s per call per node (full): 447972 +Grid : Message : Average mflops/s per call per node (full): 304094 +Grid : Message : Stencil 13.7252 GB/s per node +Grid : Message : Stencil 16.6097 GB/s per node +Grid : Message : Stencil 17.8379 GB/s per node +Grid : Message : Stencil 13.1634 GB/s per node +Grid : Message : Average mflops/s per call per node : 665025 +Grid : Message : Average mflops/s per call per node : 804014 +Grid : Message : Average mflops/s per call per node : 813861 +Grid : Message : Average mflops/s per call per node : 660909 +Grid : Message : Average mflops/s per call per node (full): 316061 +Grid : Message : Average mflops/s per call per node (full): 436117 +Grid : Message : Average mflops/s per call per node (full): 444560 +Grid : Message : Average mflops/s per call per node (full): 304968 +Grid : Message : Stencil 13.8481 GB/s per node +Grid : Message : Stencil 14.2236 GB/s per node +Grid : Message : Stencil 19.1255 GB/s per node +Grid : Message : Stencil 12.4868 GB/s per node +Grid : Message : Average mflops/s per call per node : 665570 +Grid : Message : Average mflops/s per call per node : 802607 +Grid : Message : Average mflops/s per call per node : 823274 +Grid : Message : Average mflops/s per call per node : 666035 +Grid : Message : Average mflops/s per call per node (full): 316665 +Grid : Message : Average mflops/s per call per node (full): 404232 +Grid : Message : Average mflops/s per call per node (full): 450107 +Grid : Message : Average mflops/s per call per node (full): 303665 +Grid : Message : Stencil 12.5744 GB/s per node +Grid : Message : Stencil 17.3745 GB/s per node +Grid : Message : Stencil 18.5195 GB/s per node +Grid : Message : Stencil 12.7849 GB/s per node +Grid : Message : Average mflops/s per call per node : 670859 +Grid : Message : Average mflops/s per call per node : 806203 +Grid : Message : Average mflops/s per call per node : 820805 +Grid : Message : Average mflops/s per call per node : 663512 +Grid : Message : Average mflops/s per call per node (full): 313422 +Grid : Message : Average mflops/s per call per node (full): 439284 +Grid : Message : Average mflops/s per call per node (full): 447298 +Grid : Message : Average mflops/s per call per node (full): 304256 +Grid : Message : Stencil 13.6758 GB/s per node +Grid : Message : Stencil 17.4313 GB/s per node +Grid : Message : Stencil 17.4837 GB/s per node +Grid : Message : Stencil 12.4561 GB/s per node +Grid : Message : Average mflops/s per call per node : 664223 +Grid : Message : Average mflops/s per call per node : 803113 +Grid : Message : Average mflops/s per call per node : 823434 +Grid : Message : Average mflops/s per call per node : 665009 +Grid : Message : Average mflops/s per call per node (full): 316348 +Grid : Message : Average mflops/s per call per node (full): 438938 +Grid : Message : Average mflops/s per call per node (full): 444205 +Grid : Message : Average mflops/s per call per node (full): 302728 +Grid : Message : Stencil 14.1599 GB/s per node +Grid : Message : Stencil 18.1794 GB/s per node +Grid : Message : Stencil 17.5413 GB/s per node +Grid : Message : Stencil 13.6046 GB/s per node +Grid : Message : Average mflops/s per call per node : 660446 +Grid : Message : Average mflops/s per call per node : 803789 +Grid : Message : Average mflops/s per call per node : 820256 +Grid : Message : Average mflops/s per call per node : 661423 +Grid : Message : Average mflops/s per call per node (full): 316034 +Grid : Message : Average mflops/s per call per node (full): 436357 +Grid : Message : Average mflops/s per call per node (full): 439043 +Grid : Message : Average mflops/s per call per node (full): 305437 +Grid : Message : Stencil 15.3867 GB/s per node +Grid : Message : Stencil 16.7997 GB/s per node +Grid : Message : Stencil 17.7 GB/s per node +Grid : Message : Stencil 12.2701 GB/s per node +Grid : Message : Average mflops/s per call per node : 660885 +Grid : Message : Average mflops/s per call per node : 802864 +Grid : Message : Average mflops/s per call per node : 822424 +Grid : Message : Average mflops/s per call per node : 670941 +Grid : Message : Average mflops/s per call per node (full): 316719 +Grid : Message : Average mflops/s per call per node (full): 430612 +Grid : Message : Average mflops/s per call per node (full): 445515 +Grid : Message : Average mflops/s per call per node (full): 302982 +Grid : Message : Stencil 12.8568 GB/s per node +Grid : Message : Stencil 16.7052 GB/s per node +Grid : Message : Stencil 16.6873 GB/s per node +Grid : Message : Stencil 13.3301 GB/s per node +Grid : Message : Average mflops/s per call per node : 663559 +Grid : Message : Average mflops/s per call per node : 804200 +Grid : Message : Average mflops/s per call per node : 820259 +Grid : Message : Average mflops/s per call per node : 659533 +Grid : Message : Average mflops/s per call per node (full): 313045 +Grid : Message : Average mflops/s per call per node (full): 437324 +Grid : Message : Average mflops/s per call per node (full): 437247 +Grid : Message : Average mflops/s per call per node (full): 305163 +Grid : Message : Stencil 13.1161 GB/s per node +Grid : Message : Stencil 16.5145 GB/s per node +Grid : Message : Stencil 17.5881 GB/s per node +Grid : Message : Stencil 12.1828 GB/s per node +Grid : Message : Average mflops/s per call per node : 661494 +Grid : Message : Average mflops/s per call per node : 806467 +Grid : Message : Average mflops/s per call per node : 820002 +Grid : Message : Average mflops/s per call per node : 670374 +Grid : Message : Average mflops/s per call per node (full): 312961 +Grid : Message : Average mflops/s per call per node (full): 435157 +Grid : Message : Average mflops/s per call per node (full): 434648 +Grid : Message : Average mflops/s per call per node (full): 302641 +Grid : Message : Stencil 13.4692 GB/s per node +Grid : Message : Stencil 16.3541 GB/s per node +Grid : Message : Stencil 17.6659 GB/s per node +Grid : Message : Stencil 12.8814 GB/s per node +Grid : Message : Average mflops/s per call per node : 663625 +Grid : Message : Average mflops/s per call per node : 804987 +Grid : Message : Average mflops/s per call per node : 822099 +Grid : Message : Average mflops/s per call per node : 662334 +Grid : Message : Average mflops/s per call per node (full): 312334 +Grid : Message : Average mflops/s per call per node (full): 432101 +Grid : Message : Average mflops/s per call per node (full): 446640 +Grid : Message : Average mflops/s per call per node (full): 305115 +Grid : Message : Stencil 12.8942 GB/s per node +Grid : Message : Stencil 16.7898 GB/s per node +Grid : Message : Stencil 17.5971 GB/s per node +Grid : Message : Stencil 12.2456 GB/s per node +Grid : Message : Average mflops/s per call per node : 668954 +Grid : Message : Average mflops/s per call per node : 798117 +Grid : Message : Average mflops/s per call per node : 824941 +Grid : Message : Average mflops/s per call per node : 669293 +Grid : Message : Average mflops/s per call per node (full): 316080 +Grid : Message : Average mflops/s per call per node (full): 433248 +Grid : Message : Average mflops/s per call per node (full): 446401 +Grid : Message : Average mflops/s per call per node (full): 303309 +Grid : Message : Stencil 12.6616 GB/s per node +Grid : Message : Stencil 16.7577 GB/s per node +Grid : Message : Stencil 17.7365 GB/s per node +Grid : Message : Stencil 12.4545 GB/s per node +Grid : Message : Average mflops/s per call per node : 670534 +Grid : Message : Average mflops/s per call per node : 804258 +Grid : Message : Average mflops/s per call per node : 819481 +Grid : Message : Average mflops/s per call per node : 664434 +Grid : Message : Average mflops/s per call per node (full): 314697 +Grid : Message : Average mflops/s per call per node (full): 436280 +Grid : Message : Average mflops/s per call per node (full): 445594 +Grid : Message : Average mflops/s per call per node (full): 303729 +Grid : Message : Stencil 13.4683 GB/s per node +Grid : Message : Stencil 16.6307 GB/s per node +Grid : Message : Stencil 18.91 GB/s per node +Grid : Message : Stencil 14.0652 GB/s per node +Grid : Message : Average mflops/s per call per node : 668763 +Grid : Message : Average mflops/s per call per node : 805773 +Grid : Message : Average mflops/s per call per node : 817079 +Grid : Message : Average mflops/s per call per node : 661217 +Grid : Message : Average mflops/s per call per node (full): 316613 +Grid : Message : Average mflops/s per call per node (full): 437106 +Grid : Message : Average mflops/s per call per node (full): 447723 +Grid : Message : Average mflops/s per call per node (full): 305805 +Grid : Message : Stencil 12.8557 GB/s per node +Grid : Message : Stencil 15.3086 GB/s per node +Grid : Message : Stencil 17.0187 GB/s per node +Grid : Message : Stencil 12.391 GB/s per node +Grid : Message : Average mflops/s per call per node : 671402 +Grid : Message : Average mflops/s per call per node : 804054 +Grid : Message : Average mflops/s per call per node : 829054 +Grid : Message : Average mflops/s per call per node : 663265 +Grid : Message : Average mflops/s per call per node (full): 316413 +Grid : Message : Average mflops/s per call per node (full): 420778 +Grid : Message : Average mflops/s per call per node (full): 441034 +Grid : Message : Average mflops/s per call per node (full): 302925 +Grid : Message : Stencil 12.5937 GB/s per node +Grid : Message : Stencil 14.2749 GB/s per node +Grid : Message : Stencil 17.3705 GB/s per node +Grid : Message : Stencil 12.9952 GB/s per node +Grid : Message : Average mflops/s per call per node : 673301 +Grid : Message : Average mflops/s per call per node : 811708 +Grid : Message : Average mflops/s per call per node : 825225 +Grid : Message : Average mflops/s per call per node : 666381 +Grid : Message : Average mflops/s per call per node (full): 314965 +Grid : Message : Average mflops/s per call per node (full): 404682 +Grid : Message : Average mflops/s per call per node (full): 444928 +Grid : Message : Average mflops/s per call per node (full): 306184 +Grid : Message : Stencil 12.4751 GB/s per node +Grid : Message : Stencil 16.734 GB/s per node +Grid : Message : Stencil 17.7405 GB/s per node +Grid : Message : Stencil 13.0793 GB/s per node +Grid : Message : Average mflops/s per call per node : 670239 +Grid : Message : Average mflops/s per call per node : 803610 +Grid : Message : Average mflops/s per call per node : 822767 +Grid : Message : Average mflops/s per call per node : 661298 +Grid : Message : Average mflops/s per call per node (full): 314063 +Grid : Message : Average mflops/s per call per node (full): 436778 +Grid : Message : Average mflops/s per call per node (full): 446006 +Grid : Message : Average mflops/s per call per node (full): 305186 +Grid : Message : Stencil 13.1137 GB/s per node +Grid : Message : Stencil 16.3732 GB/s per node +Grid : Message : Stencil 18.3038 GB/s per node +Grid : Message : Stencil 12.7076 GB/s per node +Grid : Message : Average mflops/s per call per node : 666051 +Grid : Message : Average mflops/s per call per node : 803733 +Grid : Message : Average mflops/s per call per node : 825737 +Grid : Message : Average mflops/s per call per node : 667496 +Grid : Message : Average mflops/s per call per node (full): 314399 +Grid : Message : Average mflops/s per call per node (full): 432762 +Grid : Message : Average mflops/s per call per node (full): 449063 +Grid : Message : Average mflops/s per call per node (full): 305673 +Grid : Message : Stencil 12.5174 GB/s per node +Grid : Message : Stencil 16.1717 GB/s per node +Grid : Message : Stencil 17.2254 GB/s per node +Grid : Message : Stencil 12.9635 GB/s per node +Grid : Message : Average mflops/s per call per node : 666187 +Grid : Message : Average mflops/s per call per node : 803564 +Grid : Message : Average mflops/s per call per node : 822850 +Grid : Message : Average mflops/s per call per node : 664979 +Grid : Message : Average mflops/s per call per node (full): 313719 +Grid : Message : Average mflops/s per call per node (full): 430242 +Grid : Message : Average mflops/s per call per node (full): 442125 +Grid : Message : Average mflops/s per call per node (full): 304770 +Grid : Message : Stencil 12.8431 GB/s per node +Grid : Message : Stencil 17.4906 GB/s per node +Grid : Message : Stencil 17.2259 GB/s per node +Grid : Message : Stencil 13.003 GB/s per node +Grid : Message : Average mflops/s per call per node : 666116 +Grid : Message : Average mflops/s per call per node : 801907 +Grid : Message : Average mflops/s per call per node : 818616 +Grid : Message : Average mflops/s per call per node : 661256 +Grid : Message : Average mflops/s per call per node (full): 314504 +Grid : Message : Average mflops/s per call per node (full): 437613 +Grid : Message : Average mflops/s per call per node (full): 443053 +Grid : Message : Average mflops/s per call per node (full): 305154 +Grid : Message : Stencil 14.0723 GB/s per node +Grid : Message : Stencil 17.498 GB/s per node +Grid : Message : Stencil 17.6098 GB/s per node +Grid : Message : Stencil 12.2343 GB/s per node +Grid : Message : Average mflops/s per call per node : 664473 +Grid : Message : Average mflops/s per call per node : 802242 +Grid : Message : Average mflops/s per call per node : 820944 +Grid : Message : Average mflops/s per call per node : 669711 +Grid : Message : Average mflops/s per call per node (full): 316594 +Grid : Message : Average mflops/s per call per node (full): 439951 +Grid : Message : Average mflops/s per call per node (full): 443698 +Grid : Message : Average mflops/s per call per node (full): 304216 +Grid : Message : Stencil 13.0752 GB/s per node +Grid : Message : Stencil 18.2619 GB/s per node +Grid : Message : Stencil 18.5119 GB/s per node +Grid : Message : Stencil 12.4328 GB/s per node +Grid : Message : Average mflops/s per call per node : 664692 +Grid : Message : Average mflops/s per call per node : 802193 +Grid : Message : Average mflops/s per call per node : 824987 +Grid : Message : Average mflops/s per call per node : 664304 +Grid : Message : Average mflops/s per call per node (full): 314319 +Grid : Message : Average mflops/s per call per node (full): 441373 +Grid : Message : Average mflops/s per call per node (full): 448471 +Grid : Message : Average mflops/s per call per node (full): 303780 +Grid : Message : Stencil 12.4062 GB/s per node +Grid : Message : Stencil 16.5179 GB/s per node +Grid : Message : Stencil 17.8355 GB/s per node +Grid : Message : Stencil 14.5781 GB/s per node +Grid : Message : Average mflops/s per call per node : 668287 +Grid : Message : Average mflops/s per call per node : 804701 +Grid : Message : Average mflops/s per call per node : 823599 +Grid : Message : Average mflops/s per call per node : 658889 +Grid : Message : Average mflops/s per call per node (full): 312743 +Grid : Message : Average mflops/s per call per node (full): 434922 +Grid : Message : Average mflops/s per call per node (full): 438520 +Grid : Message : Average mflops/s per call per node (full): 306746 +Grid : Message : Stencil 13.3246 GB/s per node +Grid : Message : Stencil 17.0093 GB/s per node +Grid : Message : Stencil 18.994 GB/s per node +Grid : Message : Stencil 13.4837 GB/s per node +Grid : Message : Average mflops/s per call per node : 664741 +Grid : Message : Average mflops/s per call per node : 800517 +Grid : Message : Average mflops/s per call per node : 820825 +Grid : Message : Average mflops/s per call per node : 660615 +Grid : Message : Average mflops/s per call per node (full): 314883 +Grid : Message : Average mflops/s per call per node (full): 437339 +Grid : Message : Average mflops/s per call per node (full): 449436 +Grid : Message : Average mflops/s per call per node (full): 303865 +Grid : Message : Stencil 12.2983 GB/s per node +Grid : Message : Stencil 16.8586 GB/s per node +Grid : Message : Stencil 18.3762 GB/s per node +Grid : Message : Stencil 12.0417 GB/s per node +Grid : Message : Average mflops/s per call per node : 665390 +Grid : Message : Average mflops/s per call per node : 807029 +Grid : Message : Average mflops/s per call per node : 823583 +Grid : Message : Average mflops/s per call per node : 671107 +Grid : Message : Average mflops/s per call per node (full): 311919 +Grid : Message : Average mflops/s per call per node (full): 438107 +Grid : Message : Average mflops/s per call per node (full): 448653 +Grid : Message : Average mflops/s per call per node (full): 302841 +Grid : Message : Stencil 13.0546 GB/s per node +Grid : Message : Stencil 16.4252 GB/s per node +Grid : Message : Stencil 17.4846 GB/s per node +Grid : Message : Stencil 12.7131 GB/s per node +Grid : Message : Average mflops/s per call per node : 667790 +Grid : Message : Average mflops/s per call per node : 804420 +Grid : Message : Average mflops/s per call per node : 822428 +Grid : Message : Average mflops/s per call per node : 662207 +Grid : Message : Average mflops/s per call per node (full): 314403 +Grid : Message : Average mflops/s per call per node (full): 433325 +Grid : Message : Average mflops/s per call per node (full): 444422 +Grid : Message : Average mflops/s per call per node (full): 304579 +Grid : Message : Stencil 13.2941 GB/s per node +Grid : Message : Stencil 9.77585 GB/s per node +Grid : Message : Stencil 17.002 GB/s per node +Grid : Message : Stencil 12.5608 GB/s per node +Grid : Message : Average mflops/s per call per node : 665332 +Grid : Message : Average mflops/s per call per node : 809864 +Grid : Message : Average mflops/s per call per node : 821372 +Grid : Message : Average mflops/s per call per node : 667238 +Grid : Message : Average mflops/s per call per node (full): 315191 +Grid : Message : Average mflops/s per call per node (full): 311246 +Grid : Message : Average mflops/s per call per node (full): 440765 +Grid : Message : Average mflops/s per call per node (full): 305183 +Grid : Message : Stencil 13.4714 GB/s per node +Grid : Message : Stencil 16.6391 GB/s per node +Grid : Message : Stencil 17.3293 GB/s per node +Grid : Message : Stencil 12.9076 GB/s per node +Grid : Message : Average mflops/s per call per node : 662206 +Grid : Message : Average mflops/s per call per node : 803920 +Grid : Message : Average mflops/s per call per node : 818714 +Grid : Message : Average mflops/s per call per node : 662318 +Grid : Message : Average mflops/s per call per node (full): 314274 +Grid : Message : Average mflops/s per call per node (full): 436457 +Grid : Message : Average mflops/s per call per node (full): 443368 +Grid : Message : Average mflops/s per call per node (full): 304443 +Grid : Message : Stencil 13.1367 GB/s per node +Grid : Message : Stencil 17.4188 GB/s per node +Grid : Message : Stencil 18.3514 GB/s per node +Grid : Message : Stencil 13.8068 GB/s per node +Grid : Message : Average mflops/s per call per node : 668658 +Grid : Message : Average mflops/s per call per node : 805311 +Grid : Message : Average mflops/s per call per node : 819600 +Grid : Message : Average mflops/s per call per node : 662668 +Grid : Message : Average mflops/s per call per node (full): 315365 +Grid : Message : Average mflops/s per call per node (full): 439100 +Grid : Message : Average mflops/s per call per node (full): 445623 +Grid : Message : Average mflops/s per call per node (full): 306193 +Grid : Message : Stencil 13.9619 GB/s per node +Grid : Message : Stencil 14.1776 GB/s per node +Grid : Message : Stencil 17.4347 GB/s per node +Grid : Message : Stencil 12.8607 GB/s per node +Grid : Message : Average mflops/s per call per node : 662908 +Grid : Message : Average mflops/s per call per node : 810045 +Grid : Message : Average mflops/s per call per node : 829036 +Grid : Message : Average mflops/s per call per node : 664978 +Grid : Message : Average mflops/s per call per node (full): 316261 +Grid : Message : Average mflops/s per call per node (full): 402716 +Grid : Message : Average mflops/s per call per node (full): 445448 +Grid : Message : Average mflops/s per call per node (full): 304756 +Grid : Message : Stencil 13.2573 GB/s per node +Grid : Message : Stencil 17.1684 GB/s per node +Grid : Message : Stencil 17.4563 GB/s per node +Grid : Message : Stencil 12.9146 GB/s per node +Grid : Message : Average mflops/s per call per node : 666694 +Grid : Message : Average mflops/s per call per node : 800308 +Grid : Message : Average mflops/s per call per node : 826364 +Grid : Message : Average mflops/s per call per node : 667065 +Grid : Message : Average mflops/s per call per node (full): 315370 +Grid : Message : Average mflops/s per call per node (full): 435363 +Grid : Message : Average mflops/s per call per node (full): 444945 +Grid : Message : Average mflops/s per call per node (full): 305344 +Grid : Message : Stencil 12.5017 GB/s per node +Grid : Message : Stencil 17.702 GB/s per node +Grid : Message : Stencil 17.1299 GB/s per node +Grid : Message : Stencil 12.8331 GB/s per node +Grid : Message : Average mflops/s per call per node : 669806 +Grid : Message : Average mflops/s per call per node : 800532 +Grid : Message : Average mflops/s per call per node : 827147 +Grid : Message : Average mflops/s per call per node : 659226 +Grid : Message : Average mflops/s per call per node (full): 313519 +Grid : Message : Average mflops/s per call per node (full): 439183 +Grid : Message : Average mflops/s per call per node (full): 440451 +Grid : Message : Average mflops/s per call per node (full): 302614 +Grid : Message : Stencil 13.0484 GB/s per node +Grid : Message : Stencil 16.8314 GB/s per node +Grid : Message : Stencil 18.3745 GB/s per node +Grid : Message : Stencil 12.5254 GB/s per node +Grid : Message : Average mflops/s per call per node : 666511 +Grid : Message : Average mflops/s per call per node : 803076 +Grid : Message : Average mflops/s per call per node : 819195 +Grid : Message : Average mflops/s per call per node : 664296 +Grid : Message : Average mflops/s per call per node (full): 314529 +Grid : Message : Average mflops/s per call per node (full): 435843 +Grid : Message : Average mflops/s per call per node (full): 446847 +Grid : Message : Average mflops/s per call per node (full): 303151 +Grid : Message : Stencil 12.3858 GB/s per node +Grid : Message : Stencil 17.2709 GB/s per node +Grid : Message : Stencil 17.303 GB/s per node +Grid : Message : Stencil 13.8556 GB/s per node +Grid : Message : Average mflops/s per call per node : 667433 +Grid : Message : Average mflops/s per call per node : 799882 +Grid : Message : Average mflops/s per call per node : 825126 +Grid : Message : Average mflops/s per call per node : 661650 +Grid : Message : Average mflops/s per call per node (full): 312345 +Grid : Message : Average mflops/s per call per node (full): 435733 +Grid : Message : Average mflops/s per call per node (full): 444059 +Grid : Message : Average mflops/s per call per node (full): 306157 +Grid : Message : Stencil 13.2423 GB/s per node +Grid : Message : Stencil 17.1759 GB/s per node +Grid : Message : Stencil 17.6773 GB/s per node +Grid : Message : Stencil 13.8838 GB/s per node +Grid : Message : Average mflops/s per call per node : 666065 +Grid : Message : Average mflops/s per call per node : 803042 +Grid : Message : Average mflops/s per call per node : 822598 +Grid : Message : Average mflops/s per call per node : 666285 +Grid : Message : Average mflops/s per call per node (full): 315382 +Grid : Message : Average mflops/s per call per node (full): 437049 +Grid : Message : Average mflops/s per call per node (full): 445820 +Grid : Message : Average mflops/s per call per node (full): 306615 +Grid : Message : Stencil 13.2119 GB/s per node +Grid : Message : Stencil 17.4023 GB/s per node +Grid : Message : Stencil 18.1174 GB/s per node +Grid : Message : Stencil 12.4868 GB/s per node +Grid : Message : Average mflops/s per call per node : 666930 +Grid : Message : Average mflops/s per call per node : 805994 +Grid : Message : Average mflops/s per call per node : 821803 +Grid : Message : Average mflops/s per call per node : 665081 +Grid : Message : Average mflops/s per call per node (full): 315690 +Grid : Message : Average mflops/s per call per node (full): 437432 +Grid : Message : Average mflops/s per call per node (full): 447243 +Grid : Message : Average mflops/s per call per node (full): 305000 +Grid : Message : Stencil 13.9222 GB/s per node +Grid : Message : Stencil 16.6438 GB/s per node +Grid : Message : Stencil 16.9078 GB/s per node +Grid : Message : Stencil 13.3717 GB/s per node +Grid : Message : Average mflops/s per call per node : 664940 +Grid : Message : Average mflops/s per call per node : 802706 +Grid : Message : Average mflops/s per call per node : 821356 +Grid : Message : Average mflops/s per call per node : 664479 +Grid : Message : Average mflops/s per call per node (full): 316591 +Grid : Message : Average mflops/s per call per node (full): 436145 +Grid : Message : Average mflops/s per call per node (full): 439766 +Grid : Message : Average mflops/s per call per node (full): 305996 +Grid : Message : Stencil 13.0773 GB/s per node +Grid : Message : Stencil 17.1815 GB/s per node +Grid : Message : Stencil 17.4962 GB/s per node +Grid : Message : Stencil 14.5529 GB/s per node +Grid : Message : Average mflops/s per call per node : 667989 +Grid : Message : Average mflops/s per call per node : 806203 +Grid : Message : Average mflops/s per call per node : 821061 +Grid : Message : Average mflops/s per call per node : 660524 +Grid : Message : Average mflops/s per call per node (full): 315502 +Grid : Message : Average mflops/s per call per node (full): 436257 +Grid : Message : Average mflops/s per call per node (full): 444052 +Grid : Message : Average mflops/s per call per node (full): 307003 +Grid : Message : Stencil 14.2909 GB/s per node +Grid : Message : Stencil 16.5371 GB/s per node +Grid : Message : Stencil 17.1227 GB/s per node +Grid : Message : Stencil 13.4769 GB/s per node +Grid : Message : Average mflops/s per call per node : 665438 +Grid : Message : Average mflops/s per call per node : 804734 +Grid : Message : Average mflops/s per call per node : 825460 +Grid : Message : Average mflops/s per call per node : 667013 +Grid : Message : Average mflops/s per call per node (full): 315943 +Grid : Message : Average mflops/s per call per node (full): 435302 +Grid : Message : Average mflops/s per call per node (full): 436536 +Grid : Message : Average mflops/s per call per node (full): 305996 +Grid : Message : Stencil 13.9086 GB/s per node +Grid : Message : Stencil 17.3296 GB/s per node +Grid : Message : Stencil 17.3873 GB/s per node +Grid : Message : Stencil 13.8987 GB/s per node +Grid : Message : Average mflops/s per call per node : 663859 +Grid : Message : Average mflops/s per call per node : 806287 +Grid : Message : Average mflops/s per call per node : 826110 +Grid : Message : Average mflops/s per call per node : 663951 +Grid : Message : Average mflops/s per call per node (full): 314912 +Grid : Message : Average mflops/s per call per node (full): 440008 +Grid : Message : Average mflops/s per call per node (full): 442199 +Grid : Message : Average mflops/s per call per node (full): 307155 +Grid : Message : Stencil 13.5995 GB/s per node +Grid : Message : Stencil 17.1634 GB/s per node +Grid : Message : Stencil 17.526 GB/s per node +Grid : Message : Stencil 13.5101 GB/s per node +Grid : Message : Average mflops/s per call per node : 665903 +Grid : Message : Average mflops/s per call per node : 804316 +Grid : Message : Average mflops/s per call per node : 823921 +Grid : Message : Average mflops/s per call per node : 654776 +Grid : Message : Average mflops/s per call per node (full): 317058 +Grid : Message : Average mflops/s per call per node (full): 438237 +Grid : Message : Average mflops/s per call per node (full): 445829 +Grid : Message : Average mflops/s per call per node (full): 301512 +Grid : Message : Stencil 14.1811 GB/s per node +Grid : Message : Stencil 17.4119 GB/s per node +Grid : Message : Stencil 17.4588 GB/s per node +Grid : Message : Stencil 12.7528 GB/s per node +Grid : Message : Average mflops/s per call per node : 664117 +Grid : Message : Average mflops/s per call per node : 804561 +Grid : Message : Average mflops/s per call per node : 823380 +Grid : Message : Average mflops/s per call per node : 666616 +Grid : Message : Average mflops/s per call per node (full): 316154 +Grid : Message : Average mflops/s per call per node (full): 437275 +Grid : Message : Average mflops/s per call per node (full): 444732 +Grid : Message : Average mflops/s per call per node (full): 306355 +Grid : Message : Stencil 12.5427 GB/s per node +Grid : Message : Stencil 16.8808 GB/s per node +Grid : Message : Stencil 17.2794 GB/s per node +Grid : Message : Stencil 12.1337 GB/s per node +Grid : Message : Average mflops/s per call per node : 666693 +Grid : Message : Average mflops/s per call per node : 805130 +Grid : Message : Average mflops/s per call per node : 822421 +Grid : Message : Average mflops/s per call per node : 667030 +Grid : Message : Average mflops/s per call per node (full): 313444 +Grid : Message : Average mflops/s per call per node (full): 438107 +Grid : Message : Average mflops/s per call per node (full): 443039 +Grid : Message : Average mflops/s per call per node (full): 303071 +Grid : Message : Stencil 13.5952 GB/s per node +Grid : Message : Stencil 17.2235 GB/s per node +Grid : Message : Stencil 17.5301 GB/s per node +Grid : Message : Stencil 12.2712 GB/s per node +Grid : Message : Average mflops/s per call per node : 666773 +Grid : Message : Average mflops/s per call per node : 798745 +Grid : Message : Average mflops/s per call per node : 825565 +Grid : Message : Average mflops/s per call per node : 663631 +Grid : Message : Average mflops/s per call per node (full): 315302 +Grid : Message : Average mflops/s per call per node (full): 435436 +Grid : Message : Average mflops/s per call per node (full): 445069 +Grid : Message : Average mflops/s per call per node (full): 304156 +Grid : Message : Stencil 13.3931 GB/s per node +Grid : Message : Stencil 16.741 GB/s per node +Grid : Message : Stencil 18.0163 GB/s per node +Grid : Message : Stencil 13.0197 GB/s per node +Grid : Message : Average mflops/s per call per node : 666664 +Grid : Message : Average mflops/s per call per node : 806233 +Grid : Message : Average mflops/s per call per node : 822672 +Grid : Message : Average mflops/s per call per node : 664351 +Grid : Message : Average mflops/s per call per node (full): 316088 +Grid : Message : Average mflops/s per call per node (full): 436794 +Grid : Message : Average mflops/s per call per node (full): 448626 +Grid : Message : Average mflops/s per call per node (full): 305654 +Grid : Message : Stencil 13.343 GB/s per node +Grid : Message : Stencil 16.877 GB/s per node +Grid : Message : Stencil 18.055 GB/s per node +Grid : Message : Stencil 13.0708 GB/s per node +Grid : Message : Average mflops/s per call per node : 666103 +Grid : Message : Average mflops/s per call per node : 804067 +Grid : Message : Average mflops/s per call per node : 821323 +Grid : Message : Average mflops/s per call per node : 661550 +Grid : Message : Average mflops/s per call per node (full): 315256 +Grid : Message : Average mflops/s per call per node (full): 436469 +Grid : Message : Average mflops/s per call per node (full): 445568 +Grid : Message : Average mflops/s per call per node (full): 304861 +Grid : Message : Stencil 13.3767 GB/s per node +Grid : Message : Stencil 17.4307 GB/s per node +Grid : Message : Stencil 17.8732 GB/s per node +Grid : Message : Stencil 14.365 GB/s per node +Grid : Message : Average mflops/s per call per node : 665950 +Grid : Message : Average mflops/s per call per node : 804442 +Grid : Message : Average mflops/s per call per node : 820539 +Grid : Message : Average mflops/s per call per node : 665047 +Grid : Message : Average mflops/s per call per node (full): 316375 +Grid : Message : Average mflops/s per call per node (full): 439167 +Grid : Message : Average mflops/s per call per node (full): 447065 +Grid : Message : Average mflops/s per call per node (full): 307428 +Grid : Message : Stencil 12.821 GB/s per node +Grid : Message : Stencil 13.5966 GB/s per node +Grid : Message : Stencil 17.6488 GB/s per node +Grid : Message : Stencil 13.8616 GB/s per node +Grid : Message : Average mflops/s per call per node : 670359 +Grid : Message : Average mflops/s per call per node : 812443 +Grid : Message : Average mflops/s per call per node : 821539 +Grid : Message : Average mflops/s per call per node : 662747 +Grid : Message : Average mflops/s per call per node (full): 315688 +Grid : Message : Average mflops/s per call per node (full): 392718 +Grid : Message : Average mflops/s per call per node (full): 445675 +Grid : Message : Average mflops/s per call per node (full): 306518 +Grid : Message : Stencil 12.7894 GB/s per node +Grid : Message : Stencil 17.3747 GB/s per node +Grid : Message : Stencil 18.4541 GB/s per node +Grid : Message : Stencil 14.2313 GB/s per node +Grid : Message : Average mflops/s per call per node : 668881 +Grid : Message : Average mflops/s per call per node : 807244 +Grid : Message : Average mflops/s per call per node : 820681 +Grid : Message : Average mflops/s per call per node : 660460 +Grid : Message : Average mflops/s per call per node (full): 315852 +Grid : Message : Average mflops/s per call per node (full): 433116 +Grid : Message : Average mflops/s per call per node (full): 448640 +Grid : Message : Average mflops/s per call per node (full): 306582 +Grid : Message : Stencil 12.9534 GB/s per node +Grid : Message : Stencil 17.086 GB/s per node +Grid : Message : Stencil 17.926 GB/s per node +Grid : Message : Stencil 12.4938 GB/s per node +Grid : Message : Average mflops/s per call per node : 666505 +Grid : Message : Average mflops/s per call per node : 804431 +Grid : Message : Average mflops/s per call per node : 820428 +Grid : Message : Average mflops/s per call per node : 669124 +Grid : Message : Average mflops/s per call per node (full): 315643 +Grid : Message : Average mflops/s per call per node (full): 439109 +Grid : Message : Average mflops/s per call per node (full): 446081 +Grid : Message : Average mflops/s per call per node (full): 305512 +Grid : Message : Stencil 13.1323 GB/s per node +Grid : Message : Stencil 16.6478 GB/s per node +Grid : Message : Stencil 17.6423 GB/s per node +Grid : Message : Stencil 12.3352 GB/s per node +Grid : Message : Average mflops/s per call per node : 665775 +Grid : Message : Average mflops/s per call per node : 807142 +Grid : Message : Average mflops/s per call per node : 819368 +Grid : Message : Average mflops/s per call per node : 666051 +Grid : Message : Average mflops/s per call per node (full): 315122 +Grid : Message : Average mflops/s per call per node (full): 431694 +Grid : Message : Average mflops/s per call per node (full): 444808 +Grid : Message : Average mflops/s per call per node (full): 302136 +Grid : Message : Stencil 14.1863 GB/s per node +Grid : Message : Stencil 16.6056 GB/s per node +Grid : Message : Stencil 18.3201 GB/s per node +Grid : Message : Stencil 12.6007 GB/s per node +Grid : Message : Average mflops/s per call per node : 663137 +Grid : Message : Average mflops/s per call per node : 802466 +Grid : Message : Average mflops/s per call per node : 821600 +Grid : Message : Average mflops/s per call per node : 661086 +Grid : Message : Average mflops/s per call per node (full): 315558 +Grid : Message : Average mflops/s per call per node (full): 435761 +Grid : Message : Average mflops/s per call per node (full): 448283 +Grid : Message : Average mflops/s per call per node (full): 302765 +Grid : Message : Stencil 13.3338 GB/s per node +Grid : Message : Stencil 17.1289 GB/s per node +Grid : Message : Stencil 17.5447 GB/s per node +Grid : Message : Stencil 12.1588 GB/s per node +Grid : Message : Average mflops/s per call per node : 661532 +Grid : Message : Average mflops/s per call per node : 806950 +Grid : Message : Average mflops/s per call per node : 826036 +Grid : Message : Average mflops/s per call per node : 668188 +Grid : Message : Average mflops/s per call per node (full): 312926 +Grid : Message : Average mflops/s per call per node (full): 437556 +Grid : Message : Average mflops/s per call per node (full): 445663 +Grid : Message : Average mflops/s per call per node (full): 303389 +Grid : Message : Stencil 13.5784 GB/s per node +Grid : Message : Stencil 14.0755 GB/s per node +Grid : Message : Stencil 17.46 GB/s per node +Grid : Message : Stencil 13.1343 GB/s per node +Grid : Message : Average mflops/s per call per node : 662222 +Grid : Message : Average mflops/s per call per node : 799662 +Grid : Message : Average mflops/s per call per node : 820866 +Grid : Message : Average mflops/s per call per node : 667038 +Grid : Message : Average mflops/s per call per node (full): 314441 +Grid : Message : Average mflops/s per call per node (full): 400252 +Grid : Message : Average mflops/s per call per node (full): 442573 +Grid : Message : Average mflops/s per call per node (full): 306094 +Grid : Message : Stencil 13.7759 GB/s per node +Grid : Message : Stencil 17.173 GB/s per node +Grid : Message : Stencil 19.3367 GB/s per node +Grid : Message : Stencil 14.1301 GB/s per node +Grid : Message : Average mflops/s per call per node : 664650 +Grid : Message : Average mflops/s per call per node : 806907 +Grid : Message : Average mflops/s per call per node : 820560 +Grid : Message : Average mflops/s per call per node : 659010 +Grid : Message : Average mflops/s per call per node (full): 315181 +Grid : Message : Average mflops/s per call per node (full): 435972 +Grid : Message : Average mflops/s per call per node (full): 449939 +Grid : Message : Average mflops/s per call per node (full): 303438 +Grid : Message : Stencil 13.6664 GB/s per node +Grid : Message : Stencil 17.1214 GB/s per node +Grid : Message : Stencil 18.6147 GB/s per node +Grid : Message : Stencil 12.3372 GB/s per node +Grid : Message : Average mflops/s per call per node : 663164 +Grid : Message : Average mflops/s per call per node : 804815 +Grid : Message : Average mflops/s per call per node : 813523 +Grid : Message : Average mflops/s per call per node : 664938 +Grid : Message : Average mflops/s per call per node (full): 315596 +Grid : Message : Average mflops/s per call per node (full): 437513 +Grid : Message : Average mflops/s per call per node (full): 447273 +Grid : Message : Average mflops/s per call per node (full): 304029 +Grid : Message : Stencil 14.2618 GB/s per node +Grid : Message : Stencil 10.0889 GB/s per node +Grid : Message : Stencil 17.4008 GB/s per node +Grid : Message : Stencil 12.3218 GB/s per node +Grid : Message : Average mflops/s per call per node : 661479 +Grid : Message : Average mflops/s per call per node : 807279 +Grid : Message : Average mflops/s per call per node : 829648 +Grid : Message : Average mflops/s per call per node : 667827 +Grid : Message : Average mflops/s per call per node (full): 316019 +Grid : Message : Average mflops/s per call per node (full): 318759 +Grid : Message : Average mflops/s per call per node (full): 445239 +Grid : Message : Average mflops/s per call per node (full): 304443 +Grid : Message : Stencil 13.7394 GB/s per node +Grid : Message : Stencil 17.5089 GB/s per node +Grid : Message : Stencil 17.5503 GB/s per node +Grid : Message : Stencil 13.057 GB/s per node +Grid : Message : Average mflops/s per call per node : 665386 +Grid : Message : Average mflops/s per call per node : 801394 +Grid : Message : Average mflops/s per call per node : 823385 +Grid : Message : Average mflops/s per call per node : 662710 +Grid : Message : Average mflops/s per call per node (full): 316635 +Grid : Message : Average mflops/s per call per node (full): 439026 +Grid : Message : Average mflops/s per call per node (full): 438906 +Grid : Message : Average mflops/s per call per node (full): 304242 +Grid : Message : Stencil 13.4454 GB/s per node +Grid : Message : Stencil 16.8154 GB/s per node +Grid : Message : Stencil 19.4219 GB/s per node +Grid : Message : Stencil 13.3221 GB/s per node +Grid : Message : Average mflops/s per call per node : 664908 +Grid : Message : Average mflops/s per call per node : 807446 +Grid : Message : Average mflops/s per call per node : 819760 +Grid : Message : Average mflops/s per call per node : 658599 +Grid : Message : Average mflops/s per call per node (full): 315954 +Grid : Message : Average mflops/s per call per node (full): 437560 +Grid : Message : Average mflops/s per call per node (full): 449689 +Grid : Message : Average mflops/s per call per node (full): 305350 +Grid : Message : Stencil 14.9133 GB/s per node +Grid : Message : Stencil 16.8471 GB/s per node +Grid : Message : Stencil 17.4022 GB/s per node +Grid : Message : Stencil 13.4123 GB/s per node +Grid : Message : Average mflops/s per call per node : 664239 +Grid : Message : Average mflops/s per call per node : 806584 +Grid : Message : Average mflops/s per call per node : 823548 +Grid : Message : Average mflops/s per call per node : 661416 +Grid : Message : Average mflops/s per call per node (full): 316375 +Grid : Message : Average mflops/s per call per node (full): 438739 +Grid : Message : Average mflops/s per call per node (full): 443985 +Grid : Message : Average mflops/s per call per node (full): 305805 +Grid : Message : Stencil 13.5212 GB/s per node +Grid : Message : Stencil 17.0275 GB/s per node +Grid : Message : Stencil 18.5525 GB/s per node +Grid : Message : Stencil 14.6028 GB/s per node +Grid : Message : Average mflops/s per call per node : 665280 +Grid : Message : Average mflops/s per call per node : 804417 +Grid : Message : Average mflops/s per call per node : 819650 +Grid : Message : Average mflops/s per call per node : 662591 +Grid : Message : Average mflops/s per call per node (full): 315578 +Grid : Message : Average mflops/s per call per node (full): 435106 +Grid : Message : Average mflops/s per call per node (full): 445912 +Grid : Message : Average mflops/s per call per node (full): 307890 +Grid : Message : Stencil 12.5063 GB/s per node +Grid : Message : Stencil 17.0401 GB/s per node +Grid : Message : Stencil 17.909 GB/s per node +Grid : Message : Stencil 12.4003 GB/s per node +Grid : Message : Average mflops/s per call per node : 670017 +Grid : Message : Average mflops/s per call per node : 797635 +Grid : Message : Average mflops/s per call per node : 822123 +Grid : Message : Average mflops/s per call per node : 669380 +Grid : Message : Average mflops/s per call per node (full): 313835 +Grid : Message : Average mflops/s per call per node (full): 432405 +Grid : Message : Average mflops/s per call per node (full): 445624 +Grid : Message : Average mflops/s per call per node (full): 304862 +Grid : Message : Stencil 13.4388 GB/s per node +Grid : Message : Stencil 10.13 GB/s per node +Grid : Message : Stencil 18.4098 GB/s per node +Grid : Message : Stencil 12.6245 GB/s per node +Grid : Message : Average mflops/s per call per node : 664449 +Grid : Message : Average mflops/s per call per node : 813357 +Grid : Message : Average mflops/s per call per node : 820461 +Grid : Message : Average mflops/s per call per node : 666593 +Grid : Message : Average mflops/s per call per node (full): 315191 +Grid : Message : Average mflops/s per call per node (full): 319670 +Grid : Message : Average mflops/s per call per node (full): 448415 +Grid : Message : Average mflops/s per call per node (full): 304444 +Grid : Message : Stencil 12.8495 GB/s per node +Grid : Message : Stencil 16.3414 GB/s per node +Grid : Message : Stencil 17.4925 GB/s per node +Grid : Message : Stencil 12.4912 GB/s per node +Grid : Message : Average mflops/s per call per node : 667422 +Grid : Message : Average mflops/s per call per node : 806560 +Grid : Message : Average mflops/s per call per node : 825592 +Grid : Message : Average mflops/s per call per node : 667284 +Grid : Message : Average mflops/s per call per node (full): 314544 +Grid : Message : Average mflops/s per call per node (full): 433583 +Grid : Message : Average mflops/s per call per node (full): 444810 +Grid : Message : Average mflops/s per call per node (full): 304577 +Grid : Message : Stencil 12.3674 GB/s per node +Grid : Message : Stencil 14.5964 GB/s per node +Grid : Message : Stencil 17.6038 GB/s per node +Grid : Message : Stencil 11.969 GB/s per node +Grid : Message : Average mflops/s per call per node : 669368 +Grid : Message : Average mflops/s per call per node : 805939 +Grid : Message : Average mflops/s per call per node : 827919 +Grid : Message : Average mflops/s per call per node : 659033 +Grid : Message : Average mflops/s per call per node (full): 312956 +Grid : Message : Average mflops/s per call per node (full): 409279 +Grid : Message : Average mflops/s per call per node (full): 446149 +Grid : Message : Average mflops/s per call per node (full): 301303 +Grid : Message : Stencil 12.7816 GB/s per node +Grid : Message : Stencil 17.2484 GB/s per node +Grid : Message : Stencil 17.5063 GB/s per node +Grid : Message : Stencil 14.1445 GB/s per node +Grid : Message : Average mflops/s per call per node : 666224 +Grid : Message : Average mflops/s per call per node : 800794 +Grid : Message : Average mflops/s per call per node : 824797 +Grid : Message : Average mflops/s per call per node : 663407 +Grid : Message : Average mflops/s per call per node (full): 313092 +Grid : Message : Average mflops/s per call per node (full): 438551 +Grid : Message : Average mflops/s per call per node (full): 445019 +Grid : Message : Average mflops/s per call per node (full): 306984 +Grid : Message : Stencil 12.5945 GB/s per node +Grid : Message : Stencil 17.3642 GB/s per node +Grid : Message : Stencil 16.9934 GB/s per node +Grid : Message : Stencil 11.7458 GB/s per node +Grid : Message : Average mflops/s per call per node : 669025 +Grid : Message : Average mflops/s per call per node : 804992 +Grid : Message : Average mflops/s per call per node : 823272 +Grid : Message : Average mflops/s per call per node : 661218 +Grid : Message : Average mflops/s per call per node (full): 314220 +Grid : Message : Average mflops/s per call per node (full): 438383 +Grid : Message : Average mflops/s per call per node (full): 440220 +Grid : Message : Average mflops/s per call per node (full): 297043 +Grid : Message : Stencil 13.5802 GB/s per node +Grid : Message : Stencil 16.4426 GB/s per node +Grid : Message : Stencil 18.345 GB/s per node +Grid : Message : Stencil 12.5696 GB/s per node +Grid : Message : Average mflops/s per call per node : 665125 +Grid : Message : Average mflops/s per call per node : 803719 +Grid : Message : Average mflops/s per call per node : 817192 +Grid : Message : Average mflops/s per call per node : 663354 +Grid : Message : Average mflops/s per call per node (full): 315413 +Grid : Message : Average mflops/s per call per node (full): 433859 +Grid : Message : Average mflops/s per call per node (full): 446174 +Grid : Message : Average mflops/s per call per node (full): 303291 +Grid : Message : Stencil 12.7315 GB/s per node +Grid : Message : Stencil 15.383 GB/s per node +Grid : Message : Stencil 18.1797 GB/s per node +Grid : Message : Stencil 13.1161 GB/s per node +Grid : Message : Average mflops/s per call per node : 666533 +Grid : Message : Average mflops/s per call per node : 806042 +Grid : Message : Average mflops/s per call per node : 821914 +Grid : Message : Average mflops/s per call per node : 664713 +Grid : Message : Average mflops/s per call per node (full): 313225 +Grid : Message : Average mflops/s per call per node (full): 421118 +Grid : Message : Average mflops/s per call per node (full): 447010 +Grid : Message : Average mflops/s per call per node (full): 305894 +Grid : Message : Stencil 12.81 GB/s per node +Grid : Message : Stencil 16.7324 GB/s per node +Grid : Message : Stencil 17.5909 GB/s per node +Grid : Message : Stencil 12.8213 GB/s per node +Grid : Message : Average mflops/s per call per node : 667673 +Grid : Message : Average mflops/s per call per node : 806181 +Grid : Message : Average mflops/s per call per node : 821357 +Grid : Message : Average mflops/s per call per node : 667377 +Grid : Message : Average mflops/s per call per node (full): 315296 +Grid : Message : Average mflops/s per call per node (full): 436948 +Grid : Message : Average mflops/s per call per node (full): 445420 +Grid : Message : Average mflops/s per call per node (full): 305824 +Grid : Message : Stencil 14.4771 GB/s per node +Grid : Message : Stencil 18.0131 GB/s per node +Grid : Message : Stencil 17.9633 GB/s per node +Grid : Message : Stencil 12.8198 GB/s per node +Grid : Message : Average mflops/s per call per node : 662482 +Grid : Message : Average mflops/s per call per node : 800550 +Grid : Message : Average mflops/s per call per node : 823363 +Grid : Message : Average mflops/s per call per node : 655407 +Grid : Message : Average mflops/s per call per node (full): 314434 +Grid : Message : Average mflops/s per call per node (full): 438833 +Grid : Message : Average mflops/s per call per node (full): 438964 +Grid : Message : Average mflops/s per call per node (full): 303720 +Grid : Message : Stencil 12.8066 GB/s per node +Grid : Message : Stencil 12.4512 GB/s per node +Grid : Message : Stencil 16.9279 GB/s per node +Grid : Message : Stencil 13.2672 GB/s per node +Grid : Message : Average mflops/s per call per node : 669351 +Grid : Message : Average mflops/s per call per node : 808688 +Grid : Message : Average mflops/s per call per node : 824319 +Grid : Message : Average mflops/s per call per node : 662097 +Grid : Message : Average mflops/s per call per node (full): 314366 +Grid : Message : Average mflops/s per call per node (full): 370483 +Grid : Message : Average mflops/s per call per node (full): 439565 +Grid : Message : Average mflops/s per call per node (full): 305894 +Grid : Message : Stencil 12.7226 GB/s per node +Grid : Message : Stencil 17.5536 GB/s per node +Grid : Message : Stencil 18.3302 GB/s per node +Grid : Message : Stencil 12.0157 GB/s per node +Grid : Message : Average mflops/s per call per node : 673634 +Grid : Message : Average mflops/s per call per node : 799870 +Grid : Message : Average mflops/s per call per node : 825012 +Grid : Message : Average mflops/s per call per node : 665898 +Grid : Message : Average mflops/s per call per node (full): 315377 +Grid : Message : Average mflops/s per call per node (full): 439524 +Grid : Message : Average mflops/s per call per node (full): 448642 +Grid : Message : Average mflops/s per call per node (full): 301330 +Grid : Message : Stencil 15.665 GB/s per node +Grid : Message : Stencil 16.7653 GB/s per node +Grid : Message : Stencil 17.6713 GB/s per node +Grid : Message : Stencil 12.1773 GB/s per node +Grid : Message : Average mflops/s per call per node : 662461 +Grid : Message : Average mflops/s per call per node : 803518 +Grid : Message : Average mflops/s per call per node : 830364 +Grid : Message : Average mflops/s per call per node : 665534 +Grid : Message : Average mflops/s per call per node (full): 317567 +Grid : Message : Average mflops/s per call per node (full): 436654 +Grid : Message : Average mflops/s per call per node (full): 445703 +Grid : Message : Average mflops/s per call per node (full): 303153 +Grid : Message : Stencil 14.7891 GB/s per node +Grid : Message : Stencil 16.8005 GB/s per node +Grid : Message : Stencil 17.7006 GB/s per node +Grid : Message : Stencil 12.0703 GB/s per node +Grid : Message : Average mflops/s per call per node : 660523 +Grid : Message : Average mflops/s per call per node : 801552 +Grid : Message : Average mflops/s per call per node : 819377 +Grid : Message : Average mflops/s per call per node : 668934 +Grid : Message : Average mflops/s per call per node (full): 315578 +Grid : Message : Average mflops/s per call per node (full): 432654 +Grid : Message : Average mflops/s per call per node (full): 445646 +Grid : Message : Average mflops/s per call per node (full): 302725 +Grid : Message : Stencil 12.4778 GB/s per node +Grid : Message : Stencil 17.3503 GB/s per node +Grid : Message : Stencil 17.3418 GB/s per node +Grid : Message : Stencil 12.9612 GB/s per node +Grid : Message : Average mflops/s per call per node : 669193 +Grid : Message : Average mflops/s per call per node : 803618 +Grid : Message : Average mflops/s per call per node : 818478 +Grid : Message : Average mflops/s per call per node : 659136 +Grid : Message : Average mflops/s per call per node (full): 313641 +Grid : Message : Average mflops/s per call per node (full): 437523 +Grid : Message : Average mflops/s per call per node (full): 442583 +Grid : Message : Average mflops/s per call per node (full): 305080 +Grid : Message : Stencil 12.5965 GB/s per node +Grid : Message : Stencil 16.2825 GB/s per node +Grid : Message : Stencil 17.1698 GB/s per node +Grid : Message : Stencil 12.9088 GB/s per node +Grid : Message : Average mflops/s per call per node : 668568 +Grid : Message : Average mflops/s per call per node : 809293 +Grid : Message : Average mflops/s per call per node : 820792 +Grid : Message : Average mflops/s per call per node : 658146 +Grid : Message : Average mflops/s per call per node (full): 313932 +Grid : Message : Average mflops/s per call per node (full): 432644 +Grid : Message : Average mflops/s per call per node (full): 442291 +Grid : Message : Average mflops/s per call per node (full): 302658 +Grid : Message : Stencil 12.7875 GB/s per node +Grid : Message : Stencil 17.3888 GB/s per node +Grid : Message : Stencil 18.2178 GB/s per node +Grid : Message : Stencil 15.7381 GB/s per node +Grid : Message : Average mflops/s per call per node : 666964 +Grid : Message : Average mflops/s per call per node : 795974 +Grid : Message : Average mflops/s per call per node : 822389 +Grid : Message : Average mflops/s per call per node : 658799 +Grid : Message : Average mflops/s per call per node (full): 314510 +Grid : Message : Average mflops/s per call per node (full): 436541 +Grid : Message : Average mflops/s per call per node (full): 448418 +Grid : Message : Average mflops/s per call per node (full): 307398 +Grid : Message : Stencil 13.0569 GB/s per node +Grid : Message : Stencil 18.0567 GB/s per node +Grid : Message : Stencil 18.5102 GB/s per node +Grid : Message : Stencil 12.0611 GB/s per node +Grid : Message : Average mflops/s per call per node : 666304 +Grid : Message : Average mflops/s per call per node : 798361 +Grid : Message : Average mflops/s per call per node : 823575 +Grid : Message : Average mflops/s per call per node : 667601 +Grid : Message : Average mflops/s per call per node (full): 315659 +Grid : Message : Average mflops/s per call per node (full): 438070 +Grid : Message : Average mflops/s per call per node (full): 445969 +Grid : Message : Average mflops/s per call per node (full): 302695 +Grid : Message : Stencil 13.1052 GB/s per node +Grid : Message : Stencil 16.5789 GB/s per node +Grid : Message : Stencil 16.905 GB/s per node +Grid : Message : Stencil 12.5662 GB/s per node +Grid : Message : Average mflops/s per call per node : 666859 +Grid : Message : Average mflops/s per call per node : 803782 +Grid : Message : Average mflops/s per call per node : 821719 +Grid : Message : Average mflops/s per call per node : 667919 +Grid : Message : Average mflops/s per call per node (full): 315018 +Grid : Message : Average mflops/s per call per node (full): 435319 +Grid : Message : Average mflops/s per call per node (full): 439161 +Grid : Message : Average mflops/s per call per node (full): 305586 +Grid : Message : Stencil 12.6053 GB/s per node +Grid : Message : Stencil 16.9889 GB/s per node +Grid : Message : Stencil 17.299 GB/s per node +Grid : Message : Stencil 13.3575 GB/s per node +Grid : Message : Average mflops/s per call per node : 668441 +Grid : Message : Average mflops/s per call per node : 800242 +Grid : Message : Average mflops/s per call per node : 813284 +Grid : Message : Average mflops/s per call per node : 663141 +Grid : Message : Average mflops/s per call per node (full): 314179 +Grid : Message : Average mflops/s per call per node (full): 436227 +Grid : Message : Average mflops/s per call per node (full): 442472 +Grid : Message : Average mflops/s per call per node (full): 305967 +Grid : Message : Stencil 12.9178 GB/s per node +Grid : Message : Stencil 16.6625 GB/s per node +Grid : Message : Stencil 17.7502 GB/s per node +Grid : Message : Stencil 13.6215 GB/s per node +Grid : Message : Average mflops/s per call per node : 669848 +Grid : Message : Average mflops/s per call per node : 805128 +Grid : Message : Average mflops/s per call per node : 819001 +Grid : Message : Average mflops/s per call per node : 664476 +Grid : Message : Average mflops/s per call per node (full): 316078 +Grid : Message : Average mflops/s per call per node (full): 435852 +Grid : Message : Average mflops/s per call per node (full): 445856 +Grid : Message : Average mflops/s per call per node (full): 305997 +Grid : Message : Stencil 12.7647 GB/s per node +Grid : Message : Stencil 16.037 GB/s per node +Grid : Message : Stencil 17.6307 GB/s per node +Grid : Message : Stencil 12.981 GB/s per node +Grid : Message : Average mflops/s per call per node : 668508 +Grid : Message : Average mflops/s per call per node : 807700 +Grid : Message : Average mflops/s per call per node : 823565 +Grid : Message : Average mflops/s per call per node : 659680 +Grid : Message : Average mflops/s per call per node (full): 314931 +Grid : Message : Average mflops/s per call per node (full): 428598 +Grid : Message : Average mflops/s per call per node (full): 445601 +Grid : Message : Average mflops/s per call per node (full): 305267 +Grid : Message : Stencil 12.7174 GB/s per node +Grid : Message : Stencil 15.3893 GB/s per node +Grid : Message : Stencil 17.7699 GB/s per node +Grid : Message : Stencil 14.1334 GB/s per node +Grid : Message : Average mflops/s per call per node : 668861 +Grid : Message : Average mflops/s per call per node : 808645 +Grid : Message : Average mflops/s per call per node : 818086 +Grid : Message : Average mflops/s per call per node : 665203 +Grid : Message : Average mflops/s per call per node (full): 314695 +Grid : Message : Average mflops/s per call per node (full): 421627 +Grid : Message : Average mflops/s per call per node (full): 438315 +Grid : Message : Average mflops/s per call per node (full): 307073 +Grid : Message : Stencil 13.5612 GB/s per node +Grid : Message : Stencil 14.3094 GB/s per node +Grid : Message : Stencil 17.7979 GB/s per node +Grid : Message : Stencil 13.7627 GB/s per node +Grid : Message : Average mflops/s per call per node : 664046 +Grid : Message : Average mflops/s per call per node : 804608 +Grid : Message : Average mflops/s per call per node : 822177 +Grid : Message : Average mflops/s per call per node : 661169 +Grid : Message : Average mflops/s per call per node (full): 314385 +Grid : Message : Average mflops/s per call per node (full): 401651 +Grid : Message : Average mflops/s per call per node (full): 443230 +Grid : Message : Average mflops/s per call per node (full): 304789 +Grid : Message : Stencil 13.6262 GB/s per node +Grid : Message : Stencil 16.5029 GB/s per node +Grid : Message : Stencil 18.9915 GB/s per node +Grid : Message : Stencil 11.9887 GB/s per node +Grid : Message : Average mflops/s per call per node : 669539 +Grid : Message : Average mflops/s per call per node : 802650 +Grid : Message : Average mflops/s per call per node : 819077 +Grid : Message : Average mflops/s per call per node : 666765 +Grid : Message : Average mflops/s per call per node (full): 317347 +Grid : Message : Average mflops/s per call per node (full): 434704 +Grid : Message : Average mflops/s per call per node (full): 449241 +Grid : Message : Average mflops/s per call per node (full): 302354 +Grid : Message : Stencil 13.9588 GB/s per node +Grid : Message : Stencil 12.2464 GB/s per node +Grid : Message : Stencil 18.1376 GB/s per node +Grid : Message : Stencil 12.7738 GB/s per node +Grid : Message : Average mflops/s per call per node : 666887 +Grid : Message : Average mflops/s per call per node : 815235 +Grid : Message : Average mflops/s per call per node : 820800 +Grid : Message : Average mflops/s per call per node : 672403 +Grid : Message : Average mflops/s per call per node (full): 316395 +Grid : Message : Average mflops/s per call per node (full): 366349 +Grid : Message : Average mflops/s per call per node (full): 447028 +Grid : Message : Average mflops/s per call per node (full): 305605 +Grid : Message : Stencil 13.3593 GB/s per node +Grid : Message : Stencil 16.3731 GB/s per node +Grid : Message : Stencil 16.966 GB/s per node +Grid : Message : Stencil 12.9026 GB/s per node +Grid : Message : Average mflops/s per call per node : 667752 +Grid : Message : Average mflops/s per call per node : 805011 +Grid : Message : Average mflops/s per call per node : 824621 +Grid : Message : Average mflops/s per call per node : 664981 +Grid : Message : Average mflops/s per call per node (full): 315591 +Grid : Message : Average mflops/s per call per node (full): 429557 +Grid : Message : Average mflops/s per call per node (full): 441140 +Grid : Message : Average mflops/s per call per node (full): 304634 +Grid : Message : Stencil 14.6674 GB/s per node +Grid : Message : Stencil 16.6037 GB/s per node +Grid : Message : Stencil 18.8243 GB/s per node +Grid : Message : Stencil 13.0788 GB/s per node +Grid : Message : Average mflops/s per call per node : 662330 +Grid : Message : Average mflops/s per call per node : 800743 +Grid : Message : Average mflops/s per call per node : 822556 +Grid : Message : Average mflops/s per call per node : 662263 +Grid : Message : Average mflops/s per call per node (full): 316637 +Grid : Message : Average mflops/s per call per node (full): 435866 +Grid : Message : Average mflops/s per call per node (full): 449299 +Grid : Message : Average mflops/s per call per node (full): 305056 +Grid : Message : Stencil 14.3719 GB/s per node +Grid : Message : Stencil 16.6864 GB/s per node +Grid : Message : Stencil 16.5486 GB/s per node +Grid : Message : Stencil 12.9351 GB/s per node +Grid : Message : Average mflops/s per call per node : 661735 +Grid : Message : Average mflops/s per call per node : 804615 +Grid : Message : Average mflops/s per call per node : 816724 +Grid : Message : Average mflops/s per call per node : 666066 +Grid : Message : Average mflops/s per call per node (full): 316431 +Grid : Message : Average mflops/s per call per node (full): 435436 +Grid : Message : Average mflops/s per call per node (full): 429896 +Grid : Message : Average mflops/s per call per node (full): 305633 +Grid : Message : Stencil 13.8614 GB/s per node +Grid : Message : Stencil 16.7359 GB/s per node +Grid : Message : Stencil 17.6222 GB/s per node +Grid : Message : Stencil 12.787 GB/s per node +Grid : Message : Average mflops/s per call per node : 663609 +Grid : Message : Average mflops/s per call per node : 802481 +Grid : Message : Average mflops/s per call per node : 822284 +Grid : Message : Average mflops/s per call per node : 663024 +Grid : Message : Average mflops/s per call per node (full): 316361 +Grid : Message : Average mflops/s per call per node (full): 436671 +Grid : Message : Average mflops/s per call per node (full): 445878 +Grid : Message : Average mflops/s per call per node (full): 304683 +Grid : Message : Stencil 12.3693 GB/s per node +Grid : Message : Stencil 17.9277 GB/s per node +Grid : Message : Stencil 17.857 GB/s per node +Grid : Message : Stencil 12.4024 GB/s per node +Grid : Message : Average mflops/s per call per node : 666807 +Grid : Message : Average mflops/s per call per node : 799770 +Grid : Message : Average mflops/s per call per node : 814899 +Grid : Message : Average mflops/s per call per node : 664095 +Grid : Message : Average mflops/s per call per node (full): 310994 +Grid : Message : Average mflops/s per call per node (full): 440054 +Grid : Message : Average mflops/s per call per node (full): 444183 +Grid : Message : Average mflops/s per call per node (full): 303609 +Grid : Message : Stencil 12.9655 GB/s per node +Grid : Message : Stencil 16.8382 GB/s per node +Grid : Message : Stencil 17.7169 GB/s per node +Grid : Message : Stencil 12.5815 GB/s per node +Grid : Message : Average mflops/s per call per node : 667126 +Grid : Message : Average mflops/s per call per node : 804406 +Grid : Message : Average mflops/s per call per node : 824484 +Grid : Message : Average mflops/s per call per node : 668240 +Grid : Message : Average mflops/s per call per node (full): 315412 +Grid : Message : Average mflops/s per call per node (full): 437094 +Grid : Message : Average mflops/s per call per node (full): 445690 +Grid : Message : Average mflops/s per call per node (full): 304519 +Grid : Message : Stencil 12.4799 GB/s per node +Grid : Message : Stencil 17.4822 GB/s per node +Grid : Message : Stencil 17.1568 GB/s per node +Grid : Message : Stencil 13.2932 GB/s per node +Grid : Message : Average mflops/s per call per node : 672305 +Grid : Message : Average mflops/s per call per node : 800954 +Grid : Message : Average mflops/s per call per node : 822328 +Grid : Message : Average mflops/s per call per node : 669005 +Grid : Message : Average mflops/s per call per node (full): 313890 +Grid : Message : Average mflops/s per call per node (full): 439128 +Grid : Message : Average mflops/s per call per node (full): 442636 +Grid : Message : Average mflops/s per call per node (full): 306782 +Grid : Message : Stencil 12.5206 GB/s per node +Grid : Message : Stencil 16.6807 GB/s per node +Grid : Message : Stencil 18.4892 GB/s per node +Grid : Message : Stencil 12.5474 GB/s per node +Grid : Message : Average mflops/s per call per node : 670051 +Grid : Message : Average mflops/s per call per node : 803701 +Grid : Message : Average mflops/s per call per node : 814013 +Grid : Message : Average mflops/s per call per node : 662089 +Grid : Message : Average mflops/s per call per node (full): 313988 +Grid : Message : Average mflops/s per call per node (full): 434021 +Grid : Message : Average mflops/s per call per node (full): 446562 +Grid : Message : Average mflops/s per call per node (full): 300483 +Grid : Message : Stencil 12.6668 GB/s per node +Grid : Message : Stencil 13.4979 GB/s per node +Grid : Message : Stencil 18.2685 GB/s per node +Grid : Message : Stencil 13.2324 GB/s per node +Grid : Message : Average mflops/s per call per node : 670057 +Grid : Message : Average mflops/s per call per node : 810582 +Grid : Message : Average mflops/s per call per node : 819467 +Grid : Message : Average mflops/s per call per node : 664962 +Grid : Message : Average mflops/s per call per node (full): 314212 +Grid : Message : Average mflops/s per call per node (full): 390848 +Grid : Message : Average mflops/s per call per node (full): 446832 +Grid : Message : Average mflops/s per call per node (full): 306389 +Grid : Message : Stencil 13.4024 GB/s per node +Grid : Message : Stencil 17.8795 GB/s per node +Grid : Message : Stencil 17.4326 GB/s per node +Grid : Message : Stencil 14.4151 GB/s per node +Grid : Message : Average mflops/s per call per node : 666151 +Grid : Message : Average mflops/s per call per node : 804497 +Grid : Message : Average mflops/s per call per node : 826075 +Grid : Message : Average mflops/s per call per node : 661044 +Grid : Message : Average mflops/s per call per node (full): 315120 +Grid : Message : Average mflops/s per call per node (full): 439813 +Grid : Message : Average mflops/s per call per node (full): 445667 +Grid : Message : Average mflops/s per call per node (full): 307218 +Grid : Message : Stencil 13.5361 GB/s per node +Grid : Message : Stencil 17.1346 GB/s per node +Grid : Message : Stencil 18.1014 GB/s per node +Grid : Message : Stencil 12.8291 GB/s per node +Grid : Message : Average mflops/s per call per node : 669046 +Grid : Message : Average mflops/s per call per node : 808504 +Grid : Message : Average mflops/s per call per node : 818995 +Grid : Message : Average mflops/s per call per node : 662119 +Grid : Message : Average mflops/s per call per node (full): 315850 +Grid : Message : Average mflops/s per call per node (full): 438213 +Grid : Message : Average mflops/s per call per node (full): 446359 +Grid : Message : Average mflops/s per call per node (full): 306122 +Grid : Message : Stencil 13.3801 GB/s per node +Grid : Message : Stencil 16.8594 GB/s per node +Grid : Message : Stencil 17.7869 GB/s per node +Grid : Message : Stencil 12.5915 GB/s per node +Grid : Message : Average mflops/s per call per node : 668371 +Grid : Message : Average mflops/s per call per node : 804653 +Grid : Message : Average mflops/s per call per node : 824526 +Grid : Message : Average mflops/s per call per node : 667430 +Grid : Message : Average mflops/s per call per node (full): 315682 +Grid : Message : Average mflops/s per call per node (full): 437676 +Grid : Message : Average mflops/s per call per node (full): 446980 +Grid : Message : Average mflops/s per call per node (full): 304992 +Grid : Message : Stencil 13.7549 GB/s per node +Grid : Message : Stencil 14.9992 GB/s per node +Grid : Message : Stencil 17.5302 GB/s per node +Grid : Message : Stencil 12.2735 GB/s per node +Grid : Message : Average mflops/s per call per node : 666096 +Grid : Message : Average mflops/s per call per node : 811499 +Grid : Message : Average mflops/s per call per node : 824694 +Grid : Message : Average mflops/s per call per node : 668519 +Grid : Message : Average mflops/s per call per node (full): 316858 +Grid : Message : Average mflops/s per call per node (full): 414059 +Grid : Message : Average mflops/s per call per node (full): 445936 +Grid : Message : Average mflops/s per call per node (full): 303533 +Grid : Message : Stencil 13.6632 GB/s per node +Grid : Message : Stencil 17.0763 GB/s per node +Grid : Message : Stencil 17.3609 GB/s per node +Grid : Message : Stencil 11.946 GB/s per node +Grid : Message : Average mflops/s per call per node : 664469 +Grid : Message : Average mflops/s per call per node : 804234 +Grid : Message : Average mflops/s per call per node : 823660 +Grid : Message : Average mflops/s per call per node : 665986 +Grid : Message : Average mflops/s per call per node (full): 313506 +Grid : Message : Average mflops/s per call per node (full): 436407 +Grid : Message : Average mflops/s per call per node (full): 445089 +Grid : Message : Average mflops/s per call per node (full): 301375 +Grid : Message : Stencil 13.127 GB/s per node +Grid : Message : Stencil 16.4242 GB/s per node +Grid : Message : Stencil 17.4365 GB/s per node +Grid : Message : Stencil 13.6282 GB/s per node +Grid : Message : Average mflops/s per call per node : 670281 +Grid : Message : Average mflops/s per call per node : 805435 +Grid : Message : Average mflops/s per call per node : 822530 +Grid : Message : Average mflops/s per call per node : 659966 +Grid : Message : Average mflops/s per call per node (full): 315917 +Grid : Message : Average mflops/s per call per node (full): 434543 +Grid : Message : Average mflops/s per call per node (full): 441755 +Grid : Message : Average mflops/s per call per node (full): 306428 +Grid : Message : Stencil 13.2015 GB/s per node +Grid : Message : Stencil 17.5169 GB/s per node +Grid : Message : Stencil 18 GB/s per node +Grid : Message : Stencil 12.8812 GB/s per node +Grid : Message : Average mflops/s per call per node : 666946 +Grid : Message : Average mflops/s per call per node : 804926 +Grid : Message : Average mflops/s per call per node : 821508 +Grid : Message : Average mflops/s per call per node : 665034 +Grid : Message : Average mflops/s per call per node (full): 313490 +Grid : Message : Average mflops/s per call per node (full): 438264 +Grid : Message : Average mflops/s per call per node (full): 446763 +Grid : Message : Average mflops/s per call per node (full): 302935 +Grid : Message : Stencil 13.3755 GB/s per node +Grid : Message : Stencil 17.2901 GB/s per node +Grid : Message : Stencil 18.1233 GB/s per node +Grid : Message : Stencil 13.465 GB/s per node +Grid : Message : Average mflops/s per call per node : 668938 +Grid : Message : Average mflops/s per call per node : 804972 +Grid : Message : Average mflops/s per call per node : 822896 +Grid : Message : Average mflops/s per call per node : 663745 +Grid : Message : Average mflops/s per call per node (full): 316147 +Grid : Message : Average mflops/s per call per node (full): 438113 +Grid : Message : Average mflops/s per call per node (full): 446562 +Grid : Message : Average mflops/s per call per node (full): 306549 +Grid : Message : Stencil 14.3151 GB/s per node +Grid : Message : Stencil 18.0712 GB/s per node +Grid : Message : Stencil 19.3206 GB/s per node +Grid : Message : Stencil 13.0481 GB/s per node +Grid : Message : Average mflops/s per call per node : 662888 +Grid : Message : Average mflops/s per call per node : 803274 +Grid : Message : Average mflops/s per call per node : 821731 +Grid : Message : Average mflops/s per call per node : 666168 +Grid : Message : Average mflops/s per call per node (full): 315353 +Grid : Message : Average mflops/s per call per node (full): 442293 +Grid : Message : Average mflops/s per call per node (full): 449390 +Grid : Message : Average mflops/s per call per node (full): 305695 +Grid : Message : Stencil 13.6391 GB/s per node +Grid : Message : Stencil 16.6396 GB/s per node +Grid : Message : Stencil 18.4333 GB/s per node +Grid : Message : Stencil 13.2398 GB/s per node +Grid : Message : Average mflops/s per call per node : 663417 +Grid : Message : Average mflops/s per call per node : 808102 +Grid : Message : Average mflops/s per call per node : 820793 +Grid : Message : Average mflops/s per call per node : 665197 +Grid : Message : Average mflops/s per call per node (full): 314956 +Grid : Message : Average mflops/s per call per node (full): 436651 +Grid : Message : Average mflops/s per call per node (full): 448931 +Grid : Message : Average mflops/s per call per node (full): 303869 +Grid : Message : Stencil 12.8134 GB/s per node +Grid : Message : Stencil 16.688 GB/s per node +Grid : Message : Stencil 17.268 GB/s per node +Grid : Message : Stencil 13.5131 GB/s per node +Grid : Message : Average mflops/s per call per node : 667084 +Grid : Message : Average mflops/s per call per node : 801652 +Grid : Message : Average mflops/s per call per node : 818306 +Grid : Message : Average mflops/s per call per node : 664899 +Grid : Message : Average mflops/s per call per node (full): 313861 +Grid : Message : Average mflops/s per call per node (full): 435206 +Grid : Message : Average mflops/s per call per node (full): 443181 +Grid : Message : Average mflops/s per call per node (full): 306460 +Grid : Message : Stencil 13.1928 GB/s per node +Grid : Message : Stencil 16.9779 GB/s per node +Grid : Message : Stencil 18.3528 GB/s per node +Grid : Message : Stencil 13.6895 GB/s per node +Grid : Message : Average mflops/s per call per node : 665268 +Grid : Message : Average mflops/s per call per node : 801351 +Grid : Message : Average mflops/s per call per node : 820825 +Grid : Message : Average mflops/s per call per node : 663334 +Grid : Message : Average mflops/s per call per node (full): 314589 +Grid : Message : Average mflops/s per call per node (full): 437686 +Grid : Message : Average mflops/s per call per node (full): 447820 +Grid : Message : Average mflops/s per call per node (full): 306741 +Grid : Message : Stencil 14.5065 GB/s per node +Grid : Message : Stencil 16.8067 GB/s per node +Grid : Message : Stencil 18.635 GB/s per node +Grid : Message : Stencil 12.8957 GB/s per node +Grid : Message : Average mflops/s per call per node : 662023 +Grid : Message : Average mflops/s per call per node : 802875 +Grid : Message : Average mflops/s per call per node : 817871 +Grid : Message : Average mflops/s per call per node : 664987 +Grid : Message : Average mflops/s per call per node (full): 316621 +Grid : Message : Average mflops/s per call per node (full): 435212 +Grid : Message : Average mflops/s per call per node (full): 446336 +Grid : Message : Average mflops/s per call per node (full): 304657 +Grid : Message : Stencil 12.4429 GB/s per node +Grid : Message : Stencil 16.5042 GB/s per node +Grid : Message : Stencil 17.7928 GB/s per node +Grid : Message : Stencil 12.6474 GB/s per node +Grid : Message : Average mflops/s per call per node : 669273 +Grid : Message : Average mflops/s per call per node : 801563 +Grid : Message : Average mflops/s per call per node : 821917 +Grid : Message : Average mflops/s per call per node : 667029 +Grid : Message : Average mflops/s per call per node (full): 312870 +Grid : Message : Average mflops/s per call per node (full): 433644 +Grid : Message : Average mflops/s per call per node (full): 447043 +Grid : Message : Average mflops/s per call per node (full): 305892 +Grid : Message : Stencil 11.3677 GB/s per node +Grid : Message : Stencil 16.7617 GB/s per node +Grid : Message : Stencil 19.4241 GB/s per node +Grid : Message : Stencil 12.6271 GB/s per node +Grid : Message : Average mflops/s per call per node : 662449 +Grid : Message : Average mflops/s per call per node : 800926 +Grid : Message : Average mflops/s per call per node : 820710 +Grid : Message : Average mflops/s per call per node : 663453 +Grid : Message : Average mflops/s per call per node (full): 303543 +Grid : Message : Average mflops/s per call per node (full): 435451 +Grid : Message : Average mflops/s per call per node (full): 449223 +Grid : Message : Average mflops/s per call per node (full): 303758 +Grid : Message : Stencil 12.2592 GB/s per node +Grid : Message : Stencil 17.931 GB/s per node +Grid : Message : Stencil 18.5877 GB/s per node +Grid : Message : Stencil 13.3924 GB/s per node +Grid : Message : Average mflops/s per call per node : 668599 +Grid : Message : Average mflops/s per call per node : 798325 +Grid : Message : Average mflops/s per call per node : 821788 +Grid : Message : Average mflops/s per call per node : 665107 +Grid : Message : Average mflops/s per call per node (full): 311819 +Grid : Message : Average mflops/s per call per node (full): 439804 +Grid : Message : Average mflops/s per call per node (full): 449096 +Grid : Message : Average mflops/s per call per node (full): 306771 +Grid : Message : Stencil 13.4397 GB/s per node +Grid : Message : Stencil 18.4553 GB/s per node +Grid : Message : Stencil 17.2144 GB/s per node +Grid : Message : Stencil 13.4523 GB/s per node +Grid : Message : Average mflops/s per call per node : 663828 +Grid : Message : Average mflops/s per call per node : 803533 +Grid : Message : Average mflops/s per call per node : 829133 +Grid : Message : Average mflops/s per call per node : 667575 +Grid : Message : Average mflops/s per call per node (full): 315880 +Grid : Message : Average mflops/s per call per node (full): 442364 +Grid : Message : Average mflops/s per call per node (full): 444101 +Grid : Message : Average mflops/s per call per node (full): 306812 +Grid : Message : Stencil 13.4004 GB/s per node +Grid : Message : Stencil 16.9872 GB/s per node +Grid : Message : Stencil 19.2425 GB/s per node +Grid : Message : Stencil 12.3307 GB/s per node +Grid : Message : Average mflops/s per call per node : 666260 +Grid : Message : Average mflops/s per call per node : 805756 +Grid : Message : Average mflops/s per call per node : 819795 +Grid : Message : Average mflops/s per call per node : 665190 +Grid : Message : Average mflops/s per call per node (full): 314488 +Grid : Message : Average mflops/s per call per node (full): 439238 +Grid : Message : Average mflops/s per call per node (full): 449947 +Grid : Message : Average mflops/s per call per node (full): 304314 +Grid : Message : Stencil 13.2794 GB/s per node +Grid : Message : Stencil 16.3363 GB/s per node +Grid : Message : Stencil 17.4506 GB/s per node +Grid : Message : Stencil 11.8973 GB/s per node +Grid : Message : Average mflops/s per call per node : 661869 +Grid : Message : Average mflops/s per call per node : 802011 +Grid : Message : Average mflops/s per call per node : 821374 +Grid : Message : Average mflops/s per call per node : 667753 +Grid : Message : Average mflops/s per call per node (full): 314151 +Grid : Message : Average mflops/s per call per node (full): 432113 +Grid : Message : Average mflops/s per call per node (full): 444162 +Grid : Message : Average mflops/s per call per node (full): 300812 +Grid : Message : Stencil 13.1259 GB/s per node +Grid : Message : Stencil 16.3224 GB/s per node +Grid : Message : Stencil 16.8754 GB/s per node +Grid : Message : Stencil 12.6539 GB/s per node +Grid : Message : Average mflops/s per call per node : 667421 +Grid : Message : Average mflops/s per call per node : 800472 +Grid : Message : Average mflops/s per call per node : 823593 +Grid : Message : Average mflops/s per call per node : 662618 +Grid : Message : Average mflops/s per call per node (full): 314562 +Grid : Message : Average mflops/s per call per node (full): 426645 +Grid : Message : Average mflops/s per call per node (full): 434872 +Grid : Message : Average mflops/s per call per node (full): 303988 +Grid : Message : Stencil 12.3184 GB/s per node +Grid : Message : Stencil 17.8475 GB/s per node +Grid : Message : Stencil 17.5751 GB/s per node +Grid : Message : Stencil 12.3613 GB/s per node +Grid : Message : Average mflops/s per call per node : 664840 +Grid : Message : Average mflops/s per call per node : 804370 +Grid : Message : Average mflops/s per call per node : 815424 +Grid : Message : Average mflops/s per call per node : 664303 +Grid : Message : Average mflops/s per call per node (full): 311918 +Grid : Message : Average mflops/s per call per node (full): 438836 +Grid : Message : Average mflops/s per call per node (full): 443442 +Grid : Message : Average mflops/s per call per node (full): 303934 +Grid : Message : Stencil 13.7473 GB/s per node +Grid : Message : Stencil 11.9221 GB/s per node +Grid : Message : Stencil 17.4339 GB/s per node +Grid : Message : Stencil 13.3121 GB/s per node +Grid : Message : Average mflops/s per call per node : 662807 +Grid : Message : Average mflops/s per call per node : 806827 +Grid : Message : Average mflops/s per call per node : 819802 +Grid : Message : Average mflops/s per call per node : 662039 +Grid : Message : Average mflops/s per call per node (full): 315933 +Grid : Message : Average mflops/s per call per node (full): 359834 +Grid : Message : Average mflops/s per call per node (full): 443439 +Grid : Message : Average mflops/s per call per node (full): 305833 +Grid : Message : Stencil 12.6701 GB/s per node +Grid : Message : Stencil 16.6586 GB/s per node +Grid : Message : Stencil 16.9694 GB/s per node +Grid : Message : Stencil 12.4529 GB/s per node +Grid : Message : Average mflops/s per call per node : 666476 +Grid : Message : Average mflops/s per call per node : 804596 +Grid : Message : Average mflops/s per call per node : 824614 +Grid : Message : Average mflops/s per call per node : 666724 +Grid : Message : Average mflops/s per call per node (full): 314720 +Grid : Message : Average mflops/s per call per node (full): 431010 +Grid : Message : Average mflops/s per call per node (full): 430781 +Grid : Message : Average mflops/s per call per node (full): 304274 +Grid : Message : Stencil 13.8334 GB/s per node +Grid : Message : Stencil 17.5333 GB/s per node +Grid : Message : Stencil 17.2497 GB/s per node +Grid : Message : Stencil 14.9046 GB/s per node +Grid : Message : Average mflops/s per call per node : 664783 +Grid : Message : Average mflops/s per call per node : 805252 +Grid : Message : Average mflops/s per call per node : 820404 +Grid : Message : Average mflops/s per call per node : 655310 +Grid : Message : Average mflops/s per call per node (full): 315375 +Grid : Message : Average mflops/s per call per node (full): 440397 +Grid : Message : Average mflops/s per call per node (full): 442864 +Grid : Message : Average mflops/s per call per node (full): 305982 +Grid : Message : Stencil 12.6778 GB/s per node +Grid : Message : Stencil 16.9202 GB/s per node +Grid : Message : Stencil 17.2772 GB/s per node +Grid : Message : Stencil 13.485 GB/s per node +Grid : Message : Average mflops/s per call per node : 666072 +Grid : Message : Average mflops/s per call per node : 806807 +Grid : Message : Average mflops/s per call per node : 825291 +Grid : Message : Average mflops/s per call per node : 664838 +Grid : Message : Average mflops/s per call per node (full): 314502 +Grid : Message : Average mflops/s per call per node (full): 437343 +Grid : Message : Average mflops/s per call per node (full): 443630 +Grid : Message : Average mflops/s per call per node (full): 306984 +Grid : Message : Stencil 13.5258 GB/s per node +Grid : Message : Stencil 17.4691 GB/s per node +Grid : Message : Stencil 17.4799 GB/s per node +Grid : Message : Stencil 12.612 GB/s per node +Grid : Message : Average mflops/s per call per node : 663070 +Grid : Message : Average mflops/s per call per node : 796856 +Grid : Message : Average mflops/s per call per node : 825192 +Grid : Message : Average mflops/s per call per node : 667299 +Grid : Message : Average mflops/s per call per node (full): 316114 +Grid : Message : Average mflops/s per call per node (full): 438528 +Grid : Message : Average mflops/s per call per node (full): 444614 +Grid : Message : Average mflops/s per call per node (full): 305545 +Grid : Message : Stencil 12.8092 GB/s per node +Grid : Message : Stencil 10.023 GB/s per node +Grid : Message : Stencil 17.95 GB/s per node +Grid : Message : Stencil 12.5901 GB/s per node +Grid : Message : Average mflops/s per call per node : 666850 +Grid : Message : Average mflops/s per call per node : 808491 +Grid : Message : Average mflops/s per call per node : 821557 +Grid : Message : Average mflops/s per call per node : 666166 +Grid : Message : Average mflops/s per call per node (full): 315060 +Grid : Message : Average mflops/s per call per node (full): 316803 +Grid : Message : Average mflops/s per call per node (full): 444537 +Grid : Message : Average mflops/s per call per node (full): 304435 +Grid : Message : Stencil 13.6691 GB/s per node +Grid : Message : Stencil 17.0066 GB/s per node +Grid : Message : Stencil 17.4481 GB/s per node +Grid : Message : Stencil 13.2227 GB/s per node +Grid : Message : Average mflops/s per call per node : 665356 +Grid : Message : Average mflops/s per call per node : 799926 +Grid : Message : Average mflops/s per call per node : 820032 +Grid : Message : Average mflops/s per call per node : 664401 +Grid : Message : Average mflops/s per call per node (full): 316276 +Grid : Message : Average mflops/s per call per node (full): 434571 +Grid : Message : Average mflops/s per call per node (full): 444560 +Grid : Message : Average mflops/s per call per node (full): 306388 +Grid : Message : Stencil 13.0078 GB/s per node +Grid : Message : Stencil 14.0913 GB/s per node +Grid : Message : Stencil 18.2835 GB/s per node +Grid : Message : Stencil 13.4152 GB/s per node +Grid : Message : Average mflops/s per call per node : 664674 +Grid : Message : Average mflops/s per call per node : 808509 +Grid : Message : Average mflops/s per call per node : 819343 +Grid : Message : Average mflops/s per call per node : 665803 +Grid : Message : Average mflops/s per call per node (full): 314690 +Grid : Message : Average mflops/s per call per node (full): 401452 +Grid : Message : Average mflops/s per call per node (full): 447251 +Grid : Message : Average mflops/s per call per node (full): 306926 +Grid : Message : Stencil 14.6221 GB/s per node +Grid : Message : Stencil 16.6677 GB/s per node +Grid : Message : Stencil 17.582 GB/s per node +Grid : Message : Stencil 12.2818 GB/s per node +Grid : Message : Average mflops/s per call per node : 659245 +Grid : Message : Average mflops/s per call per node : 804855 +Grid : Message : Average mflops/s per call per node : 819504 +Grid : Message : Average mflops/s per call per node : 667461 +Grid : Message : Average mflops/s per call per node (full): 315871 +Grid : Message : Average mflops/s per call per node (full): 434870 +Grid : Message : Average mflops/s per call per node (full): 443764 +Grid : Message : Average mflops/s per call per node (full): 304046 +Grid : Message : Stencil 13.0725 GB/s per node +Grid : Message : Stencil 16.8008 GB/s per node +Grid : Message : Stencil 17.7133 GB/s per node +Grid : Message : Stencil 12.5526 GB/s per node +Grid : Message : Average mflops/s per call per node : 662510 +Grid : Message : Average mflops/s per call per node : 801213 +Grid : Message : Average mflops/s per call per node : 818551 +Grid : Message : Average mflops/s per call per node : 666014 +Grid : Message : Average mflops/s per call per node (full): 313216 +Grid : Message : Average mflops/s per call per node (full): 436557 +Grid : Message : Average mflops/s per call per node (full): 445124 +Grid : Message : Average mflops/s per call per node (full): 305612 +Grid : Message : Stencil 13.0189 GB/s per node +Grid : Message : Stencil 16.4422 GB/s per node +Grid : Message : Stencil 18.3631 GB/s per node +Grid : Message : Stencil 12.9549 GB/s per node +Grid : Message : Average mflops/s per call per node : 668479 +Grid : Message : Average mflops/s per call per node : 804731 +Grid : Message : Average mflops/s per call per node : 823586 +Grid : Message : Average mflops/s per call per node : 663128 +Grid : Message : Average mflops/s per call per node (full): 315350 +Grid : Message : Average mflops/s per call per node (full): 433633 +Grid : Message : Average mflops/s per call per node (full): 448125 +Grid : Message : Average mflops/s per call per node (full): 305041 +Grid : Message : Stencil 13.7261 GB/s per node +Grid : Message : Stencil 16.5987 GB/s per node +Grid : Message : Stencil 18.1536 GB/s per node +Grid : Message : Stencil 13.5897 GB/s per node +Grid : Message : Average mflops/s per call per node : 662310 +Grid : Message : Average mflops/s per call per node : 797417 +Grid : Message : Average mflops/s per call per node : 821599 +Grid : Message : Average mflops/s per call per node : 659934 +Grid : Message : Average mflops/s per call per node (full): 315396 +Grid : Message : Average mflops/s per call per node (full): 433694 +Grid : Message : Average mflops/s per call per node (full): 446757 +Grid : Message : Average mflops/s per call per node (full): 305987 +Grid : Message : Stencil 15.2071 GB/s per node +Grid : Message : Stencil 17.2064 GB/s per node +Grid : Message : Stencil 17.2153 GB/s per node +Grid : Message : Stencil 14.5624 GB/s per node +Grid : Message : Average mflops/s per call per node : 661738 +Grid : Message : Average mflops/s per call per node : 804050 +Grid : Message : Average mflops/s per call per node : 823835 +Grid : Message : Average mflops/s per call per node : 657306 +Grid : Message : Average mflops/s per call per node (full): 316739 +Grid : Message : Average mflops/s per call per node (full): 438758 +Grid : Message : Average mflops/s per call per node (full): 443199 +Grid : Message : Average mflops/s per call per node (full): 306799 +Grid : Message : Stencil 14.368 GB/s per node +Grid : Message : Stencil 12.6867 GB/s per node +Grid : Message : Stencil 17.667 GB/s per node +Grid : Message : Stencil 12.7552 GB/s per node +Grid : Message : Average mflops/s per call per node : 664257 +Grid : Message : Average mflops/s per call per node : 806847 +Grid : Message : Average mflops/s per call per node : 825078 +Grid : Message : Average mflops/s per call per node : 668175 +Grid : Message : Average mflops/s per call per node (full): 316318 +Grid : Message : Average mflops/s per call per node (full): 375326 +Grid : Message : Average mflops/s per call per node (full): 446654 +Grid : Message : Average mflops/s per call per node (full): 305203 +Grid : Message : Stencil 14.4694 GB/s per node +Grid : Message : Stencil 16.4098 GB/s per node +Grid : Message : Stencil 17.5862 GB/s per node +Grid : Message : Stencil 11.8687 GB/s per node +Grid : Message : Average mflops/s per call per node : 664554 +Grid : Message : Average mflops/s per call per node : 802905 +Grid : Message : Average mflops/s per call per node : 820834 +Grid : Message : Average mflops/s per call per node : 666175 +Grid : Message : Average mflops/s per call per node (full): 314881 +Grid : Message : Average mflops/s per call per node (full): 431615 +Grid : Message : Average mflops/s per call per node (full): 432655 +Grid : Message : Average mflops/s per call per node (full): 298226 +Grid : Message : Stencil 13.5055 GB/s per node +Grid : Message : Stencil 16.7847 GB/s per node +Grid : Message : Stencil 18.1815 GB/s per node +Grid : Message : Stencil 13.5363 GB/s per node +Grid : Message : Average mflops/s per call per node : 662343 +Grid : Message : Average mflops/s per call per node : 802398 +Grid : Message : Average mflops/s per call per node : 822336 +Grid : Message : Average mflops/s per call per node : 667545 +Grid : Message : Average mflops/s per call per node (full): 315990 +Grid : Message : Average mflops/s per call per node (full): 437402 +Grid : Message : Average mflops/s per call per node (full): 443588 +Grid : Message : Average mflops/s per call per node (full): 306124 +Grid : Message : Stencil 14.5623 GB/s per node +Grid : Message : Stencil 16.7777 GB/s per node +Grid : Message : Stencil 17.9493 GB/s per node +Grid : Message : Stencil 13.6665 GB/s per node +Grid : Message : Average mflops/s per call per node : 661588 +Grid : Message : Average mflops/s per call per node : 804256 +Grid : Message : Average mflops/s per call per node : 823914 +Grid : Message : Average mflops/s per call per node : 665377 +Grid : Message : Average mflops/s per call per node (full): 315257 +Grid : Message : Average mflops/s per call per node (full): 435848 +Grid : Message : Average mflops/s per call per node (full): 445922 +Grid : Message : Average mflops/s per call per node (full): 306689 +Grid : Message : Stencil 12.7961 GB/s per node +Grid : Message : Stencil 16.6998 GB/s per node +Grid : Message : Stencil 17.8133 GB/s per node +Grid : Message : Stencil 12.2565 GB/s per node +Grid : Message : Average mflops/s per call per node : 663028 +Grid : Message : Average mflops/s per call per node : 804059 +Grid : Message : Average mflops/s per call per node : 823390 +Grid : Message : Average mflops/s per call per node : 669287 +Grid : Message : Average mflops/s per call per node (full): 312929 +Grid : Message : Average mflops/s per call per node (full): 431848 +Grid : Message : Average mflops/s per call per node (full): 446219 +Grid : Message : Average mflops/s per call per node (full): 304734 +Grid : Message : Stencil 12.7893 GB/s per node +Grid : Message : Stencil 16.221 GB/s per node +Grid : Message : Stencil 17.2351 GB/s per node +Grid : Message : Stencil 13.5079 GB/s per node +Grid : Message : Average mflops/s per call per node : 665857 +Grid : Message : Average mflops/s per call per node : 802147 +Grid : Message : Average mflops/s per call per node : 820083 +Grid : Message : Average mflops/s per call per node : 665001 +Grid : Message : Average mflops/s per call per node (full): 310159 +Grid : Message : Average mflops/s per call per node (full): 428779 +Grid : Message : Average mflops/s per call per node (full): 442738 +Grid : Message : Average mflops/s per call per node (full): 306802 +Grid : Message : Stencil 13.1754 GB/s per node +Grid : Message : Stencil 16.4533 GB/s per node +Grid : Message : Stencil 17.429 GB/s per node +Grid : Message : Stencil 13.1193 GB/s per node +Grid : Message : Average mflops/s per call per node : 668829 +Grid : Message : Average mflops/s per call per node : 800926 +Grid : Message : Average mflops/s per call per node : 820687 +Grid : Message : Average mflops/s per call per node : 661271 +Grid : Message : Average mflops/s per call per node (full): 316558 +Grid : Message : Average mflops/s per call per node (full): 433815 +Grid : Message : Average mflops/s per call per node (full): 444707 +Grid : Message : Average mflops/s per call per node (full): 305770 +Grid : Message : Stencil 12.9593 GB/s per node +Grid : Message : Stencil 17.1329 GB/s per node +Grid : Message : Stencil 17.0942 GB/s per node +Grid : Message : Stencil 12.9989 GB/s per node +Grid : Message : Average mflops/s per call per node : 667452 +Grid : Message : Average mflops/s per call per node : 806554 +Grid : Message : Average mflops/s per call per node : 822115 +Grid : Message : Average mflops/s per call per node : 661052 +Grid : Message : Average mflops/s per call per node (full): 315427 +Grid : Message : Average mflops/s per call per node (full): 440584 +Grid : Message : Average mflops/s per call per node (full): 441843 +Grid : Message : Average mflops/s per call per node (full): 304562 +Grid : Message : Stencil 13.518 GB/s per node +Grid : Message : Stencil 17.0027 GB/s per node +Grid : Message : Stencil 18.5458 GB/s per node +Grid : Message : Stencil 12.7962 GB/s per node +Grid : Message : Average mflops/s per call per node : 667844 +Grid : Message : Average mflops/s per call per node : 810592 +Grid : Message : Average mflops/s per call per node : 808363 +Grid : Message : Average mflops/s per call per node : 663379 +Grid : Message : Average mflops/s per call per node (full): 315060 +Grid : Message : Average mflops/s per call per node (full): 439231 +Grid : Message : Average mflops/s per call per node (full): 445731 +Grid : Message : Average mflops/s per call per node (full): 304239 +Grid : Message : Stencil 12.5581 GB/s per node +Grid : Message : Stencil 16.7383 GB/s per node +Grid : Message : Stencil 17.4816 GB/s per node +Grid : Message : Stencil 12.9929 GB/s per node +Grid : Message : Average mflops/s per call per node : 665838 +Grid : Message : Average mflops/s per call per node : 806940 +Grid : Message : Average mflops/s per call per node : 820026 +Grid : Message : Average mflops/s per call per node : 667036 +Grid : Message : Average mflops/s per call per node (full): 313606 +Grid : Message : Average mflops/s per call per node (full): 436480 +Grid : Message : Average mflops/s per call per node (full): 444863 +Grid : Message : Average mflops/s per call per node (full): 306720 +Grid : Message : Stencil 15.0397 GB/s per node +Grid : Message : Stencil 17.1911 GB/s per node +Grid : Message : Stencil 17.6592 GB/s per node +Grid : Message : Stencil 12.6569 GB/s per node +Grid : Message : Average mflops/s per call per node : 662483 +Grid : Message : Average mflops/s per call per node : 809326 +Grid : Message : Average mflops/s per call per node : 821916 +Grid : Message : Average mflops/s per call per node : 667193 +Grid : Message : Average mflops/s per call per node (full): 316709 +Grid : Message : Average mflops/s per call per node (full): 440519 +Grid : Message : Average mflops/s per call per node (full): 444642 +Grid : Message : Average mflops/s per call per node (full): 304478 +Grid : Message : Stencil 14.1893 GB/s per node +Grid : Message : Stencil 16.7491 GB/s per node +Grid : Message : Stencil 17.7025 GB/s per node +Grid : Message : Stencil 13.0426 GB/s per node +Grid : Message : Average mflops/s per call per node : 663481 +Grid : Message : Average mflops/s per call per node : 803216 +Grid : Message : Average mflops/s per call per node : 824558 +Grid : Message : Average mflops/s per call per node : 664887 +Grid : Message : Average mflops/s per call per node (full): 316694 +Grid : Message : Average mflops/s per call per node (full): 436122 +Grid : Message : Average mflops/s per call per node (full): 446367 +Grid : Message : Average mflops/s per call per node (full): 305147 +Grid : Message : Stencil 12.402 GB/s per node +Grid : Message : Stencil 16.8793 GB/s per node +Grid : Message : Stencil 17.5857 GB/s per node +Grid : Message : Stencil 13.9042 GB/s per node +Grid : Message : Average mflops/s per call per node : 668553 +Grid : Message : Average mflops/s per call per node : 799272 +Grid : Message : Average mflops/s per call per node : 824044 +Grid : Message : Average mflops/s per call per node : 654693 +Grid : Message : Average mflops/s per call per node (full): 311949 +Grid : Message : Average mflops/s per call per node (full): 436881 +Grid : Message : Average mflops/s per call per node (full): 446205 +Grid : Message : Average mflops/s per call per node (full): 305577 +Grid : Message : Stencil 13.1478 GB/s per node +Grid : Message : Stencil 16.31 GB/s per node +Grid : Message : Stencil 18.0335 GB/s per node +Grid : Message : Stencil 12.8276 GB/s per node +Grid : Message : Average mflops/s per call per node : 668461 +Grid : Message : Average mflops/s per call per node : 802629 +Grid : Message : Average mflops/s per call per node : 821788 +Grid : Message : Average mflops/s per call per node : 663470 +Grid : Message : Average mflops/s per call per node (full): 315510 +Grid : Message : Average mflops/s per call per node (full): 432585 +Grid : Message : Average mflops/s per call per node (full): 446386 +Grid : Message : Average mflops/s per call per node (full): 305276 +Grid : Message : Stencil 12.7427 GB/s per node +Grid : Message : Stencil 16.4928 GB/s per node +Grid : Message : Stencil 17.973 GB/s per node +Grid : Message : Stencil 13.3166 GB/s per node +Grid : Message : Average mflops/s per call per node : 673103 +Grid : Message : Average mflops/s per call per node : 804558 +Grid : Message : Average mflops/s per call per node : 823988 +Grid : Message : Average mflops/s per call per node : 663175 +Grid : Message : Average mflops/s per call per node (full): 315899 +Grid : Message : Average mflops/s per call per node (full): 433461 +Grid : Message : Average mflops/s per call per node (full): 442470 +Grid : Message : Average mflops/s per call per node (full): 306588 +Grid : Message : Stencil 13.2121 GB/s per node +Grid : Message : Stencil 17.1273 GB/s per node +Grid : Message : Stencil 17.6244 GB/s per node +Grid : Message : Stencil 13.2802 GB/s per node +Grid : Message : Average mflops/s per call per node : 668004 +Grid : Message : Average mflops/s per call per node : 799380 +Grid : Message : Average mflops/s per call per node : 824882 +Grid : Message : Average mflops/s per call per node : 662423 +Grid : Message : Average mflops/s per call per node (full): 315807 +Grid : Message : Average mflops/s per call per node (full): 437571 +Grid : Message : Average mflops/s per call per node (full): 445182 +Grid : Message : Average mflops/s per call per node (full): 305479 +Grid : Message : Stencil 13.1344 GB/s per node +Grid : Message : Stencil 12.7333 GB/s per node +Grid : Message : Stencil 18.0397 GB/s per node +Grid : Message : Stencil 12.4452 GB/s per node +Grid : Message : Average mflops/s per call per node : 667610 +Grid : Message : Average mflops/s per call per node : 809721 +Grid : Message : Average mflops/s per call per node : 820494 +Grid : Message : Average mflops/s per call per node : 667970 +Grid : Message : Average mflops/s per call per node (full): 315073 +Grid : Message : Average mflops/s per call per node (full): 376912 +Grid : Message : Average mflops/s per call per node (full): 447594 +Grid : Message : Average mflops/s per call per node (full): 302941 +Grid : Message : Stencil 12.9916 GB/s per node +Grid : Message : Stencil 16.4847 GB/s per node +Grid : Message : Stencil 18.4717 GB/s per node +Grid : Message : Stencil 13.8467 GB/s per node +Grid : Message : Average mflops/s per call per node : 666602 +Grid : Message : Average mflops/s per call per node : 806090 +Grid : Message : Average mflops/s per call per node : 817726 +Grid : Message : Average mflops/s per call per node : 661847 +Grid : Message : Average mflops/s per call per node (full): 315127 +Grid : Message : Average mflops/s per call per node (full): 432934 +Grid : Message : Average mflops/s per call per node (full): 448390 +Grid : Message : Average mflops/s per call per node (full): 306390 +Grid : Message : Stencil 12.3533 GB/s per node +Grid : Message : Stencil 17.2274 GB/s per node +Grid : Message : Stencil 17.7197 GB/s per node +Grid : Message : Stencil 13.6455 GB/s per node +Grid : Message : Average mflops/s per call per node : 665051 +Grid : Message : Average mflops/s per call per node : 806937 +Grid : Message : Average mflops/s per call per node : 822513 +Grid : Message : Average mflops/s per call per node : 660128 +Grid : Message : Average mflops/s per call per node (full): 307102 +Grid : Message : Average mflops/s per call per node (full): 438441 +Grid : Message : Average mflops/s per call per node (full): 446803 +Grid : Message : Average mflops/s per call per node (full): 305302 +Grid : Message : Stencil 12.3064 GB/s per node +Grid : Message : Stencil 16.2822 GB/s per node +Grid : Message : Stencil 17.2384 GB/s per node +Grid : Message : Stencil 14.1965 GB/s per node +Grid : Message : Average mflops/s per call per node : 665604 +Grid : Message : Average mflops/s per call per node : 801811 +Grid : Message : Average mflops/s per call per node : 826139 +Grid : Message : Average mflops/s per call per node : 658406 +Grid : Message : Average mflops/s per call per node (full): 311986 +Grid : Message : Average mflops/s per call per node (full): 431562 +Grid : Message : Average mflops/s per call per node (full): 443145 +Grid : Message : Average mflops/s per call per node (full): 304433 +Grid : Message : Stencil 12.532 GB/s per node +Grid : Message : Stencil 17.5079 GB/s per node +Grid : Message : Stencil 18.6678 GB/s per node +Grid : Message : Stencil 12.3361 GB/s per node +Grid : Message : Average mflops/s per call per node : 667572 +Grid : Message : Average mflops/s per call per node : 801710 +Grid : Message : Average mflops/s per call per node : 818537 +Grid : Message : Average mflops/s per call per node : 668800 +Grid : Message : Average mflops/s per call per node (full): 313979 +Grid : Message : Average mflops/s per call per node (full): 439231 +Grid : Message : Average mflops/s per call per node (full): 446644 +Grid : Message : Average mflops/s per call per node (full): 304258 +Grid : Message : Stencil 13.1495 GB/s per node +Grid : Message : Stencil 16.1774 GB/s per node +Grid : Message : Stencil 17.0706 GB/s per node +Grid : Message : Stencil 12.2607 GB/s per node +Grid : Message : Average mflops/s per call per node : 669214 +Grid : Message : Average mflops/s per call per node : 802115 +Grid : Message : Average mflops/s per call per node : 824918 +Grid : Message : Average mflops/s per call per node : 668359 +Grid : Message : Average mflops/s per call per node (full): 315314 +Grid : Message : Average mflops/s per call per node (full): 430106 +Grid : Message : Average mflops/s per call per node (full): 441147 +Grid : Message : Average mflops/s per call per node (full): 303811 +Grid : Message : Stencil 13.1248 GB/s per node +Grid : Message : Stencil 9.80877 GB/s per node +Grid : Message : Stencil 17.9584 GB/s per node +Grid : Message : Stencil 12.2776 GB/s per node +Grid : Message : Average mflops/s per call per node : 667706 +Grid : Message : Average mflops/s per call per node : 804502 +Grid : Message : Average mflops/s per call per node : 828082 +Grid : Message : Average mflops/s per call per node : 670349 +Grid : Message : Average mflops/s per call per node (full): 315359 +Grid : Message : Average mflops/s per call per node (full): 310713 +Grid : Message : Average mflops/s per call per node (full): 446836 +Grid : Message : Average mflops/s per call per node (full): 304280 +Grid : Message : Stencil 13.5257 GB/s per node +Grid : Message : Stencil 17.2223 GB/s per node +Grid : Message : Stencil 17.3992 GB/s per node +Grid : Message : Stencil 14.4036 GB/s per node +Grid : Message : Average mflops/s per call per node : 668025 +Grid : Message : Average mflops/s per call per node : 803956 +Grid : Message : Average mflops/s per call per node : 821781 +Grid : Message : Average mflops/s per call per node : 656816 +Grid : Message : Average mflops/s per call per node (full): 315395 +Grid : Message : Average mflops/s per call per node (full): 438030 +Grid : Message : Average mflops/s per call per node (full): 443848 +Grid : Message : Average mflops/s per call per node (full): 306141 +Grid : Message : Stencil 13.7108 GB/s per node +Grid : Message : Stencil 17.0664 GB/s per node +Grid : Message : Stencil 17.4254 GB/s per node +Grid : Message : Stencil 12.3972 GB/s per node +Grid : Message : Average mflops/s per call per node : 666651 +Grid : Message : Average mflops/s per call per node : 804934 +Grid : Message : Average mflops/s per call per node : 829044 +Grid : Message : Average mflops/s per call per node : 667967 +Grid : Message : Average mflops/s per call per node (full): 316229 +Grid : Message : Average mflops/s per call per node (full): 437367 +Grid : Message : Average mflops/s per call per node (full): 445282 +Grid : Message : Average mflops/s per call per node (full): 304927 +Grid : Message : Stencil 13.6529 GB/s per node +Grid : Message : Stencil 17.3621 GB/s per node +Grid : Message : Stencil 17.7085 GB/s per node +Grid : Message : Stencil 12.1508 GB/s per node +Grid : Message : Average mflops/s per call per node : 665599 +Grid : Message : Average mflops/s per call per node : 802139 +Grid : Message : Average mflops/s per call per node : 819221 +Grid : Message : Average mflops/s per call per node : 666278 +Grid : Message : Average mflops/s per call per node (full): 315715 +Grid : Message : Average mflops/s per call per node (full): 437760 +Grid : Message : Average mflops/s per call per node (full): 444470 +Grid : Message : Average mflops/s per call per node (full): 303334 +Grid : Message : Stencil 13.7906 GB/s per node +Grid : Message : Stencil 6.55631 GB/s per node +Grid : Message : Stencil 18.0631 GB/s per node +Grid : Message : Stencil 12.4737 GB/s per node +Grid : Message : Average mflops/s per call per node : 663705 +Grid : Message : Average mflops/s per call per node : 807329 +Grid : Message : Average mflops/s per call per node : 822044 +Grid : Message : Average mflops/s per call per node : 667803 +Grid : Message : Average mflops/s per call per node (full): 316153 +Grid : Message : Average mflops/s per call per node (full): 226870 +Grid : Message : Average mflops/s per call per node (full): 445791 +Grid : Message : Average mflops/s per call per node (full): 304399 +Grid : Message : Stencil 13.9422 GB/s per node +Grid : Message : Stencil 18.0489 GB/s per node +Grid : Message : Stencil 18.202 GB/s per node +Grid : Message : Stencil 12.7549 GB/s per node +Grid : Message : Average mflops/s per call per node : 664665 +Grid : Message : Average mflops/s per call per node : 805682 +Grid : Message : Average mflops/s per call per node : 823134 +Grid : Message : Average mflops/s per call per node : 660925 +Grid : Message : Average mflops/s per call per node (full): 316494 +Grid : Message : Average mflops/s per call per node (full): 441297 +Grid : Message : Average mflops/s per call per node (full): 447213 +Grid : Message : Average mflops/s per call per node (full): 303280 +Grid : Message : Stencil 14.8899 GB/s per node +Grid : Message : Stencil 17.1059 GB/s per node +Grid : Message : Stencil 17.6417 GB/s per node +Grid : Message : Stencil 12.4758 GB/s per node +Grid : Message : Average mflops/s per call per node : 663410 +Grid : Message : Average mflops/s per call per node : 805254 +Grid : Message : Average mflops/s per call per node : 825254 +Grid : Message : Average mflops/s per call per node : 667450 +Grid : Message : Average mflops/s per call per node (full): 317238 +Grid : Message : Average mflops/s per call per node (full): 437415 +Grid : Message : Average mflops/s per call per node (full): 437589 +Grid : Message : Average mflops/s per call per node (full): 303595 +Grid : Message : Stencil 12.6778 GB/s per node +Grid : Message : Stencil 17.786 GB/s per node +Grid : Message : Stencil 17.91 GB/s per node +Grid : Message : Stencil 13.2347 GB/s per node +Grid : Message : Average mflops/s per call per node : 671875 +Grid : Message : Average mflops/s per call per node : 802320 +Grid : Message : Average mflops/s per call per node : 826003 +Grid : Message : Average mflops/s per call per node : 664528 +Grid : Message : Average mflops/s per call per node (full): 315102 +Grid : Message : Average mflops/s per call per node (full): 438671 +Grid : Message : Average mflops/s per call per node (full): 448007 +Grid : Message : Average mflops/s per call per node (full): 304178 +Grid : Message : Stencil 12.8066 GB/s per node +Grid : Message : Stencil 9.47955 GB/s per node +Grid : Message : Stencil 17.1296 GB/s per node +Grid : Message : Stencil 13.3126 GB/s per node +Grid : Message : Average mflops/s per call per node : 667534 +Grid : Message : Average mflops/s per call per node : 810356 +Grid : Message : Average mflops/s per call per node : 820584 +Grid : Message : Average mflops/s per call per node : 665002 +Grid : Message : Average mflops/s per call per node (full): 315031 +Grid : Message : Average mflops/s per call per node (full): 303250 +Grid : Message : Average mflops/s per call per node (full): 440421 +Grid : Message : Average mflops/s per call per node (full): 306009 +Grid : Message : Stencil 12.8412 GB/s per node +Grid : Message : Stencil 17.5009 GB/s per node +Grid : Message : Stencil 19.7497 GB/s per node +Grid : Message : Stencil 12.4201 GB/s per node +Grid : Message : Average mflops/s per call per node : 666739 +Grid : Message : Average mflops/s per call per node : 798613 +Grid : Message : Average mflops/s per call per node : 818810 +Grid : Message : Average mflops/s per call per node : 663304 +Grid : Message : Average mflops/s per call per node (full): 312930 +Grid : Message : Average mflops/s per call per node (full): 438826 +Grid : Message : Average mflops/s per call per node (full): 450335 +Grid : Message : Average mflops/s per call per node (full): 303353 +Grid : Message : Stencil 12.7016 GB/s per node +Grid : Message : Stencil 16.2536 GB/s per node +Grid : Message : Stencil 18.2265 GB/s per node +Grid : Message : Stencil 12.3248 GB/s per node +Grid : Message : Average mflops/s per call per node : 670398 +Grid : Message : Average mflops/s per call per node : 804082 +Grid : Message : Average mflops/s per call per node : 821654 +Grid : Message : Average mflops/s per call per node : 665580 +Grid : Message : Average mflops/s per call per node (full): 314434 +Grid : Message : Average mflops/s per call per node (full): 429458 +Grid : Message : Average mflops/s per call per node (full): 446120 +Grid : Message : Average mflops/s per call per node (full): 303792 +Grid : Message : Stencil 12.8835 GB/s per node +Grid : Message : Stencil 16.4583 GB/s per node +Grid : Message : Stencil 17.5232 GB/s per node +Grid : Message : Stencil 12.9388 GB/s per node +Grid : Message : Average mflops/s per call per node : 668522 +Grid : Message : Average mflops/s per call per node : 808236 +Grid : Message : Average mflops/s per call per node : 821330 +Grid : Message : Average mflops/s per call per node : 660228 +Grid : Message : Average mflops/s per call per node (full): 313520 +Grid : Message : Average mflops/s per call per node (full): 430168 +Grid : Message : Average mflops/s per call per node (full): 439663 +Grid : Message : Average mflops/s per call per node (full): 304068 +Grid : Message : Stencil 13.021 GB/s per node +Grid : Message : Stencil 16.8159 GB/s per node +Grid : Message : Stencil 17.3261 GB/s per node +Grid : Message : Stencil 12.7807 GB/s per node +Grid : Message : Average mflops/s per call per node : 667768 +Grid : Message : Average mflops/s per call per node : 807405 +Grid : Message : Average mflops/s per call per node : 825368 +Grid : Message : Average mflops/s per call per node : 667833 +Grid : Message : Average mflops/s per call per node (full): 313354 +Grid : Message : Average mflops/s per call per node (full): 433510 +Grid : Message : Average mflops/s per call per node (full): 443765 +Grid : Message : Average mflops/s per call per node (full): 306532 +Grid : Message : Stencil 13.04 GB/s per node +Grid : Message : Stencil 17.9266 GB/s per node +Grid : Message : Stencil 16.9641 GB/s per node +Grid : Message : Stencil 12.1914 GB/s per node +Grid : Message : Average mflops/s per call per node : 667822 +Grid : Message : Average mflops/s per call per node : 796507 +Grid : Message : Average mflops/s per call per node : 826307 +Grid : Message : Average mflops/s per call per node : 667710 +Grid : Message : Average mflops/s per call per node (full): 314874 +Grid : Message : Average mflops/s per call per node (full): 438934 +Grid : Message : Average mflops/s per call per node (full): 440721 +Grid : Message : Average mflops/s per call per node (full): 301811 +Grid : Message : Stencil 14.0713 GB/s per node +Grid : Message : Stencil 9.84629 GB/s per node +Grid : Message : Stencil 17.975 GB/s per node +Grid : Message : Stencil 12.6292 GB/s per node +Grid : Message : Average mflops/s per call per node : 662677 +Grid : Message : Average mflops/s per call per node : 807628 +Grid : Message : Average mflops/s per call per node : 819486 +Grid : Message : Average mflops/s per call per node : 665414 +Grid : Message : Average mflops/s per call per node (full): 314320 +Grid : Message : Average mflops/s per call per node (full): 311792 +Grid : Message : Average mflops/s per call per node (full): 446145 +Grid : Message : Average mflops/s per call per node (full): 305001 +Grid : Message : Stencil 15.2395 GB/s per node +Grid : Message : Stencil 16.6914 GB/s per node +Grid : Message : Stencil 16.7894 GB/s per node +Grid : Message : Stencil 13.1997 GB/s per node +Grid : Message : Average mflops/s per call per node : 661209 +Grid : Message : Average mflops/s per call per node : 809394 +Grid : Message : Average mflops/s per call per node : 825684 +Grid : Message : Average mflops/s per call per node : 662842 +Grid : Message : Average mflops/s per call per node (full): 316996 +Grid : Message : Average mflops/s per call per node (full): 437153 +Grid : Message : Average mflops/s per call per node (full): 438272 +Grid : Message : Average mflops/s per call per node (full): 304307 +Grid : Message : Stencil 13.3791 GB/s per node +Grid : Message : Stencil 16.8369 GB/s per node +Grid : Message : Stencil 17.224 GB/s per node +Grid : Message : Stencil 14.0024 GB/s per node +Grid : Message : Average mflops/s per call per node : 665689 +Grid : Message : Average mflops/s per call per node : 802725 +Grid : Message : Average mflops/s per call per node : 827751 +Grid : Message : Average mflops/s per call per node : 654313 +Grid : Message : Average mflops/s per call per node (full): 316236 +Grid : Message : Average mflops/s per call per node (full): 436545 +Grid : Message : Average mflops/s per call per node (full): 443947 +Grid : Message : Average mflops/s per call per node (full): 304434 +Grid : Message : Stencil 12.5735 GB/s per node +Grid : Message : Stencil 11.2593 GB/s per node +Grid : Message : Stencil 17.4278 GB/s per node +Grid : Message : Stencil 12.9417 GB/s per node +Grid : Message : Average mflops/s per call per node : 672094 +Grid : Message : Average mflops/s per call per node : 807479 +Grid : Message : Average mflops/s per call per node : 827536 +Grid : Message : Average mflops/s per call per node : 663187 +Grid : Message : Average mflops/s per call per node (full): 314938 +Grid : Message : Average mflops/s per call per node (full): 345570 +Grid : Message : Average mflops/s per call per node (full): 444522 +Grid : Message : Average mflops/s per call per node (full): 304011 +Grid : Message : Stencil 13.6533 GB/s per node +Grid : Message : Stencil 17.3081 GB/s per node +Grid : Message : Stencil 17.4874 GB/s per node +Grid : Message : Stencil 12.6095 GB/s per node +Grid : Message : Average mflops/s per call per node : 668633 +Grid : Message : Average mflops/s per call per node : 802535 +Grid : Message : Average mflops/s per call per node : 823084 +Grid : Message : Average mflops/s per call per node : 668702 +Grid : Message : Average mflops/s per call per node (full): 317040 +Grid : Message : Average mflops/s per call per node (full): 438029 +Grid : Message : Average mflops/s per call per node (full): 445463 +Grid : Message : Average mflops/s per call per node (full): 306130 +Grid : Message : Stencil 12.867 GB/s per node +Grid : Message : Stencil 17.2766 GB/s per node +Grid : Message : Stencil 16.1951 GB/s per node +Grid : Message : Stencil 11.682 GB/s per node +Grid : Message : Average mflops/s per call per node : 667313 +Grid : Message : Average mflops/s per call per node : 802375 +Grid : Message : Average mflops/s per call per node : 827107 +Grid : Message : Average mflops/s per call per node : 664678 +Grid : Message : Average mflops/s per call per node (full): 313388 +Grid : Message : Average mflops/s per call per node (full): 438214 +Grid : Message : Average mflops/s per call per node (full): 424986 +Grid : Message : Average mflops/s per call per node (full): 298506 +Grid : Message : Stencil 13.0763 GB/s per node +Grid : Message : Stencil 16.5815 GB/s per node +Grid : Message : Stencil 17.1128 GB/s per node +Grid : Message : Stencil 12.3072 GB/s per node +Grid : Message : Average mflops/s per call per node : 666096 +Grid : Message : Average mflops/s per call per node : 805076 +Grid : Message : Average mflops/s per call per node : 820869 +Grid : Message : Average mflops/s per call per node : 664399 +Grid : Message : Average mflops/s per call per node (full): 314237 +Grid : Message : Average mflops/s per call per node (full): 436072 +Grid : Message : Average mflops/s per call per node (full): 440936 +Grid : Message : Average mflops/s per call per node (full): 303585 +Grid : Message : Stencil 13.4101 GB/s per node +Grid : Message : Stencil 16.7424 GB/s per node +Grid : Message : Stencil 17.8502 GB/s per node +Grid : Message : Stencil 14.3448 GB/s per node +Grid : Message : Average mflops/s per call per node : 666799 +Grid : Message : Average mflops/s per call per node : 809391 +Grid : Message : Average mflops/s per call per node : 823629 +Grid : Message : Average mflops/s per call per node : 664725 +Grid : Message : Average mflops/s per call per node (full): 315379 +Grid : Message : Average mflops/s per call per node (full): 437879 +Grid : Message : Average mflops/s per call per node (full): 447103 +Grid : Message : Average mflops/s per call per node (full): 307257 +Grid : Message : Stencil 13.568 GB/s per node +Grid : Message : Stencil 17.6779 GB/s per node +Grid : Message : Stencil 16.8741 GB/s per node +Grid : Message : Stencil 12.4839 GB/s per node +Grid : Message : Average mflops/s per call per node : 668025 +Grid : Message : Average mflops/s per call per node : 804674 +Grid : Message : Average mflops/s per call per node : 821256 +Grid : Message : Average mflops/s per call per node : 664966 +Grid : Message : Average mflops/s per call per node (full): 316063 +Grid : Message : Average mflops/s per call per node (full): 440264 +Grid : Message : Average mflops/s per call per node (full): 439220 +Grid : Message : Average mflops/s per call per node (full): 303367 +Grid : Message : Stencil 13.8637 GB/s per node +Grid : Message : Stencil 16.318 GB/s per node +Grid : Message : Stencil 17.3829 GB/s per node +Grid : Message : Stencil 12.7476 GB/s per node +Grid : Message : Average mflops/s per call per node : 667436 +Grid : Message : Average mflops/s per call per node : 803547 +Grid : Message : Average mflops/s per call per node : 825521 +Grid : Message : Average mflops/s per call per node : 662615 +Grid : Message : Average mflops/s per call per node (full): 316257 +Grid : Message : Average mflops/s per call per node (full): 429325 +Grid : Message : Average mflops/s per call per node (full): 441772 +Grid : Message : Average mflops/s per call per node (full): 304181 +Grid : Message : Stencil 14.1388 GB/s per node +Grid : Message : Stencil 16.9508 GB/s per node +Grid : Message : Stencil 17.6921 GB/s per node +Grid : Message : Stencil 12.6858 GB/s per node +Grid : Message : Average mflops/s per call per node : 666942 +Grid : Message : Average mflops/s per call per node : 802603 +Grid : Message : Average mflops/s per call per node : 824945 +Grid : Message : Average mflops/s per call per node : 668276 +Grid : Message : Average mflops/s per call per node (full): 317268 +Grid : Message : Average mflops/s per call per node (full): 435966 +Grid : Message : Average mflops/s per call per node (full): 445270 +Grid : Message : Average mflops/s per call per node (full): 305709 +Grid : Message : Stencil 13.2752 GB/s per node +Grid : Message : Stencil 14.3609 GB/s per node +Grid : Message : Stencil 19.2179 GB/s per node +Grid : Message : Stencil 13.5641 GB/s per node +Grid : Message : Average mflops/s per call per node : 666144 +Grid : Message : Average mflops/s per call per node : 803392 +Grid : Message : Average mflops/s per call per node : 822152 +Grid : Message : Average mflops/s per call per node : 658488 +Grid : Message : Average mflops/s per call per node (full): 315994 +Grid : Message : Average mflops/s per call per node (full): 402828 +Grid : Message : Average mflops/s per call per node (full): 449873 +Grid : Message : Average mflops/s per call per node (full): 304869 +Grid : Message : Stencil 13.6586 GB/s per node +Grid : Message : Stencil 16.6687 GB/s per node +Grid : Message : Stencil 17.8118 GB/s per node +Grid : Message : Stencil 12.9281 GB/s per node +Grid : Message : Average mflops/s per call per node : 662645 +Grid : Message : Average mflops/s per call per node : 805011 +Grid : Message : Average mflops/s per call per node : 823772 +Grid : Message : Average mflops/s per call per node : 659767 +Grid : Message : Average mflops/s per call per node (full): 315917 +Grid : Message : Average mflops/s per call per node (full): 434045 +Grid : Message : Average mflops/s per call per node (full): 447067 +Grid : Message : Average mflops/s per call per node (full): 303836 +Grid : Message : Stencil 12.783 GB/s per node +Grid : Message : Stencil 16.5007 GB/s per node +Grid : Message : Stencil 18.1978 GB/s per node +Grid : Message : Stencil 12.5762 GB/s per node +Grid : Message : Average mflops/s per call per node : 668439 +Grid : Message : Average mflops/s per call per node : 808408 +Grid : Message : Average mflops/s per call per node : 818862 +Grid : Message : Average mflops/s per call per node : 666769 +Grid : Message : Average mflops/s per call per node (full): 313955 +Grid : Message : Average mflops/s per call per node (full): 435371 +Grid : Message : Average mflops/s per call per node (full): 444736 +Grid : Message : Average mflops/s per call per node (full): 302601 +Grid : Message : Stencil 12.3588 GB/s per node +Grid : Message : Stencil 16.6906 GB/s per node +Grid : Message : Stencil 17.5897 GB/s per node +Grid : Message : Stencil 12.731 GB/s per node +Grid : Message : Average mflops/s per call per node : 672717 +Grid : Message : Average mflops/s per call per node : 809225 +Grid : Message : Average mflops/s per call per node : 824966 +Grid : Message : Average mflops/s per call per node : 664892 +Grid : Message : Average mflops/s per call per node (full): 312967 +Grid : Message : Average mflops/s per call per node (full): 437334 +Grid : Message : Average mflops/s per call per node (full): 446624 +Grid : Message : Average mflops/s per call per node (full): 305067 +Grid : Message : Stencil 13.5086 GB/s per node +Grid : Message : Stencil 16.7682 GB/s per node +Grid : Message : Stencil 17.36 GB/s per node +Grid : Message : Stencil 12.4457 GB/s per node +Grid : Message : Average mflops/s per call per node : 665369 +Grid : Message : Average mflops/s per call per node : 803451 +Grid : Message : Average mflops/s per call per node : 819366 +Grid : Message : Average mflops/s per call per node : 664067 +Grid : Message : Average mflops/s per call per node (full): 314363 +Grid : Message : Average mflops/s per call per node (full): 436058 +Grid : Message : Average mflops/s per call per node (full): 443030 +Grid : Message : Average mflops/s per call per node (full): 303937 +Grid : Message : Stencil 13.3165 GB/s per node +Grid : Message : Stencil 12.5025 GB/s per node +Grid : Message : Stencil 17.7703 GB/s per node +Grid : Message : Stencil 14.2332 GB/s per node +Grid : Message : Average mflops/s per call per node : 665657 +Grid : Message : Average mflops/s per call per node : 812801 +Grid : Message : Average mflops/s per call per node : 823140 +Grid : Message : Average mflops/s per call per node : 663945 +Grid : Message : Average mflops/s per call per node (full): 315523 +Grid : Message : Average mflops/s per call per node (full): 372095 +Grid : Message : Average mflops/s per call per node (full): 446653 +Grid : Message : Average mflops/s per call per node (full): 307241 +Grid : Message : Stencil 14.9576 GB/s per node +Grid : Message : Stencil 8.89052 GB/s per node +Grid : Message : Stencil 17.2728 GB/s per node +Grid : Message : Stencil 12.6808 GB/s per node +Grid : Message : Average mflops/s per call per node : 662859 +Grid : Message : Average mflops/s per call per node : 812123 +Grid : Message : Average mflops/s per call per node : 830044 +Grid : Message : Average mflops/s per call per node : 665640 +Grid : Message : Average mflops/s per call per node (full): 317215 +Grid : Message : Average mflops/s per call per node (full): 289792 +Grid : Message : Average mflops/s per call per node (full): 444953 +Grid : Message : Average mflops/s per call per node (full): 305590 +Grid : Message : Stencil 13.2073 GB/s per node +Grid : Message : Stencil 16.5902 GB/s per node +Grid : Message : Stencil 17.9064 GB/s per node +Grid : Message : Stencil 12.3071 GB/s per node +Grid : Message : Average mflops/s per call per node : 665944 +Grid : Message : Average mflops/s per call per node : 805445 +Grid : Message : Average mflops/s per call per node : 821582 +Grid : Message : Average mflops/s per call per node : 659905 +Grid : Message : Average mflops/s per call per node (full): 315108 +Grid : Message : Average mflops/s per call per node (full): 434920 +Grid : Message : Average mflops/s per call per node (full): 440533 +Grid : Message : Average mflops/s per call per node (full): 300261 +Grid : Message : Stencil 13.6207 GB/s per node +Grid : Message : Stencil 16.963 GB/s per node +Grid : Message : Stencil 18.8015 GB/s per node +Grid : Message : Stencil 12.6053 GB/s per node +Grid : Message : Average mflops/s per call per node : 662003 +Grid : Message : Average mflops/s per call per node : 802034 +Grid : Message : Average mflops/s per call per node : 823994 +Grid : Message : Average mflops/s per call per node : 664597 +Grid : Message : Average mflops/s per call per node (full): 315494 +Grid : Message : Average mflops/s per call per node (full): 436003 +Grid : Message : Average mflops/s per call per node (full): 449112 +Grid : Message : Average mflops/s per call per node (full): 304923 +Grid : Message : Stencil 13.6174 GB/s per node +Grid : Message : Stencil 12.2346 GB/s per node +Grid : Message : Stencil 17.3975 GB/s per node +Grid : Message : Stencil 13.829 GB/s per node +Grid : Message : Average mflops/s per call per node : 661357 +Grid : Message : Average mflops/s per call per node : 801615 +Grid : Message : Average mflops/s per call per node : 822680 +Grid : Message : Average mflops/s per call per node : 662306 +Grid : Message : Average mflops/s per call per node (full): 314288 +Grid : Message : Average mflops/s per call per node (full): 366449 +Grid : Message : Average mflops/s per call per node (full): 444360 +Grid : Message : Average mflops/s per call per node (full): 307243 +Grid : Message : Stencil 13.2055 GB/s per node +Grid : Message : Stencil 15.5344 GB/s per node +Grid : Message : Stencil 18.6795 GB/s per node +Grid : Message : Stencil 13.3403 GB/s per node +Grid : Message : Average mflops/s per call per node : 667854 +Grid : Message : Average mflops/s per call per node : 805612 +Grid : Message : Average mflops/s per call per node : 825962 +Grid : Message : Average mflops/s per call per node : 661435 +Grid : Message : Average mflops/s per call per node (full): 315217 +Grid : Message : Average mflops/s per call per node (full): 423496 +Grid : Message : Average mflops/s per call per node (full): 449684 +Grid : Message : Average mflops/s per call per node (full): 305117 +Grid : Message : Stencil 14.0589 GB/s per node +Grid : Message : Stencil 17.53 GB/s per node +Grid : Message : Stencil 17.3587 GB/s per node +Grid : Message : Stencil 12.8013 GB/s per node +Grid : Message : Average mflops/s per call per node : 663290 +Grid : Message : Average mflops/s per call per node : 802056 +Grid : Message : Average mflops/s per call per node : 820628 +Grid : Message : Average mflops/s per call per node : 667458 +Grid : Message : Average mflops/s per call per node (full): 316086 +Grid : Message : Average mflops/s per call per node (full): 433471 +Grid : Message : Average mflops/s per call per node (full): 443865 +Grid : Message : Average mflops/s per call per node (full): 305638 +Grid : Message : Stencil 12.4114 GB/s per node +Grid : Message : Stencil 16.5811 GB/s per node +Grid : Message : Stencil 17.246 GB/s per node +Grid : Message : Stencil 12.9512 GB/s per node +Grid : Message : Average mflops/s per call per node : 670801 +Grid : Message : Average mflops/s per call per node : 803141 +Grid : Message : Average mflops/s per call per node : 818087 +Grid : Message : Average mflops/s per call per node : 662536 +Grid : Message : Average mflops/s per call per node (full): 313026 +Grid : Message : Average mflops/s per call per node (full): 435532 +Grid : Message : Average mflops/s per call per node (full): 440716 +Grid : Message : Average mflops/s per call per node (full): 304931 +Grid : Message : Stencil 12.3893 GB/s per node +Grid : Message : Stencil 16.275 GB/s per node +Grid : Message : Stencil 17.6015 GB/s per node +Grid : Message : Stencil 14.2541 GB/s per node +Grid : Message : Average mflops/s per call per node : 670981 +Grid : Message : Average mflops/s per call per node : 808883 +Grid : Message : Average mflops/s per call per node : 824212 +Grid : Message : Average mflops/s per call per node : 662167 +Grid : Message : Average mflops/s per call per node (full): 313634 +Grid : Message : Average mflops/s per call per node (full): 433070 +Grid : Message : Average mflops/s per call per node (full): 445960 +Grid : Message : Average mflops/s per call per node (full): 307540 +Grid : Message : Stencil 12.5686 GB/s per node +Grid : Message : Stencil 17.4453 GB/s per node +Grid : Message : Stencil 18.5536 GB/s per node +Grid : Message : Stencil 14.4062 GB/s per node +Grid : Message : Average mflops/s per call per node : 671233 +Grid : Message : Average mflops/s per call per node : 803489 +Grid : Message : Average mflops/s per call per node : 822710 +Grid : Message : Average mflops/s per call per node : 659322 +Grid : Message : Average mflops/s per call per node (full): 314646 +Grid : Message : Average mflops/s per call per node (full): 438822 +Grid : Message : Average mflops/s per call per node (full): 448936 +Grid : Message : Average mflops/s per call per node (full): 307099 +Grid : Message : Stencil 12.17 GB/s per node +Grid : Message : Stencil 17.2394 GB/s per node +Grid : Message : Stencil 17.1924 GB/s per node +Grid : Message : Stencil 14.3786 GB/s per node +Grid : Message : Average mflops/s per call per node : 670419 +Grid : Message : Average mflops/s per call per node : 803856 +Grid : Message : Average mflops/s per call per node : 819163 +Grid : Message : Average mflops/s per call per node : 660090 +Grid : Message : Average mflops/s per call per node (full): 309861 +Grid : Message : Average mflops/s per call per node (full): 439206 +Grid : Message : Average mflops/s per call per node (full): 443010 +Grid : Message : Average mflops/s per call per node (full): 306431 +Grid : Message : Stencil 13.4474 GB/s per node +Grid : Message : Stencil 16.5257 GB/s per node +Grid : Message : Stencil 16.5948 GB/s per node +Grid : Message : Stencil 12.4461 GB/s per node +Grid : Message : Average mflops/s per call per node : 664070 +Grid : Message : Average mflops/s per call per node : 805521 +Grid : Message : Average mflops/s per call per node : 825787 +Grid : Message : Average mflops/s per call per node : 666217 +Grid : Message : Average mflops/s per call per node (full): 314056 +Grid : Message : Average mflops/s per call per node (full): 435679 +Grid : Message : Average mflops/s per call per node (full): 431690 +Grid : Message : Average mflops/s per call per node (full): 305067 +Grid : Message : Stencil 13.2165 GB/s per node +Grid : Message : Stencil 11.7598 GB/s per node +Grid : Message : Stencil 17.8103 GB/s per node +Grid : Message : Stencil 11.9989 GB/s per node +Grid : Message : Average mflops/s per call per node : 667313 +Grid : Message : Average mflops/s per call per node : 811988 +Grid : Message : Average mflops/s per call per node : 822549 +Grid : Message : Average mflops/s per call per node : 667800 +Grid : Message : Average mflops/s per call per node (full): 314792 +Grid : Message : Average mflops/s per call per node (full): 356582 +Grid : Message : Average mflops/s per call per node (full): 445769 +Grid : Message : Average mflops/s per call per node (full): 302482 +Grid : Message : Stencil 13.2337 GB/s per node +Grid : Message : Stencil 16.9512 GB/s per node +Grid : Message : Stencil 17.8045 GB/s per node +Grid : Message : Stencil 13.8251 GB/s per node +Grid : Message : Average mflops/s per call per node : 664088 +Grid : Message : Average mflops/s per call per node : 803417 +Grid : Message : Average mflops/s per call per node : 818634 +Grid : Message : Average mflops/s per call per node : 663948 +Grid : Message : Average mflops/s per call per node (full): 314716 +Grid : Message : Average mflops/s per call per node (full): 437263 +Grid : Message : Average mflops/s per call per node (full): 445292 +Grid : Message : Average mflops/s per call per node (full): 306747 +Grid : Message : Stencil 12.4742 GB/s per node +Grid : Message : Stencil 17.4951 GB/s per node +Grid : Message : Stencil 17.6713 GB/s per node +Grid : Message : Stencil 13.3499 GB/s per node +Grid : Message : Average mflops/s per call per node : 665881 +Grid : Message : Average mflops/s per call per node : 801179 +Grid : Message : Average mflops/s per call per node : 824577 +Grid : Message : Average mflops/s per call per node : 667595 +Grid : Message : Average mflops/s per call per node (full): 311674 +Grid : Message : Average mflops/s per call per node (full): 438499 +Grid : Message : Average mflops/s per call per node (full): 445682 +Grid : Message : Average mflops/s per call per node (full): 306601 +Grid : Message : Stencil 12.5619 GB/s per node +Grid : Message : Stencil 11.8294 GB/s per node +Grid : Message : Stencil 16.3323 GB/s per node +Grid : Message : Stencil 11.958 GB/s per node +Grid : Message : Average mflops/s per call per node : 667462 +Grid : Message : Average mflops/s per call per node : 803788 +Grid : Message : Average mflops/s per call per node : 827608 +Grid : Message : Average mflops/s per call per node : 670717 +Grid : Message : Average mflops/s per call per node (full): 313219 +Grid : Message : Average mflops/s per call per node (full): 357704 +Grid : Message : Average mflops/s per call per node (full): 420800 +Grid : Message : Average mflops/s per call per node (full): 301473 +Grid : Message : Stencil 13.1724 GB/s per node +Grid : Message : Stencil 17.0143 GB/s per node +Grid : Message : Stencil 17.4939 GB/s per node +Grid : Message : Stencil 13.4218 GB/s per node +Grid : Message : Average mflops/s per call per node : 669132 +Grid : Message : Average mflops/s per call per node : 805514 +Grid : Message : Average mflops/s per call per node : 823415 +Grid : Message : Average mflops/s per call per node : 666192 +Grid : Message : Average mflops/s per call per node (full): 315321 +Grid : Message : Average mflops/s per call per node (full): 437871 +Grid : Message : Average mflops/s per call per node (full): 445335 +Grid : Message : Average mflops/s per call per node (full): 306812 +Grid : Message : Stencil 13.7464 GB/s per node +Grid : Message : Stencil 16.6745 GB/s per node +Grid : Message : Stencil 17.1911 GB/s per node +Grid : Message : Stencil 15.0666 GB/s per node +Grid : Message : Average mflops/s per call per node : 664781 +Grid : Message : Average mflops/s per call per node : 803928 +Grid : Message : Average mflops/s per call per node : 822713 +Grid : Message : Average mflops/s per call per node : 658377 +Grid : Message : Average mflops/s per call per node (full): 316466 +Grid : Message : Average mflops/s per call per node (full): 436232 +Grid : Message : Average mflops/s per call per node (full): 442786 +Grid : Message : Average mflops/s per call per node (full): 306735 +Grid : Message : Stencil 14.3112 GB/s per node +Grid : Message : Stencil 16.9462 GB/s per node +Grid : Message : Stencil 17.9614 GB/s per node +Grid : Message : Stencil 13.8318 GB/s per node +Grid : Message : Average mflops/s per call per node : 665989 +Grid : Message : Average mflops/s per call per node : 803710 +Grid : Message : Average mflops/s per call per node : 819066 +Grid : Message : Average mflops/s per call per node : 664430 +Grid : Message : Average mflops/s per call per node (full): 316879 +Grid : Message : Average mflops/s per call per node (full): 438108 +Grid : Message : Average mflops/s per call per node (full): 446138 +Grid : Message : Average mflops/s per call per node (full): 307528 +Grid : Message : Stencil 13.2229 GB/s per node +Grid : Message : Stencil 17.2677 GB/s per node +Grid : Message : Stencil 17.7745 GB/s per node +Grid : Message : Stencil 12.5946 GB/s per node +Grid : Message : Average mflops/s per call per node : 668613 +Grid : Message : Average mflops/s per call per node : 805485 +Grid : Message : Average mflops/s per call per node : 825436 +Grid : Message : Average mflops/s per call per node : 665651 +Grid : Message : Average mflops/s per call per node (full): 316275 +Grid : Message : Average mflops/s per call per node (full): 432677 +Grid : Message : Average mflops/s per call per node (full): 446369 +Grid : Message : Average mflops/s per call per node (full): 303448 +Grid : Message : Stencil 13.5782 GB/s per node +Grid : Message : Stencil 17.1672 GB/s per node +Grid : Message : Stencil 17.4806 GB/s per node +Grid : Message : Stencil 13.6005 GB/s per node +Grid : Message : Average mflops/s per call per node : 665271 +Grid : Message : Average mflops/s per call per node : 803369 +Grid : Message : Average mflops/s per call per node : 826869 +Grid : Message : Average mflops/s per call per node : 661994 +Grid : Message : Average mflops/s per call per node (full): 315703 +Grid : Message : Average mflops/s per call per node (full): 438789 +Grid : Message : Average mflops/s per call per node (full): 445241 +Grid : Message : Average mflops/s per call per node (full): 306011 +Grid : Message : Stencil 13.0431 GB/s per node +Grid : Message : Stencil 16.4806 GB/s per node +Grid : Message : Stencil 18.8665 GB/s per node +Grid : Message : Stencil 13.3775 GB/s per node +Grid : Message : Average mflops/s per call per node : 667692 +Grid : Message : Average mflops/s per call per node : 805333 +Grid : Message : Average mflops/s per call per node : 829122 +Grid : Message : Average mflops/s per call per node : 663530 +Grid : Message : Average mflops/s per call per node (full): 315212 +Grid : Message : Average mflops/s per call per node (full): 434699 +Grid : Message : Average mflops/s per call per node (full): 450563 +Grid : Message : Average mflops/s per call per node (full): 306393 +Grid : Message : Stencil 12.7172 GB/s per node +Grid : Message : Stencil 16.4615 GB/s per node +Grid : Message : Stencil 19.0358 GB/s per node +Grid : Message : Stencil 13.4485 GB/s per node +Grid : Message : Average mflops/s per call per node : 669490 +Grid : Message : Average mflops/s per call per node : 800479 +Grid : Message : Average mflops/s per call per node : 822324 +Grid : Message : Average mflops/s per call per node : 660896 +Grid : Message : Average mflops/s per call per node (full): 314962 +Grid : Message : Average mflops/s per call per node (full): 431262 +Grid : Message : Average mflops/s per call per node (full): 450903 +Grid : Message : Average mflops/s per call per node (full): 306073 +Grid : Message : Stencil 13.6954 GB/s per node +Grid : Message : Stencil 16.7403 GB/s per node +Grid : Message : Stencil 17.4408 GB/s per node +Grid : Message : Stencil 12.6016 GB/s per node +Grid : Message : Average mflops/s per call per node : 663190 +Grid : Message : Average mflops/s per call per node : 800581 +Grid : Message : Average mflops/s per call per node : 826168 +Grid : Message : Average mflops/s per call per node : 665855 +Grid : Message : Average mflops/s per call per node (full): 313723 +Grid : Message : Average mflops/s per call per node (full): 437454 +Grid : Message : Average mflops/s per call per node (full): 445763 +Grid : Message : Average mflops/s per call per node (full): 304748 +Grid : Message : Stencil 14.0301 GB/s per node +Grid : Message : Stencil 16.8619 GB/s per node +Grid : Message : Stencil 17.9328 GB/s per node +Grid : Message : Stencil 14.2081 GB/s per node +Grid : Message : Average mflops/s per call per node : 665022 +Grid : Message : Average mflops/s per call per node : 808321 +Grid : Message : Average mflops/s per call per node : 826120 +Grid : Message : Average mflops/s per call per node : 662543 +Grid : Message : Average mflops/s per call per node (full): 316402 +Grid : Message : Average mflops/s per call per node (full): 438387 +Grid : Message : Average mflops/s per call per node (full): 447424 +Grid : Message : Average mflops/s per call per node (full): 307661 +Grid : Message : Stencil 14.4306 GB/s per node +Grid : Message : Stencil 16.4094 GB/s per node +Grid : Message : Stencil 17.3191 GB/s per node +Grid : Message : Stencil 13.8437 GB/s per node +Grid : Message : Average mflops/s per call per node : 664494 +Grid : Message : Average mflops/s per call per node : 806160 +Grid : Message : Average mflops/s per call per node : 820601 +Grid : Message : Average mflops/s per call per node : 661708 +Grid : Message : Average mflops/s per call per node (full): 315384 +Grid : Message : Average mflops/s per call per node (full): 434299 +Grid : Message : Average mflops/s per call per node (full): 443111 +Grid : Message : Average mflops/s per call per node (full): 306571 +Grid : Message : Stencil 13.5568 GB/s per node +Grid : Message : Stencil 18.3458 GB/s per node +Grid : Message : Stencil 18.3744 GB/s per node +Grid : Message : Stencil 12.6788 GB/s per node +Grid : Message : Average mflops/s per call per node : 664391 +Grid : Message : Average mflops/s per call per node : 798768 +Grid : Message : Average mflops/s per call per node : 826634 +Grid : Message : Average mflops/s per call per node : 663137 +Grid : Message : Average mflops/s per call per node (full): 315463 +Grid : Message : Average mflops/s per call per node (full): 440577 +Grid : Message : Average mflops/s per call per node (full): 449206 +Grid : Message : Average mflops/s per call per node (full): 305236 +Grid : Message : Stencil 12.6774 GB/s per node +Grid : Message : Stencil 17.6112 GB/s per node +Grid : Message : Stencil 17.2765 GB/s per node +Grid : Message : Stencil 13.9594 GB/s per node +Grid : Message : Average mflops/s per call per node : 670856 +Grid : Message : Average mflops/s per call per node : 804658 +Grid : Message : Average mflops/s per call per node : 818594 +Grid : Message : Average mflops/s per call per node : 654510 +Grid : Message : Average mflops/s per call per node (full): 313844 +Grid : Message : Average mflops/s per call per node (full): 438540 +Grid : Message : Average mflops/s per call per node (full): 443813 +Grid : Message : Average mflops/s per call per node (full): 305913 +Grid : Message : Stencil 14.0668 GB/s per node +Grid : Message : Stencil 16.7665 GB/s per node +Grid : Message : Stencil 17.9828 GB/s per node +Grid : Message : Stencil 13.3468 GB/s per node +Grid : Message : Average mflops/s per call per node : 665086 +Grid : Message : Average mflops/s per call per node : 805773 +Grid : Message : Average mflops/s per call per node : 826598 +Grid : Message : Average mflops/s per call per node : 665131 +Grid : Message : Average mflops/s per call per node (full): 314886 +Grid : Message : Average mflops/s per call per node (full): 437424 +Grid : Message : Average mflops/s per call per node (full): 445201 +Grid : Message : Average mflops/s per call per node (full): 303112 +Grid : Message : Stencil 12.9395 GB/s per node +Grid : Message : Stencil 16.6062 GB/s per node +Grid : Message : Stencil 17.4802 GB/s per node +Grid : Message : Stencil 13.1207 GB/s per node +Grid : Message : Average mflops/s per call per node : 667340 +Grid : Message : Average mflops/s per call per node : 802719 +Grid : Message : Average mflops/s per call per node : 821553 +Grid : Message : Average mflops/s per call per node : 660221 +Grid : Message : Average mflops/s per call per node (full): 313758 +Grid : Message : Average mflops/s per call per node (full): 435392 +Grid : Message : Average mflops/s per call per node (full): 443062 +Grid : Message : Average mflops/s per call per node (full): 304683 +Grid : Message : Stencil 13.5273 GB/s per node +Grid : Message : Stencil 16.7218 GB/s per node +Grid : Message : Stencil 17.3427 GB/s per node +Grid : Message : Stencil 12.3055 GB/s per node +Grid : Message : Average mflops/s per call per node : 666951 +Grid : Message : Average mflops/s per call per node : 808340 +Grid : Message : Average mflops/s per call per node : 825856 +Grid : Message : Average mflops/s per call per node : 669653 +Grid : Message : Average mflops/s per call per node (full): 316393 +Grid : Message : Average mflops/s per call per node (full): 437208 +Grid : Message : Average mflops/s per call per node (full): 444151 +Grid : Message : Average mflops/s per call per node (full): 304506 +Grid : Message : Stencil 12.9419 GB/s per node +Grid : Message : Stencil 17.7185 GB/s per node +Grid : Message : Stencil 17.6023 GB/s per node +Grid : Message : Stencil 13.1646 GB/s per node +Grid : Message : Average mflops/s per call per node : 671486 +Grid : Message : Average mflops/s per call per node : 803835 +Grid : Message : Average mflops/s per call per node : 812919 +Grid : Message : Average mflops/s per call per node : 661156 +Grid : Message : Average mflops/s per call per node (full): 316181 +Grid : Message : Average mflops/s per call per node (full): 439019 +Grid : Message : Average mflops/s per call per node (full): 443163 +Grid : Message : Average mflops/s per call per node (full): 305543 +Grid : Message : Stencil 12.8064 GB/s per node +Grid : Message : Stencil 17.3831 GB/s per node +Grid : Message : Stencil 19.2456 GB/s per node +Grid : Message : Stencil 13.7529 GB/s per node +Grid : Message : Average mflops/s per call per node : 671019 +Grid : Message : Average mflops/s per call per node : 804072 +Grid : Message : Average mflops/s per call per node : 818083 +Grid : Message : Average mflops/s per call per node : 657458 +Grid : Message : Average mflops/s per call per node (full): 315531 +Grid : Message : Average mflops/s per call per node (full): 437530 +Grid : Message : Average mflops/s per call per node (full): 447228 +Grid : Message : Average mflops/s per call per node (full): 305933 +Grid : Message : Stencil 13.0671 GB/s per node +Grid : Message : Stencil 16.3692 GB/s per node +Grid : Message : Stencil 17.5399 GB/s per node +Grid : Message : Stencil 12.765 GB/s per node +Grid : Message : Average mflops/s per call per node : 668696 +Grid : Message : Average mflops/s per call per node : 806903 +Grid : Message : Average mflops/s per call per node : 815258 +Grid : Message : Average mflops/s per call per node : 662416 +Grid : Message : Average mflops/s per call per node (full): 314894 +Grid : Message : Average mflops/s per call per node (full): 433481 +Grid : Message : Average mflops/s per call per node (full): 443106 +Grid : Message : Average mflops/s per call per node (full): 304658 +Grid : Message : Stencil 13.2608 GB/s per node +Grid : Message : Stencil 16.5387 GB/s per node +Grid : Message : Stencil 17.9303 GB/s per node +Grid : Message : Stencil 12.9177 GB/s per node +Grid : Message : Average mflops/s per call per node : 665759 +Grid : Message : Average mflops/s per call per node : 804452 +Grid : Message : Average mflops/s per call per node : 822492 +Grid : Message : Average mflops/s per call per node : 664038 +Grid : Message : Average mflops/s per call per node (full): 315706 +Grid : Message : Average mflops/s per call per node (full): 435423 +Grid : Message : Average mflops/s per call per node (full): 446707 +Grid : Message : Average mflops/s per call per node (full): 303776 +Grid : Message : Stencil 12.7231 GB/s per node +Grid : Message : Stencil 17.212 GB/s per node +Grid : Message : Stencil 17.2632 GB/s per node +Grid : Message : Stencil 15.0456 GB/s per node +Grid : Message : Average mflops/s per call per node : 668041 +Grid : Message : Average mflops/s per call per node : 804260 +Grid : Message : Average mflops/s per call per node : 828111 +Grid : Message : Average mflops/s per call per node : 659790 +Grid : Message : Average mflops/s per call per node (full): 315169 +Grid : Message : Average mflops/s per call per node (full): 436572 +Grid : Message : Average mflops/s per call per node (full): 445038 +Grid : Message : Average mflops/s per call per node (full): 307447 +Grid : Message : Stencil 13.9317 GB/s per node +Grid : Message : Stencil 17.6126 GB/s per node +Grid : Message : Stencil 17.9129 GB/s per node +Grid : Message : Stencil 12.1942 GB/s per node +Grid : Message : Average mflops/s per call per node : 663571 +Grid : Message : Average mflops/s per call per node : 806005 +Grid : Message : Average mflops/s per call per node : 818928 +Grid : Message : Average mflops/s per call per node : 667109 +Grid : Message : Average mflops/s per call per node (full): 315229 +Grid : Message : Average mflops/s per call per node (full): 437729 +Grid : Message : Average mflops/s per call per node (full): 443696 +Grid : Message : Average mflops/s per call per node (full): 303522 +Grid : Message : Stencil 13.8441 GB/s per node +Grid : Message : Stencil 16.8207 GB/s per node +Grid : Message : Stencil 17.3388 GB/s per node +Grid : Message : Stencil 12.6899 GB/s per node +Grid : Message : Average mflops/s per call per node : 667025 +Grid : Message : Average mflops/s per call per node : 803488 +Grid : Message : Average mflops/s per call per node : 826681 +Grid : Message : Average mflops/s per call per node : 668904 +Grid : Message : Average mflops/s per call per node (full): 316439 +Grid : Message : Average mflops/s per call per node (full): 438327 +Grid : Message : Average mflops/s per call per node (full): 443946 +Grid : Message : Average mflops/s per call per node (full): 304893 +Grid : Message : Stencil 14.6365 GB/s per node +Grid : Message : Stencil 16.8923 GB/s per node +Grid : Message : Stencil 18.0901 GB/s per node +Grid : Message : Stencil 14.1822 GB/s per node +Grid : Message : Average mflops/s per call per node : 666147 +Grid : Message : Average mflops/s per call per node : 804740 +Grid : Message : Average mflops/s per call per node : 822198 +Grid : Message : Average mflops/s per call per node : 661873 +Grid : Message : Average mflops/s per call per node (full): 316893 +Grid : Message : Average mflops/s per call per node (full): 437004 +Grid : Message : Average mflops/s per call per node (full): 446809 +Grid : Message : Average mflops/s per call per node (full): 307507 +Grid : Message : Stencil 13.0344 GB/s per node +Grid : Message : Stencil 16.2898 GB/s per node +Grid : Message : Stencil 18.1261 GB/s per node +Grid : Message : Stencil 13.4165 GB/s per node +Grid : Message : Average mflops/s per call per node : 669087 +Grid : Message : Average mflops/s per call per node : 804366 +Grid : Message : Average mflops/s per call per node : 822323 +Grid : Message : Average mflops/s per call per node : 659677 +Grid : Message : Average mflops/s per call per node (full): 315904 +Grid : Message : Average mflops/s per call per node (full): 432455 +Grid : Message : Average mflops/s per call per node (full): 445337 +Grid : Message : Average mflops/s per call per node (full): 304386 +Grid : Message : Stencil 13.7753 GB/s per node +Grid : Message : Stencil 11.5873 GB/s per node +Grid : Message : Stencil 17.7056 GB/s per node +Grid : Message : Stencil 12.6323 GB/s per node +Grid : Message : Average mflops/s per call per node : 666533 +Grid : Message : Average mflops/s per call per node : 808259 +Grid : Message : Average mflops/s per call per node : 821688 +Grid : Message : Average mflops/s per call per node : 669442 +Grid : Message : Average mflops/s per call per node (full): 315905 +Grid : Message : Average mflops/s per call per node (full): 352761 +Grid : Message : Average mflops/s per call per node (full): 444870 +Grid : Message : Average mflops/s per call per node (full): 305567 +Grid : Message : Stencil 13.3282 GB/s per node +Grid : Message : Stencil 16.8213 GB/s per node +Grid : Message : Stencil 17.8142 GB/s per node +Grid : Message : Stencil 12.2119 GB/s per node +Grid : Message : Average mflops/s per call per node : 670208 +Grid : Message : Average mflops/s per call per node : 805861 +Grid : Message : Average mflops/s per call per node : 822623 +Grid : Message : Average mflops/s per call per node : 666740 +Grid : Message : Average mflops/s per call per node (full): 316655 +Grid : Message : Average mflops/s per call per node (full): 432955 +Grid : Message : Average mflops/s per call per node (full): 446403 +Grid : Message : Average mflops/s per call per node (full): 303583 +Grid : Message : Stencil 12.71 GB/s per node +Grid : Message : Stencil 17.2789 GB/s per node +Grid : Message : Stencil 17.3154 GB/s per node +Grid : Message : Stencil 12.9706 GB/s per node +Grid : Message : Average mflops/s per call per node : 668272 +Grid : Message : Average mflops/s per call per node : 803593 +Grid : Message : Average mflops/s per call per node : 821190 +Grid : Message : Average mflops/s per call per node : 665746 +Grid : Message : Average mflops/s per call per node (full): 315074 +Grid : Message : Average mflops/s per call per node (full): 436341 +Grid : Message : Average mflops/s per call per node (full): 438150 +Grid : Message : Average mflops/s per call per node (full): 304502 +Grid : Message : Stencil 12.5427 GB/s per node +Grid : Message : Stencil 17.1143 GB/s per node +Grid : Message : Stencil 17.4695 GB/s per node +Grid : Message : Stencil 14.2233 GB/s per node +Grid : Message : Average mflops/s per call per node : 668353 +Grid : Message : Average mflops/s per call per node : 807108 +Grid : Message : Average mflops/s per call per node : 826014 +Grid : Message : Average mflops/s per call per node : 661985 +Grid : Message : Average mflops/s per call per node (full): 313652 +Grid : Message : Average mflops/s per call per node (full): 435772 +Grid : Message : Average mflops/s per call per node (full): 444672 +Grid : Message : Average mflops/s per call per node (full): 306525 +Grid : Message : Stencil 12.3856 GB/s per node +Grid : Message : Stencil 16.6937 GB/s per node +Grid : Message : Stencil 18.4506 GB/s per node +Grid : Message : Stencil 13.9017 GB/s per node +Grid : Message : Average mflops/s per call per node : 671646 +Grid : Message : Average mflops/s per call per node : 799233 +Grid : Message : Average mflops/s per call per node : 825952 +Grid : Message : Average mflops/s per call per node : 666904 +Grid : Message : Average mflops/s per call per node (full): 313151 +Grid : Message : Average mflops/s per call per node (full): 435437 +Grid : Message : Average mflops/s per call per node (full): 449338 +Grid : Message : Average mflops/s per call per node (full): 307579 +Grid : Message : Stencil 13.5081 GB/s per node +Grid : Message : Stencil 16.904 GB/s per node +Grid : Message : Stencil 17.8221 GB/s per node +Grid : Message : Stencil 11.9864 GB/s per node +Grid : Message : Average mflops/s per call per node : 667372 +Grid : Message : Average mflops/s per call per node : 806954 +Grid : Message : Average mflops/s per call per node : 823843 +Grid : Message : Average mflops/s per call per node : 668278 +Grid : Message : Average mflops/s per call per node (full): 315974 +Grid : Message : Average mflops/s per call per node (full): 436821 +Grid : Message : Average mflops/s per call per node (full): 437610 +Grid : Message : Average mflops/s per call per node (full): 301928 +Grid : Message : Stencil 13.2842 GB/s per node +Grid : Message : Stencil 16.9778 GB/s per node +Grid : Message : Stencil 17.1985 GB/s per node +Grid : Message : Stencil 12.6819 GB/s per node +Grid : Message : Average mflops/s per call per node : 663084 +Grid : Message : Average mflops/s per call per node : 806820 +Grid : Message : Average mflops/s per call per node : 819492 +Grid : Message : Average mflops/s per call per node : 663353 +Grid : Message : Average mflops/s per call per node (full): 315298 +Grid : Message : Average mflops/s per call per node (full): 438561 +Grid : Message : Average mflops/s per call per node (full): 441997 +Grid : Message : Average mflops/s per call per node (full): 304187 +Grid : Message : Stencil 12.9111 GB/s per node +Grid : Message : Stencil 17.6963 GB/s per node +Grid : Message : Stencil 17.6975 GB/s per node +Grid : Message : Stencil 13.016 GB/s per node +Grid : Message : Average mflops/s per call per node : 664176 +Grid : Message : Average mflops/s per call per node : 800210 +Grid : Message : Average mflops/s per call per node : 821890 +Grid : Message : Average mflops/s per call per node : 662729 +Grid : Message : Average mflops/s per call per node (full): 314576 +Grid : Message : Average mflops/s per call per node (full): 437851 +Grid : Message : Average mflops/s per call per node (full): 446601 +Grid : Message : Average mflops/s per call per node (full): 305381 +Grid : Message : Stencil 13.1248 GB/s per node +Grid : Message : Stencil 11.7779 GB/s per node +Grid : Message : Stencil 17.7619 GB/s per node +Grid : Message : Stencil 13.4022 GB/s per node +Grid : Message : Average mflops/s per call per node : 664419 +Grid : Message : Average mflops/s per call per node : 810492 +Grid : Message : Average mflops/s per call per node : 823930 +Grid : Message : Average mflops/s per call per node : 665522 +Grid : Message : Average mflops/s per call per node (full): 314881 +Grid : Message : Average mflops/s per call per node (full): 356528 +Grid : Message : Average mflops/s per call per node (full): 445937 +Grid : Message : Average mflops/s per call per node (full): 306116 +Grid : Message : Stencil 14.3878 GB/s per node +Grid : Message : Stencil 16.7257 GB/s per node +Grid : Message : Stencil 17.8578 GB/s per node +Grid : Message : Stencil 13.9036 GB/s per node +Grid : Message : Average mflops/s per call per node : 660208 +Grid : Message : Average mflops/s per call per node : 807016 +Grid : Message : Average mflops/s per call per node : 823662 +Grid : Message : Average mflops/s per call per node : 664817 +Grid : Message : Average mflops/s per call per node (full): 315583 +Grid : Message : Average mflops/s per call per node (full): 436472 +Grid : Message : Average mflops/s per call per node (full): 446283 +Grid : Message : Average mflops/s per call per node (full): 306769 +Grid : Message : Stencil 13.7372 GB/s per node +Grid : Message : Stencil 14.8812 GB/s per node +Grid : Message : Stencil 17.5616 GB/s per node +Grid : Message : Stencil 11.8767 GB/s per node +Grid : Message : Average mflops/s per call per node : 660390 +Grid : Message : Average mflops/s per call per node : 804542 +Grid : Message : Average mflops/s per call per node : 819512 +Grid : Message : Average mflops/s per call per node : 664682 +Grid : Message : Average mflops/s per call per node (full): 315855 +Grid : Message : Average mflops/s per call per node (full): 414288 +Grid : Message : Average mflops/s per call per node (full): 442000 +Grid : Message : Average mflops/s per call per node (full): 300282 +Grid : Message : Stencil 14.2464 GB/s per node +Grid : Message : Stencil 17.0363 GB/s per node +Grid : Message : Stencil 17.9915 GB/s per node +Grid : Message : Stencil 13.461 GB/s per node +Grid : Message : Average mflops/s per call per node : 661537 +Grid : Message : Average mflops/s per call per node : 805651 +Grid : Message : Average mflops/s per call per node : 824045 +Grid : Message : Average mflops/s per call per node : 667438 +Grid : Message : Average mflops/s per call per node (full): 316224 +Grid : Message : Average mflops/s per call per node (full): 435645 +Grid : Message : Average mflops/s per call per node (full): 446580 +Grid : Message : Average mflops/s per call per node (full): 307108 +Grid : Message : Stencil 14.2497 GB/s per node +Grid : Message : Stencil 16.2639 GB/s per node +Grid : Message : Stencil 17.1763 GB/s per node +Grid : Message : Stencil 12.0851 GB/s per node +Grid : Message : Average mflops/s per call per node : 659856 +Grid : Message : Average mflops/s per call per node : 808352 +Grid : Message : Average mflops/s per call per node : 823527 +Grid : Message : Average mflops/s per call per node : 662137 +Grid : Message : Average mflops/s per call per node (full): 315624 +Grid : Message : Average mflops/s per call per node (full): 431227 +Grid : Message : Average mflops/s per call per node (full): 441847 +Grid : Message : Average mflops/s per call per node (full): 302014 +Grid : Message : Stencil 12.6238 GB/s per node +Grid : Message : Stencil 16.3639 GB/s per node +Grid : Message : Stencil 17.937 GB/s per node +Grid : Message : Stencil 12.8845 GB/s per node +Grid : Message : Average mflops/s per call per node : 664350 +Grid : Message : Average mflops/s per call per node : 803314 +Grid : Message : Average mflops/s per call per node : 819809 +Grid : Message : Average mflops/s per call per node : 660241 +Grid : Message : Average mflops/s per call per node (full): 312411 +Grid : Message : Average mflops/s per call per node (full): 431811 +Grid : Message : Average mflops/s per call per node (full): 445748 +Grid : Message : Average mflops/s per call per node (full): 301276 +Grid : Message : Stencil 12.5847 GB/s per node +Grid : Message : Stencil 17.6308 GB/s per node +Grid : Message : Stencil 18.0077 GB/s per node +Grid : Message : Stencil 12.2625 GB/s per node +Grid : Message : Average mflops/s per call per node : 662560 +Grid : Message : Average mflops/s per call per node : 800141 +Grid : Message : Average mflops/s per call per node : 819948 +Grid : Message : Average mflops/s per call per node : 665869 +Grid : Message : Average mflops/s per call per node (full): 313352 +Grid : Message : Average mflops/s per call per node (full): 439427 +Grid : Message : Average mflops/s per call per node (full): 444876 +Grid : Message : Average mflops/s per call per node (full): 304118 +Grid : Message : Stencil 13.4228 GB/s per node +Grid : Message : Stencil 16.4704 GB/s per node +Grid : Message : Stencil 17.0233 GB/s per node +Grid : Message : Stencil 12.4666 GB/s per node +Grid : Message : Average mflops/s per call per node : 659557 +Grid : Message : Average mflops/s per call per node : 805825 +Grid : Message : Average mflops/s per call per node : 822933 +Grid : Message : Average mflops/s per call per node : 663659 +Grid : Message : Average mflops/s per call per node (full): 314515 +Grid : Message : Average mflops/s per call per node (full): 435573 +Grid : Message : Average mflops/s per call per node (full): 440776 +Grid : Message : Average mflops/s per call per node (full): 304630 +Grid : Message : Stencil 12.4644 GB/s per node +Grid : Message : Stencil 8.22093 GB/s per node +Grid : Message : Stencil 17.7152 GB/s per node +Grid : Message : Stencil 14.6792 GB/s per node +Grid : Message : Average mflops/s per call per node : 665642 +Grid : Message : Average mflops/s per call per node : 808130 +Grid : Message : Average mflops/s per call per node : 823904 +Grid : Message : Average mflops/s per call per node : 661846 +Grid : Message : Average mflops/s per call per node (full): 312503 +Grid : Message : Average mflops/s per call per node (full): 272466 +Grid : Message : Average mflops/s per call per node (full): 445081 +Grid : Message : Average mflops/s per call per node (full): 306767 +Grid : Message : Stencil 12.6647 GB/s per node +Grid : Message : Stencil 17.5637 GB/s per node +Grid : Message : Stencil 16.5685 GB/s per node +Grid : Message : Stencil 12.8074 GB/s per node +Grid : Message : Average mflops/s per call per node : 664429 +Grid : Message : Average mflops/s per call per node : 804657 +Grid : Message : Average mflops/s per call per node : 825316 +Grid : Message : Average mflops/s per call per node : 662416 +Grid : Message : Average mflops/s per call per node (full): 314387 +Grid : Message : Average mflops/s per call per node (full): 439744 +Grid : Message : Average mflops/s per call per node (full): 429863 +Grid : Message : Average mflops/s per call per node (full): 304961 +Grid : Message : Stencil 13.6648 GB/s per node +Grid : Message : Stencil 17.817 GB/s per node +Grid : Message : Stencil 16.2935 GB/s per node +Grid : Message : Stencil 12.3396 GB/s per node +Grid : Message : Average mflops/s per call per node : 665153 +Grid : Message : Average mflops/s per call per node : 799606 +Grid : Message : Average mflops/s per call per node : 823484 +Grid : Message : Average mflops/s per call per node : 664285 +Grid : Message : Average mflops/s per call per node (full): 315681 +Grid : Message : Average mflops/s per call per node (full): 438394 +Grid : Message : Average mflops/s per call per node (full): 421651 +Grid : Message : Average mflops/s per call per node (full): 302749 +Grid : Message : Stencil 12.9152 GB/s per node +Grid : Message : Stencil 17.2826 GB/s per node +Grid : Message : Stencil 17.2844 GB/s per node +Grid : Message : Stencil 14.1308 GB/s per node +Grid : Message : Average mflops/s per call per node : 671951 +Grid : Message : Average mflops/s per call per node : 805099 +Grid : Message : Average mflops/s per call per node : 820406 +Grid : Message : Average mflops/s per call per node : 666892 +Grid : Message : Average mflops/s per call per node (full): 315966 +Grid : Message : Average mflops/s per call per node (full): 440134 +Grid : Message : Average mflops/s per call per node (full): 442765 +Grid : Message : Average mflops/s per call per node (full): 305421 +Grid : Message : Stencil 14.0159 GB/s per node +Grid : Message : Stencil 16.8766 GB/s per node +Grid : Message : Stencil 17.9298 GB/s per node +Grid : Message : Stencil 12.887 GB/s per node +Grid : Message : Average mflops/s per call per node : 663984 +Grid : Message : Average mflops/s per call per node : 804339 +Grid : Message : Average mflops/s per call per node : 825334 +Grid : Message : Average mflops/s per call per node : 662330 +Grid : Message : Average mflops/s per call per node (full): 315302 +Grid : Message : Average mflops/s per call per node (full): 435971 +Grid : Message : Average mflops/s per call per node (full): 446246 +Grid : Message : Average mflops/s per call per node (full): 304300 +Grid : Message : Stencil 14.5695 GB/s per node +Grid : Message : Stencil 16.8476 GB/s per node +Grid : Message : Stencil 17.7522 GB/s per node +Grid : Message : Stencil 14.0278 GB/s per node +Grid : Message : Average mflops/s per call per node : 663605 +Grid : Message : Average mflops/s per call per node : 803894 +Grid : Message : Average mflops/s per call per node : 828041 +Grid : Message : Average mflops/s per call per node : 665135 +Grid : Message : Average mflops/s per call per node (full): 317216 +Grid : Message : Average mflops/s per call per node (full): 435113 +Grid : Message : Average mflops/s per call per node (full): 446534 +Grid : Message : Average mflops/s per call per node (full): 307800 +Grid : Message : Stencil 12.3099 GB/s per node +Grid : Message : Stencil 17.0746 GB/s per node +Grid : Message : Stencil 17.5469 GB/s per node +Grid : Message : Stencil 13.6681 GB/s per node +Grid : Message : Average mflops/s per call per node : 669123 +Grid : Message : Average mflops/s per call per node : 801928 +Grid : Message : Average mflops/s per call per node : 824345 +Grid : Message : Average mflops/s per call per node : 659179 +Grid : Message : Average mflops/s per call per node (full): 312030 +Grid : Message : Average mflops/s per call per node (full): 432529 +Grid : Message : Average mflops/s per call per node (full): 446152 +Grid : Message : Average mflops/s per call per node (full): 306748 +Grid : Message : Stencil 12.8134 GB/s per node +Grid : Message : Stencil 18.2851 GB/s per node +Grid : Message : Stencil 17.6986 GB/s per node +Grid : Message : Stencil 12.7626 GB/s per node +Grid : Message : Average mflops/s per call per node : 666400 +Grid : Message : Average mflops/s per call per node : 804575 +Grid : Message : Average mflops/s per call per node : 828300 +Grid : Message : Average mflops/s per call per node : 663369 +Grid : Message : Average mflops/s per call per node (full): 313964 +Grid : Message : Average mflops/s per call per node (full): 442054 +Grid : Message : Average mflops/s per call per node (full): 447876 +Grid : Message : Average mflops/s per call per node (full): 305509 +Grid : Message : Stencil 13.7164 GB/s per node +Grid : Message : Stencil 14.5666 GB/s per node +Grid : Message : Stencil 17.8571 GB/s per node +Grid : Message : Stencil 12.8831 GB/s per node +Grid : Message : Average mflops/s per call per node : 662667 +Grid : Message : Average mflops/s per call per node : 808341 +Grid : Message : Average mflops/s per call per node : 823866 +Grid : Message : Average mflops/s per call per node : 662070 +Grid : Message : Average mflops/s per call per node (full): 315443 +Grid : Message : Average mflops/s per call per node (full): 409315 +Grid : Message : Average mflops/s per call per node (full): 446895 +Grid : Message : Average mflops/s per call per node (full): 304843 +Grid : Message : Stencil 13.3634 GB/s per node +Grid : Message : Stencil 16.8785 GB/s per node +Grid : Message : Stencil 17.6547 GB/s per node +Grid : Message : Stencil 13.929 GB/s per node +Grid : Message : Average mflops/s per call per node : 665176 +Grid : Message : Average mflops/s per call per node : 804424 +Grid : Message : Average mflops/s per call per node : 821991 +Grid : Message : Average mflops/s per call per node : 659931 +Grid : Message : Average mflops/s per call per node (full): 314784 +Grid : Message : Average mflops/s per call per node (full): 437025 +Grid : Message : Average mflops/s per call per node (full): 446321 +Grid : Message : Average mflops/s per call per node (full): 306278 +Grid : Message : Stencil 12.355 GB/s per node +Grid : Message : Stencil 16.895 GB/s per node +Grid : Message : Stencil 18.166 GB/s per node +Grid : Message : Stencil 12.8473 GB/s per node +Grid : Message : Average mflops/s per call per node : 671496 +Grid : Message : Average mflops/s per call per node : 808884 +Grid : Message : Average mflops/s per call per node : 820349 +Grid : Message : Average mflops/s per call per node : 666351 +Grid : Message : Average mflops/s per call per node (full): 312992 +Grid : Message : Average mflops/s per call per node (full): 439455 +Grid : Message : Average mflops/s per call per node (full): 446030 +Grid : Message : Average mflops/s per call per node (full): 306316 +Grid : Message : Stencil 12.6097 GB/s per node +Grid : Message : Stencil 16.6952 GB/s per node +Grid : Message : Stencil 17.7702 GB/s per node +Grid : Message : Stencil 12.9123 GB/s per node +Grid : Message : Average mflops/s per call per node : 667832 +Grid : Message : Average mflops/s per call per node : 807358 +Grid : Message : Average mflops/s per call per node : 825113 +Grid : Message : Average mflops/s per call per node : 664207 +Grid : Message : Average mflops/s per call per node (full): 314251 +Grid : Message : Average mflops/s per call per node (full): 436472 +Grid : Message : Average mflops/s per call per node (full): 446405 +Grid : Message : Average mflops/s per call per node (full): 303965 +Grid : Message : Stencil 14.0821 GB/s per node +Grid : Message : Stencil 15.1892 GB/s per node +Grid : Message : Stencil 17.7668 GB/s per node +Grid : Message : Stencil 12.6889 GB/s per node +Grid : Message : Average mflops/s per call per node : 664142 +Grid : Message : Average mflops/s per call per node : 806184 +Grid : Message : Average mflops/s per call per node : 826703 +Grid : Message : Average mflops/s per call per node : 659089 +Grid : Message : Average mflops/s per call per node (full): 316433 +Grid : Message : Average mflops/s per call per node (full): 417763 +Grid : Message : Average mflops/s per call per node (full): 445587 +Grid : Message : Average mflops/s per call per node (full): 303256 +Grid : Message : Stencil 13.5766 GB/s per node +Grid : Message : Stencil 17.2951 GB/s per node +Grid : Message : Stencil 17.4029 GB/s per node +Grid : Message : Stencil 12.7326 GB/s per node +Grid : Message : Average mflops/s per call per node : 666642 +Grid : Message : Average mflops/s per call per node : 802032 +Grid : Message : Average mflops/s per call per node : 819241 +Grid : Message : Average mflops/s per call per node : 666999 +Grid : Message : Average mflops/s per call per node (full): 316199 +Grid : Message : Average mflops/s per call per node (full): 438555 +Grid : Message : Average mflops/s per call per node (full): 443653 +Grid : Message : Average mflops/s per call per node (full): 305053 +Grid : Message : Stencil 13.2271 GB/s per node +Grid : Message : Stencil 16.6175 GB/s per node +Grid : Message : Stencil 18.169 GB/s per node +Grid : Message : Stencil 12.1386 GB/s per node +Grid : Message : Average mflops/s per call per node : 668187 +Grid : Message : Average mflops/s per call per node : 804086 +Grid : Message : Average mflops/s per call per node : 826263 +Grid : Message : Average mflops/s per call per node : 664120 +Grid : Message : Average mflops/s per call per node (full): 315783 +Grid : Message : Average mflops/s per call per node (full): 435193 +Grid : Message : Average mflops/s per call per node (full): 446811 +Grid : Message : Average mflops/s per call per node (full): 302426 +Grid : Message : Stencil 12.5939 GB/s per node +Grid : Message : Stencil 16.4433 GB/s per node +Grid : Message : Stencil 17.8313 GB/s per node +Grid : Message : Stencil 11.7482 GB/s per node +Grid : Message : Average mflops/s per call per node : 668082 +Grid : Message : Average mflops/s per call per node : 807633 +Grid : Message : Average mflops/s per call per node : 822405 +Grid : Message : Average mflops/s per call per node : 668530 +Grid : Message : Average mflops/s per call per node (full): 314384 +Grid : Message : Average mflops/s per call per node (full): 433890 +Grid : Message : Average mflops/s per call per node (full): 436885 +Grid : Message : Average mflops/s per call per node (full): 294829 +Grid : Message : Stencil 12.9189 GB/s per node +Grid : Message : Stencil 16.7443 GB/s per node +Grid : Message : Stencil 18.5545 GB/s per node +Grid : Message : Stencil 13.4276 GB/s per node +Grid : Message : Average mflops/s per call per node : 666286 +Grid : Message : Average mflops/s per call per node : 804392 +Grid : Message : Average mflops/s per call per node : 818844 +Grid : Message : Average mflops/s per call per node : 664687 +Grid : Message : Average mflops/s per call per node (full): 315228 +Grid : Message : Average mflops/s per call per node (full): 436326 +Grid : Message : Average mflops/s per call per node (full): 447264 +Grid : Message : Average mflops/s per call per node (full): 307040 +Grid : Message : Stencil 13.3248 GB/s per node +Grid : Message : Stencil 12.1998 GB/s per node +Grid : Message : Stencil 17.5079 GB/s per node +Grid : Message : Stencil 13.3229 GB/s per node +Grid : Message : Average mflops/s per call per node : 667839 +Grid : Message : Average mflops/s per call per node : 813175 +Grid : Message : Average mflops/s per call per node : 814133 +Grid : Message : Average mflops/s per call per node : 665674 +Grid : Message : Average mflops/s per call per node (full): 315289 +Grid : Message : Average mflops/s per call per node (full): 365548 +Grid : Message : Average mflops/s per call per node (full): 443603 +Grid : Message : Average mflops/s per call per node (full): 305937 +Grid : Message : Stencil 13.1377 GB/s per node +Grid : Message : Stencil 16.4798 GB/s per node +Grid : Message : Stencil 17.8989 GB/s per node +Grid : Message : Stencil 13.3023 GB/s per node +Grid : Message : Average mflops/s per call per node : 669090 +Grid : Message : Average mflops/s per call per node : 804535 +Grid : Message : Average mflops/s per call per node : 825306 +Grid : Message : Average mflops/s per call per node : 666189 +Grid : Message : Average mflops/s per call per node (full): 315289 +Grid : Message : Average mflops/s per call per node (full): 434236 +Grid : Message : Average mflops/s per call per node (full): 445521 +Grid : Message : Average mflops/s per call per node (full): 303984 +Grid : Message : Stencil 14.0527 GB/s per node +Grid : Message : Stencil 16.9076 GB/s per node +Grid : Message : Stencil 17.3601 GB/s per node +Grid : Message : Stencil 12.4109 GB/s per node +Grid : Message : Average mflops/s per call per node : 663052 +Grid : Message : Average mflops/s per call per node : 803103 +Grid : Message : Average mflops/s per call per node : 828502 +Grid : Message : Average mflops/s per call per node : 663911 +Grid : Message : Average mflops/s per call per node (full): 315560 +Grid : Message : Average mflops/s per call per node (full): 436378 +Grid : Message : Average mflops/s per call per node (full): 444933 +Grid : Message : Average mflops/s per call per node (full): 303394 +Grid : Message : Stencil 13.2076 GB/s per node +Grid : Message : Stencil 17.8873 GB/s per node +Grid : Message : Stencil 17.3344 GB/s per node +Grid : Message : Stencil 13.4283 GB/s per node +Grid : Message : Average mflops/s per call per node : 666130 +Grid : Message : Average mflops/s per call per node : 802572 +Grid : Message : Average mflops/s per call per node : 824862 +Grid : Message : Average mflops/s per call per node : 665551 +Grid : Message : Average mflops/s per call per node (full): 315252 +Grid : Message : Average mflops/s per call per node (full): 440432 +Grid : Message : Average mflops/s per call per node (full): 443691 +Grid : Message : Average mflops/s per call per node (full): 306628 +Grid : Message : Stencil 12.6615 GB/s per node +Grid : Message : Stencil 17.1256 GB/s per node +Grid : Message : Stencil 17.3373 GB/s per node +Grid : Message : Stencil 12.4662 GB/s per node +Grid : Message : Average mflops/s per call per node : 671242 +Grid : Message : Average mflops/s per call per node : 804096 +Grid : Message : Average mflops/s per call per node : 825872 +Grid : Message : Average mflops/s per call per node : 663829 +Grid : Message : Average mflops/s per call per node (full): 314762 +Grid : Message : Average mflops/s per call per node (full): 438760 +Grid : Message : Average mflops/s per call per node (full): 442389 +Grid : Message : Average mflops/s per call per node (full): 304135 +Grid : Message : Stencil 13.9984 GB/s per node +Grid : Message : Stencil 16.7166 GB/s per node +Grid : Message : Stencil 17.9318 GB/s per node +Grid : Message : Stencil 12.5099 GB/s per node +Grid : Message : Average mflops/s per call per node : 665745 +Grid : Message : Average mflops/s per call per node : 804408 +Grid : Message : Average mflops/s per call per node : 825315 +Grid : Message : Average mflops/s per call per node : 662627 +Grid : Message : Average mflops/s per call per node (full): 316107 +Grid : Message : Average mflops/s per call per node (full): 436169 +Grid : Message : Average mflops/s per call per node (full): 447081 +Grid : Message : Average mflops/s per call per node (full): 304059 +Grid : Message : Stencil 14.1307 GB/s per node +Grid : Message : Stencil 16.955 GB/s per node +Grid : Message : Stencil 17.413 GB/s per node +Grid : Message : Stencil 13.2242 GB/s per node +Grid : Message : Average mflops/s per call per node : 665882 +Grid : Message : Average mflops/s per call per node : 805427 +Grid : Message : Average mflops/s per call per node : 819318 +Grid : Message : Average mflops/s per call per node : 664097 +Grid : Message : Average mflops/s per call per node (full): 315293 +Grid : Message : Average mflops/s per call per node (full): 433667 +Grid : Message : Average mflops/s per call per node (full): 444729 +Grid : Message : Average mflops/s per call per node (full): 305803 +Grid : Message : Stencil 14.9627 GB/s per node +Grid : Message : Stencil 16.6868 GB/s per node +Grid : Message : Stencil 18.1597 GB/s per node +Grid : Message : Stencil 13.8765 GB/s per node +Grid : Message : Average mflops/s per call per node : 663128 +Grid : Message : Average mflops/s per call per node : 805706 +Grid : Message : Average mflops/s per call per node : 816270 +Grid : Message : Average mflops/s per call per node : 662347 +Grid : Message : Average mflops/s per call per node (full): 316989 +Grid : Message : Average mflops/s per call per node (full): 437238 +Grid : Message : Average mflops/s per call per node (full): 446091 +Grid : Message : Average mflops/s per call per node (full): 306802 +Grid : Message : Stencil 13.1026 GB/s per node +Grid : Message : Stencil 16.688 GB/s per node +Grid : Message : Stencil 17.4324 GB/s per node +Grid : Message : Stencil 13.3778 GB/s per node +Grid : Message : Average mflops/s per call per node : 668583 +Grid : Message : Average mflops/s per call per node : 805193 +Grid : Message : Average mflops/s per call per node : 824697 +Grid : Message : Average mflops/s per call per node : 661618 +Grid : Message : Average mflops/s per call per node (full): 316151 +Grid : Message : Average mflops/s per call per node (full): 437204 +Grid : Message : Average mflops/s per call per node (full): 444908 +Grid : Message : Average mflops/s per call per node (full): 306332 +Grid : Message : Stencil 13.12 GB/s per node +Grid : Message : Stencil 17.2708 GB/s per node +Grid : Message : Stencil 17.37 GB/s per node +Grid : Message : Stencil 12.1212 GB/s per node +Grid : Message : Average mflops/s per call per node : 667632 +Grid : Message : Average mflops/s per call per node : 800607 +Grid : Message : Average mflops/s per call per node : 822822 +Grid : Message : Average mflops/s per call per node : 665908 +Grid : Message : Average mflops/s per call per node (full): 314548 +Grid : Message : Average mflops/s per call per node (full): 437853 +Grid : Message : Average mflops/s per call per node (full): 443769 +Grid : Message : Average mflops/s per call per node (full): 301555 +Grid : Message : Stencil 12.4694 GB/s per node +Grid : Message : Stencil 16.5969 GB/s per node +Grid : Message : Stencil 17.213 GB/s per node +Grid : Message : Stencil 13.3999 GB/s per node +Grid : Message : Average mflops/s per call per node : 669733 +Grid : Message : Average mflops/s per call per node : 806943 +Grid : Message : Average mflops/s per call per node : 822933 +Grid : Message : Average mflops/s per call per node : 662354 +Grid : Message : Average mflops/s per call per node (full): 313965 +Grid : Message : Average mflops/s per call per node (full): 435489 +Grid : Message : Average mflops/s per call per node (full): 440837 +Grid : Message : Average mflops/s per call per node (full): 306316 +Grid : Message : Stencil 12.5374 GB/s per node +Grid : Message : Stencil 17.5318 GB/s per node +Grid : Message : Stencil 16.8417 GB/s per node +Grid : Message : Stencil 12.4993 GB/s per node +Grid : Message : Average mflops/s per call per node : 667866 +Grid : Message : Average mflops/s per call per node : 796795 +Grid : Message : Average mflops/s per call per node : 821464 +Grid : Message : Average mflops/s per call per node : 661191 +Grid : Message : Average mflops/s per call per node (full): 313613 +Grid : Message : Average mflops/s per call per node (full): 435727 +Grid : Message : Average mflops/s per call per node (full): 427083 +Grid : Message : Average mflops/s per call per node (full): 303886 +Grid : Message : Stencil 12.6277 GB/s per node +Grid : Message : Stencil 17.4118 GB/s per node +Grid : Message : Stencil 18.5909 GB/s per node +Grid : Message : Stencil 12.4698 GB/s per node +Grid : Message : Average mflops/s per call per node : 666965 +Grid : Message : Average mflops/s per call per node : 798077 +Grid : Message : Average mflops/s per call per node : 817594 +Grid : Message : Average mflops/s per call per node : 669968 +Grid : Message : Average mflops/s per call per node (full): 314027 +Grid : Message : Average mflops/s per call per node (full): 437054 +Grid : Message : Average mflops/s per call per node (full): 447328 +Grid : Message : Average mflops/s per call per node (full): 304924 +Grid : Message : Stencil 12.7725 GB/s per node +Grid : Message : Stencil 16.6166 GB/s per node +Grid : Message : Stencil 17.397 GB/s per node +Grid : Message : Stencil 12.4983 GB/s per node +Grid : Message : Average mflops/s per call per node : 665732 +Grid : Message : Average mflops/s per call per node : 807913 +Grid : Message : Average mflops/s per call per node : 821210 +Grid : Message : Average mflops/s per call per node : 661150 +Grid : Message : Average mflops/s per call per node (full): 313604 +Grid : Message : Average mflops/s per call per node (full): 435521 +Grid : Message : Average mflops/s per call per node (full): 443043 +Grid : Message : Average mflops/s per call per node (full): 303752 +Grid : Message : Stencil 13.6786 GB/s per node +Grid : Message : Stencil 17.4595 GB/s per node +Grid : Message : Stencil 17.5525 GB/s per node +Grid : Message : Stencil 13.3556 GB/s per node +Grid : Message : Average mflops/s per call per node : 661439 +Grid : Message : Average mflops/s per call per node : 805238 +Grid : Message : Average mflops/s per call per node : 815276 +Grid : Message : Average mflops/s per call per node : 662636 +Grid : Message : Average mflops/s per call per node (full): 315806 +Grid : Message : Average mflops/s per call per node (full): 440289 +Grid : Message : Average mflops/s per call per node (full): 443045 +Grid : Message : Average mflops/s per call per node (full): 306035 +Grid : Message : Stencil 12.614 GB/s per node +Grid : Message : Stencil 17.0698 GB/s per node +Grid : Message : Stencil 17.8556 GB/s per node +Grid : Message : Stencil 12.4782 GB/s per node +Grid : Message : Average mflops/s per call per node : 665447 +Grid : Message : Average mflops/s per call per node : 803670 +Grid : Message : Average mflops/s per call per node : 817131 +Grid : Message : Average mflops/s per call per node : 668088 +Grid : Message : Average mflops/s per call per node (full): 310869 +Grid : Message : Average mflops/s per call per node (full): 439056 +Grid : Message : Average mflops/s per call per node (full): 446346 +Grid : Message : Average mflops/s per call per node (full): 304479 +Grid : Message : Stencil 13.6138 GB/s per node +Grid : Message : Stencil 18.4778 GB/s per node +Grid : Message : Stencil 17.1873 GB/s per node +Grid : Message : Stencil 13.0416 GB/s per node +Grid : Message : Average mflops/s per call per node : 666452 +Grid : Message : Average mflops/s per call per node : 795137 +Grid : Message : Average mflops/s per call per node : 828115 +Grid : Message : Average mflops/s per call per node : 662954 +Grid : Message : Average mflops/s per call per node (full): 314428 +Grid : Message : Average mflops/s per call per node (full): 437022 +Grid : Message : Average mflops/s per call per node (full): 442798 +Grid : Message : Average mflops/s per call per node (full): 305421 +Grid : Message : Stencil 14.7139 GB/s per node +Grid : Message : Stencil 16.6484 GB/s per node +Grid : Message : Stencil 17.9508 GB/s per node +Grid : Message : Stencil 13.5629 GB/s per node +Grid : Message : Average mflops/s per call per node : 665832 +Grid : Message : Average mflops/s per call per node : 803880 +Grid : Message : Average mflops/s per call per node : 821362 +Grid : Message : Average mflops/s per call per node : 663756 +Grid : Message : Average mflops/s per call per node (full): 316832 +Grid : Message : Average mflops/s per call per node (full): 436166 +Grid : Message : Average mflops/s per call per node (full): 445924 +Grid : Message : Average mflops/s per call per node (full): 305439 +Grid : Message : Stencil 13.0084 GB/s per node +Grid : Message : Stencil 16.4882 GB/s per node +Grid : Message : Stencil 17.9387 GB/s per node +Grid : Message : Stencil 14.2227 GB/s per node +Grid : Message : Average mflops/s per call per node : 671512 +Grid : Message : Average mflops/s per call per node : 807289 +Grid : Message : Average mflops/s per call per node : 821044 +Grid : Message : Average mflops/s per call per node : 663509 +Grid : Message : Average mflops/s per call per node (full): 315038 +Grid : Message : Average mflops/s per call per node (full): 434524 +Grid : Message : Average mflops/s per call per node (full): 447069 +Grid : Message : Average mflops/s per call per node (full): 307107 +Grid : Message : Stencil 13.0866 GB/s per node +Grid : Message : Stencil 18.7245 GB/s per node +Grid : Message : Stencil 17.3379 GB/s per node +Grid : Message : Stencil 12.6833 GB/s per node +Grid : Message : Average mflops/s per call per node : 667359 +Grid : Message : Average mflops/s per call per node : 800618 +Grid : Message : Average mflops/s per call per node : 824683 +Grid : Message : Average mflops/s per call per node : 665308 +Grid : Message : Average mflops/s per call per node (full): 314225 +Grid : Message : Average mflops/s per call per node (full): 440026 +Grid : Message : Average mflops/s per call per node (full): 443438 +Grid : Message : Average mflops/s per call per node (full): 304101 +Grid : Message : Stencil 14.1214 GB/s per node +Grid : Message : Stencil 11.1327 GB/s per node +Grid : Message : Stencil 18.438 GB/s per node +Grid : Message : Stencil 12.6113 GB/s per node +Grid : Message : Average mflops/s per call per node : 664777 +Grid : Message : Average mflops/s per call per node : 804016 +Grid : Message : Average mflops/s per call per node : 821008 +Grid : Message : Average mflops/s per call per node : 668356 +Grid : Message : Average mflops/s per call per node (full): 317217 +Grid : Message : Average mflops/s per call per node (full): 342710 +Grid : Message : Average mflops/s per call per node (full): 447021 +Grid : Message : Average mflops/s per call per node (full): 297876 +Grid : Message : Stencil 13.408 GB/s per node +Grid : Message : Stencil 17.4022 GB/s per node +Grid : Message : Stencil 16.8897 GB/s per node +Grid : Message : Stencil 13.0928 GB/s per node +Grid : Message : Average mflops/s per call per node : 667130 +Grid : Message : Average mflops/s per call per node : 807309 +Grid : Message : Average mflops/s per call per node : 821878 +Grid : Message : Average mflops/s per call per node : 666926 +Grid : Message : Average mflops/s per call per node (full): 315943 +Grid : Message : Average mflops/s per call per node (full): 440890 +Grid : Message : Average mflops/s per call per node (full): 438122 +Grid : Message : Average mflops/s per call per node (full): 306182 +Grid : Message : Stencil 12.573 GB/s per node +Grid : Message : Stencil 16.7063 GB/s per node +Grid : Message : Stencil 17.1319 GB/s per node +Grid : Message : Stencil 13.0951 GB/s per node +Grid : Message : Average mflops/s per call per node : 668167 +Grid : Message : Average mflops/s per call per node : 802663 +Grid : Message : Average mflops/s per call per node : 825467 +Grid : Message : Average mflops/s per call per node : 663485 +Grid : Message : Average mflops/s per call per node (full): 314510 +Grid : Message : Average mflops/s per call per node (full): 436425 +Grid : Message : Average mflops/s per call per node (full): 441905 +Grid : Message : Average mflops/s per call per node (full): 304733 +Grid : Message : Stencil 12.5289 GB/s per node +Grid : Message : Stencil 17.3595 GB/s per node +Grid : Message : Stencil 18.2739 GB/s per node +Grid : Message : Stencil 12.4844 GB/s per node +Grid : Message : Average mflops/s per call per node : 669621 +Grid : Message : Average mflops/s per call per node : 799998 +Grid : Message : Average mflops/s per call per node : 816993 +Grid : Message : Average mflops/s per call per node : 658972 +Grid : Message : Average mflops/s per call per node (full): 314123 +Grid : Message : Average mflops/s per call per node (full): 439790 +Grid : Message : Average mflops/s per call per node (full): 445963 +Grid : Message : Average mflops/s per call per node (full): 303737 +Grid : Message : Stencil 13.4448 GB/s per node +Grid : Message : Stencil 16.9993 GB/s per node +Grid : Message : Stencil 17.8453 GB/s per node +Grid : Message : Stencil 15.0281 GB/s per node +Grid : Message : Average mflops/s per call per node : 663621 +Grid : Message : Average mflops/s per call per node : 805340 +Grid : Message : Average mflops/s per call per node : 821926 +Grid : Message : Average mflops/s per call per node : 658941 +Grid : Message : Average mflops/s per call per node (full): 315829 +Grid : Message : Average mflops/s per call per node (full): 436984 +Grid : Message : Average mflops/s per call per node (full): 439082 +Grid : Message : Average mflops/s per call per node (full): 306427 +Grid : Message : Stencil 13.0982 GB/s per node +Grid : Message : Stencil 16.6596 GB/s per node +Grid : Message : Stencil 18.4421 GB/s per node +Grid : Message : Stencil 13.3526 GB/s per node +Grid : Message : Average mflops/s per call per node : 665223 +Grid : Message : Average mflops/s per call per node : 803700 +Grid : Message : Average mflops/s per call per node : 822412 +Grid : Message : Average mflops/s per call per node : 668318 +Grid : Message : Average mflops/s per call per node (full): 315138 +Grid : Message : Average mflops/s per call per node (full): 436240 +Grid : Message : Average mflops/s per call per node (full): 449103 +Grid : Message : Average mflops/s per call per node (full): 307087 +Grid : Message : Stencil 13.1714 GB/s per node +Grid : Message : Stencil 16.8825 GB/s per node +Grid : Message : Stencil 17.5464 GB/s per node +Grid : Message : Stencil 13.121 GB/s per node +Grid : Message : Average mflops/s per call per node : 667784 +Grid : Message : Average mflops/s per call per node : 807189 +Grid : Message : Average mflops/s per call per node : 820395 +Grid : Message : Average mflops/s per call per node : 667089 +Grid : Message : Average mflops/s per call per node (full): 314869 +Grid : Message : Average mflops/s per call per node (full): 438273 +Grid : Message : Average mflops/s per call per node (full): 445656 +Grid : Message : Average mflops/s per call per node (full): 307350 +Grid : Message : Stencil 13.72 GB/s per node +Grid : Message : Stencil 12.6906 GB/s per node +Grid : Message : Stencil 18.2964 GB/s per node +Grid : Message : Stencil 13.3584 GB/s per node +Grid : Message : Average mflops/s per call per node : 666867 +Grid : Message : Average mflops/s per call per node : 809633 +Grid : Message : Average mflops/s per call per node : 828112 +Grid : Message : Average mflops/s per call per node : 667237 +Grid : Message : Average mflops/s per call per node (full): 315368 +Grid : Message : Average mflops/s per call per node (full): 375897 +Grid : Message : Average mflops/s per call per node (full): 448476 +Grid : Message : Average mflops/s per call per node (full): 306870 +Grid : Message : Stencil 12.6439 GB/s per node +Grid : Message : Stencil 16.0623 GB/s per node +Grid : Message : Stencil 18.2302 GB/s per node +Grid : Message : Stencil 14.5926 GB/s per node +Grid : Message : Average mflops/s per call per node : 669424 +Grid : Message : Average mflops/s per call per node : 800242 +Grid : Message : Average mflops/s per call per node : 822926 +Grid : Message : Average mflops/s per call per node : 655791 +Grid : Message : Average mflops/s per call per node (full): 313407 +Grid : Message : Average mflops/s per call per node (full): 428941 +Grid : Message : Average mflops/s per call per node (full): 447806 +Grid : Message : Average mflops/s per call per node (full): 306464 +Grid : Message : Stencil 14.4725 GB/s per node +Grid : Message : Stencil 14.2302 GB/s per node +Grid : Message : Stencil 16.734 GB/s per node +Grid : Message : Stencil 11.6196 GB/s per node +Grid : Message : Average mflops/s per call per node : 664432 +Grid : Message : Average mflops/s per call per node : 803748 +Grid : Message : Average mflops/s per call per node : 823327 +Grid : Message : Average mflops/s per call per node : 669175 +Grid : Message : Average mflops/s per call per node (full): 316590 +Grid : Message : Average mflops/s per call per node (full): 402428 +Grid : Message : Average mflops/s per call per node (full): 437468 +Grid : Message : Average mflops/s per call per node (full): 298073 +Grid : Message : Stencil 13.7889 GB/s per node +Grid : Message : Stencil 16.9001 GB/s per node +Grid : Message : Stencil 17.9268 GB/s per node +Grid : Message : Stencil 12.4647 GB/s per node +Grid : Message : Average mflops/s per call per node : 665976 +Grid : Message : Average mflops/s per call per node : 803838 +Grid : Message : Average mflops/s per call per node : 819200 +Grid : Message : Average mflops/s per call per node : 666380 +Grid : Message : Average mflops/s per call per node (full): 316274 +Grid : Message : Average mflops/s per call per node (full): 437659 +Grid : Message : Average mflops/s per call per node (full): 446601 +Grid : Message : Average mflops/s per call per node (full): 305255 +Grid : Message : Stencil 12.9962 GB/s per node +Grid : Message : Stencil 17.5956 GB/s per node +Grid : Message : Stencil 17.5064 GB/s per node +Grid : Message : Stencil 12.5874 GB/s per node +Grid : Message : Average mflops/s per call per node : 668479 +Grid : Message : Average mflops/s per call per node : 799041 +Grid : Message : Average mflops/s per call per node : 818390 +Grid : Message : Average mflops/s per call per node : 664712 +Grid : Message : Average mflops/s per call per node (full): 315237 +Grid : Message : Average mflops/s per call per node (full): 440281 +Grid : Message : Average mflops/s per call per node (full): 443676 +Grid : Message : Average mflops/s per call per node (full): 304852 +Grid : Message : Stencil 12.6041 GB/s per node +Grid : Message : Stencil 17.3707 GB/s per node +Grid : Message : Stencil 17.659 GB/s per node +Grid : Message : Stencil 12.3764 GB/s per node +Grid : Message : Average mflops/s per call per node : 669297 +Grid : Message : Average mflops/s per call per node : 803887 +Grid : Message : Average mflops/s per call per node : 823498 +Grid : Message : Average mflops/s per call per node : 665741 +Grid : Message : Average mflops/s per call per node (full): 315614 +Grid : Message : Average mflops/s per call per node (full): 436539 +Grid : Message : Average mflops/s per call per node (full): 440952 +Grid : Message : Average mflops/s per call per node (full): 304036 +Grid : Message : Stencil 13.5348 GB/s per node +Grid : Message : Stencil 17.038 GB/s per node +Grid : Message : Stencil 18.1659 GB/s per node +Grid : Message : Stencil 13.1206 GB/s per node +Grid : Message : Average mflops/s per call per node : 664512 +Grid : Message : Average mflops/s per call per node : 805318 +Grid : Message : Average mflops/s per call per node : 825232 +Grid : Message : Average mflops/s per call per node : 666321 +Grid : Message : Average mflops/s per call per node (full): 314166 +Grid : Message : Average mflops/s per call per node (full): 437732 +Grid : Message : Average mflops/s per call per node (full): 448411 +Grid : Message : Average mflops/s per call per node (full): 306062 +Grid : Message : Stencil 13.7061 GB/s per node +Grid : Message : Stencil 16.3278 GB/s per node +Grid : Message : Stencil 18.5878 GB/s per node +Grid : Message : Stencil 12.4536 GB/s per node +Grid : Message : Average mflops/s per call per node : 661594 +Grid : Message : Average mflops/s per call per node : 808149 +Grid : Message : Average mflops/s per call per node : 821405 +Grid : Message : Average mflops/s per call per node : 665914 +Grid : Message : Average mflops/s per call per node (full): 315010 +Grid : Message : Average mflops/s per call per node (full): 432917 +Grid : Message : Average mflops/s per call per node (full): 448662 +Grid : Message : Average mflops/s per call per node (full): 302919 +Grid : Message : Stencil 12.7146 GB/s per node +Grid : Message : Stencil 16.0773 GB/s per node +Grid : Message : Stencil 17.1469 GB/s per node +Grid : Message : Stencil 12.8855 GB/s per node +Grid : Message : Average mflops/s per call per node : 666923 +Grid : Message : Average mflops/s per call per node : 805953 +Grid : Message : Average mflops/s per call per node : 828904 +Grid : Message : Average mflops/s per call per node : 665301 +Grid : Message : Average mflops/s per call per node (full): 314385 +Grid : Message : Average mflops/s per call per node (full): 428018 +Grid : Message : Average mflops/s per call per node (full): 440554 +Grid : Message : Average mflops/s per call per node (full): 305729 +Grid : Message : Stencil 12.9741 GB/s per node +Grid : Message : Stencil 18.8804 GB/s per node +Grid : Message : Stencil 17.7919 GB/s per node +Grid : Message : Stencil 13.4678 GB/s per node +Grid : Message : Average mflops/s per call per node : 666032 +Grid : Message : Average mflops/s per call per node : 803449 +Grid : Message : Average mflops/s per call per node : 822860 +Grid : Message : Average mflops/s per call per node : 662973 +Grid : Message : Average mflops/s per call per node (full): 313551 +Grid : Message : Average mflops/s per call per node (full): 441640 +Grid : Message : Average mflops/s per call per node (full): 446690 +Grid : Message : Average mflops/s per call per node (full): 306235 +Grid : Message : Stencil 12.521 GB/s per node +Grid : Message : Stencil 16.3717 GB/s per node +Grid : Message : Stencil 16.7679 GB/s per node +Grid : Message : Stencil 12.8942 GB/s per node +Grid : Message : Average mflops/s per call per node : 665824 +Grid : Message : Average mflops/s per call per node : 806997 +Grid : Message : Average mflops/s per call per node : 825090 +Grid : Message : Average mflops/s per call per node : 664015 +Grid : Message : Average mflops/s per call per node (full): 312669 +Grid : Message : Average mflops/s per call per node (full): 432103 +Grid : Message : Average mflops/s per call per node (full): 428071 +Grid : Message : Average mflops/s per call per node (full): 305300 +Grid : Message : Stencil 13.3708 GB/s per node +Grid : Message : Stencil 16.5066 GB/s per node +Grid : Message : Stencil 16.9276 GB/s per node +Grid : Message : Stencil 12.0497 GB/s per node +Grid : Message : Average mflops/s per call per node : 666178 +Grid : Message : Average mflops/s per call per node : 804912 +Grid : Message : Average mflops/s per call per node : 822589 +Grid : Message : Average mflops/s per call per node : 668246 +Grid : Message : Average mflops/s per call per node (full): 316301 +Grid : Message : Average mflops/s per call per node (full): 435095 +Grid : Message : Average mflops/s per call per node (full): 436080 +Grid : Message : Average mflops/s per call per node (full): 302118 +Grid : Message : Stencil 12.7027 GB/s per node +Grid : Message : Stencil 16.5099 GB/s per node +Grid : Message : Stencil 17.1585 GB/s per node +Grid : Message : Stencil 13.2181 GB/s per node +Grid : Message : Average mflops/s per call per node : 668274 +Grid : Message : Average mflops/s per call per node : 802058 +Grid : Message : Average mflops/s per call per node : 823686 +Grid : Message : Average mflops/s per call per node : 665332 +Grid : Message : Average mflops/s per call per node (full): 314731 +Grid : Message : Average mflops/s per call per node (full): 433988 +Grid : Message : Average mflops/s per call per node (full): 441863 +Grid : Message : Average mflops/s per call per node (full): 306171 +Grid : Message : Stencil 13.5096 GB/s per node +Grid : Message : Stencil 16.6291 GB/s per node +Grid : Message : Stencil 17.6593 GB/s per node +Grid : Message : Stencil 12.738 GB/s per node +Grid : Message : Average mflops/s per call per node : 667716 +Grid : Message : Average mflops/s per call per node : 804795 +Grid : Message : Average mflops/s per call per node : 823643 +Grid : Message : Average mflops/s per call per node : 661819 +Grid : Message : Average mflops/s per call per node (full): 315837 +Grid : Message : Average mflops/s per call per node (full): 436896 +Grid : Message : Average mflops/s per call per node (full): 443696 +Grid : Message : Average mflops/s per call per node (full): 303158 +Grid : Message : Stencil 13.764 GB/s per node +Grid : Message : Stencil 16.4805 GB/s per node +Grid : Message : Stencil 16.806 GB/s per node +Grid : Message : Stencil 12.2637 GB/s per node +Grid : Message : Average mflops/s per call per node : 666394 +Grid : Message : Average mflops/s per call per node : 808315 +Grid : Message : Average mflops/s per call per node : 830523 +Grid : Message : Average mflops/s per call per node : 667099 +Grid : Message : Average mflops/s per call per node (full): 316598 +Grid : Message : Average mflops/s per call per node (full): 431115 +Grid : Message : Average mflops/s per call per node (full): 438804 +Grid : Message : Average mflops/s per call per node (full): 303771 +Grid : Message : Stencil 14.0315 GB/s per node +Grid : Message : Stencil 16.6177 GB/s per node +Grid : Message : Stencil 17.5158 GB/s per node +Grid : Message : Stencil 12.8021 GB/s per node +Grid : Message : Average mflops/s per call per node : 664946 +Grid : Message : Average mflops/s per call per node : 801175 +Grid : Message : Average mflops/s per call per node : 824969 +Grid : Message : Average mflops/s per call per node : 664651 +Grid : Message : Average mflops/s per call per node (full): 316485 +Grid : Message : Average mflops/s per call per node (full): 434346 +Grid : Message : Average mflops/s per call per node (full): 445188 +Grid : Message : Average mflops/s per call per node (full): 304439 +Grid : Message : Stencil 13.3968 GB/s per node +Grid : Message : Stencil 16.7046 GB/s per node +Grid : Message : Stencil 19.0282 GB/s per node +Grid : Message : Stencil 13.6061 GB/s per node +Grid : Message : Average mflops/s per call per node : 665767 +Grid : Message : Average mflops/s per call per node : 806549 +Grid : Message : Average mflops/s per call per node : 824649 +Grid : Message : Average mflops/s per call per node : 659771 +Grid : Message : Average mflops/s per call per node (full): 316021 +Grid : Message : Average mflops/s per call per node (full): 437159 +Grid : Message : Average mflops/s per call per node (full): 448624 +Grid : Message : Average mflops/s per call per node (full): 305509 +Grid : Message : Stencil 13.3675 GB/s per node +Grid : Message : Stencil 16.382 GB/s per node +Grid : Message : Stencil 18.511 GB/s per node +Grid : Message : Stencil 13.2205 GB/s per node +Grid : Message : Average mflops/s per call per node : 665497 +Grid : Message : Average mflops/s per call per node : 803878 +Grid : Message : Average mflops/s per call per node : 827619 +Grid : Message : Average mflops/s per call per node : 665752 +Grid : Message : Average mflops/s per call per node (full): 316256 +Grid : Message : Average mflops/s per call per node (full): 433503 +Grid : Message : Average mflops/s per call per node (full): 448486 +Grid : Message : Average mflops/s per call per node (full): 306068 +Grid : Message : Stencil 13.0375 GB/s per node +Grid : Message : Stencil 16.6764 GB/s per node +Grid : Message : Stencil 17.3085 GB/s per node +Grid : Message : Stencil 12.0552 GB/s per node +Grid : Message : Average mflops/s per call per node : 668003 +Grid : Message : Average mflops/s per call per node : 806652 +Grid : Message : Average mflops/s per call per node : 825741 +Grid : Message : Average mflops/s per call per node : 665785 +Grid : Message : Average mflops/s per call per node (full): 313382 +Grid : Message : Average mflops/s per call per node (full): 436713 +Grid : Message : Average mflops/s per call per node (full): 445120 +Grid : Message : Average mflops/s per call per node (full): 302148 +Grid : Message : Stencil 14.8512 GB/s per node +Grid : Message : Stencil 16.5633 GB/s per node +Grid : Message : Stencil 17.388 GB/s per node +Grid : Message : Stencil 12.0063 GB/s per node +Grid : Message : Average mflops/s per call per node : 663881 +Grid : Message : Average mflops/s per call per node : 804976 +Grid : Message : Average mflops/s per call per node : 820549 +Grid : Message : Average mflops/s per call per node : 668148 +Grid : Message : Average mflops/s per call per node (full): 316076 +Grid : Message : Average mflops/s per call per node (full): 435694 +Grid : Message : Average mflops/s per call per node (full): 443735 +Grid : Message : Average mflops/s per call per node (full): 302773 +Grid : Message : Stencil 13.3267 GB/s per node +Grid : Message : Stencil 17.0588 GB/s per node +Grid : Message : Stencil 16.5925 GB/s per node +Grid : Message : Stencil 13.05 GB/s per node +Grid : Message : Average mflops/s per call per node : 665763 +Grid : Message : Average mflops/s per call per node : 803088 +Grid : Message : Average mflops/s per call per node : 826037 +Grid : Message : Average mflops/s per call per node : 662986 +Grid : Message : Average mflops/s per call per node (full): 315659 +Grid : Message : Average mflops/s per call per node (full): 435380 +Grid : Message : Average mflops/s per call per node (full): 433958 +Grid : Message : Average mflops/s per call per node (full): 306295 +Grid : Message : Stencil 12.9447 GB/s per node +Grid : Message : Stencil 17.6472 GB/s per node +Grid : Message : Stencil 18.4279 GB/s per node +Grid : Message : Stencil 13.0117 GB/s per node +Grid : Message : Average mflops/s per call per node : 667222 +Grid : Message : Average mflops/s per call per node : 801881 +Grid : Message : Average mflops/s per call per node : 821229 +Grid : Message : Average mflops/s per call per node : 660378 +Grid : Message : Average mflops/s per call per node (full): 313981 +Grid : Message : Average mflops/s per call per node (full): 438879 +Grid : Message : Average mflops/s per call per node (full): 447844 +Grid : Message : Average mflops/s per call per node (full): 303894 +Grid : Message : Stencil 13.1517 GB/s per node +Grid : Message : Stencil 16.6235 GB/s per node +Grid : Message : Stencil 17.8076 GB/s per node +Grid : Message : Stencil 12.154 GB/s per node +Grid : Message : Average mflops/s per call per node : 669391 +Grid : Message : Average mflops/s per call per node : 800786 +Grid : Message : Average mflops/s per call per node : 818034 +Grid : Message : Average mflops/s per call per node : 664339 +Grid : Message : Average mflops/s per call per node (full): 315474 +Grid : Message : Average mflops/s per call per node (full): 434236 +Grid : Message : Average mflops/s per call per node (full): 444109 +Grid : Message : Average mflops/s per call per node (full): 302887 +Grid : Message : Stencil 13.2738 GB/s per node +Grid : Message : Stencil 16.4548 GB/s per node +Grid : Message : Stencil 18.0116 GB/s per node +Grid : Message : Stencil 14.688 GB/s per node +Grid : Message : Average mflops/s per call per node : 667392 +Grid : Message : Average mflops/s per call per node : 802106 +Grid : Message : Average mflops/s per call per node : 819500 +Grid : Message : Average mflops/s per call per node : 658955 +Grid : Message : Average mflops/s per call per node (full): 316091 +Grid : Message : Average mflops/s per call per node (full): 433778 +Grid : Message : Average mflops/s per call per node (full): 446855 +Grid : Message : Average mflops/s per call per node (full): 305394 +Grid : Message : Stencil 12.7989 GB/s per node +Grid : Message : Stencil 17.0567 GB/s per node +Grid : Message : Stencil 17.9343 GB/s per node +Grid : Message : Stencil 12.7083 GB/s per node +Grid : Message : Average mflops/s per call per node : 671386 +Grid : Message : Average mflops/s per call per node : 802837 +Grid : Message : Average mflops/s per call per node : 820421 +Grid : Message : Average mflops/s per call per node : 663040 +Grid : Message : Average mflops/s per call per node (full): 315605 +Grid : Message : Average mflops/s per call per node (full): 437290 +Grid : Message : Average mflops/s per call per node (full): 447574 +Grid : Message : Average mflops/s per call per node (full): 304870 +Grid : Message : Stencil 12.2852 GB/s per node +Grid : Message : Stencil 17.3791 GB/s per node +Grid : Message : Stencil 17.1474 GB/s per node +Grid : Message : Stencil 13.436 GB/s per node +Grid : Message : Average mflops/s per call per node : 670569 +Grid : Message : Average mflops/s per call per node : 805222 +Grid : Message : Average mflops/s per call per node : 825606 +Grid : Message : Average mflops/s per call per node : 663347 +Grid : Message : Average mflops/s per call per node (full): 312029 +Grid : Message : Average mflops/s per call per node (full): 440050 +Grid : Message : Average mflops/s per call per node (full): 441858 +Grid : Message : Average mflops/s per call per node (full): 305846 +Grid : Message : Stencil 12.2379 GB/s per node +Grid : Message : Stencil 14.0604 GB/s per node +Grid : Message : Stencil 18.6758 GB/s per node +Grid : Message : Stencil 13.2438 GB/s per node +Grid : Message : Average mflops/s per call per node : 671841 +Grid : Message : Average mflops/s per call per node : 807685 +Grid : Message : Average mflops/s per call per node : 820105 +Grid : Message : Average mflops/s per call per node : 656744 +Grid : Message : Average mflops/s per call per node (full): 311636 +Grid : Message : Average mflops/s per call per node (full): 401570 +Grid : Message : Average mflops/s per call per node (full): 447034 +Grid : Message : Average mflops/s per call per node (full): 304702 +Grid : Message : Stencil 13.5392 GB/s per node +Grid : Message : Stencil 17.0725 GB/s per node +Grid : Message : Stencil 17.1784 GB/s per node +Grid : Message : Stencil 12.2392 GB/s per node +Grid : Message : Average mflops/s per call per node : 664917 +Grid : Message : Average mflops/s per call per node : 801687 +Grid : Message : Average mflops/s per call per node : 815332 +Grid : Message : Average mflops/s per call per node : 656729 +Grid : Message : Average mflops/s per call per node (full): 315726 +Grid : Message : Average mflops/s per call per node (full): 434728 +Grid : Message : Average mflops/s per call per node (full): 431152 +Grid : Message : Average mflops/s per call per node (full): 302133 +Grid : Message : Stencil 13.5374 GB/s per node +Grid : Message : Stencil 16.5367 GB/s per node +Grid : Message : Stencil 17.4212 GB/s per node +Grid : Message : Stencil 12.8501 GB/s per node +Grid : Message : Average mflops/s per call per node : 668077 +Grid : Message : Average mflops/s per call per node : 803680 +Grid : Message : Average mflops/s per call per node : 826720 +Grid : Message : Average mflops/s per call per node : 665432 +Grid : Message : Average mflops/s per call per node (full): 316753 +Grid : Message : Average mflops/s per call per node (full): 434204 +Grid : Message : Average mflops/s per call per node (full): 444228 +Grid : Message : Average mflops/s per call per node (full): 305541 +Grid : Message : Stencil 12.8187 GB/s per node +Grid : Message : Stencil 16.7074 GB/s per node +Grid : Message : Stencil 18.4389 GB/s per node +Grid : Message : Stencil 12.8444 GB/s per node +Grid : Message : Average mflops/s per call per node : 670256 +Grid : Message : Average mflops/s per call per node : 804534 +Grid : Message : Average mflops/s per call per node : 820132 +Grid : Message : Average mflops/s per call per node : 666422 +Grid : Message : Average mflops/s per call per node (full): 315529 +Grid : Message : Average mflops/s per call per node (full): 435032 +Grid : Message : Average mflops/s per call per node (full): 447477 +Grid : Message : Average mflops/s per call per node (full): 304948 +Grid : Message : Stencil 13.379 GB/s per node +Grid : Message : Stencil 17.7912 GB/s per node +Grid : Message : Stencil 17.7083 GB/s per node +Grid : Message : Stencil 14.0831 GB/s per node +Grid : Message : Average mflops/s per call per node : 670713 +Grid : Message : Average mflops/s per call per node : 801535 +Grid : Message : Average mflops/s per call per node : 820180 +Grid : Message : Average mflops/s per call per node : 660857 +Grid : Message : Average mflops/s per call per node (full): 316493 +Grid : Message : Average mflops/s per call per node (full): 439429 +Grid : Message : Average mflops/s per call per node (full): 442506 +Grid : Message : Average mflops/s per call per node (full): 306858 +Grid : Message : Stencil 13.1634 GB/s per node +Grid : Message : Stencil 16.8688 GB/s per node +Grid : Message : Stencil 17.749 GB/s per node +Grid : Message : Stencil 13.1366 GB/s per node +Grid : Message : Average mflops/s per call per node : 669289 +Grid : Message : Average mflops/s per call per node : 802523 +Grid : Message : Average mflops/s per call per node : 820049 +Grid : Message : Average mflops/s per call per node : 663095 +Grid : Message : Average mflops/s per call per node (full): 314878 +Grid : Message : Average mflops/s per call per node (full): 436156 +Grid : Message : Average mflops/s per call per node (full): 444995 +Grid : Message : Average mflops/s per call per node (full): 304995 +Grid : Message : Stencil 12.3941 GB/s per node +Grid : Message : Stencil 16.754 GB/s per node +Grid : Message : Stencil 18.4191 GB/s per node +Grid : Message : Stencil 14.0827 GB/s per node +Grid : Message : Average mflops/s per call per node : 669824 +Grid : Message : Average mflops/s per call per node : 803334 +Grid : Message : Average mflops/s per call per node : 820453 +Grid : Message : Average mflops/s per call per node : 661164 +Grid : Message : Average mflops/s per call per node (full): 313443 +Grid : Message : Average mflops/s per call per node (full): 437198 +Grid : Message : Average mflops/s per call per node (full): 448787 +Grid : Message : Average mflops/s per call per node (full): 306816 +Grid : Message : Stencil 13.5058 GB/s per node +Grid : Message : Stencil 16.1241 GB/s per node +Grid : Message : Stencil 17.2608 GB/s per node +Grid : Message : Stencil 12.0562 GB/s per node +Grid : Message : Average mflops/s per call per node : 664161 +Grid : Message : Average mflops/s per call per node : 805610 +Grid : Message : Average mflops/s per call per node : 825873 +Grid : Message : Average mflops/s per call per node : 668450 +Grid : Message : Average mflops/s per call per node (full): 315219 +Grid : Message : Average mflops/s per call per node (full): 430622 +Grid : Message : Average mflops/s per call per node (full): 443767 +Grid : Message : Average mflops/s per call per node (full): 302160 +Grid : Message : Stencil 12.4599 GB/s per node +Grid : Message : Stencil 17.2007 GB/s per node +Grid : Message : Stencil 18.89 GB/s per node +Grid : Message : Stencil 12.934 GB/s per node +Grid : Message : Average mflops/s per call per node : 668564 +Grid : Message : Average mflops/s per call per node : 807248 +Grid : Message : Average mflops/s per call per node : 818786 +Grid : Message : Average mflops/s per call per node : 657532 +Grid : Message : Average mflops/s per call per node (full): 313528 +Grid : Message : Average mflops/s per call per node (full): 440095 +Grid : Message : Average mflops/s per call per node (full): 448704 +Grid : Message : Average mflops/s per call per node (full): 304130 +Grid : Message : Stencil 12.78 GB/s per node +Grid : Message : Stencil 16.6666 GB/s per node +Grid : Message : Stencil 18.0844 GB/s per node +Grid : Message : Stencil 13.0648 GB/s per node +Grid : Message : Average mflops/s per call per node : 667875 +Grid : Message : Average mflops/s per call per node : 803602 +Grid : Message : Average mflops/s per call per node : 823689 +Grid : Message : Average mflops/s per call per node : 667685 +Grid : Message : Average mflops/s per call per node (full): 315202 +Grid : Message : Average mflops/s per call per node (full): 435594 +Grid : Message : Average mflops/s per call per node (full): 445154 +Grid : Message : Average mflops/s per call per node (full): 305952 +Grid : Message : Stencil 13.243 GB/s per node +Grid : Message : Stencil 18.4039 GB/s per node +Grid : Message : Stencil 18.6476 GB/s per node +Grid : Message : Stencil 12.592 GB/s per node +Grid : Message : Average mflops/s per call per node : 668126 +Grid : Message : Average mflops/s per call per node : 809386 +Grid : Message : Average mflops/s per call per node : 821717 +Grid : Message : Average mflops/s per call per node : 667399 +Grid : Message : Average mflops/s per call per node (full): 315372 +Grid : Message : Average mflops/s per call per node (full): 443816 +Grid : Message : Average mflops/s per call per node (full): 444890 +Grid : Message : Average mflops/s per call per node (full): 302195 +Grid : Message : Stencil 12.46 GB/s per node +Grid : Message : Stencil 18.2155 GB/s per node +Grid : Message : Stencil 19.4584 GB/s per node +Grid : Message : Stencil 12.0904 GB/s per node +Grid : Message : Average mflops/s per call per node : 669848 +Grid : Message : Average mflops/s per call per node : 803086 +Grid : Message : Average mflops/s per call per node : 822755 +Grid : Message : Average mflops/s per call per node : 664231 +Grid : Message : Average mflops/s per call per node (full): 313075 +Grid : Message : Average mflops/s per call per node (full): 440188 +Grid : Message : Average mflops/s per call per node (full): 451379 +Grid : Message : Average mflops/s per call per node (full): 301640 +Grid : Message : Stencil 12.8262 GB/s per node +Grid : Message : Stencil 17.1159 GB/s per node +Grid : Message : Stencil 19.3861 GB/s per node +Grid : Message : Stencil 12.3982 GB/s per node +Grid : Message : Average mflops/s per call per node : 668377 +Grid : Message : Average mflops/s per call per node : 798894 +Grid : Message : Average mflops/s per call per node : 824796 +Grid : Message : Average mflops/s per call per node : 664084 +Grid : Message : Average mflops/s per call per node (full): 315225 +Grid : Message : Average mflops/s per call per node (full): 432619 +Grid : Message : Average mflops/s per call per node (full): 450634 +Grid : Message : Average mflops/s per call per node (full): 304496 +Grid : Message : Stencil 13.0438 GB/s per node +Grid : Message : Stencil 16.5429 GB/s per node +Grid : Message : Stencil 18.9669 GB/s per node +Grid : Message : Stencil 13.4612 GB/s per node +Grid : Message : Average mflops/s per call per node : 666991 +Grid : Message : Average mflops/s per call per node : 806302 +Grid : Message : Average mflops/s per call per node : 821472 +Grid : Message : Average mflops/s per call per node : 659024 +Grid : Message : Average mflops/s per call per node (full): 315137 +Grid : Message : Average mflops/s per call per node (full): 434857 +Grid : Message : Average mflops/s per call per node (full): 449237 +Grid : Message : Average mflops/s per call per node (full): 303623 +Grid : Message : Stencil 12.9249 GB/s per node +Grid : Message : Stencil 16.6397 GB/s per node +Grid : Message : Stencil 18.3127 GB/s per node +Grid : Message : Stencil 12.2587 GB/s per node +Grid : Message : Average mflops/s per call per node : 667634 +Grid : Message : Average mflops/s per call per node : 805112 +Grid : Message : Average mflops/s per call per node : 816695 +Grid : Message : Average mflops/s per call per node : 671686 +Grid : Message : Average mflops/s per call per node (full): 315028 +Grid : Message : Average mflops/s per call per node (full): 426131 +Grid : Message : Average mflops/s per call per node (full): 445520 +Grid : Message : Average mflops/s per call per node (full): 305326 +Grid : Message : Stencil 12.682 GB/s per node +Grid : Message : Stencil 16.5346 GB/s per node +Grid : Message : Stencil 17.7569 GB/s per node +Grid : Message : Stencil 12.9644 GB/s per node +Grid : Message : Average mflops/s per call per node : 671225 +Grid : Message : Average mflops/s per call per node : 807645 +Grid : Message : Average mflops/s per call per node : 820011 +Grid : Message : Average mflops/s per call per node : 665084 +Grid : Message : Average mflops/s per call per node (full): 315592 +Grid : Message : Average mflops/s per call per node (full): 434958 +Grid : Message : Average mflops/s per call per node (full): 445033 +Grid : Message : Average mflops/s per call per node (full): 306041 +Grid : Message : Stencil 12.6496 GB/s per node +Grid : Message : Stencil 18.4247 GB/s per node +Grid : Message : Stencil 17.1874 GB/s per node +Grid : Message : Stencil 12.4295 GB/s per node +Grid : Message : Average mflops/s per call per node : 672262 +Grid : Message : Average mflops/s per call per node : 804437 +Grid : Message : Average mflops/s per call per node : 821138 +Grid : Message : Average mflops/s per call per node : 668501 +Grid : Message : Average mflops/s per call per node (full): 314993 +Grid : Message : Average mflops/s per call per node (full): 443670 +Grid : Message : Average mflops/s per call per node (full): 443111 +Grid : Message : Average mflops/s per call per node (full): 305376 +Grid : Message : Stencil 12.462 GB/s per node +Grid : Message : Stencil 10.9507 GB/s per node +Grid : Message : Stencil 17.384 GB/s per node +Grid : Message : Stencil 15.5983 GB/s per node +Grid : Message : Average mflops/s per call per node : 669546 +Grid : Message : Average mflops/s per call per node : 814575 +Grid : Message : Average mflops/s per call per node : 817613 +Grid : Message : Average mflops/s per call per node : 658649 +Grid : Message : Average mflops/s per call per node (full): 312235 +Grid : Message : Average mflops/s per call per node (full): 338314 +Grid : Message : Average mflops/s per call per node (full): 443080 +Grid : Message : Average mflops/s per call per node (full): 307255 +Grid : Message : Stencil 12.7405 GB/s per node +Grid : Message : Stencil 8.93222 GB/s per node +Grid : Message : Stencil 19.2037 GB/s per node +Grid : Message : Stencil 12.2945 GB/s per node +Grid : Message : Average mflops/s per call per node : 667278 +Grid : Message : Average mflops/s per call per node : 807521 +Grid : Message : Average mflops/s per call per node : 819983 +Grid : Message : Average mflops/s per call per node : 665896 +Grid : Message : Average mflops/s per call per node (full): 313870 +Grid : Message : Average mflops/s per call per node (full): 290570 +Grid : Message : Average mflops/s per call per node (full): 448551 +Grid : Message : Average mflops/s per call per node (full): 303654 +Grid : Message : Stencil 13.1072 GB/s per node +Grid : Message : Stencil 17.1792 GB/s per node +Grid : Message : Stencil 17.8879 GB/s per node +Grid : Message : Stencil 12.8571 GB/s per node +Grid : Message : Average mflops/s per call per node : 669444 +Grid : Message : Average mflops/s per call per node : 809635 +Grid : Message : Average mflops/s per call per node : 815615 +Grid : Message : Average mflops/s per call per node : 662025 +Grid : Message : Average mflops/s per call per node (full): 316555 +Grid : Message : Average mflops/s per call per node (full): 439307 +Grid : Message : Average mflops/s per call per node (full): 444712 +Grid : Message : Average mflops/s per call per node (full): 304171 +Grid : Message : Stencil 13.9813 GB/s per node +Grid : Message : Stencil 17.1032 GB/s per node +Grid : Message : Stencil 18.3618 GB/s per node +Grid : Message : Stencil 12.8147 GB/s per node +Grid : Message : Average mflops/s per call per node : 663352 +Grid : Message : Average mflops/s per call per node : 802854 +Grid : Message : Average mflops/s per call per node : 819638 +Grid : Message : Average mflops/s per call per node : 666209 +Grid : Message : Average mflops/s per call per node (full): 315948 +Grid : Message : Average mflops/s per call per node (full): 435988 +Grid : Message : Average mflops/s per call per node (full): 445941 +Grid : Message : Average mflops/s per call per node (full): 305988 +Grid : Message : Stencil 12.4969 GB/s per node +Grid : Message : Stencil 16.6988 GB/s per node +Grid : Message : Stencil 17.843 GB/s per node +Grid : Message : Stencil 12.6231 GB/s per node +Grid : Message : Average mflops/s per call per node : 670303 +Grid : Message : Average mflops/s per call per node : 803093 +Grid : Message : Average mflops/s per call per node : 820375 +Grid : Message : Average mflops/s per call per node : 666858 +Grid : Message : Average mflops/s per call per node (full): 314096 +Grid : Message : Average mflops/s per call per node (full): 436713 +Grid : Message : Average mflops/s per call per node (full): 445877 +Grid : Message : Average mflops/s per call per node (full): 304598 +Grid : Message : Stencil 13.397 GB/s per node +Grid : Message : Stencil 7.74249 GB/s per node +Grid : Message : Stencil 17.764 GB/s per node +Grid : Message : Stencil 13.4336 GB/s per node +Grid : Message : Average mflops/s per call per node : 667487 +Grid : Message : Average mflops/s per call per node : 810620 +Grid : Message : Average mflops/s per call per node : 826152 +Grid : Message : Average mflops/s per call per node : 658813 +Grid : Message : Average mflops/s per call per node (full): 316835 +Grid : Message : Average mflops/s per call per node (full): 259695 +Grid : Message : Average mflops/s per call per node (full): 444902 +Grid : Message : Average mflops/s per call per node (full): 305398 +Grid : Message : Stencil 12.8336 GB/s per node +Grid : Message : Stencil 16.7161 GB/s per node +Grid : Message : Stencil 18.6455 GB/s per node +Grid : Message : Stencil 12.5245 GB/s per node +Grid : Message : Average mflops/s per call per node : 670738 +Grid : Message : Average mflops/s per call per node : 804355 +Grid : Message : Average mflops/s per call per node : 819528 +Grid : Message : Average mflops/s per call per node : 666568 +Grid : Message : Average mflops/s per call per node (full): 315705 +Grid : Message : Average mflops/s per call per node (full): 429704 +Grid : Message : Average mflops/s per call per node (full): 447242 +Grid : Message : Average mflops/s per call per node (full): 304879 +Grid : Message : Stencil 12.9128 GB/s per node +Grid : Message : Stencil 16.2871 GB/s per node +Grid : Message : Stencil 18.1038 GB/s per node +Grid : Message : Stencil 12.6756 GB/s per node +Grid : Message : Average mflops/s per call per node : 670906 +Grid : Message : Average mflops/s per call per node : 804414 +Grid : Message : Average mflops/s per call per node : 823795 +Grid : Message : Average mflops/s per call per node : 667857 +Grid : Message : Average mflops/s per call per node (full): 315623 +Grid : Message : Average mflops/s per call per node (full): 432420 +Grid : Message : Average mflops/s per call per node (full): 445608 +Grid : Message : Average mflops/s per call per node (full): 305259 +Grid : Message : Stencil 13.1449 GB/s per node +Grid : Message : Stencil 17.4201 GB/s per node +Grid : Message : Stencil 18.0655 GB/s per node +Grid : Message : Stencil 13.6378 GB/s per node +Grid : Message : Average mflops/s per call per node : 667533 +Grid : Message : Average mflops/s per call per node : 806456 +Grid : Message : Average mflops/s per call per node : 823065 +Grid : Message : Average mflops/s per call per node : 662906 +Grid : Message : Average mflops/s per call per node (full): 315087 +Grid : Message : Average mflops/s per call per node (full): 439271 +Grid : Message : Average mflops/s per call per node (full): 446828 +Grid : Message : Average mflops/s per call per node (full): 304904 +Grid : Message : Stencil 12.5603 GB/s per node +Grid : Message : Stencil 16.4946 GB/s per node +Grid : Message : Stencil 17.2015 GB/s per node +Grid : Message : Stencil 12.6719 GB/s per node +Grid : Message : Average mflops/s per call per node : 667440 +Grid : Message : Average mflops/s per call per node : 804005 +Grid : Message : Average mflops/s per call per node : 819082 +Grid : Message : Average mflops/s per call per node : 659045 +Grid : Message : Average mflops/s per call per node (full): 313637 +Grid : Message : Average mflops/s per call per node (full): 434386 +Grid : Message : Average mflops/s per call per node (full): 441996 +Grid : Message : Average mflops/s per call per node (full): 302145 +Grid : Message : Stencil 12.5421 GB/s per node +Grid : Message : Stencil 16.6179 GB/s per node +Grid : Message : Stencil 17.576 GB/s per node +Grid : Message : Stencil 12.7209 GB/s per node +Grid : Message : Average mflops/s per call per node : 673301 +Grid : Message : Average mflops/s per call per node : 802954 +Grid : Message : Average mflops/s per call per node : 822063 +Grid : Message : Average mflops/s per call per node : 665198 +Grid : Message : Average mflops/s per call per node (full): 314161 +Grid : Message : Average mflops/s per call per node (full): 433576 +Grid : Message : Average mflops/s per call per node (full): 445628 +Grid : Message : Average mflops/s per call per node (full): 305214 +Grid : Message : Stencil 13.2862 GB/s per node +Grid : Message : Stencil 10.5698 GB/s per node +Grid : Message : Stencil 17.4027 GB/s per node +Grid : Message : Stencil 12.7517 GB/s per node +Grid : Message : Average mflops/s per call per node : 669822 +Grid : Message : Average mflops/s per call per node : 804922 +Grid : Message : Average mflops/s per call per node : 822865 +Grid : Message : Average mflops/s per call per node : 663862 +Grid : Message : Average mflops/s per call per node (full): 316809 +Grid : Message : Average mflops/s per call per node (full): 329286 +Grid : Message : Average mflops/s per call per node (full): 442725 +Grid : Message : Average mflops/s per call per node (full): 303149 +Grid : Message : Stencil 12.852 GB/s per node +Grid : Message : Stencil 16.5856 GB/s per node +Grid : Message : Stencil 16.8703 GB/s per node +Grid : Message : Stencil 12.3381 GB/s per node +Grid : Message : Average mflops/s per call per node : 672030 +Grid : Message : Average mflops/s per call per node : 806056 +Grid : Message : Average mflops/s per call per node : 824011 +Grid : Message : Average mflops/s per call per node : 663725 +Grid : Message : Average mflops/s per call per node (full): 313016 +Grid : Message : Average mflops/s per call per node (full): 433934 +Grid : Message : Average mflops/s per call per node (full): 437887 +Grid : Message : Average mflops/s per call per node (full): 304202 +Grid : Message : Stencil 13.1136 GB/s per node +Grid : Message : Stencil 17.4085 GB/s per node +Grid : Message : Stencil 17.6594 GB/s per node +Grid : Message : Stencil 12.5101 GB/s per node +Grid : Message : Average mflops/s per call per node : 667205 +Grid : Message : Average mflops/s per call per node : 801624 +Grid : Message : Average mflops/s per call per node : 821358 +Grid : Message : Average mflops/s per call per node : 666993 +Grid : Message : Average mflops/s per call per node (full): 315372 +Grid : Message : Average mflops/s per call per node (full): 438220 +Grid : Message : Average mflops/s per call per node (full): 446266 +Grid : Message : Average mflops/s per call per node (full): 305044 +Grid : Message : Stencil 12.615 GB/s per node +Grid : Message : Stencil 16.9959 GB/s per node +Grid : Message : Stencil 17.4396 GB/s per node +Grid : Message : Stencil 12.6798 GB/s per node +Grid : Message : Average mflops/s per call per node : 668788 +Grid : Message : Average mflops/s per call per node : 800461 +Grid : Message : Average mflops/s per call per node : 826309 +Grid : Message : Average mflops/s per call per node : 664766 +Grid : Message : Average mflops/s per call per node (full): 314281 +Grid : Message : Average mflops/s per call per node (full): 435637 +Grid : Message : Average mflops/s per call per node (full): 445094 +Grid : Message : Average mflops/s per call per node (full): 303595 +Grid : Message : Stencil 12.6878 GB/s per node +Grid : Message : Stencil 16.6081 GB/s per node +Grid : Message : Stencil 16.9322 GB/s per node +Grid : Message : Stencil 13.615 GB/s per node +Grid : Message : Average mflops/s per call per node : 667055 +Grid : Message : Average mflops/s per call per node : 802579 +Grid : Message : Average mflops/s per call per node : 829844 +Grid : Message : Average mflops/s per call per node : 662433 +Grid : Message : Average mflops/s per call per node (full): 314107 +Grid : Message : Average mflops/s per call per node (full): 434352 +Grid : Message : Average mflops/s per call per node (full): 429138 +Grid : Message : Average mflops/s per call per node (full): 305501 +Grid : Message : Stencil 13.0521 GB/s per node +Grid : Message : Stencil 14.0309 GB/s per node +Grid : Message : Stencil 16.8677 GB/s per node +Grid : Message : Stencil 12.5594 GB/s per node +Grid : Message : Average mflops/s per call per node : 668824 +Grid : Message : Average mflops/s per call per node : 810876 +Grid : Message : Average mflops/s per call per node : 821318 +Grid : Message : Average mflops/s per call per node : 664964 +Grid : Message : Average mflops/s per call per node (full): 316377 +Grid : Message : Average mflops/s per call per node (full): 399792 +Grid : Message : Average mflops/s per call per node (full): 438584 +Grid : Message : Average mflops/s per call per node (full): 302806 +Grid : Message : Stencil 15.8763 GB/s per node +Grid : Message : Stencil 17.0735 GB/s per node +Grid : Message : Stencil 18.3697 GB/s per node +Grid : Message : Stencil 12.4011 GB/s per node +Grid : Message : Average mflops/s per call per node : 658812 +Grid : Message : Average mflops/s per call per node : 807395 +Grid : Message : Average mflops/s per call per node : 825888 +Grid : Message : Average mflops/s per call per node : 663160 +Grid : Message : Average mflops/s per call per node (full): 316458 +Grid : Message : Average mflops/s per call per node (full): 439902 +Grid : Message : Average mflops/s per call per node (full): 450768 +Grid : Message : Average mflops/s per call per node (full): 304482 +Grid : Message : Stencil 13.3067 GB/s per node +Grid : Message : Stencil 10.7314 GB/s per node +Grid : Message : Stencil 17.4575 GB/s per node +Grid : Message : Stencil 12.6334 GB/s per node +Grid : Message : Average mflops/s per call per node : 663778 +Grid : Message : Average mflops/s per call per node : 804718 +Grid : Message : Average mflops/s per call per node : 827613 +Grid : Message : Average mflops/s per call per node : 668973 +Grid : Message : Average mflops/s per call per node (full): 315624 +Grid : Message : Average mflops/s per call per node (full): 332576 +Grid : Message : Average mflops/s per call per node (full): 446449 +Grid : Message : Average mflops/s per call per node (full): 306130 +Grid : Message : Stencil 12.8944 GB/s per node +Grid : Message : Stencil 16.6851 GB/s per node +Grid : Message : Stencil 17.6033 GB/s per node +Grid : Message : Stencil 12.1455 GB/s per node +Grid : Message : Average mflops/s per call per node : 667064 +Grid : Message : Average mflops/s per call per node : 804489 +Grid : Message : Average mflops/s per call per node : 814207 +Grid : Message : Average mflops/s per call per node : 662144 +Grid : Message : Average mflops/s per call per node (full): 312704 +Grid : Message : Average mflops/s per call per node (full): 436769 +Grid : Message : Average mflops/s per call per node (full): 444143 +Grid : Message : Average mflops/s per call per node (full): 300723 +Grid : Message : Stencil 13.4574 GB/s per node +Grid : Message : Stencil 16.9887 GB/s per node +Grid : Message : Stencil 17.1818 GB/s per node +Grid : Message : Stencil 12.4573 GB/s per node +Grid : Message : Average mflops/s per call per node : 666017 +Grid : Message : Average mflops/s per call per node : 801217 +Grid : Message : Average mflops/s per call per node : 828869 +Grid : Message : Average mflops/s per call per node : 669250 +Grid : Message : Average mflops/s per call per node (full): 316097 +Grid : Message : Average mflops/s per call per node (full): 438066 +Grid : Message : Average mflops/s per call per node (full): 443785 +Grid : Message : Average mflops/s per call per node (full): 305111 +Grid : Message : Stencil 14.3726 GB/s per node +Grid : Message : Stencil 16.5001 GB/s per node +Grid : Message : Stencil 17.4919 GB/s per node +Grid : Message : Stencil 11.8296 GB/s per node +Grid : Message : Average mflops/s per call per node : 661241 +Grid : Message : Average mflops/s per call per node : 805278 +Grid : Message : Average mflops/s per call per node : 819257 +Grid : Message : Average mflops/s per call per node : 662070 +Grid : Message : Average mflops/s per call per node (full): 316571 +Grid : Message : Average mflops/s per call per node (full): 434901 +Grid : Message : Average mflops/s per call per node (full): 443915 +Grid : Message : Average mflops/s per call per node (full): 299770 +Grid : Message : Stencil 13.9432 GB/s per node +Grid : Message : Stencil 17.1709 GB/s per node +Grid : Message : Stencil 18.5397 GB/s per node +Grid : Message : Stencil 12.0828 GB/s per node +Grid : Message : Average mflops/s per call per node : 662025 +Grid : Message : Average mflops/s per call per node : 798735 +Grid : Message : Average mflops/s per call per node : 827157 +Grid : Message : Average mflops/s per call per node : 669224 +Grid : Message : Average mflops/s per call per node (full): 315170 +Grid : Message : Average mflops/s per call per node (full): 436317 +Grid : Message : Average mflops/s per call per node (full): 450815 +Grid : Message : Average mflops/s per call per node (full): 303414 +Grid : Message : Stencil 14.8256 GB/s per node +Grid : Message : Stencil 16.8343 GB/s per node +Grid : Message : Stencil 17.9506 GB/s per node +Grid : Message : Stencil 12.1935 GB/s per node +Grid : Message : Average mflops/s per call per node : 658702 +Grid : Message : Average mflops/s per call per node : 805008 +Grid : Message : Average mflops/s per call per node : 821807 +Grid : Message : Average mflops/s per call per node : 669907 +Grid : Message : Average mflops/s per call per node (full): 315640 +Grid : Message : Average mflops/s per call per node (full): 436405 +Grid : Message : Average mflops/s per call per node (full): 447011 +Grid : Message : Average mflops/s per call per node (full): 303721 +Grid : Message : Stencil 12.8896 GB/s per node +Grid : Message : Stencil 17.0187 GB/s per node +Grid : Message : Stencil 18.0889 GB/s per node +Grid : Message : Stencil 12.6104 GB/s per node +Grid : Message : Average mflops/s per call per node : 668724 +Grid : Message : Average mflops/s per call per node : 804606 +Grid : Message : Average mflops/s per call per node : 816875 +Grid : Message : Average mflops/s per call per node : 669186 +Grid : Message : Average mflops/s per call per node (full): 315034 +Grid : Message : Average mflops/s per call per node (full): 440288 +Grid : Message : Average mflops/s per call per node (full): 445646 +Grid : Message : Average mflops/s per call per node (full): 304050 +Grid : Message : Stencil 13.7508 GB/s per node +Grid : Message : Stencil 16.7089 GB/s per node +Grid : Message : Stencil 18.2857 GB/s per node +Grid : Message : Stencil 13.2618 GB/s per node +Grid : Message : Average mflops/s per call per node : 663381 +Grid : Message : Average mflops/s per call per node : 811779 +Grid : Message : Average mflops/s per call per node : 819120 +Grid : Message : Average mflops/s per call per node : 663294 +Grid : Message : Average mflops/s per call per node (full): 316059 +Grid : Message : Average mflops/s per call per node (full): 437388 +Grid : Message : Average mflops/s per call per node (full): 447847 +Grid : Message : Average mflops/s per call per node (full): 305901 +Grid : Message : Stencil 12.8254 GB/s per node +Grid : Message : Stencil 17.0104 GB/s per node +Grid : Message : Stencil 19.176 GB/s per node +Grid : Message : Stencil 12.5891 GB/s per node +Grid : Message : Average mflops/s per call per node : 663632 +Grid : Message : Average mflops/s per call per node : 805038 +Grid : Message : Average mflops/s per call per node : 820317 +Grid : Message : Average mflops/s per call per node : 661826 +Grid : Message : Average mflops/s per call per node (full): 313980 +Grid : Message : Average mflops/s per call per node (full): 438791 +Grid : Message : Average mflops/s per call per node (full): 448800 +Grid : Message : Average mflops/s per call per node (full): 304529 +Grid : Message : Stencil 15.0151 GB/s per node +Grid : Message : Stencil 16.4325 GB/s per node +Grid : Message : Stencil 17.3295 GB/s per node +Grid : Message : Stencil 11.941 GB/s per node +Grid : Message : Average mflops/s per call per node : 659911 +Grid : Message : Average mflops/s per call per node : 803247 +Grid : Message : Average mflops/s per call per node : 820411 +Grid : Message : Average mflops/s per call per node : 671308 +Grid : Message : Average mflops/s per call per node (full): 315850 +Grid : Message : Average mflops/s per call per node (full): 433571 +Grid : Message : Average mflops/s per call per node (full): 441916 +Grid : Message : Average mflops/s per call per node (full): 301998 +Grid : Message : Stencil 14.2255 GB/s per node +Grid : Message : Stencil 17.661 GB/s per node +Grid : Message : Stencil 18.0753 GB/s per node +Grid : Message : Stencil 13.4795 GB/s per node +Grid : Message : Average mflops/s per call per node : 662586 +Grid : Message : Average mflops/s per call per node : 805452 +Grid : Message : Average mflops/s per call per node : 827893 +Grid : Message : Average mflops/s per call per node : 664931 +Grid : Message : Average mflops/s per call per node (full): 315941 +Grid : Message : Average mflops/s per call per node (full): 440056 +Grid : Message : Average mflops/s per call per node (full): 438518 +Grid : Message : Average mflops/s per call per node (full): 305840 +Grid : Message : Stencil 12.5694 GB/s per node +Grid : Message : Stencil 16.384 GB/s per node +Grid : Message : Stencil 17.0648 GB/s per node +Grid : Message : Stencil 12.5373 GB/s per node +Grid : Message : Average mflops/s per call per node : 667746 +Grid : Message : Average mflops/s per call per node : 805444 +Grid : Message : Average mflops/s per call per node : 820356 +Grid : Message : Average mflops/s per call per node : 664620 +Grid : Message : Average mflops/s per call per node (full): 313742 +Grid : Message : Average mflops/s per call per node (full): 433099 +Grid : Message : Average mflops/s per call per node (full): 440895 +Grid : Message : Average mflops/s per call per node (full): 304415 +Grid : Message : Stencil 9.82643 GB/s per node +Grid : Message : Stencil 17.1429 GB/s per node +Grid : Message : Stencil 18.2343 GB/s per node +Grid : Message : Stencil 12.7365 GB/s per node +Grid : Message : Average mflops/s per call per node : 667814 +Grid : Message : Average mflops/s per call per node : 806928 +Grid : Message : Average mflops/s per call per node : 821145 +Grid : Message : Average mflops/s per call per node : 666313 +Grid : Message : Average mflops/s per call per node (full): 278891 +Grid : Message : Average mflops/s per call per node (full): 438675 +Grid : Message : Average mflops/s per call per node (full): 446588 +Grid : Message : Average mflops/s per call per node (full): 305229 +Grid : Message : Stencil 12.7522 GB/s per node +Grid : Message : Stencil 17.7047 GB/s per node +Grid : Message : Stencil 18.7484 GB/s per node +Grid : Message : Stencil 13.3864 GB/s per node +Grid : Message : Average mflops/s per call per node : 667383 +Grid : Message : Average mflops/s per call per node : 804993 +Grid : Message : Average mflops/s per call per node : 822793 +Grid : Message : Average mflops/s per call per node : 667884 +Grid : Message : Average mflops/s per call per node (full): 314158 +Grid : Message : Average mflops/s per call per node (full): 439202 +Grid : Message : Average mflops/s per call per node (full): 449309 +Grid : Message : Average mflops/s per call per node (full): 306541 +Grid : Message : Stencil 12.586 GB/s per node +Grid : Message : Stencil 16.2104 GB/s per node +Grid : Message : Stencil 17.4591 GB/s per node +Grid : Message : Stencil 12.9073 GB/s per node +Grid : Message : Average mflops/s per call per node : 669139 +Grid : Message : Average mflops/s per call per node : 801610 +Grid : Message : Average mflops/s per call per node : 824310 +Grid : Message : Average mflops/s per call per node : 666021 +Grid : Message : Average mflops/s per call per node (full): 314005 +Grid : Message : Average mflops/s per call per node (full): 422426 +Grid : Message : Average mflops/s per call per node (full): 443867 +Grid : Message : Average mflops/s per call per node (full): 304793 +Grid : Message : Stencil 12.5338 GB/s per node +Grid : Message : Stencil 17.3649 GB/s per node +Grid : Message : Stencil 17.3414 GB/s per node +Grid : Message : Stencil 12.9893 GB/s per node +Grid : Message : Average mflops/s per call per node : 668380 +Grid : Message : Average mflops/s per call per node : 801173 +Grid : Message : Average mflops/s per call per node : 821810 +Grid : Message : Average mflops/s per call per node : 668572 +Grid : Message : Average mflops/s per call per node (full): 313675 +Grid : Message : Average mflops/s per call per node (full): 437900 +Grid : Message : Average mflops/s per call per node (full): 442784 +Grid : Message : Average mflops/s per call per node (full): 305719 +Grid : Message : Stencil 12.9458 GB/s per node +Grid : Message : Stencil 16.6798 GB/s per node +Grid : Message : Stencil 17.6924 GB/s per node +Grid : Message : Stencil 13.7427 GB/s per node +Grid : Message : Average mflops/s per call per node : 669642 +Grid : Message : Average mflops/s per call per node : 804386 +Grid : Message : Average mflops/s per call per node : 825651 +Grid : Message : Average mflops/s per call per node : 665360 +Grid : Message : Average mflops/s per call per node (full): 315893 +Grid : Message : Average mflops/s per call per node (full): 437608 +Grid : Message : Average mflops/s per call per node (full): 446511 +Grid : Message : Average mflops/s per call per node (full): 307120 +Grid : Message : Stencil 12.6993 GB/s per node +Grid : Message : Stencil 17.4135 GB/s per node +Grid : Message : Stencil 17.7356 GB/s per node +Grid : Message : Stencil 13.9442 GB/s per node +Grid : Message : Average mflops/s per call per node : 669407 +Grid : Message : Average mflops/s per call per node : 802858 +Grid : Message : Average mflops/s per call per node : 818423 +Grid : Message : Average mflops/s per call per node : 661573 +Grid : Message : Average mflops/s per call per node (full): 314725 +Grid : Message : Average mflops/s per call per node (full): 438259 +Grid : Message : Average mflops/s per call per node (full): 445175 +Grid : Message : Average mflops/s per call per node (full): 306416 +Grid : Message : Stencil 13.4012 GB/s per node +Grid : Message : Stencil 16.8096 GB/s per node +Grid : Message : Stencil 18.5014 GB/s per node +Grid : Message : Stencil 12.5647 GB/s per node +Grid : Message : Average mflops/s per call per node : 666205 +Grid : Message : Average mflops/s per call per node : 804380 +Grid : Message : Average mflops/s per call per node : 817652 +Grid : Message : Average mflops/s per call per node : 667548 +Grid : Message : Average mflops/s per call per node (full): 316299 +Grid : Message : Average mflops/s per call per node (full): 436949 +Grid : Message : Average mflops/s per call per node (full): 446093 +Grid : Message : Average mflops/s per call per node (full): 304572 +Grid : Message : Stencil 12.857 GB/s per node +Grid : Message : Stencil 18.2254 GB/s per node +Grid : Message : Stencil 16.6485 GB/s per node +Grid : Message : Stencil 12.3756 GB/s per node +Grid : Message : Average mflops/s per call per node : 668871 +Grid : Message : Average mflops/s per call per node : 802096 +Grid : Message : Average mflops/s per call per node : 824877 +Grid : Message : Average mflops/s per call per node : 660461 +Grid : Message : Average mflops/s per call per node (full): 315638 +Grid : Message : Average mflops/s per call per node (full): 440771 +Grid : Message : Average mflops/s per call per node (full): 424600 +Grid : Message : Average mflops/s per call per node (full): 299492 +Grid : Message : Stencil 14.3042 GB/s per node +Grid : Message : Stencil 18.4101 GB/s per node +Grid : Message : Stencil 17.7716 GB/s per node +Grid : Message : Stencil 13.5163 GB/s per node +Grid : Message : Average mflops/s per call per node : 665246 +Grid : Message : Average mflops/s per call per node : 802891 +Grid : Message : Average mflops/s per call per node : 821382 +Grid : Message : Average mflops/s per call per node : 661226 +Grid : Message : Average mflops/s per call per node (full): 316755 +Grid : Message : Average mflops/s per call per node (full): 442426 +Grid : Message : Average mflops/s per call per node (full): 446803 +Grid : Message : Average mflops/s per call per node (full): 305937 +Grid : Message : Stencil 13.6618 GB/s per node +Grid : Message : Stencil 16.3922 GB/s per node +Grid : Message : Stencil 17.3079 GB/s per node +Grid : Message : Stencil 13.1425 GB/s per node +Grid : Message : Average mflops/s per call per node : 668153 +Grid : Message : Average mflops/s per call per node : 806500 +Grid : Message : Average mflops/s per call per node : 823490 +Grid : Message : Average mflops/s per call per node : 663790 +Grid : Message : Average mflops/s per call per node (full): 316957 +Grid : Message : Average mflops/s per call per node (full): 434594 +Grid : Message : Average mflops/s per call per node (full): 442187 +Grid : Message : Average mflops/s per call per node (full): 304909 +Grid : Message : Stencil 13.9324 GB/s per node +Grid : Message : Stencil 17.2523 GB/s per node +Grid : Message : Stencil 17.4606 GB/s per node +Grid : Message : Stencil 12.2759 GB/s per node +Grid : Message : Average mflops/s per call per node : 664155 +Grid : Message : Average mflops/s per call per node : 802125 +Grid : Message : Average mflops/s per call per node : 819229 +Grid : Message : Average mflops/s per call per node : 664597 +Grid : Message : Average mflops/s per call per node (full): 316763 +Grid : Message : Average mflops/s per call per node (full): 438223 +Grid : Message : Average mflops/s per call per node (full): 444372 +Grid : Message : Average mflops/s per call per node (full): 303570 +Grid : Message : Stencil 13.0636 GB/s per node +Grid : Message : Stencil 15.9842 GB/s per node +Grid : Message : Stencil 17.529 GB/s per node +Grid : Message : Stencil 14.4215 GB/s per node +Grid : Message : Average mflops/s per call per node : 664767 +Grid : Message : Average mflops/s per call per node : 807391 +Grid : Message : Average mflops/s per call per node : 821570 +Grid : Message : Average mflops/s per call per node : 661350 +Grid : Message : Average mflops/s per call per node (full): 315462 +Grid : Message : Average mflops/s per call per node (full): 424480 +Grid : Message : Average mflops/s per call per node (full): 444477 +Grid : Message : Average mflops/s per call per node (full): 307316 +Grid : Message : Stencil 12.6748 GB/s per node +Grid : Message : Stencil 16.9539 GB/s per node +Grid : Message : Stencil 17.3641 GB/s per node +Grid : Message : Stencil 13.0659 GB/s per node +Grid : Message : Average mflops/s per call per node : 667960 +Grid : Message : Average mflops/s per call per node : 807442 +Grid : Message : Average mflops/s per call per node : 818275 +Grid : Message : Average mflops/s per call per node : 661018 +Grid : Message : Average mflops/s per call per node (full): 314619 +Grid : Message : Average mflops/s per call per node (full): 435237 +Grid : Message : Average mflops/s per call per node (full): 440508 +Grid : Message : Average mflops/s per call per node (full): 305566 +Grid : Message : Stencil 14.8347 GB/s per node +Grid : Message : Stencil 17.385 GB/s per node +Grid : Message : Stencil 17.593 GB/s per node +Grid : Message : Stencil 13.8755 GB/s per node +Grid : Message : Average mflops/s per call per node : 662295 +Grid : Message : Average mflops/s per call per node : 805309 +Grid : Message : Average mflops/s per call per node : 814833 +Grid : Message : Average mflops/s per call per node : 662705 +Grid : Message : Average mflops/s per call per node (full): 315703 +Grid : Message : Average mflops/s per call per node (full): 438796 +Grid : Message : Average mflops/s per call per node (full): 444603 +Grid : Message : Average mflops/s per call per node (full): 304304 +Grid : Message : Stencil 12.7963 GB/s per node +Grid : Message : Stencil 16.472 GB/s per node +Grid : Message : Stencil 17.7234 GB/s per node +Grid : Message : Stencil 14.8846 GB/s per node +Grid : Message : Average mflops/s per call per node : 666891 +Grid : Message : Average mflops/s per call per node : 805228 +Grid : Message : Average mflops/s per call per node : 826469 +Grid : Message : Average mflops/s per call per node : 666819 +Grid : Message : Average mflops/s per call per node (full): 314699 +Grid : Message : Average mflops/s per call per node (full): 434810 +Grid : Message : Average mflops/s per call per node (full): 447368 +Grid : Message : Average mflops/s per call per node (full): 308042 +Grid : Message : Stencil 14.6722 GB/s per node +Grid : Message : Stencil 17.809 GB/s per node +Grid : Message : Stencil 17.9151 GB/s per node +Grid : Message : Stencil 12.8243 GB/s per node +Grid : Message : Average mflops/s per call per node : 661154 +Grid : Message : Average mflops/s per call per node : 798660 +Grid : Message : Average mflops/s per call per node : 822729 +Grid : Message : Average mflops/s per call per node : 672214 +Grid : Message : Average mflops/s per call per node (full): 316280 +Grid : Message : Average mflops/s per call per node (full): 438026 +Grid : Message : Average mflops/s per call per node (full): 445698 +Grid : Message : Average mflops/s per call per node (full): 307144 +Grid : Message : Stencil 14.2181 GB/s per node +Grid : Message : Stencil 16.8218 GB/s per node +Grid : Message : Stencil 16.8766 GB/s per node +Grid : Message : Stencil 12.6323 GB/s per node +Grid : Message : Average mflops/s per call per node : 663563 +Grid : Message : Average mflops/s per call per node : 806464 +Grid : Message : Average mflops/s per call per node : 824904 +Grid : Message : Average mflops/s per call per node : 664000 +Grid : Message : Average mflops/s per call per node (full): 316627 +Grid : Message : Average mflops/s per call per node (full): 438328 +Grid : Message : Average mflops/s per call per node (full): 439408 +Grid : Message : Average mflops/s per call per node (full): 303348 +Grid : Message : Stencil 13.1026 GB/s per node +Grid : Message : Stencil 17.6235 GB/s per node +Grid : Message : Stencil 18.184 GB/s per node +Grid : Message : Stencil 12.6188 GB/s per node +Grid : Message : Average mflops/s per call per node : 668665 +Grid : Message : Average mflops/s per call per node : 799103 +Grid : Message : Average mflops/s per call per node : 823048 +Grid : Message : Average mflops/s per call per node : 665913 +Grid : Message : Average mflops/s per call per node (full): 315766 +Grid : Message : Average mflops/s per call per node (full): 439133 +Grid : Message : Average mflops/s per call per node (full): 447554 +Grid : Message : Average mflops/s per call per node (full): 304968 +Grid : Message : Stencil 13.8485 GB/s per node +Grid : Message : Stencil 18.4952 GB/s per node +Grid : Message : Stencil 17.5541 GB/s per node +Grid : Message : Stencil 12.1178 GB/s per node +Grid : Message : Average mflops/s per call per node : 663295 +Grid : Message : Average mflops/s per call per node : 805011 +Grid : Message : Average mflops/s per call per node : 825778 +Grid : Message : Average mflops/s per call per node : 671942 +Grid : Message : Average mflops/s per call per node (full): 315425 +Grid : Message : Average mflops/s per call per node (full): 440979 +Grid : Message : Average mflops/s per call per node (full): 446281 +Grid : Message : Average mflops/s per call per node (full): 303669 +Grid : Message : Stencil 15.4956 GB/s per node +Grid : Message : Stencil 16.4981 GB/s per node +Grid : Message : Stencil 18.3299 GB/s per node +Grid : Message : Stencil 12.8365 GB/s per node +Grid : Message : Average mflops/s per call per node : 663849 +Grid : Message : Average mflops/s per call per node : 806486 +Grid : Message : Average mflops/s per call per node : 820322 +Grid : Message : Average mflops/s per call per node : 666420 +Grid : Message : Average mflops/s per call per node (full): 317964 +Grid : Message : Average mflops/s per call per node (full): 434864 +Grid : Message : Average mflops/s per call per node (full): 448068 +Grid : Message : Average mflops/s per call per node (full): 305884 +Grid : Message : Stencil 12.9709 GB/s per node +Grid : Message : Stencil 17.3497 GB/s per node +Grid : Message : Stencil 17.8545 GB/s per node +Grid : Message : Stencil 14.3185 GB/s per node +Grid : Message : Average mflops/s per call per node : 666199 +Grid : Message : Average mflops/s per call per node : 805234 +Grid : Message : Average mflops/s per call per node : 821724 +Grid : Message : Average mflops/s per call per node : 661092 +Grid : Message : Average mflops/s per call per node (full): 314666 +Grid : Message : Average mflops/s per call per node (full): 439669 +Grid : Message : Average mflops/s per call per node (full): 443151 +Grid : Message : Average mflops/s per call per node (full): 305498 +Grid : Message : Stencil 14.0359 GB/s per node +Grid : Message : Stencil 16.959 GB/s per node +Grid : Message : Stencil 17.5696 GB/s per node +Grid : Message : Stencil 14.5427 GB/s per node +Grid : Message : Average mflops/s per call per node : 663099 +Grid : Message : Average mflops/s per call per node : 801907 +Grid : Message : Average mflops/s per call per node : 825632 +Grid : Message : Average mflops/s per call per node : 656212 +Grid : Message : Average mflops/s per call per node (full): 316620 +Grid : Message : Average mflops/s per call per node (full): 437226 +Grid : Message : Average mflops/s per call per node (full): 446519 +Grid : Message : Average mflops/s per call per node (full): 307200 +Grid : Message : Stencil 13.1194 GB/s per node +Grid : Message : Stencil 16.5614 GB/s per node +Grid : Message : Stencil 17.534 GB/s per node +Grid : Message : Stencil 12.7429 GB/s per node +Grid : Message : Average mflops/s per call per node : 667941 +Grid : Message : Average mflops/s per call per node : 798824 +Grid : Message : Average mflops/s per call per node : 827515 +Grid : Message : Average mflops/s per call per node : 667869 +Grid : Message : Average mflops/s per call per node (full): 316068 +Grid : Message : Average mflops/s per call per node (full): 433793 +Grid : Message : Average mflops/s per call per node (full): 446594 +Grid : Message : Average mflops/s per call per node (full): 305905 +Grid : Message : Stencil 14.0792 GB/s per node +Grid : Message : Stencil 16.1996 GB/s per node +Grid : Message : Stencil 17.2694 GB/s per node +Grid : Message : Stencil 12.9557 GB/s per node +Grid : Message : Average mflops/s per call per node : 663768 +Grid : Message : Average mflops/s per call per node : 807580 +Grid : Message : Average mflops/s per call per node : 820271 +Grid : Message : Average mflops/s per call per node : 662811 +Grid : Message : Average mflops/s per call per node (full): 316384 +Grid : Message : Average mflops/s per call per node (full): 431281 +Grid : Message : Average mflops/s per call per node (full): 442801 +Grid : Message : Average mflops/s per call per node (full): 305739 +Grid : Message : Stencil 12.9774 GB/s per node +Grid : Message : Stencil 17.2278 GB/s per node +Grid : Message : Stencil 17.2784 GB/s per node +Grid : Message : Stencil 13.1069 GB/s per node +Grid : Message : Average mflops/s per call per node : 664316 +Grid : Message : Average mflops/s per call per node : 805309 +Grid : Message : Average mflops/s per call per node : 822463 +Grid : Message : Average mflops/s per call per node : 666367 +Grid : Message : Average mflops/s per call per node (full): 314621 +Grid : Message : Average mflops/s per call per node (full): 431987 +Grid : Message : Average mflops/s per call per node (full): 442618 +Grid : Message : Average mflops/s per call per node (full): 305410 +Grid : Message : Stencil 13.6128 GB/s per node +Grid : Message : Stencil 16.5456 GB/s per node +Grid : Message : Stencil 17.9411 GB/s per node +Grid : Message : Stencil 11.9459 GB/s per node +Grid : Message : Average mflops/s per call per node : 660818 +Grid : Message : Average mflops/s per call per node : 799034 +Grid : Message : Average mflops/s per call per node : 821010 +Grid : Message : Average mflops/s per call per node : 663987 +Grid : Message : Average mflops/s per call per node (full): 312564 +Grid : Message : Average mflops/s per call per node (full): 430588 +Grid : Message : Average mflops/s per call per node (full): 445427 +Grid : Message : Average mflops/s per call per node (full): 299941 +Grid : Message : Stencil 13.9729 GB/s per node +Grid : Message : Stencil 17.9032 GB/s per node +Grid : Message : Stencil 17.6193 GB/s per node +Grid : Message : Stencil 13.7253 GB/s per node +Grid : Message : Average mflops/s per call per node : 663104 +Grid : Message : Average mflops/s per call per node : 801495 +Grid : Message : Average mflops/s per call per node : 825126 +Grid : Message : Average mflops/s per call per node : 663916 +Grid : Message : Average mflops/s per call per node (full): 316378 +Grid : Message : Average mflops/s per call per node (full): 439695 +Grid : Message : Average mflops/s per call per node (full): 446413 +Grid : Message : Average mflops/s per call per node (full): 305940 +Grid : Message : Stencil 13.0165 GB/s per node +Grid : Message : Stencil 17.1341 GB/s per node +Grid : Message : Stencil 18.0119 GB/s per node +Grid : Message : Stencil 12.7273 GB/s per node +Grid : Message : Average mflops/s per call per node : 667567 +Grid : Message : Average mflops/s per call per node : 803152 +Grid : Message : Average mflops/s per call per node : 822604 +Grid : Message : Average mflops/s per call per node : 668782 +Grid : Message : Average mflops/s per call per node (full): 314125 +Grid : Message : Average mflops/s per call per node (full): 436297 +Grid : Message : Average mflops/s per call per node (full): 447752 +Grid : Message : Average mflops/s per call per node (full): 305420 +Grid : Message : Stencil 13.8381 GB/s per node +Grid : Message : Stencil 17.2971 GB/s per node +Grid : Message : Stencil 17.7722 GB/s per node +Grid : Message : Stencil 12.8672 GB/s per node +Grid : Message : Average mflops/s per call per node : 662547 +Grid : Message : Average mflops/s per call per node : 802852 +Grid : Message : Average mflops/s per call per node : 821718 +Grid : Message : Average mflops/s per call per node : 665407 +Grid : Message : Average mflops/s per call per node (full): 312763 +Grid : Message : Average mflops/s per call per node (full): 438642 +Grid : Message : Average mflops/s per call per node (full): 443881 +Grid : Message : Average mflops/s per call per node (full): 305626 +Grid : Message : Stencil 12.7343 GB/s per node +Grid : Message : Stencil 17.479 GB/s per node +Grid : Message : Stencil 17.3539 GB/s per node +Grid : Message : Stencil 12.7125 GB/s per node +Grid : Message : Average mflops/s per call per node : 669782 +Grid : Message : Average mflops/s per call per node : 801289 +Grid : Message : Average mflops/s per call per node : 823651 +Grid : Message : Average mflops/s per call per node : 669702 +Grid : Message : Average mflops/s per call per node (full): 314697 +Grid : Message : Average mflops/s per call per node (full): 439325 +Grid : Message : Average mflops/s per call per node (full): 442798 +Grid : Message : Average mflops/s per call per node (full): 305791 +Grid : Message : Stencil 12.907 GB/s per node +Grid : Message : Stencil 17.4736 GB/s per node +Grid : Message : Stencil 17.468 GB/s per node +Grid : Message : Stencil 12.2998 GB/s per node +Grid : Message : Average mflops/s per call per node : 667492 +Grid : Message : Average mflops/s per call per node : 806048 +Grid : Message : Average mflops/s per call per node : 820264 +Grid : Message : Average mflops/s per call per node : 667962 +Grid : Message : Average mflops/s per call per node (full): 314788 +Grid : Message : Average mflops/s per call per node (full): 440490 +Grid : Message : Average mflops/s per call per node (full): 442776 +Grid : Message : Average mflops/s per call per node (full): 303726 +Grid : Message : Stencil 12.0599 GB/s per node +Grid : Message : Stencil 16.8494 GB/s per node +Grid : Message : Stencil 17.1225 GB/s per node +Grid : Message : Stencil 14.4116 GB/s per node +Grid : Message : Average mflops/s per call per node : 667139 +Grid : Message : Average mflops/s per call per node : 801106 +Grid : Message : Average mflops/s per call per node : 820180 +Grid : Message : Average mflops/s per call per node : 661676 +Grid : Message : Average mflops/s per call per node (full): 309309 +Grid : Message : Average mflops/s per call per node (full): 437179 +Grid : Message : Average mflops/s per call per node (full): 441473 +Grid : Message : Average mflops/s per call per node (full): 307348 +Grid : Message : Stencil 13.1343 GB/s per node +Grid : Message : Stencil 16.8507 GB/s per node +Grid : Message : Stencil 18.1028 GB/s per node +Grid : Message : Stencil 13.8568 GB/s per node +Grid : Message : Average mflops/s per call per node : 664792 +Grid : Message : Average mflops/s per call per node : 802847 +Grid : Message : Average mflops/s per call per node : 821274 +Grid : Message : Average mflops/s per call per node : 661640 +Grid : Message : Average mflops/s per call per node (full): 315142 +Grid : Message : Average mflops/s per call per node (full): 434607 +Grid : Message : Average mflops/s per call per node (full): 445560 +Grid : Message : Average mflops/s per call per node (full): 305731 +Grid : Message : Stencil 14.1103 GB/s per node +Grid : Message : Stencil 18.1799 GB/s per node +Grid : Message : Stencil 16.1787 GB/s per node +Grid : Message : Stencil 13.0525 GB/s per node +Grid : Message : Average mflops/s per call per node : 664712 +Grid : Message : Average mflops/s per call per node : 801524 +Grid : Message : Average mflops/s per call per node : 826986 +Grid : Message : Average mflops/s per call per node : 663457 +Grid : Message : Average mflops/s per call per node (full): 316484 +Grid : Message : Average mflops/s per call per node (full): 440438 +Grid : Message : Average mflops/s per call per node (full): 419089 +Grid : Message : Average mflops/s per call per node (full): 304400 +Grid : Message : Stencil 14.4843 GB/s per node +Grid : Message : Stencil 16.718 GB/s per node +Grid : Message : Stencil 17.5117 GB/s per node +Grid : Message : Stencil 12.4229 GB/s per node +Grid : Message : Average mflops/s per call per node : 659603 +Grid : Message : Average mflops/s per call per node : 806019 +Grid : Message : Average mflops/s per call per node : 823493 +Grid : Message : Average mflops/s per call per node : 670796 +Grid : Message : Average mflops/s per call per node (full): 316105 +Grid : Message : Average mflops/s per call per node (full): 437210 +Grid : Message : Average mflops/s per call per node (full): 444135 +Grid : Message : Average mflops/s per call per node (full): 302971 +Grid : Message : Stencil 13.5064 GB/s per node +Grid : Message : Stencil 10.846 GB/s per node +Grid : Message : Stencil 18.2372 GB/s per node +Grid : Message : Stencil 12.0147 GB/s per node +Grid : Message : Average mflops/s per call per node : 664622 +Grid : Message : Average mflops/s per call per node : 811024 +Grid : Message : Average mflops/s per call per node : 826650 +Grid : Message : Average mflops/s per call per node : 664303 +Grid : Message : Average mflops/s per call per node (full): 314965 +Grid : Message : Average mflops/s per call per node (full): 336084 +Grid : Message : Average mflops/s per call per node (full): 449306 +Grid : Message : Average mflops/s per call per node (full): 301577 +Grid : Message : Stencil 12.3366 GB/s per node +Grid : Message : Stencil 17.1414 GB/s per node +Grid : Message : Stencil 18.1672 GB/s per node +Grid : Message : Stencil 12.544 GB/s per node +Grid : Message : Average mflops/s per call per node : 668773 +Grid : Message : Average mflops/s per call per node : 803488 +Grid : Message : Average mflops/s per call per node : 823559 +Grid : Message : Average mflops/s per call per node : 666131 +Grid : Message : Average mflops/s per call per node (full): 312170 +Grid : Message : Average mflops/s per call per node (full): 435861 +Grid : Message : Average mflops/s per call per node (full): 448144 +Grid : Message : Average mflops/s per call per node (full): 304729 +Grid : Message : Stencil 13.962 GB/s per node +Grid : Message : Stencil 16.6905 GB/s per node +Grid : Message : Stencil 17.603 GB/s per node +Grid : Message : Stencil 12.6235 GB/s per node +Grid : Message : Average mflops/s per call per node : 664443 +Grid : Message : Average mflops/s per call per node : 800430 +Grid : Message : Average mflops/s per call per node : 821436 +Grid : Message : Average mflops/s per call per node : 660055 +Grid : Message : Average mflops/s per call per node (full): 315811 +Grid : Message : Average mflops/s per call per node (full): 435238 +Grid : Message : Average mflops/s per call per node (full): 445246 +Grid : Message : Average mflops/s per call per node (full): 302370 +Grid : Message : Stencil 12.8179 GB/s per node +Grid : Message : Stencil 16.5557 GB/s per node +Grid : Message : Stencil 17.7032 GB/s per node +Grid : Message : Stencil 11.7236 GB/s per node +Grid : Message : Average mflops/s per call per node : 669912 +Grid : Message : Average mflops/s per call per node : 805298 +Grid : Message : Average mflops/s per call per node : 823263 +Grid : Message : Average mflops/s per call per node : 665005 +Grid : Message : Average mflops/s per call per node (full): 315095 +Grid : Message : Average mflops/s per call per node (full): 432900 +Grid : Message : Average mflops/s per call per node (full): 445229 +Grid : Message : Average mflops/s per call per node (full): 298923 +Grid : Message : Stencil 15.3521 GB/s per node +Grid : Message : Stencil 17.6488 GB/s per node +Grid : Message : Stencil 17.7158 GB/s per node +Grid : Message : Stencil 14.2351 GB/s per node +Grid : Message : Average mflops/s per call per node : 662351 +Grid : Message : Average mflops/s per call per node : 806679 +Grid : Message : Average mflops/s per call per node : 825100 +Grid : Message : Average mflops/s per call per node : 658530 +Grid : Message : Average mflops/s per call per node (full): 317173 +Grid : Message : Average mflops/s per call per node (full): 440280 +Grid : Message : Average mflops/s per call per node (full): 447566 +Grid : Message : Average mflops/s per call per node (full): 306379 +Grid : Message : Stencil 13.817 GB/s per node +Grid : Message : Stencil 17.4135 GB/s per node +Grid : Message : Stencil 17.6026 GB/s per node +Grid : Message : Stencil 14.0141 GB/s per node +Grid : Message : Average mflops/s per call per node : 662014 +Grid : Message : Average mflops/s per call per node : 801619 +Grid : Message : Average mflops/s per call per node : 827237 +Grid : Message : Average mflops/s per call per node : 656741 +Grid : Message : Average mflops/s per call per node (full): 315937 +Grid : Message : Average mflops/s per call per node (full): 438308 +Grid : Message : Average mflops/s per call per node (full): 445880 +Grid : Message : Average mflops/s per call per node (full): 306123 +Grid : Message : Stencil 12.7159 GB/s per node +Grid : Message : Stencil 17.4458 GB/s per node +Grid : Message : Stencil 17.2803 GB/s per node +Grid : Message : Stencil 12.1528 GB/s per node +Grid : Message : Average mflops/s per call per node : 668929 +Grid : Message : Average mflops/s per call per node : 805120 +Grid : Message : Average mflops/s per call per node : 827176 +Grid : Message : Average mflops/s per call per node : 667771 +Grid : Message : Average mflops/s per call per node (full): 315238 +Grid : Message : Average mflops/s per call per node (full): 438501 +Grid : Message : Average mflops/s per call per node (full): 443475 +Grid : Message : Average mflops/s per call per node (full): 302603 +Grid : Message : Stencil 13.6026 GB/s per node +Grid : Message : Stencil 17.4989 GB/s per node +Grid : Message : Stencil 17.1213 GB/s per node +Grid : Message : Stencil 11.9238 GB/s per node +Grid : Message : Average mflops/s per call per node : 662910 +Grid : Message : Average mflops/s per call per node : 798563 +Grid : Message : Average mflops/s per call per node : 825342 +Grid : Message : Average mflops/s per call per node : 665980 +Grid : Message : Average mflops/s per call per node (full): 315142 +Grid : Message : Average mflops/s per call per node (full): 437721 +Grid : Message : Average mflops/s per call per node (full): 439211 +Grid : Message : Average mflops/s per call per node (full): 300811 +Grid : Message : Stencil 13.5228 GB/s per node +Grid : Message : Stencil 16.4377 GB/s per node +Grid : Message : Stencil 17.4979 GB/s per node +Grid : Message : Stencil 12.0545 GB/s per node +Grid : Message : Average mflops/s per call per node : 664701 +Grid : Message : Average mflops/s per call per node : 806089 +Grid : Message : Average mflops/s per call per node : 822489 +Grid : Message : Average mflops/s per call per node : 664298 +Grid : Message : Average mflops/s per call per node (full): 312607 +Grid : Message : Average mflops/s per call per node (full): 433771 +Grid : Message : Average mflops/s per call per node (full): 444755 +Grid : Message : Average mflops/s per call per node (full): 299660 +Grid : Message : Stencil 12.5371 GB/s per node +Grid : Message : Stencil 17.1109 GB/s per node +Grid : Message : Stencil 17.6479 GB/s per node +Grid : Message : Stencil 12.1976 GB/s per node +Grid : Message : Average mflops/s per call per node : 669989 +Grid : Message : Average mflops/s per call per node : 807323 +Grid : Message : Average mflops/s per call per node : 820493 +Grid : Message : Average mflops/s per call per node : 658760 +Grid : Message : Average mflops/s per call per node (full): 311514 +Grid : Message : Average mflops/s per call per node (full): 440217 +Grid : Message : Average mflops/s per call per node (full): 444441 +Grid : Message : Average mflops/s per call per node (full): 302401 +Grid : Message : Stencil 12.8845 GB/s per node +Grid : Message : Stencil 17.2483 GB/s per node +Grid : Message : Stencil 18.8034 GB/s per node +Grid : Message : Stencil 12.3223 GB/s per node +Grid : Message : Average mflops/s per call per node : 669446 +Grid : Message : Average mflops/s per call per node : 800302 +Grid : Message : Average mflops/s per call per node : 819148 +Grid : Message : Average mflops/s per call per node : 661661 +Grid : Message : Average mflops/s per call per node (full): 314380 +Grid : Message : Average mflops/s per call per node (full): 437620 +Grid : Message : Average mflops/s per call per node (full): 447853 +Grid : Message : Average mflops/s per call per node (full): 303941 +Grid : Message : Stencil 12.9305 GB/s per node +Grid : Message : Stencil 17.4124 GB/s per node +Grid : Message : Stencil 17.5552 GB/s per node +Grid : Message : Stencil 14.0837 GB/s per node +Grid : Message : Average mflops/s per call per node : 666221 +Grid : Message : Average mflops/s per call per node : 799268 +Grid : Message : Average mflops/s per call per node : 822312 +Grid : Message : Average mflops/s per call per node : 658214 +Grid : Message : Average mflops/s per call per node (full): 315500 +Grid : Message : Average mflops/s per call per node (full): 437563 +Grid : Message : Average mflops/s per call per node (full): 434967 +Grid : Message : Average mflops/s per call per node (full): 305814 +Grid : Message : Stencil 13.6345 GB/s per node +Grid : Message : Stencil 18.8579 GB/s per node +Grid : Message : Stencil 17.4464 GB/s per node +Grid : Message : Stencil 12.8563 GB/s per node +Grid : Message : Average mflops/s per call per node : 665744 +Grid : Message : Average mflops/s per call per node : 800913 +Grid : Message : Average mflops/s per call per node : 819724 +Grid : Message : Average mflops/s per call per node : 664268 +Grid : Message : Average mflops/s per call per node (full): 315457 +Grid : Message : Average mflops/s per call per node (full): 442373 +Grid : Message : Average mflops/s per call per node (full): 443078 +Grid : Message : Average mflops/s per call per node (full): 304337 +Grid : Message : Stencil 13.2966 GB/s per node +Grid : Message : Stencil 16.5044 GB/s per node +Grid : Message : Stencil 17.98 GB/s per node +Grid : Message : Stencil 13.0868 GB/s per node +Grid : Message : Average mflops/s per call per node : 667082 +Grid : Message : Average mflops/s per call per node : 804780 +Grid : Message : Average mflops/s per call per node : 824113 +Grid : Message : Average mflops/s per call per node : 662976 +Grid : Message : Average mflops/s per call per node (full): 315618 +Grid : Message : Average mflops/s per call per node (full): 434837 +Grid : Message : Average mflops/s per call per node (full): 444083 +Grid : Message : Average mflops/s per call per node (full): 304187 +Grid : Message : Stencil 13.6028 GB/s per node +Grid : Message : Stencil 11.7606 GB/s per node +Grid : Message : Stencil 18.1364 GB/s per node +Grid : Message : Stencil 12.9121 GB/s per node +Grid : Message : Average mflops/s per call per node : 664563 +Grid : Message : Average mflops/s per call per node : 810802 +Grid : Message : Average mflops/s per call per node : 816970 +Grid : Message : Average mflops/s per call per node : 664115 +Grid : Message : Average mflops/s per call per node (full): 315142 +Grid : Message : Average mflops/s per call per node (full): 356461 +Grid : Message : Average mflops/s per call per node (full): 445728 +Grid : Message : Average mflops/s per call per node (full): 306152 +Grid : Message : Stencil 13.0699 GB/s per node +Grid : Message : Stencil 16.2134 GB/s per node +Grid : Message : Stencil 17.513 GB/s per node +Grid : Message : Stencil 13.8265 GB/s per node +Grid : Message : Average mflops/s per call per node : 667083 +Grid : Message : Average mflops/s per call per node : 805456 +Grid : Message : Average mflops/s per call per node : 830206 +Grid : Message : Average mflops/s per call per node : 661448 +Grid : Message : Average mflops/s per call per node (full): 313897 +Grid : Message : Average mflops/s per call per node (full): 425836 +Grid : Message : Average mflops/s per call per node (full): 444927 +Grid : Message : Average mflops/s per call per node (full): 306378 +Grid : Message : Stencil 13.472 GB/s per node +Grid : Message : Stencil 16.6024 GB/s per node +Grid : Message : Stencil 17.4968 GB/s per node +Grid : Message : Stencil 13.2805 GB/s per node +Grid : Message : Average mflops/s per call per node : 664531 +Grid : Message : Average mflops/s per call per node : 806542 +Grid : Message : Average mflops/s per call per node : 819825 +Grid : Message : Average mflops/s per call per node : 666230 +Grid : Message : Average mflops/s per call per node (full): 315455 +Grid : Message : Average mflops/s per call per node (full): 436165 +Grid : Message : Average mflops/s per call per node (full): 445133 +Grid : Message : Average mflops/s per call per node (full): 306197 +Grid : Message : Stencil 12.518 GB/s per node +Grid : Message : Stencil 17.3645 GB/s per node +Grid : Message : Stencil 17.1035 GB/s per node +Grid : Message : Stencil 12.106 GB/s per node +Grid : Message : Average mflops/s per call per node : 666808 +Grid : Message : Average mflops/s per call per node : 802389 +Grid : Message : Average mflops/s per call per node : 819214 +Grid : Message : Average mflops/s per call per node : 667164 +Grid : Message : Average mflops/s per call per node (full): 313082 +Grid : Message : Average mflops/s per call per node (full): 439283 +Grid : Message : Average mflops/s per call per node (full): 440588 +Grid : Message : Average mflops/s per call per node (full): 304126 +Grid : Message : Stencil 12.9822 GB/s per node +Grid : Message : Stencil 15.7679 GB/s per node +Grid : Message : Stencil 17.8964 GB/s per node +Grid : Message : Stencil 12.1026 GB/s per node +Grid : Message : Average mflops/s per call per node : 667679 +Grid : Message : Average mflops/s per call per node : 802767 +Grid : Message : Average mflops/s per call per node : 820210 +Grid : Message : Average mflops/s per call per node : 666777 +Grid : Message : Average mflops/s per call per node (full): 314809 +Grid : Message : Average mflops/s per call per node (full): 425015 +Grid : Message : Average mflops/s per call per node (full): 444200 +Grid : Message : Average mflops/s per call per node (full): 302785 +Grid : Message : Stencil 13.074 GB/s per node +Grid : Message : Stencil 16.3824 GB/s per node +Grid : Message : Stencil 17.4478 GB/s per node +Grid : Message : Stencil 12.6308 GB/s per node +Grid : Message : Average mflops/s per call per node : 664206 +Grid : Message : Average mflops/s per call per node : 804750 +Grid : Message : Average mflops/s per call per node : 825824 +Grid : Message : Average mflops/s per call per node : 668402 +Grid : Message : Average mflops/s per call per node (full): 313047 +Grid : Message : Average mflops/s per call per node (full): 433801 +Grid : Message : Average mflops/s per call per node (full): 446196 +Grid : Message : Average mflops/s per call per node (full): 305143 +Grid : Message : Stencil 14.5859 GB/s per node +Grid : Message : Stencil 8.49655 GB/s per node +Grid : Message : Stencil 17.462 GB/s per node +Grid : Message : Stencil 13.5331 GB/s per node +Grid : Message : Average mflops/s per call per node : 659024 +Grid : Message : Average mflops/s per call per node : 814575 +Grid : Message : Average mflops/s per call per node : 816364 +Grid : Message : Average mflops/s per call per node : 665033 +Grid : Message : Average mflops/s per call per node (full): 316545 +Grid : Message : Average mflops/s per call per node (full): 278554 +Grid : Message : Average mflops/s per call per node (full): 442461 +Grid : Message : Average mflops/s per call per node (full): 304570 +Grid : Message : Stencil 13.853 GB/s per node +Grid : Message : Stencil 17.2408 GB/s per node +Grid : Message : Stencil 17.7049 GB/s per node +Grid : Message : Stencil 13.8443 GB/s per node +Grid : Message : Average mflops/s per call per node : 661716 +Grid : Message : Average mflops/s per call per node : 803346 +Grid : Message : Average mflops/s per call per node : 816573 +Grid : Message : Average mflops/s per call per node : 662450 +Grid : Message : Average mflops/s per call per node (full): 316011 +Grid : Message : Average mflops/s per call per node (full): 436728 +Grid : Message : Average mflops/s per call per node (full): 441098 +Grid : Message : Average mflops/s per call per node (full): 304371 +Grid : Message : Stencil 13.3244 GB/s per node +Grid : Message : Stencil 16.8162 GB/s per node +Grid : Message : Stencil 17.7584 GB/s per node +Grid : Message : Stencil 11.9128 GB/s per node +Grid : Message : Average mflops/s per call per node : 667918 +Grid : Message : Average mflops/s per call per node : 804741 +Grid : Message : Average mflops/s per call per node : 820470 +Grid : Message : Average mflops/s per call per node : 666901 +Grid : Message : Average mflops/s per call per node (full): 314445 +Grid : Message : Average mflops/s per call per node (full): 436457 +Grid : Message : Average mflops/s per call per node (full): 443473 +Grid : Message : Average mflops/s per call per node (full): 300946 +Grid : Message : Stencil 13.4446 GB/s per node +Grid : Message : Stencil 16.8478 GB/s per node +Grid : Message : Stencil 17.7755 GB/s per node +Grid : Message : Stencil 13.8389 GB/s per node +Grid : Message : Average mflops/s per call per node : 667766 +Grid : Message : Average mflops/s per call per node : 802491 +Grid : Message : Average mflops/s per call per node : 821947 +Grid : Message : Average mflops/s per call per node : 665397 +Grid : Message : Average mflops/s per call per node (full): 316293 +Grid : Message : Average mflops/s per call per node (full): 437154 +Grid : Message : Average mflops/s per call per node (full): 446942 +Grid : Message : Average mflops/s per call per node (full): 307415 +Grid : Message : Stencil 14.0143 GB/s per node +Grid : Message : Stencil 16.6462 GB/s per node +Grid : Message : Stencil 18.0273 GB/s per node +Grid : Message : Stencil 12.2425 GB/s per node +Grid : Message : Average mflops/s per call per node : 666333 +Grid : Message : Average mflops/s per call per node : 805973 +Grid : Message : Average mflops/s per call per node : 818586 +Grid : Message : Average mflops/s per call per node : 667479 +Grid : Message : Average mflops/s per call per node (full): 316949 +Grid : Message : Average mflops/s per call per node (full): 434593 +Grid : Message : Average mflops/s per call per node (full): 439484 +Grid : Message : Average mflops/s per call per node (full): 303910 +Grid : Message : Stencil 13.7419 GB/s per node +Grid : Message : Stencil 16.4797 GB/s per node +Grid : Message : Stencil 16.9528 GB/s per node +Grid : Message : Stencil 12.4341 GB/s per node +Grid : Message : Average mflops/s per call per node : 669863 +Grid : Message : Average mflops/s per call per node : 804020 +Grid : Message : Average mflops/s per call per node : 818802 +Grid : Message : Average mflops/s per call per node : 663249 +Grid : Message : Average mflops/s per call per node (full): 316526 +Grid : Message : Average mflops/s per call per node (full): 431400 +Grid : Message : Average mflops/s per call per node (full): 439791 +Grid : Message : Average mflops/s per call per node (full): 303398 +Grid : Message : Stencil 12.9811 GB/s per node +Grid : Message : Stencil 16.9562 GB/s per node +Grid : Message : Stencil 18.895 GB/s per node +Grid : Message : Stencil 14.0543 GB/s per node +Grid : Message : Average mflops/s per call per node : 669785 +Grid : Message : Average mflops/s per call per node : 805240 +Grid : Message : Average mflops/s per call per node : 820750 +Grid : Message : Average mflops/s per call per node : 657780 +Grid : Message : Average mflops/s per call per node (full): 315425 +Grid : Message : Average mflops/s per call per node (full): 437260 +Grid : Message : Average mflops/s per call per node (full): 448428 +Grid : Message : Average mflops/s per call per node (full): 305976 +Grid : Message : Stencil 12.9075 GB/s per node +Grid : Message : Stencil 16.5077 GB/s per node +Grid : Message : Stencil 18.1803 GB/s per node +Grid : Message : Stencil 13.2324 GB/s per node +Grid : Message : Average mflops/s per call per node : 669413 +Grid : Message : Average mflops/s per call per node : 806530 +Grid : Message : Average mflops/s per call per node : 820634 +Grid : Message : Average mflops/s per call per node : 663184 +Grid : Message : Average mflops/s per call per node (full): 314776 +Grid : Message : Average mflops/s per call per node (full): 434158 +Grid : Message : Average mflops/s per call per node (full): 448497 +Grid : Message : Average mflops/s per call per node (full): 304859 +Grid : Message : Stencil 13.287 GB/s per node +Grid : Message : Stencil 16.4753 GB/s per node +Grid : Message : Stencil 17.1715 GB/s per node +Grid : Message : Stencil 15.9104 GB/s per node +Grid : Message : Average mflops/s per call per node : 668487 +Grid : Message : Average mflops/s per call per node : 806878 +Grid : Message : Average mflops/s per call per node : 820880 +Grid : Message : Average mflops/s per call per node : 661266 +Grid : Message : Average mflops/s per call per node (full): 315909 +Grid : Message : Average mflops/s per call per node (full): 431333 +Grid : Message : Average mflops/s per call per node (full): 442874 +Grid : Message : Average mflops/s per call per node (full): 308064 +Grid : Message : Stencil 13.4721 GB/s per node +Grid : Message : Stencil 17.1158 GB/s per node +Grid : Message : Stencil 17.2517 GB/s per node +Grid : Message : Stencil 11.9345 GB/s per node +Grid : Message : Average mflops/s per call per node : 666143 +Grid : Message : Average mflops/s per call per node : 801154 +Grid : Message : Average mflops/s per call per node : 821732 +Grid : Message : Average mflops/s per call per node : 665116 +Grid : Message : Average mflops/s per call per node (full): 316290 +Grid : Message : Average mflops/s per call per node (full): 438552 +Grid : Message : Average mflops/s per call per node (full): 443037 +Grid : Message : Average mflops/s per call per node (full): 301309 +Grid : Message : Stencil 13.3112 GB/s per node +Grid : Message : Stencil 16.9799 GB/s per node +Grid : Message : Stencil 17.3504 GB/s per node +Grid : Message : Stencil 12.3472 GB/s per node +Grid : Message : Average mflops/s per call per node : 666107 +Grid : Message : Average mflops/s per call per node : 803853 +Grid : Message : Average mflops/s per call per node : 826777 +Grid : Message : Average mflops/s per call per node : 663021 +Grid : Message : Average mflops/s per call per node (full): 315759 +Grid : Message : Average mflops/s per call per node (full): 437814 +Grid : Message : Average mflops/s per call per node (full): 444277 +Grid : Message : Average mflops/s per call per node (full): 302103 +Grid : Message : Stencil 14.5476 GB/s per node +Grid : Message : Stencil 16.744 GB/s per node +Grid : Message : Stencil 18.094 GB/s per node +Grid : Message : Stencil 12.4832 GB/s per node +Grid : Message : Average mflops/s per call per node : 661807 +Grid : Message : Average mflops/s per call per node : 805374 +Grid : Message : Average mflops/s per call per node : 815879 +Grid : Message : Average mflops/s per call per node : 669430 +Grid : Message : Average mflops/s per call per node (full): 317179 +Grid : Message : Average mflops/s per call per node (full): 436523 +Grid : Message : Average mflops/s per call per node (full): 445499 +Grid : Message : Average mflops/s per call per node (full): 304739 +Grid : Message : Stencil 13.5476 GB/s per node +Grid : Message : Stencil 17.0627 GB/s per node +Grid : Message : Stencil 17.9587 GB/s per node +Grid : Message : Stencil 12.7293 GB/s per node +Grid : Message : Average mflops/s per call per node : 664909 +Grid : Message : Average mflops/s per call per node : 802387 +Grid : Message : Average mflops/s per call per node : 819178 +Grid : Message : Average mflops/s per call per node : 662260 +Grid : Message : Average mflops/s per call per node (full): 315249 +Grid : Message : Average mflops/s per call per node (full): 435294 +Grid : Message : Average mflops/s per call per node (full): 446118 +Grid : Message : Average mflops/s per call per node (full): 304420 +Grid : Message : Stencil 14.9442 GB/s per node +Grid : Message : Stencil 17.2568 GB/s per node +Grid : Message : Stencil 17.3896 GB/s per node +Grid : Message : Stencil 12.517 GB/s per node +Grid : Message : Average mflops/s per call per node : 663158 +Grid : Message : Average mflops/s per call per node : 803797 +Grid : Message : Average mflops/s per call per node : 820222 +Grid : Message : Average mflops/s per call per node : 664988 +Grid : Message : Average mflops/s per call per node (full): 317171 +Grid : Message : Average mflops/s per call per node (full): 437926 +Grid : Message : Average mflops/s per call per node (full): 443898 +Grid : Message : Average mflops/s per call per node (full): 303122 +Grid : Message : Stencil 13.7926 GB/s per node +Grid : Message : Stencil 10.182 GB/s per node +Grid : Message : Stencil 17.1182 GB/s per node +Grid : Message : Stencil 12.6113 GB/s per node +Grid : Message : Average mflops/s per call per node : 666118 +Grid : Message : Average mflops/s per call per node : 811413 +Grid : Message : Average mflops/s per call per node : 827386 +Grid : Message : Average mflops/s per call per node : 662404 +Grid : Message : Average mflops/s per call per node (full): 316161 +Grid : Message : Average mflops/s per call per node (full): 320980 +Grid : Message : Average mflops/s per call per node (full): 441691 +Grid : Message : Average mflops/s per call per node (full): 304311 +Grid : Message : Stencil 15.0103 GB/s per node +Grid : Message : Stencil 17.974 GB/s per node +Grid : Message : Stencil 16.8732 GB/s per node +Grid : Message : Stencil 11.9428 GB/s per node +Grid : Message : Average mflops/s per call per node : 659495 +Grid : Message : Average mflops/s per call per node : 798743 +Grid : Message : Average mflops/s per call per node : 828324 +Grid : Message : Average mflops/s per call per node : 669846 +Grid : Message : Average mflops/s per call per node (full): 315714 +Grid : Message : Average mflops/s per call per node (full): 439913 +Grid : Message : Average mflops/s per call per node (full): 439295 +Grid : Message : Average mflops/s per call per node (full): 301726 +Grid : Message : Stencil 13.4555 GB/s per node +Grid : Message : Stencil 16.2566 GB/s per node +Grid : Message : Stencil 17.5285 GB/s per node +Grid : Message : Stencil 13.3142 GB/s per node +Grid : Message : Average mflops/s per call per node : 664309 +Grid : Message : Average mflops/s per call per node : 801698 +Grid : Message : Average mflops/s per call per node : 823972 +Grid : Message : Average mflops/s per call per node : 663338 +Grid : Message : Average mflops/s per call per node (full): 314162 +Grid : Message : Average mflops/s per call per node (full): 431179 +Grid : Message : Average mflops/s per call per node (full): 445222 +Grid : Message : Average mflops/s per call per node (full): 305774 +Grid : Message : Stencil 12.698 GB/s per node +Grid : Message : Stencil 16.5092 GB/s per node +Grid : Message : Stencil 17.4474 GB/s per node +Grid : Message : Stencil 12.3678 GB/s per node +Grid : Message : Average mflops/s per call per node : 665460 +Grid : Message : Average mflops/s per call per node : 803692 +Grid : Message : Average mflops/s per call per node : 814742 +Grid : Message : Average mflops/s per call per node : 665287 +Grid : Message : Average mflops/s per call per node (full): 314678 +Grid : Message : Average mflops/s per call per node (full): 433743 +Grid : Message : Average mflops/s per call per node (full): 432566 +Grid : Message : Average mflops/s per call per node (full): 304588 +Grid : Message : Stencil 13.7497 GB/s per node +Grid : Message : Stencil 16.4933 GB/s per node +Grid : Message : Stencil 17.969 GB/s per node +Grid : Message : Stencil 15.9667 GB/s per node +Grid : Message : Average mflops/s per call per node : 663973 +Grid : Message : Average mflops/s per call per node : 809146 +Grid : Message : Average mflops/s per call per node : 824922 +Grid : Message : Average mflops/s per call per node : 656900 +Grid : Message : Average mflops/s per call per node (full): 314409 +Grid : Message : Average mflops/s per call per node (full): 434490 +Grid : Message : Average mflops/s per call per node (full): 446536 +Grid : Message : Average mflops/s per call per node (full): 307389 +Grid : Message : Stencil 12.961 GB/s per node +Grid : Message : Stencil 12.5404 GB/s per node +Grid : Message : Stencil 16.8662 GB/s per node +Grid : Message : Stencil 12.6023 GB/s per node +Grid : Message : Average mflops/s per call per node : 666260 +Grid : Message : Average mflops/s per call per node : 809160 +Grid : Message : Average mflops/s per call per node : 821739 +Grid : Message : Average mflops/s per call per node : 666750 +Grid : Message : Average mflops/s per call per node (full): 313902 +Grid : Message : Average mflops/s per call per node (full): 371663 +Grid : Message : Average mflops/s per call per node (full): 437243 +Grid : Message : Average mflops/s per call per node (full): 300832 +Grid : Message : Stencil 13.1174 GB/s per node +Grid : Message : Stencil 16.4419 GB/s per node +Grid : Message : Stencil 17.6359 GB/s per node +Grid : Message : Stencil 12.9856 GB/s per node +Grid : Message : Average mflops/s per call per node : 666836 +Grid : Message : Average mflops/s per call per node : 803190 +Grid : Message : Average mflops/s per call per node : 831600 +Grid : Message : Average mflops/s per call per node : 663531 +Grid : Message : Average mflops/s per call per node (full): 315346 +Grid : Message : Average mflops/s per call per node (full): 434213 +Grid : Message : Average mflops/s per call per node (full): 446042 +Grid : Message : Average mflops/s per call per node (full): 305281 +Grid : Message : Stencil 13.8596 GB/s per node +Grid : Message : Stencil 16.4839 GB/s per node +Grid : Message : Stencil 16.9857 GB/s per node +Grid : Message : Stencil 11.9013 GB/s per node +Grid : Message : Average mflops/s per call per node : 665002 +Grid : Message : Average mflops/s per call per node : 803603 +Grid : Message : Average mflops/s per call per node : 826298 +Grid : Message : Average mflops/s per call per node : 666970 +Grid : Message : Average mflops/s per call per node (full): 312641 +Grid : Message : Average mflops/s per call per node (full): 435404 +Grid : Message : Average mflops/s per call per node (full): 439788 +Grid : Message : Average mflops/s per call per node (full): 300583 +Grid : Message : Stencil 13.126 GB/s per node +Grid : Message : Stencil 7.92625 GB/s per node +Grid : Message : Stencil 17.2697 GB/s per node +Grid : Message : Stencil 12.5352 GB/s per node +Grid : Message : Average mflops/s per call per node : 660996 +Grid : Message : Average mflops/s per call per node : 812284 +Grid : Message : Average mflops/s per call per node : 825278 +Grid : Message : Average mflops/s per call per node : 668621 +Grid : Message : Average mflops/s per call per node (full): 313449 +Grid : Message : Average mflops/s per call per node (full): 264451 +Grid : Message : Average mflops/s per call per node (full): 442895 +Grid : Message : Average mflops/s per call per node (full): 305272 +Grid : Message : Stencil 14.0345 GB/s per node +Grid : Message : Stencil 17.1177 GB/s per node +Grid : Message : Stencil 17.9579 GB/s per node +Grid : Message : Stencil 13.7344 GB/s per node +Grid : Message : Average mflops/s per call per node : 664502 +Grid : Message : Average mflops/s per call per node : 805953 +Grid : Message : Average mflops/s per call per node : 825593 +Grid : Message : Average mflops/s per call per node : 660114 +Grid : Message : Average mflops/s per call per node (full): 315873 +Grid : Message : Average mflops/s per call per node (full): 437394 +Grid : Message : Average mflops/s per call per node (full): 447291 +Grid : Message : Average mflops/s per call per node (full): 304741 +Grid : Message : Stencil 13.4645 GB/s per node +Grid : Message : Stencil 17.4987 GB/s per node +Grid : Message : Stencil 17.4917 GB/s per node +Grid : Message : Stencil 13.0298 GB/s per node +Grid : Message : Average mflops/s per call per node : 666735 +Grid : Message : Average mflops/s per call per node : 803387 +Grid : Message : Average mflops/s per call per node : 815749 +Grid : Message : Average mflops/s per call per node : 664987 +Grid : Message : Average mflops/s per call per node (full): 316438 +Grid : Message : Average mflops/s per call per node (full): 439568 +Grid : Message : Average mflops/s per call per node (full): 443471 +Grid : Message : Average mflops/s per call per node (full): 305477 +Grid : Message : Stencil 13.3655 GB/s per node +Grid : Message : Stencil 17.6846 GB/s per node +Grid : Message : Stencil 18.5753 GB/s per node +Grid : Message : Stencil 13.1193 GB/s per node +Grid : Message : Average mflops/s per call per node : 664948 +Grid : Message : Average mflops/s per call per node : 806213 +Grid : Message : Average mflops/s per call per node : 823464 +Grid : Message : Average mflops/s per call per node : 662498 +Grid : Message : Average mflops/s per call per node (full): 315014 +Grid : Message : Average mflops/s per call per node (full): 441551 +Grid : Message : Average mflops/s per call per node (full): 447925 +Grid : Message : Average mflops/s per call per node (full): 305503 +Grid : Message : Stencil 14.1704 GB/s per node +Grid : Message : Stencil 16.4379 GB/s per node +Grid : Message : Stencil 17.1236 GB/s per node +Grid : Message : Stencil 12.5188 GB/s per node +Grid : Message : Average mflops/s per call per node : 665015 +Grid : Message : Average mflops/s per call per node : 804900 +Grid : Message : Average mflops/s per call per node : 821353 +Grid : Message : Average mflops/s per call per node : 666641 +Grid : Message : Average mflops/s per call per node (full): 316231 +Grid : Message : Average mflops/s per call per node (full): 434665 +Grid : Message : Average mflops/s per call per node (full): 435790 +Grid : Message : Average mflops/s per call per node (full): 297563 +Grid : Message : Stencil 13.258 GB/s per node +Grid : Message : Stencil 16.6796 GB/s per node +Grid : Message : Stencil 17.6714 GB/s per node +Grid : Message : Stencil 13.6617 GB/s per node +Grid : Message : Average mflops/s per call per node : 668216 +Grid : Message : Average mflops/s per call per node : 804154 +Grid : Message : Average mflops/s per call per node : 819799 +Grid : Message : Average mflops/s per call per node : 662504 +Grid : Message : Average mflops/s per call per node (full): 315646 +Grid : Message : Average mflops/s per call per node (full): 436768 +Grid : Message : Average mflops/s per call per node (full): 445854 +Grid : Message : Average mflops/s per call per node (full): 306523 +Grid : Message : Stencil 12.5776 GB/s per node +Grid : Message : Stencil 19.1094 GB/s per node +Grid : Message : Stencil 17.8382 GB/s per node +Grid : Message : Stencil 12.3613 GB/s per node +Grid : Message : Average mflops/s per call per node : 668374 +Grid : Message : Average mflops/s per call per node : 803874 +Grid : Message : Average mflops/s per call per node : 822898 +Grid : Message : Average mflops/s per call per node : 663293 +Grid : Message : Average mflops/s per call per node (full): 314337 +Grid : Message : Average mflops/s per call per node (full): 442282 +Grid : Message : Average mflops/s per call per node (full): 445664 +Grid : Message : Average mflops/s per call per node (full): 302925 +Grid : Message : Stencil 13.4442 GB/s per node +Grid : Message : Stencil 16.8888 GB/s per node +Grid : Message : Stencil 17.1822 GB/s per node +Grid : Message : Stencil 13.2556 GB/s per node +Grid : Message : Average mflops/s per call per node : 664317 +Grid : Message : Average mflops/s per call per node : 801116 +Grid : Message : Average mflops/s per call per node : 818191 +Grid : Message : Average mflops/s per call per node : 661072 +Grid : Message : Average mflops/s per call per node (full): 314244 +Grid : Message : Average mflops/s per call per node (full): 437558 +Grid : Message : Average mflops/s per call per node (full): 443380 +Grid : Message : Average mflops/s per call per node (full): 305380 +Grid : Message : Stencil 13.4673 GB/s per node +Grid : Message : Stencil 17.4665 GB/s per node +Grid : Message : Stencil 18.2401 GB/s per node +Grid : Message : Stencil 13.2764 GB/s per node +Grid : Message : Average mflops/s per call per node : 664984 +Grid : Message : Average mflops/s per call per node : 805208 +Grid : Message : Average mflops/s per call per node : 827513 +Grid : Message : Average mflops/s per call per node : 664600 +Grid : Message : Average mflops/s per call per node (full): 314235 +Grid : Message : Average mflops/s per call per node (full): 438965 +Grid : Message : Average mflops/s per call per node (full): 448809 +Grid : Message : Average mflops/s per call per node (full): 306326 +Grid : Message : Stencil 14.6654 GB/s per node +Grid : Message : Stencil 7.58732 GB/s per node +Grid : Message : Stencil 18.1682 GB/s per node +Grid : Message : Stencil 11.7598 GB/s per node +Grid : Message : Average mflops/s per call per node : 661720 +Grid : Message : Average mflops/s per call per node : 806420 +Grid : Message : Average mflops/s per call per node : 818420 +Grid : Message : Average mflops/s per call per node : 668057 +Grid : Message : Average mflops/s per call per node (full): 316368 +Grid : Message : Average mflops/s per call per node (full): 255634 +Grid : Message : Average mflops/s per call per node (full): 446359 +Grid : Message : Average mflops/s per call per node (full): 298658 +Grid : Message : Stencil 14.6144 GB/s per node +Grid : Message : Stencil 16.5195 GB/s per node +Grid : Message : Stencil 17.374 GB/s per node +Grid : Message : Stencil 13.3331 GB/s per node +Grid : Message : Average mflops/s per call per node : 663454 +Grid : Message : Average mflops/s per call per node : 803748 +Grid : Message : Average mflops/s per call per node : 827999 +Grid : Message : Average mflops/s per call per node : 663467 +Grid : Message : Average mflops/s per call per node (full): 316129 +Grid : Message : Average mflops/s per call per node (full): 434864 +Grid : Message : Average mflops/s per call per node (full): 443165 +Grid : Message : Average mflops/s per call per node (full): 304171 +Grid : Message : Stencil 14.3637 GB/s per node +Grid : Message : Stencil 17.712 GB/s per node +Grid : Message : Stencil 17.178 GB/s per node +Grid : Message : Stencil 12.8432 GB/s per node +Grid : Message : Average mflops/s per call per node : 665494 +Grid : Message : Average mflops/s per call per node : 802527 +Grid : Message : Average mflops/s per call per node : 821649 +Grid : Message : Average mflops/s per call per node : 660891 +Grid : Message : Average mflops/s per call per node (full): 316872 +Grid : Message : Average mflops/s per call per node (full): 436560 +Grid : Message : Average mflops/s per call per node (full): 441310 +Grid : Message : Average mflops/s per call per node (full): 305030 +Grid : Message : Stencil 13.6686 GB/s per node +Grid : Message : Stencil 17.4995 GB/s per node +Grid : Message : Stencil 17.6195 GB/s per node +Grid : Message : Stencil 12.6683 GB/s per node +Grid : Message : Average mflops/s per call per node : 663927 +Grid : Message : Average mflops/s per call per node : 804827 +Grid : Message : Average mflops/s per call per node : 828559 +Grid : Message : Average mflops/s per call per node : 667145 +Grid : Message : Average mflops/s per call per node (full): 313344 +Grid : Message : Average mflops/s per call per node (full): 435859 +Grid : Message : Average mflops/s per call per node (full): 445948 +Grid : Message : Average mflops/s per call per node (full): 303649 +Grid : Message : Stencil 13.2974 GB/s per node +Grid : Message : Stencil 16.9957 GB/s per node +Grid : Message : Stencil 17.9603 GB/s per node +Grid : Message : Stencil 13.7693 GB/s per node +Grid : Message : Average mflops/s per call per node : 660695 +Grid : Message : Average mflops/s per call per node : 799165 +Grid : Message : Average mflops/s per call per node : 820450 +Grid : Message : Average mflops/s per call per node : 661012 +Grid : Message : Average mflops/s per call per node (full): 312753 +Grid : Message : Average mflops/s per call per node (full): 436415 +Grid : Message : Average mflops/s per call per node (full): 447591 +Grid : Message : Average mflops/s per call per node (full): 306742 +Grid : Message : Stencil 14.316 GB/s per node +Grid : Message : Stencil 17.3739 GB/s per node +Grid : Message : Stencil 17.5313 GB/s per node +Grid : Message : Stencil 12.8304 GB/s per node +Grid : Message : Average mflops/s per call per node : 658790 +Grid : Message : Average mflops/s per call per node : 799323 +Grid : Message : Average mflops/s per call per node : 824848 +Grid : Message : Average mflops/s per call per node : 665160 +Grid : Message : Average mflops/s per call per node (full): 316336 +Grid : Message : Average mflops/s per call per node (full): 435574 +Grid : Message : Average mflops/s per call per node (full): 440652 +Grid : Message : Average mflops/s per call per node (full): 304537 +Grid : Message : Stencil 14.3048 GB/s per node +Grid : Message : Stencil 17.5397 GB/s per node +Grid : Message : Stencil 17.0633 GB/s per node +Grid : Message : Stencil 13.5409 GB/s per node +Grid : Message : Average mflops/s per call per node : 659717 +Grid : Message : Average mflops/s per call per node : 804485 +Grid : Message : Average mflops/s per call per node : 825617 +Grid : Message : Average mflops/s per call per node : 661768 +Grid : Message : Average mflops/s per call per node (full): 314706 +Grid : Message : Average mflops/s per call per node (full): 439069 +Grid : Message : Average mflops/s per call per node (full): 440527 +Grid : Message : Average mflops/s per call per node (full): 306173 +Grid : Message : Stencil 12.8025 GB/s per node +Grid : Message : Stencil 17.2434 GB/s per node +Grid : Message : Stencil 17.9128 GB/s per node +Grid : Message : Stencil 13.279 GB/s per node +Grid : Message : Average mflops/s per call per node : 668256 +Grid : Message : Average mflops/s per call per node : 803676 +Grid : Message : Average mflops/s per call per node : 820491 +Grid : Message : Average mflops/s per call per node : 662418 +Grid : Message : Average mflops/s per call per node (full): 315205 +Grid : Message : Average mflops/s per call per node (full): 437749 +Grid : Message : Average mflops/s per call per node (full): 446306 +Grid : Message : Average mflops/s per call per node (full): 305289 +Grid : Message : Stencil 12.5601 GB/s per node +Grid : Message : Stencil 16.8043 GB/s per node +Grid : Message : Stencil 16.5785 GB/s per node +Grid : Message : Stencil 12.8368 GB/s per node +Grid : Message : Average mflops/s per call per node : 668628 +Grid : Message : Average mflops/s per call per node : 803293 +Grid : Message : Average mflops/s per call per node : 821762 +Grid : Message : Average mflops/s per call per node : 660631 +Grid : Message : Average mflops/s per call per node (full): 313970 +Grid : Message : Average mflops/s per call per node (full): 437030 +Grid : Message : Average mflops/s per call per node (full): 426193 +Grid : Message : Average mflops/s per call per node (full): 303849 +Grid : Message : Stencil 12.2139 GB/s per node +Grid : Message : Stencil 16.8379 GB/s per node +Grid : Message : Stencil 17.767 GB/s per node +Grid : Message : Stencil 13.2638 GB/s per node +Grid : Message : Average mflops/s per call per node : 671858 +Grid : Message : Average mflops/s per call per node : 803407 +Grid : Message : Average mflops/s per call per node : 822154 +Grid : Message : Average mflops/s per call per node : 665694 +Grid : Message : Average mflops/s per call per node (full): 311775 +Grid : Message : Average mflops/s per call per node (full): 438106 +Grid : Message : Average mflops/s per call per node (full): 446340 +Grid : Message : Average mflops/s per call per node (full): 305394 +Grid : Message : Stencil 13.1395 GB/s per node +Grid : Message : Stencil 17.0386 GB/s per node +Grid : Message : Stencil 17.3741 GB/s per node +Grid : Message : Stencil 12.463 GB/s per node +Grid : Message : Average mflops/s per call per node : 667151 +Grid : Message : Average mflops/s per call per node : 802197 +Grid : Message : Average mflops/s per call per node : 822477 +Grid : Message : Average mflops/s per call per node : 664371 +Grid : Message : Average mflops/s per call per node (full): 314042 +Grid : Message : Average mflops/s per call per node (full): 437082 +Grid : Message : Average mflops/s per call per node (full): 443449 +Grid : Message : Average mflops/s per call per node (full): 303713 +Grid : Message : Stencil 12.8417 GB/s per node +Grid : Message : Stencil 16.6673 GB/s per node +Grid : Message : Stencil 19.5679 GB/s per node +Grid : Message : Stencil 12.7913 GB/s per node +Grid : Message : Average mflops/s per call per node : 669236 +Grid : Message : Average mflops/s per call per node : 803589 +Grid : Message : Average mflops/s per call per node : 821074 +Grid : Message : Average mflops/s per call per node : 663866 +Grid : Message : Average mflops/s per call per node (full): 315084 +Grid : Message : Average mflops/s per call per node (full): 436973 +Grid : Message : Average mflops/s per call per node (full): 449685 +Grid : Message : Average mflops/s per call per node (full): 306224 +Grid : Message : Stencil 12.5789 GB/s per node +Grid : Message : Stencil 15.6077 GB/s per node +Grid : Message : Stencil 17.4916 GB/s per node +Grid : Message : Stencil 13.0994 GB/s per node +Grid : Message : Average mflops/s per call per node : 666648 +Grid : Message : Average mflops/s per call per node : 802272 +Grid : Message : Average mflops/s per call per node : 822978 +Grid : Message : Average mflops/s per call per node : 661676 +Grid : Message : Average mflops/s per call per node (full): 314172 +Grid : Message : Average mflops/s per call per node (full): 413146 +Grid : Message : Average mflops/s per call per node (full): 444026 +Grid : Message : Average mflops/s per call per node (full): 305552 +Grid : Message : Stencil 12.7795 GB/s per node +Grid : Message : Stencil 16.6517 GB/s per node +Grid : Message : Stencil 17.9527 GB/s per node +Grid : Message : Stencil 12.8314 GB/s per node +Grid : Message : Average mflops/s per call per node : 671120 +Grid : Message : Average mflops/s per call per node : 805540 +Grid : Message : Average mflops/s per call per node : 826565 +Grid : Message : Average mflops/s per call per node : 664837 +Grid : Message : Average mflops/s per call per node (full): 315358 +Grid : Message : Average mflops/s per call per node (full): 435827 +Grid : Message : Average mflops/s per call per node (full): 447974 +Grid : Message : Average mflops/s per call per node (full): 304610 +Grid : Message : Stencil 13.9753 GB/s per node +Grid : Message : Stencil 12.3165 GB/s per node +Grid : Message : Stencil 18.4297 GB/s per node +Grid : Message : Stencil 12.4519 GB/s per node +Grid : Message : Average mflops/s per call per node : 662207 +Grid : Message : Average mflops/s per call per node : 810547 +Grid : Message : Average mflops/s per call per node : 820474 +Grid : Message : Average mflops/s per call per node : 666930 +Grid : Message : Average mflops/s per call per node (full): 315541 +Grid : Message : Average mflops/s per call per node (full): 367835 +Grid : Message : Average mflops/s per call per node (full): 447424 +Grid : Message : Average mflops/s per call per node (full): 304845 +Grid : Message : Stencil 12.2834 GB/s per node +Grid : Message : Stencil 17.2565 GB/s per node +Grid : Message : Stencil 18.1191 GB/s per node +Grid : Message : Stencil 11.8336 GB/s per node +Grid : Message : Average mflops/s per call per node : 670755 +Grid : Message : Average mflops/s per call per node : 802889 +Grid : Message : Average mflops/s per call per node : 820477 +Grid : Message : Average mflops/s per call per node : 667293 +Grid : Message : Average mflops/s per call per node (full): 312060 +Grid : Message : Average mflops/s per call per node (full): 436713 +Grid : Message : Average mflops/s per call per node (full): 445364 +Grid : Message : Average mflops/s per call per node (full): 300346 +Grid : Message : Stencil 12.2895 GB/s per node +Grid : Message : Stencil 17.4713 GB/s per node +Grid : Message : Stencil 17.4528 GB/s per node +Grid : Message : Stencil 12.3124 GB/s per node +Grid : Message : Average mflops/s per call per node : 668955 +Grid : Message : Average mflops/s per call per node : 808029 +Grid : Message : Average mflops/s per call per node : 820266 +Grid : Message : Average mflops/s per call per node : 666742 +Grid : Message : Average mflops/s per call per node (full): 310810 +Grid : Message : Average mflops/s per call per node (full): 439407 +Grid : Message : Average mflops/s per call per node (full): 444057 +Grid : Message : Average mflops/s per call per node (full): 303354 +Grid : Message : Stencil 13.0029 GB/s per node +Grid : Message : Stencil 16.9907 GB/s per node +Grid : Message : Stencil 18.4251 GB/s per node +Grid : Message : Stencil 14.6304 GB/s per node +Grid : Message : Average mflops/s per call per node : 667661 +Grid : Message : Average mflops/s per call per node : 804566 +Grid : Message : Average mflops/s per call per node : 822622 +Grid : Message : Average mflops/s per call per node : 659795 +Grid : Message : Average mflops/s per call per node (full): 315440 +Grid : Message : Average mflops/s per call per node (full): 437435 +Grid : Message : Average mflops/s per call per node (full): 448746 +Grid : Message : Average mflops/s per call per node (full): 305813 +Grid : Message : Stencil 12.4992 GB/s per node +Grid : Message : Stencil 16.8081 GB/s per node +Grid : Message : Stencil 16.9932 GB/s per node +Grid : Message : Stencil 11.8852 GB/s per node +Grid : Message : Average mflops/s per call per node : 668265 +Grid : Message : Average mflops/s per call per node : 805726 +Grid : Message : Average mflops/s per call per node : 825066 +Grid : Message : Average mflops/s per call per node : 667803 +Grid : Message : Average mflops/s per call per node (full): 311253 +Grid : Message : Average mflops/s per call per node (full): 437038 +Grid : Message : Average mflops/s per call per node (full): 440641 +Grid : Message : Average mflops/s per call per node (full): 300243 +Grid : Message : Stencil 12.858 GB/s per node +Grid : Message : Stencil 16.6191 GB/s per node +Grid : Message : Stencil 17.4867 GB/s per node +Grid : Message : Stencil 12.0766 GB/s per node +Grid : Message : Average mflops/s per call per node : 664701 +Grid : Message : Average mflops/s per call per node : 802362 +Grid : Message : Average mflops/s per call per node : 825296 +Grid : Message : Average mflops/s per call per node : 671195 +Grid : Message : Average mflops/s per call per node (full): 313291 +Grid : Message : Average mflops/s per call per node (full): 435415 +Grid : Message : Average mflops/s per call per node (full): 444173 +Grid : Message : Average mflops/s per call per node (full): 302506 +Grid : Message : Stencil 12.819 GB/s per node +Grid : Message : Stencil 16.7382 GB/s per node +Grid : Message : Stencil 17.8396 GB/s per node +Grid : Message : Stencil 12.7021 GB/s per node +Grid : Message : Average mflops/s per call per node : 661640 +Grid : Message : Average mflops/s per call per node : 800746 +Grid : Message : Average mflops/s per call per node : 825440 +Grid : Message : Average mflops/s per call per node : 664101 +Grid : Message : Average mflops/s per call per node (full): 312336 +Grid : Message : Average mflops/s per call per node (full): 435376 +Grid : Message : Average mflops/s per call per node (full): 447927 +Grid : Message : Average mflops/s per call per node (full): 303009 +Grid : Message : Stencil 13.2615 GB/s per node +Grid : Message : Stencil 16.7029 GB/s per node +Grid : Message : Stencil 17.164 GB/s per node +Grid : Message : Stencil 15.1159 GB/s per node +Grid : Message : Average mflops/s per call per node : 663828 +Grid : Message : Average mflops/s per call per node : 803900 +Grid : Message : Average mflops/s per call per node : 823424 +Grid : Message : Average mflops/s per call per node : 661500 +Grid : Message : Average mflops/s per call per node (full): 315033 +Grid : Message : Average mflops/s per call per node (full): 434653 +Grid : Message : Average mflops/s per call per node (full): 427996 +Grid : Message : Average mflops/s per call per node (full): 308046 +Grid : Message : Stencil 12.6615 GB/s per node +Grid : Message : Stencil 16.4168 GB/s per node +Grid : Message : Stencil 17.8024 GB/s per node +Grid : Message : Stencil 11.8658 GB/s per node +Grid : Message : Average mflops/s per call per node : 667893 +Grid : Message : Average mflops/s per call per node : 805218 +Grid : Message : Average mflops/s per call per node : 820360 +Grid : Message : Average mflops/s per call per node : 660027 +Grid : Message : Average mflops/s per call per node (full): 314693 +Grid : Message : Average mflops/s per call per node (full): 433820 +Grid : Message : Average mflops/s per call per node (full): 444477 +Grid : Message : Average mflops/s per call per node (full): 299996 +Grid : Message : Stencil 12.4537 GB/s per node +Grid : Message : Stencil 16.6316 GB/s per node +Grid : Message : Stencil 17.255 GB/s per node +Grid : Message : Stencil 13.0949 GB/s per node +Grid : Message : Average mflops/s per call per node : 667214 +Grid : Message : Average mflops/s per call per node : 807821 +Grid : Message : Average mflops/s per call per node : 821258 +Grid : Message : Average mflops/s per call per node : 665148 +Grid : Message : Average mflops/s per call per node (full): 313047 +Grid : Message : Average mflops/s per call per node (full): 435113 +Grid : Message : Average mflops/s per call per node (full): 441888 +Grid : Message : Average mflops/s per call per node (full): 305770 +Grid : Message : Stencil 13.026 GB/s per node +Grid : Message : Stencil 16.8508 GB/s per node +Grid : Message : Stencil 18.4139 GB/s per node +Grid : Message : Stencil 16.757 GB/s per node +Grid : Message : Average mflops/s per call per node : 664610 +Grid : Message : Average mflops/s per call per node : 796445 +Grid : Message : Average mflops/s per call per node : 819592 +Grid : Message : Average mflops/s per call per node : 657022 +Grid : Message : Average mflops/s per call per node (full): 313254 +Grid : Message : Average mflops/s per call per node (full): 435785 +Grid : Message : Average mflops/s per call per node (full): 447783 +Grid : Message : Average mflops/s per call per node (full): 307976 +Grid : Message : Stencil 13.6629 GB/s per node +Grid : Message : Stencil 14.7722 GB/s per node +Grid : Message : Stencil 17.0123 GB/s per node +Grid : Message : Stencil 14.6703 GB/s per node +Grid : Message : Average mflops/s per call per node : 666158 +Grid : Message : Average mflops/s per call per node : 808269 +Grid : Message : Average mflops/s per call per node : 823241 +Grid : Message : Average mflops/s per call per node : 659966 +Grid : Message : Average mflops/s per call per node (full): 315972 +Grid : Message : Average mflops/s per call per node (full): 408625 +Grid : Message : Average mflops/s per call per node (full): 441349 +Grid : Message : Average mflops/s per call per node (full): 307814 +Grid : Message : Stencil 13.8222 GB/s per node +Grid : Message : Stencil 16.5421 GB/s per node +Grid : Message : Stencil 17.9573 GB/s per node +Grid : Message : Stencil 13.5068 GB/s per node +Grid : Message : Average mflops/s per call per node : 662776 +Grid : Message : Average mflops/s per call per node : 802749 +Grid : Message : Average mflops/s per call per node : 819045 +Grid : Message : Average mflops/s per call per node : 667423 +Grid : Message : Average mflops/s per call per node (full): 315685 +Grid : Message : Average mflops/s per call per node (full): 434846 +Grid : Message : Average mflops/s per call per node (full): 445561 +Grid : Message : Average mflops/s per call per node (full): 306973 +Grid : Message : Stencil 12.6947 GB/s per node +Grid : Message : Stencil 16.6596 GB/s per node +Grid : Message : Stencil 17.3481 GB/s per node +Grid : Message : Stencil 12.2762 GB/s per node +Grid : Message : Average mflops/s per call per node : 664797 +Grid : Message : Average mflops/s per call per node : 803254 +Grid : Message : Average mflops/s per call per node : 817960 +Grid : Message : Average mflops/s per call per node : 663602 +Grid : Message : Average mflops/s per call per node (full): 313921 +Grid : Message : Average mflops/s per call per node (full): 434428 +Grid : Message : Average mflops/s per call per node (full): 443691 +Grid : Message : Average mflops/s per call per node (full): 303453 +Grid : Message : Stencil 13.0304 GB/s per node +Grid : Message : Stencil 16.5727 GB/s per node +Grid : Message : Stencil 17.5342 GB/s per node +Grid : Message : Stencil 13.6637 GB/s per node +Grid : Message : Average mflops/s per call per node : 661446 +Grid : Message : Average mflops/s per call per node : 800983 +Grid : Message : Average mflops/s per call per node : 825268 +Grid : Message : Average mflops/s per call per node : 662414 +Grid : Message : Average mflops/s per call per node (full): 312931 +Grid : Message : Average mflops/s per call per node (full): 434956 +Grid : Message : Average mflops/s per call per node (full): 446353 +Grid : Message : Average mflops/s per call per node (full): 306809 +Grid : Message : Stencil 13.4499 GB/s per node +Grid : Message : Stencil 17.8274 GB/s per node +Grid : Message : Stencil 17.3187 GB/s per node +Grid : Message : Stencil 12.2928 GB/s per node +Grid : Message : Average mflops/s per call per node : 663668 +Grid : Message : Average mflops/s per call per node : 802349 +Grid : Message : Average mflops/s per call per node : 824225 +Grid : Message : Average mflops/s per call per node : 664527 +Grid : Message : Average mflops/s per call per node (full): 313580 +Grid : Message : Average mflops/s per call per node (full): 437042 +Grid : Message : Average mflops/s per call per node (full): 444399 +Grid : Message : Average mflops/s per call per node (full): 303315 +Grid : Message : Stencil 13.4204 GB/s per node +Grid : Message : Stencil 14.6526 GB/s per node +Grid : Message : Stencil 17.0765 GB/s per node +Grid : Message : Stencil 13.3068 GB/s per node +Grid : Message : Average mflops/s per call per node : 665269 +Grid : Message : Average mflops/s per call per node : 801119 +Grid : Message : Average mflops/s per call per node : 821479 +Grid : Message : Average mflops/s per call per node : 660903 +Grid : Message : Average mflops/s per call per node (full): 315610 +Grid : Message : Average mflops/s per call per node (full): 410340 +Grid : Message : Average mflops/s per call per node (full): 440724 +Grid : Message : Average mflops/s per call per node (full): 304392 +Grid : Message : Stencil 12.9226 GB/s per node +Grid : Message : Stencil 16.9886 GB/s per node +Grid : Message : Stencil 18.0785 GB/s per node +Grid : Message : Stencil 12.6001 GB/s per node +Grid : Message : Average mflops/s per call per node : 667969 +Grid : Message : Average mflops/s per call per node : 805671 +Grid : Message : Average mflops/s per call per node : 825970 +Grid : Message : Average mflops/s per call per node : 668389 +Grid : Message : Average mflops/s per call per node (full): 315630 +Grid : Message : Average mflops/s per call per node (full): 437228 +Grid : Message : Average mflops/s per call per node (full): 447175 +Grid : Message : Average mflops/s per call per node (full): 305637 +Grid : Message : Stencil 13.0071 GB/s per node +Grid : Message : Stencil 16.4769 GB/s per node +Grid : Message : Stencil 17.5628 GB/s per node +Grid : Message : Stencil 12.6108 GB/s per node +Grid : Message : Average mflops/s per call per node : 664811 +Grid : Message : Average mflops/s per call per node : 807014 +Grid : Message : Average mflops/s per call per node : 825491 +Grid : Message : Average mflops/s per call per node : 664695 +Grid : Message : Average mflops/s per call per node (full): 314531 +Grid : Message : Average mflops/s per call per node (full): 434962 +Grid : Message : Average mflops/s per call per node (full): 445453 +Grid : Message : Average mflops/s per call per node (full): 305524 +Grid : Message : Stencil 13.0581 GB/s per node +Grid : Message : Stencil 16.6896 GB/s per node +Grid : Message : Stencil 17.6596 GB/s per node +Grid : Message : Stencil 12.3528 GB/s per node +Grid : Message : Average mflops/s per call per node : 666024 +Grid : Message : Average mflops/s per call per node : 803409 +Grid : Message : Average mflops/s per call per node : 820641 +Grid : Message : Average mflops/s per call per node : 668749 +Grid : Message : Average mflops/s per call per node (full): 314320 +Grid : Message : Average mflops/s per call per node (full): 435101 +Grid : Message : Average mflops/s per call per node (full): 444807 +Grid : Message : Average mflops/s per call per node (full): 305022 +Grid : Message : Stencil 13.858 GB/s per node +Grid : Message : Stencil 16.4654 GB/s per node +Grid : Message : Stencil 16.3535 GB/s per node +Grid : Message : Stencil 12.7462 GB/s per node +Grid : Message : Average mflops/s per call per node : 663278 +Grid : Message : Average mflops/s per call per node : 799008 +Grid : Message : Average mflops/s per call per node : 828356 +Grid : Message : Average mflops/s per call per node : 665600 +Grid : Message : Average mflops/s per call per node (full): 315862 +Grid : Message : Average mflops/s per call per node (full): 424663 +Grid : Message : Average mflops/s per call per node (full): 429391 +Grid : Message : Average mflops/s per call per node (full): 305352 +Grid : Message : Stencil 13.395 GB/s per node +Grid : Message : Stencil 16.9958 GB/s per node +Grid : Message : Stencil 17.0761 GB/s per node +Grid : Message : Stencil 11.9704 GB/s per node +Grid : Message : Average mflops/s per call per node : 663609 +Grid : Message : Average mflops/s per call per node : 798959 +Grid : Message : Average mflops/s per call per node : 825384 +Grid : Message : Average mflops/s per call per node : 668680 +Grid : Message : Average mflops/s per call per node (full): 315709 +Grid : Message : Average mflops/s per call per node (full): 434231 +Grid : Message : Average mflops/s per call per node (full): 442366 +Grid : Message : Average mflops/s per call per node (full): 301876 +Grid : Message : Stencil 14.9144 GB/s per node +Grid : Message : Stencil 17.035 GB/s per node +Grid : Message : Stencil 16.9675 GB/s per node +Grid : Message : Stencil 12.5384 GB/s per node +Grid : Message : Average mflops/s per call per node : 660331 +Grid : Message : Average mflops/s per call per node : 799565 +Grid : Message : Average mflops/s per call per node : 820775 +Grid : Message : Average mflops/s per call per node : 664950 +Grid : Message : Average mflops/s per call per node (full): 316087 +Grid : Message : Average mflops/s per call per node (full): 435532 +Grid : Message : Average mflops/s per call per node (full): 439632 +Grid : Message : Average mflops/s per call per node (full): 304705 +Grid : Message : Stencil 13.2079 GB/s per node +Grid : Message : Stencil 9.38049 GB/s per node +Grid : Message : Stencil 17.5985 GB/s per node +Grid : Message : Stencil 12.9394 GB/s per node +Grid : Message : Average mflops/s per call per node : 666286 +Grid : Message : Average mflops/s per call per node : 807472 +Grid : Message : Average mflops/s per call per node : 822532 +Grid : Message : Average mflops/s per call per node : 666927 +Grid : Message : Average mflops/s per call per node (full): 315426 +Grid : Message : Average mflops/s per call per node (full): 301528 +Grid : Message : Average mflops/s per call per node (full): 444270 +Grid : Message : Average mflops/s per call per node (full): 305483 +Grid : Message : Stencil 12.9366 GB/s per node +Grid : Message : Stencil 8.86962 GB/s per node +Grid : Message : Stencil 18.1196 GB/s per node +Grid : Message : Stencil 14.2661 GB/s per node +Grid : Message : Average mflops/s per call per node : 664986 +Grid : Message : Average mflops/s per call per node : 810537 +Grid : Message : Average mflops/s per call per node : 825000 +Grid : Message : Average mflops/s per call per node : 662504 +Grid : Message : Average mflops/s per call per node (full): 311648 +Grid : Message : Average mflops/s per call per node (full): 288921 +Grid : Message : Average mflops/s per call per node (full): 448842 +Grid : Message : Average mflops/s per call per node (full): 307355 +Grid : Message : Stencil 12.8223 GB/s per node +Grid : Message : Stencil 16.8345 GB/s per node +Grid : Message : Stencil 17.715 GB/s per node +Grid : Message : Stencil 13.3817 GB/s per node +Grid : Message : Average mflops/s per call per node : 666438 +Grid : Message : Average mflops/s per call per node : 806506 +Grid : Message : Average mflops/s per call per node : 823873 +Grid : Message : Average mflops/s per call per node : 664154 +Grid : Message : Average mflops/s per call per node (full): 314939 +Grid : Message : Average mflops/s per call per node (full): 438382 +Grid : Message : Average mflops/s per call per node (full): 445696 +Grid : Message : Average mflops/s per call per node (full): 304392 +Grid : Message : Stencil 13.5753 GB/s per node +Grid : Message : Stencil 16.8199 GB/s per node +Grid : Message : Stencil 17.6639 GB/s per node +Grid : Message : Stencil 12.508 GB/s per node +Grid : Message : Average mflops/s per call per node : 661288 +Grid : Message : Average mflops/s per call per node : 801734 +Grid : Message : Average mflops/s per call per node : 826895 +Grid : Message : Average mflops/s per call per node : 666866 +Grid : Message : Average mflops/s per call per node (full): 315113 +Grid : Message : Average mflops/s per call per node (full): 436627 +Grid : Message : Average mflops/s per call per node (full): 445885 +Grid : Message : Average mflops/s per call per node (full): 304389 +Grid : Message : Stencil 13.2609 GB/s per node +Grid : Message : Stencil 16.5876 GB/s per node +Grid : Message : Stencil 17.518 GB/s per node +Grid : Message : Stencil 12.622 GB/s per node +Grid : Message : Average mflops/s per call per node : 666661 +Grid : Message : Average mflops/s per call per node : 806564 +Grid : Message : Average mflops/s per call per node : 829801 +Grid : Message : Average mflops/s per call per node : 666031 +Grid : Message : Average mflops/s per call per node (full): 315103 +Grid : Message : Average mflops/s per call per node (full): 434902 +Grid : Message : Average mflops/s per call per node (full): 446852 +Grid : Message : Average mflops/s per call per node (full): 303927 +Grid : Message : Stencil 12.7534 GB/s per node +Grid : Message : Stencil 18.1301 GB/s per node +Grid : Message : Stencil 17.676 GB/s per node +Grid : Message : Stencil 12.6733 GB/s per node +Grid : Message : Average mflops/s per call per node : 666771 +Grid : Message : Average mflops/s per call per node : 795816 +Grid : Message : Average mflops/s per call per node : 819261 +Grid : Message : Average mflops/s per call per node : 661509 +Grid : Message : Average mflops/s per call per node (full): 313882 +Grid : Message : Average mflops/s per call per node (full): 436371 +Grid : Message : Average mflops/s per call per node (full): 445274 +Grid : Message : Average mflops/s per call per node (full): 304463 +Grid : Message : Stencil 13.021 GB/s per node +Grid : Message : Stencil 17.343 GB/s per node +Grid : Message : Stencil 17.7043 GB/s per node +Grid : Message : Stencil 12.9421 GB/s per node +Grid : Message : Average mflops/s per call per node : 667115 +Grid : Message : Average mflops/s per call per node : 801682 +Grid : Message : Average mflops/s per call per node : 821272 +Grid : Message : Average mflops/s per call per node : 667456 +Grid : Message : Average mflops/s per call per node (full): 314729 +Grid : Message : Average mflops/s per call per node (full): 437239 +Grid : Message : Average mflops/s per call per node (full): 440060 +Grid : Message : Average mflops/s per call per node (full): 305517 +Grid : Message : Stencil 14.6619 GB/s per node +Grid : Message : Stencil 14.2718 GB/s per node +Grid : Message : Stencil 17.7885 GB/s per node +Grid : Message : Stencil 12.7771 GB/s per node +Grid : Message : Average mflops/s per call per node : 663197 +Grid : Message : Average mflops/s per call per node : 804402 +Grid : Message : Average mflops/s per call per node : 818351 +Grid : Message : Average mflops/s per call per node : 659551 +Grid : Message : Average mflops/s per call per node (full): 316195 +Grid : Message : Average mflops/s per call per node (full): 404190 +Grid : Message : Average mflops/s per call per node (full): 445743 +Grid : Message : Average mflops/s per call per node (full): 303828 +Grid : Message : Stencil 14.1316 GB/s per node +Grid : Message : Stencil 16.6678 GB/s per node +Grid : Message : Stencil 18.9153 GB/s per node +Grid : Message : Stencil 12.2411 GB/s per node +Grid : Message : Average mflops/s per call per node : 664796 +Grid : Message : Average mflops/s per call per node : 801994 +Grid : Message : Average mflops/s per call per node : 817793 +Grid : Message : Average mflops/s per call per node : 671268 +Grid : Message : Average mflops/s per call per node (full): 316474 +Grid : Message : Average mflops/s per call per node (full): 434130 +Grid : Message : Average mflops/s per call per node (full): 446851 +Grid : Message : Average mflops/s per call per node (full): 304576 +Grid : Message : Stencil 15.3967 GB/s per node +Grid : Message : Stencil 17.3259 GB/s per node +Grid : Message : Stencil 17.8009 GB/s per node +Grid : Message : Stencil 12.0176 GB/s per node +Grid : Message : Average mflops/s per call per node : 662867 +Grid : Message : Average mflops/s per call per node : 802190 +Grid : Message : Average mflops/s per call per node : 818363 +Grid : Message : Average mflops/s per call per node : 672577 +Grid : Message : Average mflops/s per call per node (full): 317209 +Grid : Message : Average mflops/s per call per node (full): 437671 +Grid : Message : Average mflops/s per call per node (full): 443132 +Grid : Message : Average mflops/s per call per node (full): 302658 +Grid : Message : Stencil 13.7304 GB/s per node +Grid : Message : Stencil 16.5138 GB/s per node +Grid : Message : Stencil 17.2066 GB/s per node +Grid : Message : Stencil 12.6344 GB/s per node +Grid : Message : Average mflops/s per call per node : 667165 +Grid : Message : Average mflops/s per call per node : 801862 +Grid : Message : Average mflops/s per call per node : 821386 +Grid : Message : Average mflops/s per call per node : 668298 +Grid : Message : Average mflops/s per call per node (full): 313850 +Grid : Message : Average mflops/s per call per node (full): 434568 +Grid : Message : Average mflops/s per call per node (full): 441871 +Grid : Message : Average mflops/s per call per node (full): 305778 +Grid : Message : Stencil 14.1902 GB/s per node +Grid : Message : Stencil 12.5243 GB/s per node +Grid : Message : Stencil 17.6716 GB/s per node +Grid : Message : Stencil 12.2327 GB/s per node +Grid : Message : Average mflops/s per call per node : 667550 +Grid : Message : Average mflops/s per call per node : 808486 +Grid : Message : Average mflops/s per call per node : 821750 +Grid : Message : Average mflops/s per call per node : 664300 +Grid : Message : Average mflops/s per call per node (full): 317058 +Grid : Message : Average mflops/s per call per node (full): 371507 +Grid : Message : Average mflops/s per call per node (full): 445703 +Grid : Message : Average mflops/s per call per node (full): 302001 +Grid : Message : Stencil 14.7126 GB/s per node +Grid : Message : Stencil 17.3296 GB/s per node +Grid : Message : Stencil 16.9344 GB/s per node +Grid : Message : Stencil 13.7377 GB/s per node +Grid : Message : Average mflops/s per call per node : 663247 +Grid : Message : Average mflops/s per call per node : 801530 +Grid : Message : Average mflops/s per call per node : 823572 +Grid : Message : Average mflops/s per call per node : 660202 +Grid : Message : Average mflops/s per call per node (full): 317715 +Grid : Message : Average mflops/s per call per node (full): 438797 +Grid : Message : Average mflops/s per call per node (full): 440266 +Grid : Message : Average mflops/s per call per node (full): 305603 +Grid : Message : Stencil 14.2874 GB/s per node +Grid : Message : Stencil 16.3823 GB/s per node +Grid : Message : Stencil 18.2821 GB/s per node +Grid : Message : Stencil 12.2695 GB/s per node +Grid : Message : Average mflops/s per call per node : 664440 +Grid : Message : Average mflops/s per call per node : 802782 +Grid : Message : Average mflops/s per call per node : 817892 +Grid : Message : Average mflops/s per call per node : 664293 +Grid : Message : Average mflops/s per call per node (full): 314310 +Grid : Message : Average mflops/s per call per node (full): 431204 +Grid : Message : Average mflops/s per call per node (full): 445353 +Grid : Message : Average mflops/s per call per node (full): 304266 +Grid : Message : Stencil 12.2287 GB/s per node +Grid : Message : Stencil 9.84358 GB/s per node +Grid : Message : Stencil 18.2287 GB/s per node +Grid : Message : Stencil 13.4763 GB/s per node +Grid : Message : Average mflops/s per call per node : 669877 +Grid : Message : Average mflops/s per call per node : 812425 +Grid : Message : Average mflops/s per call per node : 815365 +Grid : Message : Average mflops/s per call per node : 662986 +Grid : Message : Average mflops/s per call per node (full): 310980 +Grid : Message : Average mflops/s per call per node (full): 312378 +Grid : Message : Average mflops/s per call per node (full): 445350 +Grid : Message : Average mflops/s per call per node (full): 304896 +Grid : Message : Stencil 12.4854 GB/s per node +Grid : Message : Stencil 16.9744 GB/s per node +Grid : Message : Stencil 17.2508 GB/s per node +Grid : Message : Stencil 14.1054 GB/s per node +Grid : Message : Average mflops/s per call per node : 666189 +Grid : Message : Average mflops/s per call per node : 800186 +Grid : Message : Average mflops/s per call per node : 824927 +Grid : Message : Average mflops/s per call per node : 660589 +Grid : Message : Average mflops/s per call per node (full): 313277 +Grid : Message : Average mflops/s per call per node (full): 437682 +Grid : Message : Average mflops/s per call per node (full): 443906 +Grid : Message : Average mflops/s per call per node (full): 304831 +Grid : Message : Stencil 13.8368 GB/s per node +Grid : Message : Stencil 16.3028 GB/s per node +Grid : Message : Stencil 17.5656 GB/s per node +Grid : Message : Stencil 12.244 GB/s per node +Grid : Message : Average mflops/s per call per node : 664243 +Grid : Message : Average mflops/s per call per node : 800247 +Grid : Message : Average mflops/s per call per node : 825207 +Grid : Message : Average mflops/s per call per node : 669564 +Grid : Message : Average mflops/s per call per node (full): 316090 +Grid : Message : Average mflops/s per call per node (full): 431846 +Grid : Message : Average mflops/s per call per node (full): 445461 +Grid : Message : Average mflops/s per call per node (full): 303726 +Grid : Message : Stencil 13.2986 GB/s per node +Grid : Message : Stencil 16.8133 GB/s per node +Grid : Message : Stencil 17.0501 GB/s per node +Grid : Message : Stencil 12.7636 GB/s per node +Grid : Message : Average mflops/s per call per node : 663854 +Grid : Message : Average mflops/s per call per node : 802586 +Grid : Message : Average mflops/s per call per node : 815724 +Grid : Message : Average mflops/s per call per node : 670749 +Grid : Message : Average mflops/s per call per node (full): 315614 +Grid : Message : Average mflops/s per call per node (full): 437869 +Grid : Message : Average mflops/s per call per node (full): 440744 +Grid : Message : Average mflops/s per call per node (full): 307138 +Grid : Message : Stencil 12.9122 GB/s per node +Grid : Message : Stencil 15.6169 GB/s per node +Grid : Message : Stencil 17.9642 GB/s per node +Grid : Message : Stencil 13.2988 GB/s per node +Grid : Message : Average mflops/s per call per node : 666449 +Grid : Message : Average mflops/s per call per node : 810116 +Grid : Message : Average mflops/s per call per node : 822794 +Grid : Message : Average mflops/s per call per node : 666926 +Grid : Message : Average mflops/s per call per node (full): 313636 +Grid : Message : Average mflops/s per call per node (full): 425292 +Grid : Message : Average mflops/s per call per node (full): 447099 +Grid : Message : Average mflops/s per call per node (full): 306732 +Grid : Message : Stencil 13.0659 GB/s per node +Grid : Message : Stencil 17.3714 GB/s per node +Grid : Message : Stencil 18.1176 GB/s per node +Grid : Message : Stencil 12.6298 GB/s per node +Grid : Message : Average mflops/s per call per node : 665254 +Grid : Message : Average mflops/s per call per node : 800461 +Grid : Message : Average mflops/s per call per node : 825509 +Grid : Message : Average mflops/s per call per node : 664509 +Grid : Message : Average mflops/s per call per node (full): 314606 +Grid : Message : Average mflops/s per call per node (full): 437387 +Grid : Message : Average mflops/s per call per node (full): 442048 +Grid : Message : Average mflops/s per call per node (full): 304503 +Grid : Message : Stencil 13.1864 GB/s per node +Grid : Message : Stencil 16.961 GB/s per node +Grid : Message : Stencil 18.1721 GB/s per node +Grid : Message : Stencil 13.3021 GB/s per node +Grid : Message : Average mflops/s per call per node : 668091 +Grid : Message : Average mflops/s per call per node : 797834 +Grid : Message : Average mflops/s per call per node : 822533 +Grid : Message : Average mflops/s per call per node : 665343 +Grid : Message : Average mflops/s per call per node (full): 315989 +Grid : Message : Average mflops/s per call per node (full): 437285 +Grid : Message : Average mflops/s per call per node (full): 448604 +Grid : Message : Average mflops/s per call per node (full): 305937 +Grid : Message : Stencil 12.7171 GB/s per node +Grid : Message : Stencil 17.0547 GB/s per node +Grid : Message : Stencil 17.2289 GB/s per node +Grid : Message : Stencil 13.0249 GB/s per node +Grid : Message : Average mflops/s per call per node : 669958 +Grid : Message : Average mflops/s per call per node : 802599 +Grid : Message : Average mflops/s per call per node : 823252 +Grid : Message : Average mflops/s per call per node : 663794 +Grid : Message : Average mflops/s per call per node (full): 315310 +Grid : Message : Average mflops/s per call per node (full): 434842 +Grid : Message : Average mflops/s per call per node (full): 443271 +Grid : Message : Average mflops/s per call per node (full): 305779 +Grid : Message : Stencil 12.7706 GB/s per node +Grid : Message : Stencil 17.9404 GB/s per node +Grid : Message : Stencil 17.5594 GB/s per node +Grid : Message : Stencil 11.8661 GB/s per node +Grid : Message : Average mflops/s per call per node : 670656 +Grid : Message : Average mflops/s per call per node : 798841 +Grid : Message : Average mflops/s per call per node : 816038 +Grid : Message : Average mflops/s per call per node : 667840 +Grid : Message : Average mflops/s per call per node (full): 315570 +Grid : Message : Average mflops/s per call per node (full): 438820 +Grid : Message : Average mflops/s per call per node (full): 444007 +Grid : Message : Average mflops/s per call per node (full): 301392 +Grid : Message : Stencil 12.6387 GB/s per node +Grid : Message : Stencil 17.87 GB/s per node +Grid : Message : Stencil 17.3612 GB/s per node +Grid : Message : Stencil 12.1397 GB/s per node +Grid : Message : Average mflops/s per call per node : 670097 +Grid : Message : Average mflops/s per call per node : 801686 +Grid : Message : Average mflops/s per call per node : 822358 +Grid : Message : Average mflops/s per call per node : 673319 +Grid : Message : Average mflops/s per call per node (full): 314702 +Grid : Message : Average mflops/s per call per node (full): 433081 +Grid : Message : Average mflops/s per call per node (full): 443404 +Grid : Message : Average mflops/s per call per node (full): 303616 +Grid : Message : Stencil 12.6213 GB/s per node +Grid : Message : Stencil 17.3179 GB/s per node +Grid : Message : Stencil 17.3871 GB/s per node +Grid : Message : Stencil 14.4538 GB/s per node +Grid : Message : Average mflops/s per call per node : 669104 +Grid : Message : Average mflops/s per call per node : 808904 +Grid : Message : Average mflops/s per call per node : 816456 +Grid : Message : Average mflops/s per call per node : 664116 +Grid : Message : Average mflops/s per call per node (full): 314805 +Grid : Message : Average mflops/s per call per node (full): 438129 +Grid : Message : Average mflops/s per call per node (full): 443576 +Grid : Message : Average mflops/s per call per node (full): 307045 +Grid : Message : Stencil 13.1342 GB/s per node +Grid : Message : Stencil 17.2667 GB/s per node +Grid : Message : Stencil 18.9676 GB/s per node +Grid : Message : Stencil 13.1063 GB/s per node +Grid : Message : Average mflops/s per call per node : 666448 +Grid : Message : Average mflops/s per call per node : 803193 +Grid : Message : Average mflops/s per call per node : 820740 +Grid : Message : Average mflops/s per call per node : 661598 +Grid : Message : Average mflops/s per call per node (full): 315157 +Grid : Message : Average mflops/s per call per node (full): 437486 +Grid : Message : Average mflops/s per call per node (full): 447975 +Grid : Message : Average mflops/s per call per node (full): 305551 +Grid : Message : Stencil 12.8885 GB/s per node +Grid : Message : Stencil 16.7165 GB/s per node +Grid : Message : Stencil 17.4135 GB/s per node +Grid : Message : Stencil 12.4162 GB/s per node +Grid : Message : Average mflops/s per call per node : 669610 +Grid : Message : Average mflops/s per call per node : 798878 +Grid : Message : Average mflops/s per call per node : 828761 +Grid : Message : Average mflops/s per call per node : 665508 +Grid : Message : Average mflops/s per call per node (full): 315714 +Grid : Message : Average mflops/s per call per node (full): 433076 +Grid : Message : Average mflops/s per call per node (full): 445197 +Grid : Message : Average mflops/s per call per node (full): 304990 +Grid : Message : Stencil 13.4829 GB/s per node +Grid : Message : Stencil 17.3411 GB/s per node +Grid : Message : Stencil 19.5368 GB/s per node +Grid : Message : Stencil 12.2651 GB/s per node +Grid : Message : Average mflops/s per call per node : 663229 +Grid : Message : Average mflops/s per call per node : 801443 +Grid : Message : Average mflops/s per call per node : 822028 +Grid : Message : Average mflops/s per call per node : 666187 +Grid : Message : Average mflops/s per call per node (full): 313415 +Grid : Message : Average mflops/s per call per node (full): 437696 +Grid : Message : Average mflops/s per call per node (full): 451046 +Grid : Message : Average mflops/s per call per node (full): 303781 +Grid : Message : Stencil 12.9546 GB/s per node +Grid : Message : Stencil 16.7871 GB/s per node +Grid : Message : Stencil 17.4806 GB/s per node +Grid : Message : Stencil 12.5693 GB/s per node +Grid : Message : Average mflops/s per call per node : 667984 +Grid : Message : Average mflops/s per call per node : 804416 +Grid : Message : Average mflops/s per call per node : 820681 +Grid : Message : Average mflops/s per call per node : 662491 +Grid : Message : Average mflops/s per call per node (full): 315209 +Grid : Message : Average mflops/s per call per node (full): 437613 +Grid : Message : Average mflops/s per call per node (full): 444061 +Grid : Message : Average mflops/s per call per node (full): 304878 +Grid : Message : Stencil 13.2857 GB/s per node +Grid : Message : Stencil 16.9461 GB/s per node +Grid : Message : Stencil 17.5121 GB/s per node +Grid : Message : Stencil 13.1988 GB/s per node +Grid : Message : Average mflops/s per call per node : 666158 +Grid : Message : Average mflops/s per call per node : 806146 +Grid : Message : Average mflops/s per call per node : 822367 +Grid : Message : Average mflops/s per call per node : 667978 +Grid : Message : Average mflops/s per call per node (full): 315279 +Grid : Message : Average mflops/s per call per node (full): 437540 +Grid : Message : Average mflops/s per call per node (full): 445660 +Grid : Message : Average mflops/s per call per node (full): 306304 +Grid : Message : Stencil 13.1804 GB/s per node +Grid : Message : Stencil 17.6715 GB/s per node +Grid : Message : Stencil 17.9868 GB/s per node +Grid : Message : Stencil 13.724 GB/s per node +Grid : Message : Average mflops/s per call per node : 667593 +Grid : Message : Average mflops/s per call per node : 801475 +Grid : Message : Average mflops/s per call per node : 823711 +Grid : Message : Average mflops/s per call per node : 668033 +Grid : Message : Average mflops/s per call per node (full): 315247 +Grid : Message : Average mflops/s per call per node (full): 435588 +Grid : Message : Average mflops/s per call per node (full): 446829 +Grid : Message : Average mflops/s per call per node (full): 307368 +Grid : Message : Stencil 12.851 GB/s per node +Grid : Message : Stencil 17.4314 GB/s per node +Grid : Message : Stencil 17.2006 GB/s per node +Grid : Message : Stencil 13.0596 GB/s per node +Grid : Message : Average mflops/s per call per node : 667473 +Grid : Message : Average mflops/s per call per node : 802011 +Grid : Message : Average mflops/s per call per node : 827037 +Grid : Message : Average mflops/s per call per node : 663835 +Grid : Message : Average mflops/s per call per node (full): 313637 +Grid : Message : Average mflops/s per call per node (full): 440010 +Grid : Message : Average mflops/s per call per node (full): 443524 +Grid : Message : Average mflops/s per call per node (full): 304671 +Grid : Message : Stencil 13.5352 GB/s per node +Grid : Message : Stencil 13.4914 GB/s per node +Grid : Message : Stencil 18.1952 GB/s per node +Grid : Message : Stencil 13.6803 GB/s per node +Grid : Message : Average mflops/s per call per node : 666383 +Grid : Message : Average mflops/s per call per node : 810543 +Grid : Message : Average mflops/s per call per node : 819120 +Grid : Message : Average mflops/s per call per node : 661499 +Grid : Message : Average mflops/s per call per node (full): 315285 +Grid : Message : Average mflops/s per call per node (full): 390981 +Grid : Message : Average mflops/s per call per node (full): 437005 +Grid : Message : Average mflops/s per call per node (full): 305682 +Grid : Message : Stencil 13.9432 GB/s per node +Grid : Message : Stencil 16.9757 GB/s per node +Grid : Message : Stencil 18.1128 GB/s per node +Grid : Message : Stencil 14.5547 GB/s per node +Grid : Message : Average mflops/s per call per node : 664137 +Grid : Message : Average mflops/s per call per node : 806558 +Grid : Message : Average mflops/s per call per node : 819594 +Grid : Message : Average mflops/s per call per node : 660482 +Grid : Message : Average mflops/s per call per node (full): 315526 +Grid : Message : Average mflops/s per call per node (full): 437547 +Grid : Message : Average mflops/s per call per node (full): 445861 +Grid : Message : Average mflops/s per call per node (full): 306961 +Grid : Message : Stencil 13.4324 GB/s per node +Grid : Message : Stencil 18.4747 GB/s per node +Grid : Message : Stencil 17.1373 GB/s per node +Grid : Message : Stencil 13.291 GB/s per node +Grid : Message : Average mflops/s per call per node : 666766 +Grid : Message : Average mflops/s per call per node : 798827 +Grid : Message : Average mflops/s per call per node : 824186 +Grid : Message : Average mflops/s per call per node : 665952 +Grid : Message : Average mflops/s per call per node (full): 315836 +Grid : Message : Average mflops/s per call per node (full): 440620 +Grid : Message : Average mflops/s per call per node (full): 442485 +Grid : Message : Average mflops/s per call per node (full): 306693 +Grid : Message : Stencil 14.2648 GB/s per node +Grid : Message : Stencil 17.4152 GB/s per node +Grid : Message : Stencil 17.9912 GB/s per node +Grid : Message : Stencil 12.5767 GB/s per node +Grid : Message : Average mflops/s per call per node : 665482 +Grid : Message : Average mflops/s per call per node : 806993 +Grid : Message : Average mflops/s per call per node : 816942 +Grid : Message : Average mflops/s per call per node : 661803 +Grid : Message : Average mflops/s per call per node (full): 317197 +Grid : Message : Average mflops/s per call per node (full): 438764 +Grid : Message : Average mflops/s per call per node (full): 445909 +Grid : Message : Average mflops/s per call per node (full): 304387 +Grid : Message : Stencil 14.4825 GB/s per node +Grid : Message : Stencil 16.3628 GB/s per node +Grid : Message : Stencil 17.3344 GB/s per node +Grid : Message : Stencil 12.5575 GB/s per node +Grid : Message : Average mflops/s per call per node : 661238 +Grid : Message : Average mflops/s per call per node : 803942 +Grid : Message : Average mflops/s per call per node : 818555 +Grid : Message : Average mflops/s per call per node : 667193 +Grid : Message : Average mflops/s per call per node (full): 316596 +Grid : Message : Average mflops/s per call per node (full): 426580 +Grid : Message : Average mflops/s per call per node (full): 442295 +Grid : Message : Average mflops/s per call per node (full): 305003 +Grid : Message : Stencil 13.2119 GB/s per node +Grid : Message : Stencil 14.9454 GB/s per node +Grid : Message : Stencil 18.2205 GB/s per node +Grid : Message : Stencil 13.2841 GB/s per node +Grid : Message : Average mflops/s per call per node : 666447 +Grid : Message : Average mflops/s per call per node : 805976 +Grid : Message : Average mflops/s per call per node : 825025 +Grid : Message : Average mflops/s per call per node : 662542 +Grid : Message : Average mflops/s per call per node (full): 315221 +Grid : Message : Average mflops/s per call per node (full): 414701 +Grid : Message : Average mflops/s per call per node (full): 449093 +Grid : Message : Average mflops/s per call per node (full): 305750 +Grid : Message : Stencil 13.3864 GB/s per node +Grid : Message : Stencil 16.3266 GB/s per node +Grid : Message : Stencil 18.0105 GB/s per node +Grid : Message : Stencil 13.2748 GB/s per node +Grid : Message : Average mflops/s per call per node : 666430 +Grid : Message : Average mflops/s per call per node : 801581 +Grid : Message : Average mflops/s per call per node : 822402 +Grid : Message : Average mflops/s per call per node : 664154 +Grid : Message : Average mflops/s per call per node (full): 316332 +Grid : Message : Average mflops/s per call per node (full): 432812 +Grid : Message : Average mflops/s per call per node (full): 447225 +Grid : Message : Average mflops/s per call per node (full): 306327 +Grid : Message : Stencil 14.0436 GB/s per node +Grid : Message : Stencil 17.5313 GB/s per node +Grid : Message : Stencil 17.9904 GB/s per node +Grid : Message : Stencil 14.0196 GB/s per node +Grid : Message : Average mflops/s per call per node : 661958 +Grid : Message : Average mflops/s per call per node : 802925 +Grid : Message : Average mflops/s per call per node : 822580 +Grid : Message : Average mflops/s per call per node : 655474 +Grid : Message : Average mflops/s per call per node (full): 315807 +Grid : Message : Average mflops/s per call per node (full): 440084 +Grid : Message : Average mflops/s per call per node (full): 446506 +Grid : Message : Average mflops/s per call per node (full): 305478 +Grid : Message : Stencil 13.8095 GB/s per node +Grid : Message : Stencil 17.219 GB/s per node +Grid : Message : Stencil 17.8972 GB/s per node +Grid : Message : Stencil 12.5523 GB/s per node +Grid : Message : Average mflops/s per call per node : 662486 +Grid : Message : Average mflops/s per call per node : 805245 +Grid : Message : Average mflops/s per call per node : 822277 +Grid : Message : Average mflops/s per call per node : 667800 +Grid : Message : Average mflops/s per call per node (full): 315251 +Grid : Message : Average mflops/s per call per node (full): 437134 +Grid : Message : Average mflops/s per call per node (full): 445001 +Grid : Message : Average mflops/s per call per node (full): 303441 +Grid : Message : Stencil 13.4219 GB/s per node +Grid : Message : Stencil 16.4628 GB/s per node +Grid : Message : Stencil 17.7018 GB/s per node +Grid : Message : Stencil 12.3819 GB/s per node +Grid : Message : Average mflops/s per call per node : 665299 +Grid : Message : Average mflops/s per call per node : 805631 +Grid : Message : Average mflops/s per call per node : 819042 +Grid : Message : Average mflops/s per call per node : 670935 +Grid : Message : Average mflops/s per call per node (full): 315640 +Grid : Message : Average mflops/s per call per node (full): 434757 +Grid : Message : Average mflops/s per call per node (full): 444690 +Grid : Message : Average mflops/s per call per node (full): 305656 +Grid : Message : Stencil 12.4988 GB/s per node +Grid : Message : Stencil 16.9083 GB/s per node +Grid : Message : Stencil 17.7146 GB/s per node +Grid : Message : Stencil 14.3923 GB/s per node +Grid : Message : Average mflops/s per call per node : 665591 +Grid : Message : Average mflops/s per call per node : 803923 +Grid : Message : Average mflops/s per call per node : 828013 +Grid : Message : Average mflops/s per call per node : 663066 +Grid : Message : Average mflops/s per call per node (full): 313228 +Grid : Message : Average mflops/s per call per node (full): 435404 +Grid : Message : Average mflops/s per call per node (full): 442913 +Grid : Message : Average mflops/s per call per node (full): 306970 +Grid : Message : Stencil 13.3269 GB/s per node +Grid : Message : Stencil 16.7782 GB/s per node +Grid : Message : Stencil 18.0916 GB/s per node +Grid : Message : Stencil 13.0793 GB/s per node +Grid : Message : Average mflops/s per call per node : 666367 +Grid : Message : Average mflops/s per call per node : 806076 +Grid : Message : Average mflops/s per call per node : 819398 +Grid : Message : Average mflops/s per call per node : 663443 +Grid : Message : Average mflops/s per call per node (full): 315773 +Grid : Message : Average mflops/s per call per node (full): 436873 +Grid : Message : Average mflops/s per call per node (full): 445575 +Grid : Message : Average mflops/s per call per node (full): 305899 +Grid : Message : Stencil 12.4975 GB/s per node +Grid : Message : Stencil 16.6148 GB/s per node +Grid : Message : Stencil 16.7767 GB/s per node +Grid : Message : Stencil 14.3518 GB/s per node +Grid : Message : Average mflops/s per call per node : 666796 +Grid : Message : Average mflops/s per call per node : 806592 +Grid : Message : Average mflops/s per call per node : 823710 +Grid : Message : Average mflops/s per call per node : 664034 +Grid : Message : Average mflops/s per call per node (full): 312829 +Grid : Message : Average mflops/s per call per node (full): 435589 +Grid : Message : Average mflops/s per call per node (full): 431787 +Grid : Message : Average mflops/s per call per node (full): 307315 +Grid : Message : Stencil 12.7171 GB/s per node +Grid : Message : Stencil 17.2914 GB/s per node +Grid : Message : Stencil 17.1318 GB/s per node +Grid : Message : Stencil 14.4639 GB/s per node +Grid : Message : Average mflops/s per call per node : 664303 +Grid : Message : Average mflops/s per call per node : 805861 +Grid : Message : Average mflops/s per call per node : 827627 +Grid : Message : Average mflops/s per call per node : 657617 +Grid : Message : Average mflops/s per call per node (full): 311895 +Grid : Message : Average mflops/s per call per node (full): 440178 +Grid : Message : Average mflops/s per call per node (full): 438531 +Grid : Message : Average mflops/s per call per node (full): 305532 +Grid : Message : Stencil 13.8382 GB/s per node +Grid : Message : Stencil 16.6842 GB/s per node +Grid : Message : Stencil 17.5183 GB/s per node +Grid : Message : Stencil 12.7917 GB/s per node +Grid : Message : Average mflops/s per call per node : 663577 +Grid : Message : Average mflops/s per call per node : 805710 +Grid : Message : Average mflops/s per call per node : 820054 +Grid : Message : Average mflops/s per call per node : 661076 +Grid : Message : Average mflops/s per call per node (full): 315325 +Grid : Message : Average mflops/s per call per node (full): 437400 +Grid : Message : Average mflops/s per call per node (full): 443599 +Grid : Message : Average mflops/s per call per node (full): 304844 +Grid : Message : Stencil 13.2596 GB/s per node +Grid : Message : Stencil 16.6761 GB/s per node +Grid : Message : Stencil 17.0363 GB/s per node +Grid : Message : Stencil 14.1222 GB/s per node +Grid : Message : Average mflops/s per call per node : 665035 +Grid : Message : Average mflops/s per call per node : 806591 +Grid : Message : Average mflops/s per call per node : 819211 +Grid : Message : Average mflops/s per call per node : 660628 +Grid : Message : Average mflops/s per call per node (full): 314882 +Grid : Message : Average mflops/s per call per node (full): 436507 +Grid : Message : Average mflops/s per call per node (full): 440248 +Grid : Message : Average mflops/s per call per node (full): 306543 +Grid : Message : Stencil 13.9848 GB/s per node +Grid : Message : Stencil 18.3976 GB/s per node +Grid : Message : Stencil 19.3124 GB/s per node +Grid : Message : Stencil 12.9166 GB/s per node +Grid : Message : Average mflops/s per call per node : 664656 +Grid : Message : Average mflops/s per call per node : 799203 +Grid : Message : Average mflops/s per call per node : 819769 +Grid : Message : Average mflops/s per call per node : 669625 +Grid : Message : Average mflops/s per call per node (full): 316861 +Grid : Message : Average mflops/s per call per node (full): 439961 +Grid : Message : Average mflops/s per call per node (full): 448614 +Grid : Message : Average mflops/s per call per node (full): 307114 +Grid : Message : Stencil 12.4935 GB/s per node +Grid : Message : Stencil 11.2988 GB/s per node +Grid : Message : Stencil 17.3402 GB/s per node +Grid : Message : Stencil 12.5342 GB/s per node +Grid : Message : Average mflops/s per call per node : 668122 +Grid : Message : Average mflops/s per call per node : 813150 +Grid : Message : Average mflops/s per call per node : 823168 +Grid : Message : Average mflops/s per call per node : 665416 +Grid : Message : Average mflops/s per call per node (full): 313288 +Grid : Message : Average mflops/s per call per node (full): 346150 +Grid : Message : Average mflops/s per call per node (full): 437381 +Grid : Message : Average mflops/s per call per node (full): 304229 +Grid : Message : Stencil 14.216 GB/s per node +Grid : Message : Stencil 17.0074 GB/s per node +Grid : Message : Stencil 17.2103 GB/s per node +Grid : Message : Stencil 13.9827 GB/s per node +Grid : Message : Average mflops/s per call per node : 663028 +Grid : Message : Average mflops/s per call per node : 805888 +Grid : Message : Average mflops/s per call per node : 820719 +Grid : Message : Average mflops/s per call per node : 663627 +Grid : Message : Average mflops/s per call per node (full): 316786 +Grid : Message : Average mflops/s per call per node (full): 438756 +Grid : Message : Average mflops/s per call per node (full): 441710 +Grid : Message : Average mflops/s per call per node (full): 307103 +Grid : Message : Stencil 13.1908 GB/s per node +Grid : Message : Stencil 14.8085 GB/s per node +Grid : Message : Stencil 17.4607 GB/s per node +Grid : Message : Stencil 12.3831 GB/s per node +Grid : Message : Average mflops/s per call per node : 665860 +Grid : Message : Average mflops/s per call per node : 807115 +Grid : Message : Average mflops/s per call per node : 824929 +Grid : Message : Average mflops/s per call per node : 670725 +Grid : Message : Average mflops/s per call per node (full): 314340 +Grid : Message : Average mflops/s per call per node (full): 414385 +Grid : Message : Average mflops/s per call per node (full): 445376 +Grid : Message : Average mflops/s per call per node (full): 304375 +Grid : Message : Stencil 13.5194 GB/s per node +Grid : Message : Stencil 13.942 GB/s per node +Grid : Message : Stencil 17.6336 GB/s per node +Grid : Message : Stencil 12.367 GB/s per node +Grid : Message : Average mflops/s per call per node : 669064 +Grid : Message : Average mflops/s per call per node : 804949 +Grid : Message : Average mflops/s per call per node : 817478 +Grid : Message : Average mflops/s per call per node : 665460 +Grid : Message : Average mflops/s per call per node (full): 316926 +Grid : Message : Average mflops/s per call per node (full): 399455 +Grid : Message : Average mflops/s per call per node (full): 446074 +Grid : Message : Average mflops/s per call per node (full): 304541 +Grid : Message : Stencil 13.6126 GB/s per node +Grid : Message : Stencil 15.9337 GB/s per node +Grid : Message : Stencil 17.3187 GB/s per node +Grid : Message : Stencil 12.4468 GB/s per node +Grid : Message : Average mflops/s per call per node : 665578 +Grid : Message : Average mflops/s per call per node : 812716 +Grid : Message : Average mflops/s per call per node : 821287 +Grid : Message : Average mflops/s per call per node : 665420 +Grid : Message : Average mflops/s per call per node (full): 316025 +Grid : Message : Average mflops/s per call per node (full): 426919 +Grid : Message : Average mflops/s per call per node (full): 443410 +Grid : Message : Average mflops/s per call per node (full): 304065 +Grid : Message : Stencil 12.7614 GB/s per node +Grid : Message : Stencil 17.1654 GB/s per node +Grid : Message : Stencil 18.0923 GB/s per node +Grid : Message : Stencil 13.4006 GB/s per node +Grid : Message : Average mflops/s per call per node : 671025 +Grid : Message : Average mflops/s per call per node : 797503 +Grid : Message : Average mflops/s per call per node : 823647 +Grid : Message : Average mflops/s per call per node : 659844 +Grid : Message : Average mflops/s per call per node (full): 316108 +Grid : Message : Average mflops/s per call per node (full): 438833 +Grid : Message : Average mflops/s per call per node (full): 447618 +Grid : Message : Average mflops/s per call per node (full): 305267 +Grid : Message : Stencil 12.9773 GB/s per node +Grid : Message : Stencil 17.1714 GB/s per node +Grid : Message : Stencil 17.282 GB/s per node +Grid : Message : Stencil 11.9263 GB/s per node +Grid : Message : Average mflops/s per call per node : 671307 +Grid : Message : Average mflops/s per call per node : 804090 +Grid : Message : Average mflops/s per call per node : 827698 +Grid : Message : Average mflops/s per call per node : 672546 +Grid : Message : Average mflops/s per call per node (full): 316159 +Grid : Message : Average mflops/s per call per node (full): 438851 +Grid : Message : Average mflops/s per call per node (full): 445167 +Grid : Message : Average mflops/s per call per node (full): 300966 +Grid : Message : Stencil 13.6184 GB/s per node +Grid : Message : Stencil 17.0852 GB/s per node +Grid : Message : Stencil 17.8702 GB/s per node +Grid : Message : Stencil 12.7869 GB/s per node +Grid : Message : Average mflops/s per call per node : 665162 +Grid : Message : Average mflops/s per call per node : 803811 +Grid : Message : Average mflops/s per call per node : 828009 +Grid : Message : Average mflops/s per call per node : 662307 +Grid : Message : Average mflops/s per call per node (full): 316505 +Grid : Message : Average mflops/s per call per node (full): 438134 +Grid : Message : Average mflops/s per call per node (full): 447694 +Grid : Message : Average mflops/s per call per node (full): 304436 +Grid : Message : Stencil 12.6024 GB/s per node +Grid : Message : Stencil 17.1863 GB/s per node +Grid : Message : Stencil 18.0182 GB/s per node +Grid : Message : Stencil 13.271 GB/s per node +Grid : Message : Average mflops/s per call per node : 668267 +Grid : Message : Average mflops/s per call per node : 800026 +Grid : Message : Average mflops/s per call per node : 822868 +Grid : Message : Average mflops/s per call per node : 661952 +Grid : Message : Average mflops/s per call per node (full): 310228 +Grid : Message : Average mflops/s per call per node (full): 435863 +Grid : Message : Average mflops/s per call per node (full): 446221 +Grid : Message : Average mflops/s per call per node (full): 305856 +Grid : Message : Stencil 13.1809 GB/s per node +Grid : Message : Stencil 13.4887 GB/s per node +Grid : Message : Stencil 17.3289 GB/s per node +Grid : Message : Stencil 13.1904 GB/s per node +Grid : Message : Average mflops/s per call per node : 664234 +Grid : Message : Average mflops/s per call per node : 803314 +Grid : Message : Average mflops/s per call per node : 815947 +Grid : Message : Average mflops/s per call per node : 661962 +Grid : Message : Average mflops/s per call per node (full): 314075 +Grid : Message : Average mflops/s per call per node (full): 389819 +Grid : Message : Average mflops/s per call per node (full): 441479 +Grid : Message : Average mflops/s per call per node (full): 305914 +Grid : Message : Stencil 13.1528 GB/s per node +Grid : Message : Stencil 16.7188 GB/s per node +Grid : Message : Stencil 16.5866 GB/s per node +Grid : Message : Stencil 12.213 GB/s per node +Grid : Message : Average mflops/s per call per node : 663048 +Grid : Message : Average mflops/s per call per node : 804374 +Grid : Message : Average mflops/s per call per node : 819659 +Grid : Message : Average mflops/s per call per node : 667866 +Grid : Message : Average mflops/s per call per node (full): 312955 +Grid : Message : Average mflops/s per call per node (full): 436178 +Grid : Message : Average mflops/s per call per node (full): 435129 +Grid : Message : Average mflops/s per call per node (full): 299583 +Grid : Message : Stencil 12.839 GB/s per node +Grid : Message : Stencil 17.4587 GB/s per node +Grid : Message : Stencil 17.6344 GB/s per node +Grid : Message : Stencil 13.0173 GB/s per node +Grid : Message : Average mflops/s per call per node : 660284 +Grid : Message : Average mflops/s per call per node : 804717 +Grid : Message : Average mflops/s per call per node : 827121 +Grid : Message : Average mflops/s per call per node : 663707 +Grid : Message : Average mflops/s per call per node (full): 312460 +Grid : Message : Average mflops/s per call per node (full): 438847 +Grid : Message : Average mflops/s per call per node (full): 446329 +Grid : Message : Average mflops/s per call per node (full): 305415 +Grid : Message : Stencil 13.2555 GB/s per node +Grid : Message : Stencil 17.6703 GB/s per node +Grid : Message : Stencil 18.5159 GB/s per node +Grid : Message : Stencil 13.467 GB/s per node +Grid : Message : Average mflops/s per call per node : 663868 +Grid : Message : Average mflops/s per call per node : 800212 +Grid : Message : Average mflops/s per call per node : 824806 +Grid : Message : Average mflops/s per call per node : 664923 +Grid : Message : Average mflops/s per call per node (full): 315689 +Grid : Message : Average mflops/s per call per node (full): 439311 +Grid : Message : Average mflops/s per call per node (full): 449243 +Grid : Message : Average mflops/s per call per node (full): 305734 +Grid : Message : Stencil 12.3396 GB/s per node +Grid : Message : Stencil 16.991 GB/s per node +Grid : Message : Stencil 17.5986 GB/s per node +Grid : Message : Stencil 11.9851 GB/s per node +Grid : Message : Average mflops/s per call per node : 665254 +Grid : Message : Average mflops/s per call per node : 807953 +Grid : Message : Average mflops/s per call per node : 821190 +Grid : Message : Average mflops/s per call per node : 668761 +Grid : Message : Average mflops/s per call per node (full): 312356 +Grid : Message : Average mflops/s per call per node (full): 432148 +Grid : Message : Average mflops/s per call per node (full): 435364 +Grid : Message : Average mflops/s per call per node (full): 302434 +Grid : Message : Stencil 12.6533 GB/s per node +Grid : Message : Stencil 16.6566 GB/s per node +Grid : Message : Stencil 17.421 GB/s per node +Grid : Message : Stencil 13.6647 GB/s per node +Grid : Message : Average mflops/s per call per node : 664238 +Grid : Message : Average mflops/s per call per node : 807496 +Grid : Message : Average mflops/s per call per node : 826445 +Grid : Message : Average mflops/s per call per node : 657369 +Grid : Message : Average mflops/s per call per node (full): 313584 +Grid : Message : Average mflops/s per call per node (full): 435134 +Grid : Message : Average mflops/s per call per node (full): 441673 +Grid : Message : Average mflops/s per call per node (full): 305415 +Grid : Message : Stencil 12.4773 GB/s per node +Grid : Message : Stencil 18.1009 GB/s per node +Grid : Message : Stencil 17.9373 GB/s per node +Grid : Message : Stencil 12.6955 GB/s per node +Grid : Message : Average mflops/s per call per node : 664810 +Grid : Message : Average mflops/s per call per node : 798276 +Grid : Message : Average mflops/s per call per node : 820774 +Grid : Message : Average mflops/s per call per node : 667568 +Grid : Message : Average mflops/s per call per node (full): 313601 +Grid : Message : Average mflops/s per call per node (full): 439527 +Grid : Message : Average mflops/s per call per node (full): 446031 +Grid : Message : Average mflops/s per call per node (full): 304211 +Grid : Message : Stencil 12.8544 GB/s per node +Grid : Message : Stencil 18.2949 GB/s per node +Grid : Message : Stencil 16.4737 GB/s per node +Grid : Message : Stencil 12.2715 GB/s per node +Grid : Message : Average mflops/s per call per node : 670412 +Grid : Message : Average mflops/s per call per node : 805830 +Grid : Message : Average mflops/s per call per node : 826551 +Grid : Message : Average mflops/s per call per node : 667492 +Grid : Message : Average mflops/s per call per node (full): 315155 +Grid : Message : Average mflops/s per call per node (full): 441838 +Grid : Message : Average mflops/s per call per node (full): 432139 +Grid : Message : Average mflops/s per call per node (full): 304623 +Grid : Message : Stencil 14.1742 GB/s per node +Grid : Message : Stencil 16.563 GB/s per node +Grid : Message : Stencil 17.5296 GB/s per node +Grid : Message : Stencil 13.3106 GB/s per node +Grid : Message : Average mflops/s per call per node : 666231 +Grid : Message : Average mflops/s per call per node : 809848 +Grid : Message : Average mflops/s per call per node : 815874 +Grid : Message : Average mflops/s per call per node : 662812 +Grid : Message : Average mflops/s per call per node (full): 315886 +Grid : Message : Average mflops/s per call per node (full): 436379 +Grid : Message : Average mflops/s per call per node (full): 444229 +Grid : Message : Average mflops/s per call per node (full): 306706 +Grid : Message : Stencil 13.297 GB/s per node +Grid : Message : Stencil 10.6758 GB/s per node +Grid : Message : Stencil 18.0975 GB/s per node +Grid : Message : Stencil 12.9191 GB/s per node +Grid : Message : Average mflops/s per call per node : 664986 +Grid : Message : Average mflops/s per call per node : 809930 +Grid : Message : Average mflops/s per call per node : 824570 +Grid : Message : Average mflops/s per call per node : 662503 +Grid : Message : Average mflops/s per call per node (full): 314510 +Grid : Message : Average mflops/s per call per node (full): 332417 +Grid : Message : Average mflops/s per call per node (full): 447999 +Grid : Message : Average mflops/s per call per node (full): 304748 +Grid : Message : Stencil 13.2278 GB/s per node +Grid : Message : Stencil 10.026 GB/s per node +Grid : Message : Stencil 18.1989 GB/s per node +Grid : Message : Stencil 13.4784 GB/s per node +Grid : Message : Average mflops/s per call per node : 668048 +Grid : Message : Average mflops/s per call per node : 807632 +Grid : Message : Average mflops/s per call per node : 822427 +Grid : Message : Average mflops/s per call per node : 663594 +Grid : Message : Average mflops/s per call per node (full): 315399 +Grid : Message : Average mflops/s per call per node (full): 317245 +Grid : Message : Average mflops/s per call per node (full): 446650 +Grid : Message : Average mflops/s per call per node (full): 306189 +Grid : Message : Stencil 13.2856 GB/s per node +Grid : Message : Stencil 16.8058 GB/s per node +Grid : Message : Stencil 18.3342 GB/s per node +Grid : Message : Stencil 13.7579 GB/s per node +Grid : Message : Average mflops/s per call per node : 666376 +Grid : Message : Average mflops/s per call per node : 807190 +Grid : Message : Average mflops/s per call per node : 820858 +Grid : Message : Average mflops/s per call per node : 658153 +Grid : Message : Average mflops/s per call per node (full): 314857 +Grid : Message : Average mflops/s per call per node (full): 435853 +Grid : Message : Average mflops/s per call per node (full): 447087 +Grid : Message : Average mflops/s per call per node (full): 305513 +Grid : Message : Stencil 15.1605 GB/s per node +Grid : Message : Stencil 17.0136 GB/s per node +Grid : Message : Stencil 18.373 GB/s per node +Grid : Message : Stencil 12.3423 GB/s per node +Grid : Message : Average mflops/s per call per node : 662966 +Grid : Message : Average mflops/s per call per node : 808304 +Grid : Message : Average mflops/s per call per node : 818004 +Grid : Message : Average mflops/s per call per node : 666981 +Grid : Message : Average mflops/s per call per node (full): 316955 +Grid : Message : Average mflops/s per call per node (full): 439448 +Grid : Message : Average mflops/s per call per node (full): 447067 +Grid : Message : Average mflops/s per call per node (full): 302182 +Grid : Message : Stencil 12.5394 GB/s per node +Grid : Message : Stencil 16.3613 GB/s per node +Grid : Message : Stencil 17.7478 GB/s per node +Grid : Message : Stencil 12.9737 GB/s per node +Grid : Message : Average mflops/s per call per node : 666957 +Grid : Message : Average mflops/s per call per node : 803510 +Grid : Message : Average mflops/s per call per node : 822896 +Grid : Message : Average mflops/s per call per node : 661411 +Grid : Message : Average mflops/s per call per node (full): 314184 +Grid : Message : Average mflops/s per call per node (full): 432010 +Grid : Message : Average mflops/s per call per node (full): 441724 +Grid : Message : Average mflops/s per call per node (full): 301640 +Grid : Message : Stencil 13.5336 GB/s per node +Grid : Message : Stencil 16.7272 GB/s per node +Grid : Message : Stencil 16.7701 GB/s per node +Grid : Message : Stencil 11.9536 GB/s per node +Grid : Message : Average mflops/s per call per node : 663344 +Grid : Message : Average mflops/s per call per node : 807277 +Grid : Message : Average mflops/s per call per node : 828164 +Grid : Message : Average mflops/s per call per node : 666565 +Grid : Message : Average mflops/s per call per node (full): 314791 +Grid : Message : Average mflops/s per call per node (full): 436726 +Grid : Message : Average mflops/s per call per node (full): 438177 +Grid : Message : Average mflops/s per call per node (full): 300950 +Grid : Message : Stencil 13.0412 GB/s per node +Grid : Message : Stencil 18.249 GB/s per node +Grid : Message : Stencil 17.3893 GB/s per node +Grid : Message : Stencil 12.5904 GB/s per node +Grid : Message : Average mflops/s per call per node : 667289 +Grid : Message : Average mflops/s per call per node : 800480 +Grid : Message : Average mflops/s per call per node : 826123 +Grid : Message : Average mflops/s per call per node : 668870 +Grid : Message : Average mflops/s per call per node (full): 314766 +Grid : Message : Average mflops/s per call per node (full): 440731 +Grid : Message : Average mflops/s per call per node (full): 444114 +Grid : Message : Average mflops/s per call per node (full): 305542 +Grid : Message : Stencil 14.1855 GB/s per node +Grid : Message : Stencil 17.3159 GB/s per node +Grid : Message : Stencil 17.3951 GB/s per node +Grid : Message : Stencil 14.7946 GB/s per node +Grid : Message : Average mflops/s per call per node : 664318 +Grid : Message : Average mflops/s per call per node : 802147 +Grid : Message : Average mflops/s per call per node : 823625 +Grid : Message : Average mflops/s per call per node : 660868 +Grid : Message : Average mflops/s per call per node (full): 315826 +Grid : Message : Average mflops/s per call per node (full): 438869 +Grid : Message : Average mflops/s per call per node (full): 445153 +Grid : Message : Average mflops/s per call per node (full): 307083 +Grid : Message : Stencil 14.3068 GB/s per node +Grid : Message : Stencil 16.111 GB/s per node +Grid : Message : Stencil 17.2469 GB/s per node +Grid : Message : Stencil 12.5397 GB/s per node +Grid : Message : Average mflops/s per call per node : 662917 +Grid : Message : Average mflops/s per call per node : 809051 +Grid : Message : Average mflops/s per call per node : 821687 +Grid : Message : Average mflops/s per call per node : 666637 +Grid : Message : Average mflops/s per call per node (full): 316955 +Grid : Message : Average mflops/s per call per node (full): 422775 +Grid : Message : Average mflops/s per call per node (full): 442596 +Grid : Message : Average mflops/s per call per node (full): 304989 +Grid : Message : Stencil 13.1324 GB/s per node +Grid : Message : Stencil 15.9543 GB/s per node +Grid : Message : Stencil 17.6188 GB/s per node +Grid : Message : Stencil 13.5612 GB/s per node +Grid : Message : Average mflops/s per call per node : 666396 +Grid : Message : Average mflops/s per call per node : 809715 +Grid : Message : Average mflops/s per call per node : 820205 +Grid : Message : Average mflops/s per call per node : 664769 +Grid : Message : Average mflops/s per call per node (full): 315161 +Grid : Message : Average mflops/s per call per node (full): 426166 +Grid : Message : Average mflops/s per call per node (full): 444006 +Grid : Message : Average mflops/s per call per node (full): 305982 +Grid : Message : Stencil 12.978 GB/s per node +Grid : Message : Stencil 17.042 GB/s per node +Grid : Message : Stencil 17.937 GB/s per node +Grid : Message : Stencil 11.8709 GB/s per node +Grid : Message : Average mflops/s per call per node : 666662 +Grid : Message : Average mflops/s per call per node : 799634 +Grid : Message : Average mflops/s per call per node : 819919 +Grid : Message : Average mflops/s per call per node : 663800 +Grid : Message : Average mflops/s per call per node (full): 315038 +Grid : Message : Average mflops/s per call per node (full): 436877 +Grid : Message : Average mflops/s per call per node (full): 445358 +Grid : Message : Average mflops/s per call per node (full): 300508 +Grid : Message : Stencil 12.4893 GB/s per node +Grid : Message : Stencil 17.7135 GB/s per node +Grid : Message : Stencil 17.2763 GB/s per node +Grid : Message : Stencil 12.7222 GB/s per node +Grid : Message : Average mflops/s per call per node : 664912 +Grid : Message : Average mflops/s per call per node : 806802 +Grid : Message : Average mflops/s per call per node : 826341 +Grid : Message : Average mflops/s per call per node : 659176 +Grid : Message : Average mflops/s per call per node (full): 312454 +Grid : Message : Average mflops/s per call per node (full): 441070 +Grid : Message : Average mflops/s per call per node (full): 442784 +Grid : Message : Average mflops/s per call per node (full): 304059 +Grid : Message : Stencil 12.7942 GB/s per node +Grid : Message : Stencil 17.2664 GB/s per node +Grid : Message : Stencil 17.7487 GB/s per node +Grid : Message : Stencil 12.6147 GB/s per node +Grid : Message : Average mflops/s per call per node : 663989 +Grid : Message : Average mflops/s per call per node : 802431 +Grid : Message : Average mflops/s per call per node : 825882 +Grid : Message : Average mflops/s per call per node : 669656 +Grid : Message : Average mflops/s per call per node (full): 303988 +Grid : Message : Average mflops/s per call per node (full): 437662 +Grid : Message : Average mflops/s per call per node (full): 444731 +Grid : Message : Average mflops/s per call per node (full): 305627 +Grid : Message : Stencil 13.6631 GB/s per node +Grid : Message : Stencil 16.8758 GB/s per node +Grid : Message : Stencil 17.9437 GB/s per node +Grid : Message : Stencil 12.5825 GB/s per node +Grid : Message : Average mflops/s per call per node : 664333 +Grid : Message : Average mflops/s per call per node : 806678 +Grid : Message : Average mflops/s per call per node : 818733 +Grid : Message : Average mflops/s per call per node : 661848 +Grid : Message : Average mflops/s per call per node (full): 315757 +Grid : Message : Average mflops/s per call per node (full): 438414 +Grid : Message : Average mflops/s per call per node (full): 445361 +Grid : Message : Average mflops/s per call per node (full): 302864 +Grid : Message : Stencil 12.1964 GB/s per node +Grid : Message : Stencil 14.0574 GB/s per node +Grid : Message : Stencil 17.7224 GB/s per node +Grid : Message : Stencil 12.4657 GB/s per node +Grid : Message : Average mflops/s per call per node : 667607 +Grid : Message : Average mflops/s per call per node : 807422 +Grid : Message : Average mflops/s per call per node : 825382 +Grid : Message : Average mflops/s per call per node : 670496 +Grid : Message : Average mflops/s per call per node (full): 310813 +Grid : Message : Average mflops/s per call per node (full): 400623 +Grid : Message : Average mflops/s per call per node (full): 446110 +Grid : Message : Average mflops/s per call per node (full): 305631 +Grid : Message : Stencil 12.9772 GB/s per node +Grid : Message : Stencil 16.5692 GB/s per node +Grid : Message : Stencil 17.082 GB/s per node +Grid : Message : Stencil 13.1432 GB/s per node +Grid : Message : Average mflops/s per call per node : 663156 +Grid : Message : Average mflops/s per call per node : 800475 +Grid : Message : Average mflops/s per call per node : 827650 +Grid : Message : Average mflops/s per call per node : 663837 +Grid : Message : Average mflops/s per call per node (full): 313312 +Grid : Message : Average mflops/s per call per node (full): 433722 +Grid : Message : Average mflops/s per call per node (full): 440128 +Grid : Message : Average mflops/s per call per node (full): 305750 +Grid : Message : Stencil 12.6959 GB/s per node +Grid : Message : Stencil 9.75687 GB/s per node +Grid : Message : Stencil 17.5084 GB/s per node +Grid : Message : Stencil 12.0511 GB/s per node +Grid : Message : Average mflops/s per call per node : 670135 +Grid : Message : Average mflops/s per call per node : 813094 +Grid : Message : Average mflops/s per call per node : 828302 +Grid : Message : Average mflops/s per call per node : 668614 +Grid : Message : Average mflops/s per call per node (full): 314985 +Grid : Message : Average mflops/s per call per node (full): 310972 +Grid : Message : Average mflops/s per call per node (full): 445489 +Grid : Message : Average mflops/s per call per node (full): 302304 +Grid : Message : Stencil 15.1776 GB/s per node +Grid : Message : Stencil 16.8651 GB/s per node +Grid : Message : Stencil 16.6968 GB/s per node +Grid : Message : Stencil 12.2462 GB/s per node +Grid : Message : Average mflops/s per call per node : 664631 +Grid : Message : Average mflops/s per call per node : 804873 +Grid : Message : Average mflops/s per call per node : 816983 +Grid : Message : Average mflops/s per call per node : 671057 +Grid : Message : Average mflops/s per call per node (full): 317617 +Grid : Message : Average mflops/s per call per node (full): 436935 +Grid : Message : Average mflops/s per call per node (full): 426973 +Grid : Message : Average mflops/s per call per node (full): 304192 +Grid : Message : Stencil 13.7144 GB/s per node +Grid : Message : Stencil 18.7334 GB/s per node +Grid : Message : Stencil 17.9377 GB/s per node +Grid : Message : Stencil 13.5519 GB/s per node +Grid : Message : Average mflops/s per call per node : 665825 +Grid : Message : Average mflops/s per call per node : 807739 +Grid : Message : Average mflops/s per call per node : 821023 +Grid : Message : Average mflops/s per call per node : 665112 +Grid : Message : Average mflops/s per call per node (full): 317255 +Grid : Message : Average mflops/s per call per node (full): 441000 +Grid : Message : Average mflops/s per call per node (full): 446026 +Grid : Message : Average mflops/s per call per node (full): 306568 +Grid : Message : Stencil 14.4005 GB/s per node +Grid : Message : Stencil 14.9304 GB/s per node +Grid : Message : Stencil 17.8885 GB/s per node +Grid : Message : Stencil 12.5922 GB/s per node +Grid : Message : Average mflops/s per call per node : 666094 +Grid : Message : Average mflops/s per call per node : 805924 +Grid : Message : Average mflops/s per call per node : 821798 +Grid : Message : Average mflops/s per call per node : 670074 +Grid : Message : Average mflops/s per call per node (full): 316933 +Grid : Message : Average mflops/s per call per node (full): 415751 +Grid : Message : Average mflops/s per call per node (full): 446741 +Grid : Message : Average mflops/s per call per node (full): 305588 +Grid : Message : Stencil 13.4556 GB/s per node +Grid : Message : Stencil 17.7989 GB/s per node +Grid : Message : Stencil 17.8123 GB/s per node +Grid : Message : Stencil 13.2153 GB/s per node +Grid : Message : Average mflops/s per call per node : 665540 +Grid : Message : Average mflops/s per call per node : 798428 +Grid : Message : Average mflops/s per call per node : 819450 +Grid : Message : Average mflops/s per call per node : 662678 +Grid : Message : Average mflops/s per call per node (full): 314840 +Grid : Message : Average mflops/s per call per node (full): 438204 +Grid : Message : Average mflops/s per call per node (full): 444908 +Grid : Message : Average mflops/s per call per node (full): 305172 +Grid : Message : Stencil 14.147 GB/s per node +Grid : Message : Stencil 16.5951 GB/s per node +Grid : Message : Stencil 17.045 GB/s per node +Grid : Message : Stencil 12.3705 GB/s per node +Grid : Message : Average mflops/s per call per node : 667115 +Grid : Message : Average mflops/s per call per node : 802583 +Grid : Message : Average mflops/s per call per node : 820262 +Grid : Message : Average mflops/s per call per node : 661130 +Grid : Message : Average mflops/s per call per node (full): 316760 +Grid : Message : Average mflops/s per call per node (full): 425907 +Grid : Message : Average mflops/s per call per node (full): 440866 +Grid : Message : Average mflops/s per call per node (full): 302945 +Grid : Message : Stencil 12.4367 GB/s per node +Grid : Message : Stencil 16.6064 GB/s per node +Grid : Message : Stencil 17.5276 GB/s per node +Grid : Message : Stencil 12.5734 GB/s per node +Grid : Message : Average mflops/s per call per node : 666864 +Grid : Message : Average mflops/s per call per node : 798125 +Grid : Message : Average mflops/s per call per node : 820718 +Grid : Message : Average mflops/s per call per node : 665262 +Grid : Message : Average mflops/s per call per node (full): 310215 +Grid : Message : Average mflops/s per call per node (full): 429816 +Grid : Message : Average mflops/s per call per node (full): 442081 +Grid : Message : Average mflops/s per call per node (full): 304755 +Grid : Message : Stencil 13.0628 GB/s per node +Grid : Message : Stencil 16.6893 GB/s per node +Grid : Message : Stencil 17.1925 GB/s per node +Grid : Message : Stencil 12.3717 GB/s per node +Grid : Message : Average mflops/s per call per node : 665550 +Grid : Message : Average mflops/s per call per node : 803240 +Grid : Message : Average mflops/s per call per node : 817650 +Grid : Message : Average mflops/s per call per node : 661483 +Grid : Message : Average mflops/s per call per node (full): 313860 +Grid : Message : Average mflops/s per call per node (full): 435647 +Grid : Message : Average mflops/s per call per node (full): 442649 +Grid : Message : Average mflops/s per call per node (full): 303165 +Grid : Message : Stencil 12.7405 GB/s per node +Grid : Message : Stencil 17.0502 GB/s per node +Grid : Message : Stencil 17.5352 GB/s per node +Grid : Message : Stencil 11.8669 GB/s per node +Grid : Message : Average mflops/s per call per node : 667635 +Grid : Message : Average mflops/s per call per node : 797258 +Grid : Message : Average mflops/s per call per node : 815596 +Grid : Message : Average mflops/s per call per node : 663218 +Grid : Message : Average mflops/s per call per node (full): 314792 +Grid : Message : Average mflops/s per call per node (full): 436536 +Grid : Message : Average mflops/s per call per node (full): 443681 +Grid : Message : Average mflops/s per call per node (full): 300390 +Grid : Message : Stencil 12.4671 GB/s per node +Grid : Message : Stencil 16.5639 GB/s per node +Grid : Message : Stencil 18.9733 GB/s per node +Grid : Message : Stencil 11.9817 GB/s per node +Grid : Message : Average mflops/s per call per node : 664843 +Grid : Message : Average mflops/s per call per node : 800012 +Grid : Message : Average mflops/s per call per node : 819876 +Grid : Message : Average mflops/s per call per node : 663148 +Grid : Message : Average mflops/s per call per node (full): 311164 +Grid : Message : Average mflops/s per call per node (full): 435468 +Grid : Message : Average mflops/s per call per node (full): 448897 +Grid : Message : Average mflops/s per call per node (full): 301392 +Grid : Message : Stencil 12.9655 GB/s per node +Grid : Message : Stencil 16.8094 GB/s per node +Grid : Message : Stencil 17.5092 GB/s per node +Grid : Message : Stencil 12.7611 GB/s per node +Grid : Message : Average mflops/s per call per node : 661145 +Grid : Message : Average mflops/s per call per node : 802228 +Grid : Message : Average mflops/s per call per node : 826971 +Grid : Message : Average mflops/s per call per node : 660795 +Grid : Message : Average mflops/s per call per node (full): 314337 +Grid : Message : Average mflops/s per call per node (full): 435938 +Grid : Message : Average mflops/s per call per node (full): 445923 +Grid : Message : Average mflops/s per call per node (full): 304664 +Grid : Message : Stencil 13.1316 GB/s per node +Grid : Message : Stencil 13.4993 GB/s per node +Grid : Message : Stencil 17.7713 GB/s per node +Grid : Message : Stencil 12.6489 GB/s per node +Grid : Message : Average mflops/s per call per node : 663786 +Grid : Message : Average mflops/s per call per node : 800213 +Grid : Message : Average mflops/s per call per node : 822439 +Grid : Message : Average mflops/s per call per node : 662126 +Grid : Message : Average mflops/s per call per node (full): 315340 +Grid : Message : Average mflops/s per call per node (full): 388301 +Grid : Message : Average mflops/s per call per node (full): 446386 +Grid : Message : Average mflops/s per call per node (full): 304489 +Grid : Message : Stencil 12.78 GB/s per node +Grid : Message : Stencil 16.1777 GB/s per node +Grid : Message : Stencil 17.0707 GB/s per node +Grid : Message : Stencil 13.3816 GB/s per node +Grid : Message : Average mflops/s per call per node : 666441 +Grid : Message : Average mflops/s per call per node : 803715 +Grid : Message : Average mflops/s per call per node : 819076 +Grid : Message : Average mflops/s per call per node : 661900 +Grid : Message : Average mflops/s per call per node (full): 314891 +Grid : Message : Average mflops/s per call per node (full): 430371 +Grid : Message : Average mflops/s per call per node (full): 441525 +Grid : Message : Average mflops/s per call per node (full): 304762 +Grid : Message : Stencil 14.2821 GB/s per node +Grid : Message : Stencil 17.3567 GB/s per node +Grid : Message : Stencil 17.0311 GB/s per node +Grid : Message : Stencil 12.3513 GB/s per node +Grid : Message : Average mflops/s per call per node : 660644 +Grid : Message : Average mflops/s per call per node : 807218 +Grid : Message : Average mflops/s per call per node : 829333 +Grid : Message : Average mflops/s per call per node : 671991 +Grid : Message : Average mflops/s per call per node (full): 315644 +Grid : Message : Average mflops/s per call per node (full): 438153 +Grid : Message : Average mflops/s per call per node (full): 441341 +Grid : Message : Average mflops/s per call per node (full): 304531 +Grid : Message : Stencil 14.2211 GB/s per node +Grid : Message : Stencil 16.8738 GB/s per node +Grid : Message : Stencil 17.8739 GB/s per node +Grid : Message : Stencil 12.481 GB/s per node +Grid : Message : Average mflops/s per call per node : 658591 +Grid : Message : Average mflops/s per call per node : 803936 +Grid : Message : Average mflops/s per call per node : 819286 +Grid : Message : Average mflops/s per call per node : 666410 +Grid : Message : Average mflops/s per call per node (full): 314857 +Grid : Message : Average mflops/s per call per node (full): 438458 +Grid : Message : Average mflops/s per call per node (full): 436476 +Grid : Message : Average mflops/s per call per node (full): 301718 +Grid : Message : Stencil 12.858 GB/s per node +Grid : Message : Stencil 17.1302 GB/s per node +Grid : Message : Stencil 17.3094 GB/s per node +Grid : Message : Stencil 12.7569 GB/s per node +Grid : Message : Average mflops/s per call per node : 664558 +Grid : Message : Average mflops/s per call per node : 801165 +Grid : Message : Average mflops/s per call per node : 821151 +Grid : Message : Average mflops/s per call per node : 662358 +Grid : Message : Average mflops/s per call per node (full): 314254 +Grid : Message : Average mflops/s per call per node (full): 435368 +Grid : Message : Average mflops/s per call per node (full): 444693 +Grid : Message : Average mflops/s per call per node (full): 304294 +Grid : Message : Stencil 15.4771 GB/s per node +Grid : Message : Stencil 14.5214 GB/s per node +Grid : Message : Stencil 18.2442 GB/s per node +Grid : Message : Stencil 13.1201 GB/s per node +Grid : Message : Average mflops/s per call per node : 659965 +Grid : Message : Average mflops/s per call per node : 807033 +Grid : Message : Average mflops/s per call per node : 820979 +Grid : Message : Average mflops/s per call per node : 662589 +Grid : Message : Average mflops/s per call per node (full): 316696 +Grid : Message : Average mflops/s per call per node (full): 409084 +Grid : Message : Average mflops/s per call per node (full): 448128 +Grid : Message : Average mflops/s per call per node (full): 304887 +Grid : Message : Stencil 12.5206 GB/s per node +Grid : Message : Stencil 16.7692 GB/s per node +Grid : Message : Stencil 17.6077 GB/s per node +Grid : Message : Stencil 12.2452 GB/s per node +Grid : Message : Average mflops/s per call per node : 664987 +Grid : Message : Average mflops/s per call per node : 804050 +Grid : Message : Average mflops/s per call per node : 822386 +Grid : Message : Average mflops/s per call per node : 665008 +Grid : Message : Average mflops/s per call per node (full): 312935 +Grid : Message : Average mflops/s per call per node (full): 435278 +Grid : Message : Average mflops/s per call per node (full): 445270 +Grid : Message : Average mflops/s per call per node (full): 302602 +Grid : Message : Stencil 12.8442 GB/s per node +Grid : Message : Stencil 17.2721 GB/s per node +Grid : Message : Stencil 18.2259 GB/s per node +Grid : Message : Stencil 12.1537 GB/s per node +Grid : Message : Average mflops/s per call per node : 665418 +Grid : Message : Average mflops/s per call per node : 800007 +Grid : Message : Average mflops/s per call per node : 815405 +Grid : Message : Average mflops/s per call per node : 670901 +Grid : Message : Average mflops/s per call per node (full): 312220 +Grid : Message : Average mflops/s per call per node (full): 432543 +Grid : Message : Average mflops/s per call per node (full): 445549 +Grid : Message : Average mflops/s per call per node (full): 303527 +Grid : Message : Stencil 12.5181 GB/s per node +Grid : Message : Stencil 17.2748 GB/s per node +Grid : Message : Stencil 18.1267 GB/s per node +Grid : Message : Stencil 13.6044 GB/s per node +Grid : Message : Average mflops/s per call per node : 667650 +Grid : Message : Average mflops/s per call per node : 800526 +Grid : Message : Average mflops/s per call per node : 818541 +Grid : Message : Average mflops/s per call per node : 660433 +Grid : Message : Average mflops/s per call per node (full): 313808 +Grid : Message : Average mflops/s per call per node (full): 437457 +Grid : Message : Average mflops/s per call per node (full): 445161 +Grid : Message : Average mflops/s per call per node (full): 305531 +Grid : Message : Stencil 13.9883 GB/s per node +Grid : Message : Stencil 17.3733 GB/s per node +Grid : Message : Stencil 18.4282 GB/s per node +Grid : Message : Stencil 12.2481 GB/s per node +Grid : Message : Average mflops/s per call per node : 665282 +Grid : Message : Average mflops/s per call per node : 803784 +Grid : Message : Average mflops/s per call per node : 821363 +Grid : Message : Average mflops/s per call per node : 673318 +Grid : Message : Average mflops/s per call per node (full): 316685 +Grid : Message : Average mflops/s per call per node (full): 438703 +Grid : Message : Average mflops/s per call per node (full): 446737 +Grid : Message : Average mflops/s per call per node (full): 304041 +Grid : Message : Stencil 13.2789 GB/s per node +Grid : Message : Stencil 16.4879 GB/s per node +Grid : Message : Stencil 17.6746 GB/s per node +Grid : Message : Stencil 13.917 GB/s per node +Grid : Message : Average mflops/s per call per node : 665807 +Grid : Message : Average mflops/s per call per node : 803510 +Grid : Message : Average mflops/s per call per node : 816688 +Grid : Message : Average mflops/s per call per node : 661748 +Grid : Message : Average mflops/s per call per node (full): 313889 +Grid : Message : Average mflops/s per call per node (full): 434971 +Grid : Message : Average mflops/s per call per node (full): 445035 +Grid : Message : Average mflops/s per call per node (full): 307077 +Grid : Message : Stencil 12.2734 GB/s per node +Grid : Message : Stencil 12.66 GB/s per node +Grid : Message : Stencil 17.0617 GB/s per node +Grid : Message : Stencil 13.5623 GB/s per node +Grid : Message : Average mflops/s per call per node : 667015 +Grid : Message : Average mflops/s per call per node : 807794 +Grid : Message : Average mflops/s per call per node : 822143 +Grid : Message : Average mflops/s per call per node : 661033 +Grid : Message : Average mflops/s per call per node (full): 311024 +Grid : Message : Average mflops/s per call per node (full): 374728 +Grid : Message : Average mflops/s per call per node (full): 441706 +Grid : Message : Average mflops/s per call per node (full): 305472 +Grid : Message : Stencil 12.1509 GB/s per node +Grid : Message : Stencil 17.0251 GB/s per node +Grid : Message : Stencil 16.1424 GB/s per node +Grid : Message : Stencil 12.3466 GB/s per node +Grid : Message : Average mflops/s per call per node : 668234 +Grid : Message : Average mflops/s per call per node : 806025 +Grid : Message : Average mflops/s per call per node : 821907 +Grid : Message : Average mflops/s per call per node : 665139 +Grid : Message : Average mflops/s per call per node (full): 310571 +Grid : Message : Average mflops/s per call per node (full): 438555 +Grid : Message : Average mflops/s per call per node (full): 417720 +Grid : Message : Average mflops/s per call per node (full): 301979 +Grid : Message : Stencil 12.3345 GB/s per node +Grid : Message : Stencil 16.5728 GB/s per node +Grid : Message : Stencil 17.1912 GB/s per node +Grid : Message : Stencil 12.7909 GB/s per node +Grid : Message : Average mflops/s per call per node : 667776 +Grid : Message : Average mflops/s per call per node : 809703 +Grid : Message : Average mflops/s per call per node : 827346 +Grid : Message : Average mflops/s per call per node : 665721 +Grid : Message : Average mflops/s per call per node (full): 310230 +Grid : Message : Average mflops/s per call per node (full): 435928 +Grid : Message : Average mflops/s per call per node (full): 443387 +Grid : Message : Average mflops/s per call per node (full): 305302 +Grid : Message : Stencil 14.1641 GB/s per node +Grid : Message : Stencil 16.2782 GB/s per node +Grid : Message : Stencil 17.5187 GB/s per node +Grid : Message : Stencil 12.2126 GB/s per node +Grid : Message : Average mflops/s per call per node : 665588 +Grid : Message : Average mflops/s per call per node : 803008 +Grid : Message : Average mflops/s per call per node : 823725 +Grid : Message : Average mflops/s per call per node : 670796 +Grid : Message : Average mflops/s per call per node (full): 316399 +Grid : Message : Average mflops/s per call per node (full): 432108 +Grid : Message : Average mflops/s per call per node (full): 445275 +Grid : Message : Average mflops/s per call per node (full): 304455 +Grid : Message : Stencil 12.3719 GB/s per node +Grid : Message : Stencil 9.44157 GB/s per node +Grid : Message : Stencil 18.2184 GB/s per node +Grid : Message : Stencil 13.0357 GB/s per node +Grid : Message : Average mflops/s per call per node : 673287 +Grid : Message : Average mflops/s per call per node : 810733 +Grid : Message : Average mflops/s per call per node : 826413 +Grid : Message : Average mflops/s per call per node : 664360 +Grid : Message : Average mflops/s per call per node (full): 313111 +Grid : Message : Average mflops/s per call per node (full): 303110 +Grid : Message : Average mflops/s per call per node (full): 447822 +Grid : Message : Average mflops/s per call per node (full): 305094 +Grid : Message : Stencil 14.4216 GB/s per node +Grid : Message : Stencil 16.8166 GB/s per node +Grid : Message : Stencil 17.0233 GB/s per node +Grid : Message : Stencil 12.4243 GB/s per node +Grid : Message : Average mflops/s per call per node : 664592 +Grid : Message : Average mflops/s per call per node : 799741 +Grid : Message : Average mflops/s per call per node : 817291 +Grid : Message : Average mflops/s per call per node : 660091 +Grid : Message : Average mflops/s per call per node (full): 316638 +Grid : Message : Average mflops/s per call per node (full): 436341 +Grid : Message : Average mflops/s per call per node (full): 440702 +Grid : Message : Average mflops/s per call per node (full): 302313 +Grid : Message : Stencil 14.0866 GB/s per node +Grid : Message : Stencil 15.121 GB/s per node +Grid : Message : Stencil 17.1513 GB/s per node +Grid : Message : Stencil 14.4614 GB/s per node +Grid : Message : Average mflops/s per call per node : 662020 +Grid : Message : Average mflops/s per call per node : 805280 +Grid : Message : Average mflops/s per call per node : 825135 +Grid : Message : Average mflops/s per call per node : 663528 +Grid : Message : Average mflops/s per call per node (full): 315369 +Grid : Message : Average mflops/s per call per node (full): 416997 +Grid : Message : Average mflops/s per call per node (full): 442667 +Grid : Message : Average mflops/s per call per node (full): 307982 +Grid : Message : Stencil 13.0421 GB/s per node +Grid : Message : Stencil 13.475 GB/s per node +Grid : Message : Stencil 17.8514 GB/s per node +Grid : Message : Stencil 12.3485 GB/s per node +Grid : Message : Average mflops/s per call per node : 668948 +Grid : Message : Average mflops/s per call per node : 806437 +Grid : Message : Average mflops/s per call per node : 816250 +Grid : Message : Average mflops/s per call per node : 667938 +Grid : Message : Average mflops/s per call per node (full): 315978 +Grid : Message : Average mflops/s per call per node (full): 390547 +Grid : Message : Average mflops/s per call per node (full): 446984 +Grid : Message : Average mflops/s per call per node (full): 304395 +Grid : Message : Stencil 12.5389 GB/s per node +Grid : Message : Stencil 16.7972 GB/s per node +Grid : Message : Stencil 17.4921 GB/s per node +Grid : Message : Stencil 12.8593 GB/s per node +Grid : Message : Average mflops/s per call per node : 668172 +Grid : Message : Average mflops/s per call per node : 801761 +Grid : Message : Average mflops/s per call per node : 818191 +Grid : Message : Average mflops/s per call per node : 667537 +Grid : Message : Average mflops/s per call per node (full): 313733 +Grid : Message : Average mflops/s per call per node (full): 435641 +Grid : Message : Average mflops/s per call per node (full): 443637 +Grid : Message : Average mflops/s per call per node (full): 305751 +Grid : Message : Stencil 12.9159 GB/s per node +Grid : Message : Stencil 17.2392 GB/s per node +Grid : Message : Stencil 17.8093 GB/s per node +Grid : Message : Stencil 12.1072 GB/s per node +Grid : Message : Average mflops/s per call per node : 665873 +Grid : Message : Average mflops/s per call per node : 801432 +Grid : Message : Average mflops/s per call per node : 819327 +Grid : Message : Average mflops/s per call per node : 666187 +Grid : Message : Average mflops/s per call per node (full): 315585 +Grid : Message : Average mflops/s per call per node (full): 439621 +Grid : Message : Average mflops/s per call per node (full): 445822 +Grid : Message : Average mflops/s per call per node (full): 302671 +Grid : Message : Stencil 15.4449 GB/s per node +Grid : Message : Stencil 10.9898 GB/s per node +Grid : Message : Stencil 16.9168 GB/s per node +Grid : Message : Stencil 12.7072 GB/s per node +Grid : Message : Average mflops/s per call per node : 659037 +Grid : Message : Average mflops/s per call per node : 807417 +Grid : Message : Average mflops/s per call per node : 823099 +Grid : Message : Average mflops/s per call per node : 667598 +Grid : Message : Average mflops/s per call per node (full): 316591 +Grid : Message : Average mflops/s per call per node (full): 339479 +Grid : Message : Average mflops/s per call per node (full): 430694 +Grid : Message : Average mflops/s per call per node (full): 303012 +Grid : Message : Stencil 14.7637 GB/s per node +Grid : Message : Stencil 16.2874 GB/s per node +Grid : Message : Stencil 18.6323 GB/s per node +Grid : Message : Stencil 12.9178 GB/s per node +Grid : Message : Average mflops/s per call per node : 660695 +Grid : Message : Average mflops/s per call per node : 801611 +Grid : Message : Average mflops/s per call per node : 815351 +Grid : Message : Average mflops/s per call per node : 669428 +Grid : Message : Average mflops/s per call per node (full): 316066 +Grid : Message : Average mflops/s per call per node (full): 431920 +Grid : Message : Average mflops/s per call per node (full): 448644 +Grid : Message : Average mflops/s per call per node (full): 306518 +Grid : Message : Stencil 14.3132 GB/s per node +Grid : Message : Stencil 17.1266 GB/s per node +Grid : Message : Stencil 17.0616 GB/s per node +Grid : Message : Stencil 13.8447 GB/s per node +Grid : Message : Average mflops/s per call per node : 658407 +Grid : Message : Average mflops/s per call per node : 803101 +Grid : Message : Average mflops/s per call per node : 819603 +Grid : Message : Average mflops/s per call per node : 659596 +Grid : Message : Average mflops/s per call per node (full): 315588 +Grid : Message : Average mflops/s per call per node (full): 438032 +Grid : Message : Average mflops/s per call per node (full): 440639 +Grid : Message : Average mflops/s per call per node (full): 305823 +Grid : Message : Stencil 13.4098 GB/s per node +Grid : Message : Stencil 16.5838 GB/s per node +Grid : Message : Stencil 17.4944 GB/s per node +Grid : Message : Stencil 12.611 GB/s per node +Grid : Message : Average mflops/s per call per node : 661273 +Grid : Message : Average mflops/s per call per node : 800292 +Grid : Message : Average mflops/s per call per node : 821205 +Grid : Message : Average mflops/s per call per node : 666224 +Grid : Message : Average mflops/s per call per node (full): 314171 +Grid : Message : Average mflops/s per call per node (full): 435713 +Grid : Message : Average mflops/s per call per node (full): 445784 +Grid : Message : Average mflops/s per call per node (full): 305169 +Grid : Message : Stencil 13.1361 GB/s per node +Grid : Message : Stencil 17.236 GB/s per node +Grid : Message : Stencil 17.6766 GB/s per node +Grid : Message : Stencil 14.132 GB/s per node +Grid : Message : Average mflops/s per call per node : 663250 +Grid : Message : Average mflops/s per call per node : 807288 +Grid : Message : Average mflops/s per call per node : 824157 +Grid : Message : Average mflops/s per call per node : 660656 +Grid : Message : Average mflops/s per call per node (full): 314434 +Grid : Message : Average mflops/s per call per node (full): 437413 +Grid : Message : Average mflops/s per call per node (full): 446797 +Grid : Message : Average mflops/s per call per node (full): 307108 +Grid : Message : Stencil 12.9989 GB/s per node +Grid : Message : Stencil 17.8977 GB/s per node +Grid : Message : Stencil 17.4466 GB/s per node +Grid : Message : Stencil 12.2233 GB/s per node +Grid : Message : Average mflops/s per call per node : 662489 +Grid : Message : Average mflops/s per call per node : 801498 +Grid : Message : Average mflops/s per call per node : 827275 +Grid : Message : Average mflops/s per call per node : 667737 +Grid : Message : Average mflops/s per call per node (full): 311317 +Grid : Message : Average mflops/s per call per node (full): 435001 +Grid : Message : Average mflops/s per call per node (full): 441079 +Grid : Message : Average mflops/s per call per node (full): 303713 +Grid : Message : Stencil 13.7653 GB/s per node +Grid : Message : Stencil 17.0009 GB/s per node +Grid : Message : Stencil 17.617 GB/s per node +Grid : Message : Stencil 12.0234 GB/s per node +Grid : Message : Average mflops/s per call per node : 664655 +Grid : Message : Average mflops/s per call per node : 802383 +Grid : Message : Average mflops/s per call per node : 821660 +Grid : Message : Average mflops/s per call per node : 664596 +Grid : Message : Average mflops/s per call per node (full): 315360 +Grid : Message : Average mflops/s per call per node (full): 437320 +Grid : Message : Average mflops/s per call per node (full): 444749 +Grid : Message : Average mflops/s per call per node (full): 302217 +Grid : Message : Stencil 13.2126 GB/s per node +Grid : Message : Stencil 17.8005 GB/s per node +Grid : Message : Stencil 17.0058 GB/s per node +Grid : Message : Stencil 12.0881 GB/s per node +Grid : Message : Average mflops/s per call per node : 664981 +Grid : Message : Average mflops/s per call per node : 806023 +Grid : Message : Average mflops/s per call per node : 822818 +Grid : Message : Average mflops/s per call per node : 667398 +Grid : Message : Average mflops/s per call per node (full): 315465 +Grid : Message : Average mflops/s per call per node (full): 441619 +Grid : Message : Average mflops/s per call per node (full): 440684 +Grid : Message : Average mflops/s per call per node (full): 302959 +Grid : Message : Stencil 13.5388 GB/s per node +Grid : Message : Stencil 18.0366 GB/s per node +Grid : Message : Stencil 17.8471 GB/s per node +Grid : Message : Stencil 12.4445 GB/s per node +Grid : Message : Average mflops/s per call per node : 663666 +Grid : Message : Average mflops/s per call per node : 798323 +Grid : Message : Average mflops/s per call per node : 822953 +Grid : Message : Average mflops/s per call per node : 666327 +Grid : Message : Average mflops/s per call per node (full): 311731 +Grid : Message : Average mflops/s per call per node (full): 439801 +Grid : Message : Average mflops/s per call per node (full): 446810 +Grid : Message : Average mflops/s per call per node (full): 304155 +Grid : Message : Stencil 13.4909 GB/s per node +Grid : Message : Stencil 16.82 GB/s per node +Grid : Message : Stencil 17.578 GB/s per node +Grid : Message : Stencil 12.1492 GB/s per node +Grid : Message : Average mflops/s per call per node : 663940 +Grid : Message : Average mflops/s per call per node : 804399 +Grid : Message : Average mflops/s per call per node : 826470 +Grid : Message : Average mflops/s per call per node : 667211 +Grid : Message : Average mflops/s per call per node (full): 314458 +Grid : Message : Average mflops/s per call per node (full): 433138 +Grid : Message : Average mflops/s per call per node (full): 445496 +Grid : Message : Average mflops/s per call per node (full): 303364 +Grid : Message : Stencil 13.2232 GB/s per node +Grid : Message : Stencil 16.7092 GB/s per node +Grid : Message : Stencil 18.4317 GB/s per node +Grid : Message : Stencil 13.5936 GB/s per node +Grid : Message : Average mflops/s per call per node : 667945 +Grid : Message : Average mflops/s per call per node : 808507 +Grid : Message : Average mflops/s per call per node : 819600 +Grid : Message : Average mflops/s per call per node : 664098 +Grid : Message : Average mflops/s per call per node (full): 315045 +Grid : Message : Average mflops/s per call per node (full): 437537 +Grid : Message : Average mflops/s per call per node (full): 446572 +Grid : Message : Average mflops/s per call per node (full): 307152 +Grid : Message : Stencil 12.7131 GB/s per node +Grid : Message : Stencil 18.2008 GB/s per node +Grid : Message : Stencil 17.2221 GB/s per node +Grid : Message : Stencil 12.6037 GB/s per node +Grid : Message : Average mflops/s per call per node : 666033 +Grid : Message : Average mflops/s per call per node : 807661 +Grid : Message : Average mflops/s per call per node : 826467 +Grid : Message : Average mflops/s per call per node : 663763 +Grid : Message : Average mflops/s per call per node (full): 314663 +Grid : Message : Average mflops/s per call per node (full): 442038 +Grid : Message : Average mflops/s per call per node (full): 443390 +Grid : Message : Average mflops/s per call per node (full): 303637 +Grid : Message : Stencil 13.5181 GB/s per node +Grid : Message : Stencil 10.8977 GB/s per node +Grid : Message : Stencil 17.9444 GB/s per node +Grid : Message : Stencil 12.0613 GB/s per node +Grid : Message : Average mflops/s per call per node : 661244 +Grid : Message : Average mflops/s per call per node : 810256 +Grid : Message : Average mflops/s per call per node : 827175 +Grid : Message : Average mflops/s per call per node : 663484 +Grid : Message : Average mflops/s per call per node (full): 314360 +Grid : Message : Average mflops/s per call per node (full): 337415 +Grid : Message : Average mflops/s per call per node (full): 448417 +Grid : Message : Average mflops/s per call per node (full): 302725 +Grid : Message : Stencil 13.5593 GB/s per node +Grid : Message : Stencil 16.7705 GB/s per node +Grid : Message : Stencil 17.8027 GB/s per node +Grid : Message : Stencil 15.5264 GB/s per node +Grid : Message : Average mflops/s per call per node : 666063 +Grid : Message : Average mflops/s per call per node : 804006 +Grid : Message : Average mflops/s per call per node : 823762 +Grid : Message : Average mflops/s per call per node : 658362 +Grid : Message : Average mflops/s per call per node (full): 316752 +Grid : Message : Average mflops/s per call per node (full): 425272 +Grid : Message : Average mflops/s per call per node (full): 446540 +Grid : Message : Average mflops/s per call per node (full): 307569 +Grid : Message : Stencil 14.9456 GB/s per node +Grid : Message : Stencil 17.4885 GB/s per node +Grid : Message : Stencil 17.3422 GB/s per node +Grid : Message : Stencil 12.3453 GB/s per node +Grid : Message : Average mflops/s per call per node : 663467 +Grid : Message : Average mflops/s per call per node : 801409 +Grid : Message : Average mflops/s per call per node : 816290 +Grid : Message : Average mflops/s per call per node : 669270 +Grid : Message : Average mflops/s per call per node (full): 317182 +Grid : Message : Average mflops/s per call per node (full): 438211 +Grid : Message : Average mflops/s per call per node (full): 442788 +Grid : Message : Average mflops/s per call per node (full): 304823 +Grid : Message : Stencil 13.9943 GB/s per node +Grid : Message : Stencil 17.1094 GB/s per node +Grid : Message : Stencil 18.2979 GB/s per node +Grid : Message : Stencil 12.9114 GB/s per node +Grid : Message : Average mflops/s per call per node : 664422 +Grid : Message : Average mflops/s per call per node : 807973 +Grid : Message : Average mflops/s per call per node : 818457 +Grid : Message : Average mflops/s per call per node : 668307 +Grid : Message : Average mflops/s per call per node (full): 316225 +Grid : Message : Average mflops/s per call per node (full): 438810 +Grid : Message : Average mflops/s per call per node (full): 447538 +Grid : Message : Average mflops/s per call per node (full): 306457 +Grid : Message : Stencil 12.4988 GB/s per node +Grid : Message : Stencil 16.2525 GB/s per node +Grid : Message : Stencil 17.7376 GB/s per node +Grid : Message : Stencil 11.9912 GB/s per node +Grid : Message : Average mflops/s per call per node : 666295 +Grid : Message : Average mflops/s per call per node : 803164 +Grid : Message : Average mflops/s per call per node : 824180 +Grid : Message : Average mflops/s per call per node : 670430 +Grid : Message : Average mflops/s per call per node (full): 313251 +Grid : Message : Average mflops/s per call per node (full): 432096 +Grid : Message : Average mflops/s per call per node (full): 446445 +Grid : Message : Average mflops/s per call per node (full): 302679 +Grid : Message : Stencil 14.3437 GB/s per node +Grid : Message : Stencil 17.4697 GB/s per node +Grid : Message : Stencil 16.9784 GB/s per node +Grid : Message : Stencil 13.9946 GB/s per node +Grid : Message : Average mflops/s per call per node : 661222 +Grid : Message : Average mflops/s per call per node : 802776 +Grid : Message : Average mflops/s per call per node : 824277 +Grid : Message : Average mflops/s per call per node : 662738 +Grid : Message : Average mflops/s per call per node (full): 315779 +Grid : Message : Average mflops/s per call per node (full): 439965 +Grid : Message : Average mflops/s per call per node (full): 440859 +Grid : Message : Average mflops/s per call per node (full): 307038 +Grid : Message : Stencil 14.8629 GB/s per node +Grid : Message : Stencil 16.1011 GB/s per node +Grid : Message : Stencil 18.7162 GB/s per node +Grid : Message : Stencil 12.7471 GB/s per node +Grid : Message : Average mflops/s per call per node : 662883 +Grid : Message : Average mflops/s per call per node : 804761 +Grid : Message : Average mflops/s per call per node : 821957 +Grid : Message : Average mflops/s per call per node : 666030 +Grid : Message : Average mflops/s per call per node (full): 316443 +Grid : Message : Average mflops/s per call per node (full): 429850 +Grid : Message : Average mflops/s per call per node (full): 448767 +Grid : Message : Average mflops/s per call per node (full): 304830 +Grid : Message : Stencil 14.3938 GB/s per node +Grid : Message : Stencil 16.8236 GB/s per node +Grid : Message : Stencil 18.0328 GB/s per node +Grid : Message : Stencil 11.6708 GB/s per node +Grid : Message : Average mflops/s per call per node : 665694 +Grid : Message : Average mflops/s per call per node : 803772 +Grid : Message : Average mflops/s per call per node : 820641 +Grid : Message : Average mflops/s per call per node : 668786 +Grid : Message : Average mflops/s per call per node (full): 316913 +Grid : Message : Average mflops/s per call per node (full): 437280 +Grid : Message : Average mflops/s per call per node (full): 446599 +Grid : Message : Average mflops/s per call per node (full): 298644 +Grid : Message : Stencil 13.6226 GB/s per node +Grid : Message : Stencil 10.1949 GB/s per node +Grid : Message : Stencil 18.0608 GB/s per node +Grid : Message : Stencil 11.8098 GB/s per node +Grid : Message : Average mflops/s per call per node : 663352 +Grid : Message : Average mflops/s per call per node : 806740 +Grid : Message : Average mflops/s per call per node : 823623 +Grid : Message : Average mflops/s per call per node : 669411 +Grid : Message : Average mflops/s per call per node (full): 315926 +Grid : Message : Average mflops/s per call per node (full): 321727 +Grid : Message : Average mflops/s per call per node (full): 447028 +Grid : Message : Average mflops/s per call per node (full): 300846 +Grid : Message : Stencil 13.8048 GB/s per node +Grid : Message : Stencil 17.1501 GB/s per node +Grid : Message : Stencil 17.915 GB/s per node +Grid : Message : Stencil 12.8037 GB/s per node +Grid : Message : Average mflops/s per call per node : 665595 +Grid : Message : Average mflops/s per call per node : 800369 +Grid : Message : Average mflops/s per call per node : 821968 +Grid : Message : Average mflops/s per call per node : 663864 +Grid : Message : Average mflops/s per call per node (full): 316305 +Grid : Message : Average mflops/s per call per node (full): 435911 +Grid : Message : Average mflops/s per call per node (full): 445401 +Grid : Message : Average mflops/s per call per node (full): 304844 +Grid : Message : Stencil 13.3276 GB/s per node +Grid : Message : Stencil 16.5479 GB/s per node +Grid : Message : Stencil 15.9232 GB/s per node +Grid : Message : Stencil 12.6292 GB/s per node +Grid : Message : Average mflops/s per call per node : 664191 +Grid : Message : Average mflops/s per call per node : 805749 +Grid : Message : Average mflops/s per call per node : 824182 +Grid : Message : Average mflops/s per call per node : 662231 +Grid : Message : Average mflops/s per call per node (full): 316444 +Grid : Message : Average mflops/s per call per node (full): 433993 +Grid : Message : Average mflops/s per call per node (full): 418336 +Grid : Message : Average mflops/s per call per node (full): 304125 +Grid : Message : Stencil 12.9556 GB/s per node +Grid : Message : Stencil 16.5463 GB/s per node +Grid : Message : Stencil 17.379 GB/s per node +Grid : Message : Stencil 12.8479 GB/s per node +Grid : Message : Average mflops/s per call per node : 667084 +Grid : Message : Average mflops/s per call per node : 801127 +Grid : Message : Average mflops/s per call per node : 824920 +Grid : Message : Average mflops/s per call per node : 668330 +Grid : Message : Average mflops/s per call per node (full): 315162 +Grid : Message : Average mflops/s per call per node (full): 435167 +Grid : Message : Average mflops/s per call per node (full): 444288 +Grid : Message : Average mflops/s per call per node (full): 306298 +Grid : Message : Stencil 13.9152 GB/s per node +Grid : Message : Stencil 17.8934 GB/s per node +Grid : Message : Stencil 17.9279 GB/s per node +Grid : Message : Stencil 12.7926 GB/s per node +Grid : Message : Average mflops/s per call per node : 662878 +Grid : Message : Average mflops/s per call per node : 798961 +Grid : Message : Average mflops/s per call per node : 824106 +Grid : Message : Average mflops/s per call per node : 663412 +Grid : Message : Average mflops/s per call per node (full): 315995 +Grid : Message : Average mflops/s per call per node (full): 437028 +Grid : Message : Average mflops/s per call per node (full): 446450 +Grid : Message : Average mflops/s per call per node (full): 305560 +Grid : Message : Stencil 14.802 GB/s per node +Grid : Message : Stencil 17.5445 GB/s per node +Grid : Message : Stencil 17.7725 GB/s per node +Grid : Message : Stencil 12.817 GB/s per node +Grid : Message : Average mflops/s per call per node : 660844 +Grid : Message : Average mflops/s per call per node : 797512 +Grid : Message : Average mflops/s per call per node : 816649 +Grid : Message : Average mflops/s per call per node : 661087 +Grid : Message : Average mflops/s per call per node (full): 316362 +Grid : Message : Average mflops/s per call per node (full): 436374 +Grid : Message : Average mflops/s per call per node (full): 446109 +Grid : Message : Average mflops/s per call per node (full): 304547 +Grid : Message : Stencil 13.819 GB/s per node +Grid : Message : Stencil 17.337 GB/s per node +Grid : Message : Stencil 17.8609 GB/s per node +Grid : Message : Stencil 12.4582 GB/s per node +Grid : Message : Average mflops/s per call per node : 661106 +Grid : Message : Average mflops/s per call per node : 801815 +Grid : Message : Average mflops/s per call per node : 824608 +Grid : Message : Average mflops/s per call per node : 666840 +Grid : Message : Average mflops/s per call per node (full): 315404 +Grid : Message : Average mflops/s per call per node (full): 436362 +Grid : Message : Average mflops/s per call per node (full): 446372 +Grid : Message : Average mflops/s per call per node (full): 304462 +Grid : Message : Stencil 13.3501 GB/s per node +Grid : Message : Stencil 16.2943 GB/s per node +Grid : Message : Stencil 17.9905 GB/s per node +Grid : Message : Stencil 13.1651 GB/s per node +Grid : Message : Average mflops/s per call per node : 667754 +Grid : Message : Average mflops/s per call per node : 805087 +Grid : Message : Average mflops/s per call per node : 815001 +Grid : Message : Average mflops/s per call per node : 663820 +Grid : Message : Average mflops/s per call per node (full): 315198 +Grid : Message : Average mflops/s per call per node (full): 430787 +Grid : Message : Average mflops/s per call per node (full): 444664 +Grid : Message : Average mflops/s per call per node (full): 304994 +Grid : Message : Stencil 12.5859 GB/s per node +Grid : Message : Stencil 16.4739 GB/s per node +Grid : Message : Stencil 17.3418 GB/s per node +Grid : Message : Stencil 12.8162 GB/s per node +Grid : Message : Average mflops/s per call per node : 671092 +Grid : Message : Average mflops/s per call per node : 803434 +Grid : Message : Average mflops/s per call per node : 819660 +Grid : Message : Average mflops/s per call per node : 667983 +Grid : Message : Average mflops/s per call per node (full): 314765 +Grid : Message : Average mflops/s per call per node (full): 434197 +Grid : Message : Average mflops/s per call per node (full): 442487 +Grid : Message : Average mflops/s per call per node (full): 306098 +Grid : Message : Stencil 13.4637 GB/s per node +Grid : Message : Stencil 7.73841 GB/s per node +Grid : Message : Stencil 17.9445 GB/s per node +Grid : Message : Stencil 13.2531 GB/s per node +Grid : Message : Average mflops/s per call per node : 666729 +Grid : Message : Average mflops/s per call per node : 809090 +Grid : Message : Average mflops/s per call per node : 823783 +Grid : Message : Average mflops/s per call per node : 661230 +Grid : Message : Average mflops/s per call per node (full): 315758 +Grid : Message : Average mflops/s per call per node (full): 259334 +Grid : Message : Average mflops/s per call per node (full): 447738 +Grid : Message : Average mflops/s per call per node (full): 304430 +Grid : Message : Stencil 12.637 GB/s per node +Grid : Message : Stencil 16.3219 GB/s per node +Grid : Message : Stencil 17.178 GB/s per node +Grid : Message : Stencil 12.8779 GB/s per node +Grid : Message : Average mflops/s per call per node : 665199 +Grid : Message : Average mflops/s per call per node : 808316 +Grid : Message : Average mflops/s per call per node : 825699 +Grid : Message : Average mflops/s per call per node : 667714 +Grid : Message : Average mflops/s per call per node (full): 313642 +Grid : Message : Average mflops/s per call per node (full): 432798 +Grid : Message : Average mflops/s per call per node (full): 442997 +Grid : Message : Average mflops/s per call per node (full): 305697 +Grid : Message : Stencil 13.8312 GB/s per node +Grid : Message : Stencil 17.2629 GB/s per node +Grid : Message : Stencil 17.0892 GB/s per node +Grid : Message : Stencil 12.7407 GB/s per node +Grid : Message : Average mflops/s per call per node : 660395 +Grid : Message : Average mflops/s per call per node : 798639 +Grid : Message : Average mflops/s per call per node : 825638 +Grid : Message : Average mflops/s per call per node : 661845 +Grid : Message : Average mflops/s per call per node (full): 315821 +Grid : Message : Average mflops/s per call per node (full): 436751 +Grid : Message : Average mflops/s per call per node (full): 436490 +Grid : Message : Average mflops/s per call per node (full): 303363 +Grid : Message : Stencil 14.028 GB/s per node +Grid : Message : Stencil 16.6012 GB/s per node +Grid : Message : Stencil 17.5076 GB/s per node +Grid : Message : Stencil 12.9486 GB/s per node +Grid : Message : Average mflops/s per call per node : 662658 +Grid : Message : Average mflops/s per call per node : 803401 +Grid : Message : Average mflops/s per call per node : 820992 +Grid : Message : Average mflops/s per call per node : 662085 +Grid : Message : Average mflops/s per call per node (full): 316026 +Grid : Message : Average mflops/s per call per node (full): 436247 +Grid : Message : Average mflops/s per call per node (full): 443812 +Grid : Message : Average mflops/s per call per node (full): 304274 +Grid : Message : Stencil 13.5494 GB/s per node +Grid : Message : Stencil 10.9216 GB/s per node +Grid : Message : Stencil 17.3544 GB/s per node +Grid : Message : Stencil 12.5985 GB/s per node +Grid : Message : Average mflops/s per call per node : 665674 +Grid : Message : Average mflops/s per call per node : 805542 +Grid : Message : Average mflops/s per call per node : 830375 +Grid : Message : Average mflops/s per call per node : 670696 +Grid : Message : Average mflops/s per call per node (full): 315974 +Grid : Message : Average mflops/s per call per node (full): 337951 +Grid : Message : Average mflops/s per call per node (full): 444669 +Grid : Message : Average mflops/s per call per node (full): 306025 +Grid : Message : Stencil 14.5241 GB/s per node +Grid : Message : Stencil 18.0398 GB/s per node +Grid : Message : Stencil 17.3894 GB/s per node +Grid : Message : Stencil 13.0366 GB/s per node +Grid : Message : Average mflops/s per call per node : 662707 +Grid : Message : Average mflops/s per call per node : 802766 +Grid : Message : Average mflops/s per call per node : 816841 +Grid : Message : Average mflops/s per call per node : 665898 +Grid : Message : Average mflops/s per call per node (full): 317317 +Grid : Message : Average mflops/s per call per node (full): 439686 +Grid : Message : Average mflops/s per call per node (full): 441888 +Grid : Message : Average mflops/s per call per node (full): 305500 +Grid : Message : Stencil 12.7563 GB/s per node +Grid : Message : Stencil 16.6327 GB/s per node +Grid : Message : Stencil 17.4013 GB/s per node +Grid : Message : Stencil 14.5206 GB/s per node +Grid : Message : Average mflops/s per call per node : 670543 +Grid : Message : Average mflops/s per call per node : 807913 +Grid : Message : Average mflops/s per call per node : 824241 +Grid : Message : Average mflops/s per call per node : 664469 +Grid : Message : Average mflops/s per call per node (full): 313226 +Grid : Message : Average mflops/s per call per node (full): 435507 +Grid : Message : Average mflops/s per call per node (full): 444819 +Grid : Message : Average mflops/s per call per node (full): 308239 +Grid : Message : Stencil 13.8565 GB/s per node +Grid : Message : Stencil 16.9393 GB/s per node +Grid : Message : Stencil 17.3037 GB/s per node +Grid : Message : Stencil 12.6889 GB/s per node +Grid : Message : Average mflops/s per call per node : 668007 +Grid : Message : Average mflops/s per call per node : 809567 +Grid : Message : Average mflops/s per call per node : 819754 +Grid : Message : Average mflops/s per call per node : 661690 +Grid : Message : Average mflops/s per call per node (full): 314114 +Grid : Message : Average mflops/s per call per node (full): 438418 +Grid : Message : Average mflops/s per call per node (full): 442341 +Grid : Message : Average mflops/s per call per node (full): 303115 +Grid : Message : Stencil 14.6176 GB/s per node +Grid : Message : Stencil 16.4156 GB/s per node +Grid : Message : Stencil 17.9656 GB/s per node +Grid : Message : Stencil 12.1492 GB/s per node +Grid : Message : Average mflops/s per call per node : 663949 +Grid : Message : Average mflops/s per call per node : 811066 +Grid : Message : Average mflops/s per call per node : 818656 +Grid : Message : Average mflops/s per call per node : 660849 +Grid : Message : Average mflops/s per call per node (full): 316079 +Grid : Message : Average mflops/s per call per node (full): 432736 +Grid : Message : Average mflops/s per call per node (full): 445519 +Grid : Message : Average mflops/s per call per node (full): 301745 +Grid : Message : Stencil 13.1905 GB/s per node +Grid : Message : Stencil 17.9314 GB/s per node +Grid : Message : Stencil 18.1212 GB/s per node +Grid : Message : Stencil 13.4665 GB/s per node +Grid : Message : Average mflops/s per call per node : 667469 +Grid : Message : Average mflops/s per call per node : 799710 +Grid : Message : Average mflops/s per call per node : 823445 +Grid : Message : Average mflops/s per call per node : 662119 +Grid : Message : Average mflops/s per call per node (full): 315512 +Grid : Message : Average mflops/s per call per node (full): 440524 +Grid : Message : Average mflops/s per call per node (full): 446878 +Grid : Message : Average mflops/s per call per node (full): 306150 +Grid : Message : Stencil 12.7929 GB/s per node +Grid : Message : Stencil 9.08547 GB/s per node +Grid : Message : Stencil 18.3256 GB/s per node +Grid : Message : Stencil 14.0092 GB/s per node +Grid : Message : Average mflops/s per call per node : 669543 +Grid : Message : Average mflops/s per call per node : 807014 +Grid : Message : Average mflops/s per call per node : 818948 +Grid : Message : Average mflops/s per call per node : 658341 +Grid : Message : Average mflops/s per call per node (full): 314512 +Grid : Message : Average mflops/s per call per node (full): 294179 +Grid : Message : Average mflops/s per call per node (full): 446968 +Grid : Message : Average mflops/s per call per node (full): 306159 +Grid : Message : Stencil 13.6148 GB/s per node +Grid : Message : Stencil 16.5713 GB/s per node +Grid : Message : Stencil 17.8983 GB/s per node +Grid : Message : Stencil 12.3309 GB/s per node +Grid : Message : Average mflops/s per call per node : 666637 +Grid : Message : Average mflops/s per call per node : 806221 +Grid : Message : Average mflops/s per call per node : 819564 +Grid : Message : Average mflops/s per call per node : 668249 +Grid : Message : Average mflops/s per call per node (full): 313595 +Grid : Message : Average mflops/s per call per node (full): 436378 +Grid : Message : Average mflops/s per call per node (full): 440557 +Grid : Message : Average mflops/s per call per node (full): 301593 +Grid : Message : Stencil 12.452 GB/s per node +Grid : Message : Stencil 17.205 GB/s per node +Grid : Message : Stencil 17.6185 GB/s per node +Grid : Message : Stencil 12.9366 GB/s per node +Grid : Message : Average mflops/s per call per node : 673064 +Grid : Message : Average mflops/s per call per node : 800476 +Grid : Message : Average mflops/s per call per node : 821564 +Grid : Message : Average mflops/s per call per node : 661854 +Grid : Message : Average mflops/s per call per node (full): 313548 +Grid : Message : Average mflops/s per call per node (full): 434248 +Grid : Message : Average mflops/s per call per node (full): 445327 +Grid : Message : Average mflops/s per call per node (full): 304364 +Grid : Message : Stencil 13.0521 GB/s per node +Grid : Message : Stencil 16.7585 GB/s per node +Grid : Message : Stencil 17.2553 GB/s per node +Grid : Message : Stencil 13.3428 GB/s per node +Grid : Message : Average mflops/s per call per node : 663655 +Grid : Message : Average mflops/s per call per node : 806512 +Grid : Message : Average mflops/s per call per node : 819044 +Grid : Message : Average mflops/s per call per node : 661744 +Grid : Message : Average mflops/s per call per node (full): 314477 +Grid : Message : Average mflops/s per call per node (full): 436994 +Grid : Message : Average mflops/s per call per node (full): 443491 +Grid : Message : Average mflops/s per call per node (full): 305860 +Grid : Message : Stencil 14.1847 GB/s per node +Grid : Message : Stencil 12.4488 GB/s per node +Grid : Message : Stencil 17.0334 GB/s per node +Grid : Message : Stencil 12.6692 GB/s per node +Grid : Message : Average mflops/s per call per node : 666023 +Grid : Message : Average mflops/s per call per node : 808708 +Grid : Message : Average mflops/s per call per node : 819004 +Grid : Message : Average mflops/s per call per node : 666128 +Grid : Message : Average mflops/s per call per node (full): 317309 +Grid : Message : Average mflops/s per call per node (full): 370478 +Grid : Message : Average mflops/s per call per node (full): 438950 +Grid : Message : Average mflops/s per call per node (full): 304976 +Grid : Message : Stencil 13.2247 GB/s per node +Grid : Message : Stencil 9.3349 GB/s per node +Grid : Message : Stencil 17.4661 GB/s per node +Grid : Message : Stencil 12.2981 GB/s per node +Grid : Message : Average mflops/s per call per node : 664947 +Grid : Message : Average mflops/s per call per node : 812408 +Grid : Message : Average mflops/s per call per node : 816737 +Grid : Message : Average mflops/s per call per node : 668435 +Grid : Message : Average mflops/s per call per node (full): 315500 +Grid : Message : Average mflops/s per call per node (full): 300518 +Grid : Message : Average mflops/s per call per node (full): 443404 +Grid : Message : Average mflops/s per call per node (full): 303863 +Grid : Message : Stencil 13.7203 GB/s per node +Grid : Message : Stencil 17.2069 GB/s per node +Grid : Message : Stencil 17.7459 GB/s per node +Grid : Message : Stencil 12.5509 GB/s per node +Grid : Message : Average mflops/s per call per node : 663900 +Grid : Message : Average mflops/s per call per node : 799419 +Grid : Message : Average mflops/s per call per node : 821869 +Grid : Message : Average mflops/s per call per node : 666586 +Grid : Message : Average mflops/s per call per node (full): 315915 +Grid : Message : Average mflops/s per call per node (full): 437670 +Grid : Message : Average mflops/s per call per node (full): 445017 +Grid : Message : Average mflops/s per call per node (full): 302862 +Grid : Message : Stencil 13.4093 GB/s per node +Grid : Message : Stencil 16.9177 GB/s per node +Grid : Message : Stencil 17.0182 GB/s per node +Grid : Message : Stencil 12.9735 GB/s per node +Grid : Message : Average mflops/s per call per node : 667387 +Grid : Message : Average mflops/s per call per node : 805937 +Grid : Message : Average mflops/s per call per node : 820998 +Grid : Message : Average mflops/s per call per node : 663590 +Grid : Message : Average mflops/s per call per node (full): 316559 +Grid : Message : Average mflops/s per call per node (full): 437874 +Grid : Message : Average mflops/s per call per node (full): 441596 +Grid : Message : Average mflops/s per call per node (full): 305372 +Grid : Message : Stencil 12.5414 GB/s per node +Grid : Message : Stencil 13.4095 GB/s per node +Grid : Message : Stencil 17.9259 GB/s per node +Grid : Message : Stencil 14.453 GB/s per node +Grid : Message : Average mflops/s per call per node : 671866 +Grid : Message : Average mflops/s per call per node : 808558 +Grid : Message : Average mflops/s per call per node : 816009 +Grid : Message : Average mflops/s per call per node : 662280 +Grid : Message : Average mflops/s per call per node (full): 313778 +Grid : Message : Average mflops/s per call per node (full): 390067 +Grid : Message : Average mflops/s per call per node (full): 445621 +Grid : Message : Average mflops/s per call per node (full): 307685 +Grid : Message : Stencil 13.0586 GB/s per node +Grid : Message : Stencil 17.8407 GB/s per node +Grid : Message : Stencil 17.6383 GB/s per node +Grid : Message : Stencil 12.424 GB/s per node +Grid : Message : Average mflops/s per call per node : 665049 +Grid : Message : Average mflops/s per call per node : 802252 +Grid : Message : Average mflops/s per call per node : 818440 +Grid : Message : Average mflops/s per call per node : 664308 +Grid : Message : Average mflops/s per call per node (full): 314421 +Grid : Message : Average mflops/s per call per node (full): 439421 +Grid : Message : Average mflops/s per call per node (full): 444839 +Grid : Message : Average mflops/s per call per node (full): 301055 +Grid : Message : Stencil 13.8105 GB/s per node +Grid : Message : Stencil 16.9644 GB/s per node +Grid : Message : Stencil 17.9834 GB/s per node +Grid : Message : Stencil 12.6671 GB/s per node +Grid : Message : Average mflops/s per call per node : 664235 +Grid : Message : Average mflops/s per call per node : 807501 +Grid : Message : Average mflops/s per call per node : 817287 +Grid : Message : Average mflops/s per call per node : 662850 +Grid : Message : Average mflops/s per call per node (full): 315857 +Grid : Message : Average mflops/s per call per node (full): 438824 +Grid : Message : Average mflops/s per call per node (full): 446259 +Grid : Message : Average mflops/s per call per node (full): 303882 +Grid : Message : Stencil 14.374 GB/s per node +Grid : Message : Stencil 16.8686 GB/s per node +Grid : Message : Stencil 18.0065 GB/s per node +Grid : Message : Stencil 12.4734 GB/s per node +Grid : Message : Average mflops/s per call per node : 664446 +Grid : Message : Average mflops/s per call per node : 804749 +Grid : Message : Average mflops/s per call per node : 815605 +Grid : Message : Average mflops/s per call per node : 669340 +Grid : Message : Average mflops/s per call per node (full): 316594 +Grid : Message : Average mflops/s per call per node (full): 436341 +Grid : Message : Average mflops/s per call per node (full): 446244 +Grid : Message : Average mflops/s per call per node (full): 304947 +Grid : Message : Stencil 14.0086 GB/s per node +Grid : Message : Stencil 14.369 GB/s per node +Grid : Message : Stencil 17.0426 GB/s per node +Grid : Message : Stencil 14.3542 GB/s per node +Grid : Message : Average mflops/s per call per node : 666933 +Grid : Message : Average mflops/s per call per node : 807542 +Grid : Message : Average mflops/s per call per node : 821032 +Grid : Message : Average mflops/s per call per node : 660200 +Grid : Message : Average mflops/s per call per node (full): 317049 +Grid : Message : Average mflops/s per call per node (full): 405986 +Grid : Message : Average mflops/s per call per node (full): 440205 +Grid : Message : Average mflops/s per call per node (full): 306721 +Grid : Message : Stencil 13.74 GB/s per node +Grid : Message : Stencil 16.4136 GB/s per node +Grid : Message : Stencil 17.747 GB/s per node +Grid : Message : Stencil 13.0566 GB/s per node +Grid : Message : Average mflops/s per call per node : 667117 +Grid : Message : Average mflops/s per call per node : 799704 +Grid : Message : Average mflops/s per call per node : 825934 +Grid : Message : Average mflops/s per call per node : 666386 +Grid : Message : Average mflops/s per call per node (full): 316527 +Grid : Message : Average mflops/s per call per node (full): 431300 +Grid : Message : Average mflops/s per call per node (full): 445056 +Grid : Message : Average mflops/s per call per node (full): 305967 +Grid : Message : Stencil 15.0414 GB/s per node +Grid : Message : Stencil 16.3805 GB/s per node +Grid : Message : Stencil 17.7157 GB/s per node +Grid : Message : Stencil 11.8524 GB/s per node +Grid : Message : Average mflops/s per call per node : 662854 +Grid : Message : Average mflops/s per call per node : 803063 +Grid : Message : Average mflops/s per call per node : 819147 +Grid : Message : Average mflops/s per call per node : 668262 +Grid : Message : Average mflops/s per call per node (full): 316698 +Grid : Message : Average mflops/s per call per node (full): 425852 +Grid : Message : Average mflops/s per call per node (full): 446307 +Grid : Message : Average mflops/s per call per node (full): 299278 +Grid : Message : Stencil 14.2677 GB/s per node +Grid : Message : Stencil 17.8884 GB/s per node +Grid : Message : Stencil 18.4766 GB/s per node +Grid : Message : Stencil 12.2529 GB/s per node +Grid : Message : Average mflops/s per call per node : 661618 +Grid : Message : Average mflops/s per call per node : 803210 +Grid : Message : Average mflops/s per call per node : 819503 +Grid : Message : Average mflops/s per call per node : 665029 +Grid : Message : Average mflops/s per call per node (full): 314492 +Grid : Message : Average mflops/s per call per node (full): 439913 +Grid : Message : Average mflops/s per call per node (full): 447901 +Grid : Message : Average mflops/s per call per node (full): 303560 +Grid : Message : Stencil 15.4062 GB/s per node +Grid : Message : Stencil 14.2115 GB/s per node +Grid : Message : Stencil 17.3118 GB/s per node +Grid : Message : Stencil 12.8169 GB/s per node +Grid : Message : Average mflops/s per call per node : 660690 +Grid : Message : Average mflops/s per call per node : 806604 +Grid : Message : Average mflops/s per call per node : 825682 +Grid : Message : Average mflops/s per call per node : 663405 +Grid : Message : Average mflops/s per call per node (full): 317080 +Grid : Message : Average mflops/s per call per node (full): 403412 +Grid : Message : Average mflops/s per call per node (full): 444843 +Grid : Message : Average mflops/s per call per node (full): 305059 +Grid : Message : Stencil 13.2666 GB/s per node +Grid : Message : Stencil 18.5442 GB/s per node +Grid : Message : Stencil 18.4985 GB/s per node +Grid : Message : Stencil 14.6172 GB/s per node +Grid : Message : Average mflops/s per call per node : 666265 +Grid : Message : Average mflops/s per call per node : 799181 +Grid : Message : Average mflops/s per call per node : 815491 +Grid : Message : Average mflops/s per call per node : 657783 +Grid : Message : Average mflops/s per call per node (full): 315400 +Grid : Message : Average mflops/s per call per node (full): 440243 +Grid : Message : Average mflops/s per call per node (full): 444845 +Grid : Message : Average mflops/s per call per node (full): 307117 +Grid : Message : Stencil 13.0131 GB/s per node +Grid : Message : Stencil 17.2194 GB/s per node +Grid : Message : Stencil 17.1459 GB/s per node +Grid : Message : Stencil 12.3687 GB/s per node +Grid : Message : Average mflops/s per call per node : 670289 +Grid : Message : Average mflops/s per call per node : 808556 +Grid : Message : Average mflops/s per call per node : 823513 +Grid : Message : Average mflops/s per call per node : 665368 +Grid : Message : Average mflops/s per call per node (full): 314349 +Grid : Message : Average mflops/s per call per node (full): 440568 +Grid : Message : Average mflops/s per call per node (full): 441278 +Grid : Message : Average mflops/s per call per node (full): 303793 +Grid : Message : Stencil 12.5975 GB/s per node +Grid : Message : Stencil 17.4383 GB/s per node +Grid : Message : Stencil 17.8292 GB/s per node +Grid : Message : Stencil 12.6179 GB/s per node +Grid : Message : Average mflops/s per call per node : 671699 +Grid : Message : Average mflops/s per call per node : 808745 +Grid : Message : Average mflops/s per call per node : 825012 +Grid : Message : Average mflops/s per call per node : 667837 +Grid : Message : Average mflops/s per call per node (full): 314785 +Grid : Message : Average mflops/s per call per node (full): 441635 +Grid : Message : Average mflops/s per call per node (full): 445869 +Grid : Message : Average mflops/s per call per node (full): 305078 +Grid : Message : Stencil 13.4828 GB/s per node +Grid : Message : Stencil 17.391 GB/s per node +Grid : Message : Stencil 17.5985 GB/s per node +Grid : Message : Stencil 13.0579 GB/s per node +Grid : Message : Average mflops/s per call per node : 670305 +Grid : Message : Average mflops/s per call per node : 800124 +Grid : Message : Average mflops/s per call per node : 821717 +Grid : Message : Average mflops/s per call per node : 665973 +Grid : Message : Average mflops/s per call per node (full): 316858 +Grid : Message : Average mflops/s per call per node (full): 436514 +Grid : Message : Average mflops/s per call per node (full): 445506 +Grid : Message : Average mflops/s per call per node (full): 305669 +Grid : Message : Stencil 13.8375 GB/s per node +Grid : Message : Stencil 16.8669 GB/s per node +Grid : Message : Stencil 17.1294 GB/s per node +Grid : Message : Stencil 13.6601 GB/s per node +Grid : Message : Average mflops/s per call per node : 670173 +Grid : Message : Average mflops/s per call per node : 802703 +Grid : Message : Average mflops/s per call per node : 826415 +Grid : Message : Average mflops/s per call per node : 665591 +Grid : Message : Average mflops/s per call per node (full): 316417 +Grid : Message : Average mflops/s per call per node (full): 436306 +Grid : Message : Average mflops/s per call per node (full): 432316 +Grid : Message : Average mflops/s per call per node (full): 306859 +Grid : Message : Stencil 13.1054 GB/s per node +Grid : Message : Stencil 16.5859 GB/s per node +Grid : Message : Stencil 18.0068 GB/s per node +Grid : Message : Stencil 13.3095 GB/s per node +Grid : Message : Average mflops/s per call per node : 669786 +Grid : Message : Average mflops/s per call per node : 804012 +Grid : Message : Average mflops/s per call per node : 815813 +Grid : Message : Average mflops/s per call per node : 664266 +Grid : Message : Average mflops/s per call per node (full): 315101 +Grid : Message : Average mflops/s per call per node (full): 434858 +Grid : Message : Average mflops/s per call per node (full): 444859 +Grid : Message : Average mflops/s per call per node (full): 306065 +Grid : Message : Stencil 12.876 GB/s per node +Grid : Message : Stencil 17.3527 GB/s per node +Grid : Message : Stencil 16.3377 GB/s per node +Grid : Message : Stencil 12.7555 GB/s per node +Grid : Message : Average mflops/s per call per node : 670670 +Grid : Message : Average mflops/s per call per node : 803765 +Grid : Message : Average mflops/s per call per node : 823899 +Grid : Message : Average mflops/s per call per node : 660135 +Grid : Message : Average mflops/s per call per node (full): 315159 +Grid : Message : Average mflops/s per call per node (full): 438142 +Grid : Message : Average mflops/s per call per node (full): 422732 +Grid : Message : Average mflops/s per call per node (full): 303690 +Grid : Message : Stencil 14.5704 GB/s per node +Grid : Message : Stencil 12.0734 GB/s per node +Grid : Message : Stencil 18.0296 GB/s per node +Grid : Message : Stencil 12.6776 GB/s per node +Grid : Message : Average mflops/s per call per node : 666129 +Grid : Message : Average mflops/s per call per node : 803061 +Grid : Message : Average mflops/s per call per node : 814283 +Grid : Message : Average mflops/s per call per node : 657234 +Grid : Message : Average mflops/s per call per node (full): 316864 +Grid : Message : Average mflops/s per call per node (full): 362979 +Grid : Message : Average mflops/s per call per node (full): 444805 +Grid : Message : Average mflops/s per call per node (full): 302650 +Grid : Message : Stencil 13.5439 GB/s per node +Grid : Message : Stencil 16.9154 GB/s per node +Grid : Message : Stencil 20.0573 GB/s per node +Grid : Message : Stencil 13.6491 GB/s per node +Grid : Message : Average mflops/s per call per node : 666737 +Grid : Message : Average mflops/s per call per node : 796338 +Grid : Message : Average mflops/s per call per node : 816756 +Grid : Message : Average mflops/s per call per node : 663693 +Grid : Message : Average mflops/s per call per node (full): 316049 +Grid : Message : Average mflops/s per call per node (full): 434567 +Grid : Message : Average mflops/s per call per node (full): 449696 +Grid : Message : Average mflops/s per call per node (full): 306704 +Grid : Message : Stencil 12.5183 GB/s per node +Grid : Message : Stencil 16.5827 GB/s per node +Grid : Message : Stencil 18.559 GB/s per node +Grid : Message : Stencil 14.6143 GB/s per node +Grid : Message : Average mflops/s per call per node : 669246 +Grid : Message : Average mflops/s per call per node : 804736 +Grid : Message : Average mflops/s per call per node : 813387 +Grid : Message : Average mflops/s per call per node : 663006 +Grid : Message : Average mflops/s per call per node (full): 313893 +Grid : Message : Average mflops/s per call per node (full): 435101 +Grid : Message : Average mflops/s per call per node (full): 445623 +Grid : Message : Average mflops/s per call per node (full): 307351 +Grid : Message : Stencil 12.4063 GB/s per node +Grid : Message : Stencil 16.7085 GB/s per node +Grid : Message : Stencil 17.7427 GB/s per node +Grid : Message : Stencil 14.278 GB/s per node +Grid : Message : Average mflops/s per call per node : 667017 +Grid : Message : Average mflops/s per call per node : 806776 +Grid : Message : Average mflops/s per call per node : 818848 +Grid : Message : Average mflops/s per call per node : 663672 +Grid : Message : Average mflops/s per call per node (full): 309893 +Grid : Message : Average mflops/s per call per node (full): 437230 +Grid : Message : Average mflops/s per call per node (full): 443248 +Grid : Message : Average mflops/s per call per node (full): 307578 +Grid : Message : Stencil 12.4697 GB/s per node +Grid : Message : Stencil 17.7348 GB/s per node +Grid : Message : Stencil 17.8787 GB/s per node +Grid : Message : Stencil 12.386 GB/s per node +Grid : Message : Average mflops/s per call per node : 668605 +Grid : Message : Average mflops/s per call per node : 803182 +Grid : Message : Average mflops/s per call per node : 821161 +Grid : Message : Average mflops/s per call per node : 666723 +Grid : Message : Average mflops/s per call per node (full): 312663 +Grid : Message : Average mflops/s per call per node (full): 438756 +Grid : Message : Average mflops/s per call per node (full): 445401 +Grid : Message : Average mflops/s per call per node (full): 303558 +Grid : Message : Stencil 13.4515 GB/s per node +Grid : Message : Stencil 11.3553 GB/s per node +Grid : Message : Stencil 18.5098 GB/s per node +Grid : Message : Stencil 13.0622 GB/s per node +Grid : Message : Average mflops/s per call per node : 663087 +Grid : Message : Average mflops/s per call per node : 812456 +Grid : Message : Average mflops/s per call per node : 814407 +Grid : Message : Average mflops/s per call per node : 665146 +Grid : Message : Average mflops/s per call per node (full): 316157 +Grid : Message : Average mflops/s per call per node (full): 347578 +Grid : Message : Average mflops/s per call per node (full): 444978 +Grid : Message : Average mflops/s per call per node (full): 305370 +Grid : Message : Stencil 13.2173 GB/s per node +Grid : Message : Stencil 16.2429 GB/s per node +Grid : Message : Stencil 18.5115 GB/s per node +Grid : Message : Stencil 12.5288 GB/s per node +Grid : Message : Average mflops/s per call per node : 668092 +Grid : Message : Average mflops/s per call per node : 805973 +Grid : Message : Average mflops/s per call per node : 824247 +Grid : Message : Average mflops/s per call per node : 669384 +Grid : Message : Average mflops/s per call per node (full): 315328 +Grid : Message : Average mflops/s per call per node (full): 430969 +Grid : Message : Average mflops/s per call per node (full): 443273 +Grid : Message : Average mflops/s per call per node (full): 304709 +Grid : Message : Stencil 13.1158 GB/s per node +Grid : Message : Stencil 16.8212 GB/s per node +Grid : Message : Stencil 17.6751 GB/s per node +Grid : Message : Stencil 12.5535 GB/s per node +Grid : Message : Average mflops/s per call per node : 664131 +Grid : Message : Average mflops/s per call per node : 804083 +Grid : Message : Average mflops/s per call per node : 824802 +Grid : Message : Average mflops/s per call per node : 664059 +Grid : Message : Average mflops/s per call per node (full): 315125 +Grid : Message : Average mflops/s per call per node (full): 431866 +Grid : Message : Average mflops/s per call per node (full): 445963 +Grid : Message : Average mflops/s per call per node (full): 304673 +Grid : Message : Stencil 14.1054 GB/s per node +Grid : Message : Stencil 17.0876 GB/s per node +Grid : Message : Stencil 16.244 GB/s per node +Grid : Message : Stencil 12.5805 GB/s per node +Grid : Message : Average mflops/s per call per node : 663063 +Grid : Message : Average mflops/s per call per node : 800649 +Grid : Message : Average mflops/s per call per node : 832698 +Grid : Message : Average mflops/s per call per node : 668734 +Grid : Message : Average mflops/s per call per node (full): 316797 +Grid : Message : Average mflops/s per call per node (full): 438163 +Grid : Message : Average mflops/s per call per node (full): 421935 +Grid : Message : Average mflops/s per call per node (full): 303610 +Grid : Message : Stencil 12.8483 GB/s per node +Grid : Message : Stencil 17.087 GB/s per node +Grid : Message : Stencil 17.8021 GB/s per node +Grid : Message : Stencil 14.3434 GB/s per node +Grid : Message : Average mflops/s per call per node : 666039 +Grid : Message : Average mflops/s per call per node : 800967 +Grid : Message : Average mflops/s per call per node : 826722 +Grid : Message : Average mflops/s per call per node : 660834 +Grid : Message : Average mflops/s per call per node (full): 315401 +Grid : Message : Average mflops/s per call per node (full): 436993 +Grid : Message : Average mflops/s per call per node (full): 446784 +Grid : Message : Average mflops/s per call per node (full): 306784 +Grid : Message : Stencil 12.7988 GB/s per node +Grid : Message : Stencil 16.4221 GB/s per node +Grid : Message : Stencil 17.3057 GB/s per node +Grid : Message : Stencil 14.4481 GB/s per node +Grid : Message : Average mflops/s per call per node : 668754 +Grid : Message : Average mflops/s per call per node : 799936 +Grid : Message : Average mflops/s per call per node : 824533 +Grid : Message : Average mflops/s per call per node : 661878 +Grid : Message : Average mflops/s per call per node (full): 315340 +Grid : Message : Average mflops/s per call per node (full): 433837 +Grid : Message : Average mflops/s per call per node (full): 445578 +Grid : Message : Average mflops/s per call per node (full): 306850 +Grid : Message : Stencil 14.4045 GB/s per node +Grid : Message : Stencil 16.6417 GB/s per node +Grid : Message : Stencil 17.9008 GB/s per node +Grid : Message : Stencil 13.8014 GB/s per node +Grid : Message : Average mflops/s per call per node : 662777 +Grid : Message : Average mflops/s per call per node : 802338 +Grid : Message : Average mflops/s per call per node : 820027 +Grid : Message : Average mflops/s per call per node : 664195 +Grid : Message : Average mflops/s per call per node (full): 316459 +Grid : Message : Average mflops/s per call per node (full): 437409 +Grid : Message : Average mflops/s per call per node (full): 445966 +Grid : Message : Average mflops/s per call per node (full): 306817 +Grid : Message : Stencil 13.1521 GB/s per node +Grid : Message : Stencil 17.1275 GB/s per node +Grid : Message : Stencil 17.9507 GB/s per node +Grid : Message : Stencil 12.6391 GB/s per node +Grid : Message : Average mflops/s per call per node : 668265 +Grid : Message : Average mflops/s per call per node : 808937 +Grid : Message : Average mflops/s per call per node : 820979 +Grid : Message : Average mflops/s per call per node : 662241 +Grid : Message : Average mflops/s per call per node (full): 316102 +Grid : Message : Average mflops/s per call per node (full): 435257 +Grid : Message : Average mflops/s per call per node (full): 445397 +Grid : Message : Average mflops/s per call per node (full): 303644 +Grid : Message : Stencil 12.8526 GB/s per node +Grid : Message : Stencil 16.7457 GB/s per node +Grid : Message : Stencil 17.2491 GB/s per node +Grid : Message : Stencil 13.7785 GB/s per node +Grid : Message : Average mflops/s per call per node : 666915 +Grid : Message : Average mflops/s per call per node : 807411 +Grid : Message : Average mflops/s per call per node : 822023 +Grid : Message : Average mflops/s per call per node : 656054 +Grid : Message : Average mflops/s per call per node (full): 314974 +Grid : Message : Average mflops/s per call per node (full): 437865 +Grid : Message : Average mflops/s per call per node (full): 443123 +Grid : Message : Average mflops/s per call per node (full): 305622 +Grid : Message : Stencil 13.0399 GB/s per node +Grid : Message : Stencil 17.3983 GB/s per node +Grid : Message : Stencil 17.8769 GB/s per node +Grid : Message : Stencil 12.7115 GB/s per node +Grid : Message : Average mflops/s per call per node : 669243 +Grid : Message : Average mflops/s per call per node : 804714 +Grid : Message : Average mflops/s per call per node : 821109 +Grid : Message : Average mflops/s per call per node : 665715 +Grid : Message : Average mflops/s per call per node (full): 315930 +Grid : Message : Average mflops/s per call per node (full): 439006 +Grid : Message : Average mflops/s per call per node (full): 445645 +Grid : Message : Average mflops/s per call per node (full): 304465 +Grid : Message : Stencil 13.0748 GB/s per node +Grid : Message : Stencil 17.0022 GB/s per node +Grid : Message : Stencil 17.9339 GB/s per node +Grid : Message : Stencil 13.2873 GB/s per node +Grid : Message : Average mflops/s per call per node : 670384 +Grid : Message : Average mflops/s per call per node : 803062 +Grid : Message : Average mflops/s per call per node : 821719 +Grid : Message : Average mflops/s per call per node : 664236 +Grid : Message : Average mflops/s per call per node (full): 315475 +Grid : Message : Average mflops/s per call per node (full): 437462 +Grid : Message : Average mflops/s per call per node (full): 446076 +Grid : Message : Average mflops/s per call per node (full): 304854 +Grid : Message : Stencil 13.3451 GB/s per node +Grid : Message : Stencil 16.987 GB/s per node +Grid : Message : Stencil 17.4433 GB/s per node +Grid : Message : Stencil 13.2438 GB/s per node +Grid : Message : Average mflops/s per call per node : 668057 +Grid : Message : Average mflops/s per call per node : 807339 +Grid : Message : Average mflops/s per call per node : 823540 +Grid : Message : Average mflops/s per call per node : 662882 +Grid : Message : Average mflops/s per call per node (full): 313029 +Grid : Message : Average mflops/s per call per node (full): 437000 +Grid : Message : Average mflops/s per call per node (full): 444704 +Grid : Message : Average mflops/s per call per node (full): 305305 +Grid : Message : Stencil 13.985 GB/s per node +Grid : Message : Stencil 10.8433 GB/s per node +Grid : Message : Stencil 18.4694 GB/s per node +Grid : Message : Stencil 12.2394 GB/s per node +Grid : Message : Average mflops/s per call per node : 660780 +Grid : Message : Average mflops/s per call per node : 811878 +Grid : Message : Average mflops/s per call per node : 819535 +Grid : Message : Average mflops/s per call per node : 663962 +Grid : Message : Average mflops/s per call per node (full): 315398 +Grid : Message : Average mflops/s per call per node (full): 335978 +Grid : Message : Average mflops/s per call per node (full): 447079 +Grid : Message : Average mflops/s per call per node (full): 299551 +Grid : Message : Stencil 12.7344 GB/s per node +Grid : Message : Stencil 16.7795 GB/s per node +Grid : Message : Stencil 17.9974 GB/s per node +Grid : Message : Stencil 12.3923 GB/s per node +Grid : Message : Average mflops/s per call per node : 666265 +Grid : Message : Average mflops/s per call per node : 806199 +Grid : Message : Average mflops/s per call per node : 821853 +Grid : Message : Average mflops/s per call per node : 665136 +Grid : Message : Average mflops/s per call per node (full): 314563 +Grid : Message : Average mflops/s per call per node (full): 437706 +Grid : Message : Average mflops/s per call per node (full): 445625 +Grid : Message : Average mflops/s per call per node (full): 304797 +Grid : Message : Stencil 13.5227 GB/s per node +Grid : Message : Stencil 9.9904 GB/s per node +Grid : Message : Stencil 18.1087 GB/s per node +Grid : Message : Stencil 13.3208 GB/s per node +Grid : Message : Average mflops/s per call per node : 665052 +Grid : Message : Average mflops/s per call per node : 806549 +Grid : Message : Average mflops/s per call per node : 820800 +Grid : Message : Average mflops/s per call per node : 668015 +Grid : Message : Average mflops/s per call per node (full): 314606 +Grid : Message : Average mflops/s per call per node (full): 316705 +Grid : Message : Average mflops/s per call per node (full): 446817 +Grid : Message : Average mflops/s per call per node (full): 306683 +Grid : Message : Stencil 14.0079 GB/s per node +Grid : Message : Stencil 10.2811 GB/s per node +Grid : Message : Stencil 17.4514 GB/s per node +Grid : Message : Stencil 12.1589 GB/s per node +Grid : Message : Average mflops/s per call per node : 664551 +Grid : Message : Average mflops/s per call per node : 807886 +Grid : Message : Average mflops/s per call per node : 823665 +Grid : Message : Average mflops/s per call per node : 668830 +Grid : Message : Average mflops/s per call per node (full): 315811 +Grid : Message : Average mflops/s per call per node (full): 322821 +Grid : Message : Average mflops/s per call per node (full): 444862 +Grid : Message : Average mflops/s per call per node (full): 303960 +Grid : Message : Stencil 12.9543 GB/s per node +Grid : Message : Stencil 17.2676 GB/s per node +Grid : Message : Stencil 17.5014 GB/s per node +Grid : Message : Stencil 12.4183 GB/s per node +Grid : Message : Average mflops/s per call per node : 666039 +Grid : Message : Average mflops/s per call per node : 801726 +Grid : Message : Average mflops/s per call per node : 818973 +Grid : Message : Average mflops/s per call per node : 668015 +Grid : Message : Average mflops/s per call per node (full): 312869 +Grid : Message : Average mflops/s per call per node (full): 434229 +Grid : Message : Average mflops/s per call per node (full): 444635 +Grid : Message : Average mflops/s per call per node (full): 304935 +Grid : Message : Stencil 12.5383 GB/s per node +Grid : Message : Stencil 14.5256 GB/s per node +Grid : Message : Stencil 18.1822 GB/s per node +Grid : Message : Stencil 12.5818 GB/s per node +Grid : Message : Average mflops/s per call per node : 669210 +Grid : Message : Average mflops/s per call per node : 807821 +Grid : Message : Average mflops/s per call per node : 822054 +Grid : Message : Average mflops/s per call per node : 664072 +Grid : Message : Average mflops/s per call per node (full): 313947 +Grid : Message : Average mflops/s per call per node (full): 408152 +Grid : Message : Average mflops/s per call per node (full): 446600 +Grid : Message : Average mflops/s per call per node (full): 304352 +Grid : Message : Stencil 13.144 GB/s per node +Grid : Message : Stencil 17.0589 GB/s per node +Grid : Message : Stencil 17.9078 GB/s per node +Grid : Message : Stencil 12.6028 GB/s per node +Grid : Message : Average mflops/s per call per node : 663790 +Grid : Message : Average mflops/s per call per node : 801031 +Grid : Message : Average mflops/s per call per node : 826611 +Grid : Message : Average mflops/s per call per node : 668085 +Grid : Message : Average mflops/s per call per node (full): 314625 +Grid : Message : Average mflops/s per call per node (full): 435639 +Grid : Message : Average mflops/s per call per node (full): 447655 +Grid : Message : Average mflops/s per call per node (full): 304941 +Grid : Message : Stencil 13.2036 GB/s per node +Grid : Message : Stencil 16.4167 GB/s per node +Grid : Message : Stencil 18.1195 GB/s per node +Grid : Message : Stencil 12.1919 GB/s per node +Grid : Message : Average mflops/s per call per node : 668091 +Grid : Message : Average mflops/s per call per node : 804708 +Grid : Message : Average mflops/s per call per node : 826018 +Grid : Message : Average mflops/s per call per node : 665139 +Grid : Message : Average mflops/s per call per node (full): 316459 +Grid : Message : Average mflops/s per call per node (full): 432613 +Grid : Message : Average mflops/s per call per node (full): 447761 +Grid : Message : Average mflops/s per call per node (full): 303228 +Grid : Message : Stencil 12.9496 GB/s per node +Grid : Message : Stencil 8.04132 GB/s per node +Grid : Message : Stencil 18.2398 GB/s per node +Grid : Message : Stencil 12.7744 GB/s per node +Grid : Message : Average mflops/s per call per node : 666903 +Grid : Message : Average mflops/s per call per node : 812769 +Grid : Message : Average mflops/s per call per node : 812983 +Grid : Message : Average mflops/s per call per node : 666973 +Grid : Message : Average mflops/s per call per node (full): 312722 +Grid : Message : Average mflops/s per call per node (full): 267572 +Grid : Message : Average mflops/s per call per node (full): 445639 +Grid : Message : Average mflops/s per call per node (full): 305841 +Grid : Message : Stencil 12.3602 GB/s per node +Grid : Message : Stencil 16.9159 GB/s per node +Grid : Message : Stencil 18.6838 GB/s per node +Grid : Message : Stencil 12.9963 GB/s per node +Grid : Message : Average mflops/s per call per node : 669015 +Grid : Message : Average mflops/s per call per node : 807804 +Grid : Message : Average mflops/s per call per node : 824645 +Grid : Message : Average mflops/s per call per node : 665724 +Grid : Message : Average mflops/s per call per node (full): 312875 +Grid : Message : Average mflops/s per call per node (full): 432958 +Grid : Message : Average mflops/s per call per node (full): 450242 +Grid : Message : Average mflops/s per call per node (full): 305161 +Grid : Message : Stencil 13.4647 GB/s per node +Grid : Message : Stencil 16.3383 GB/s per node +Grid : Message : Stencil 17.0735 GB/s per node +Grid : Message : Stencil 12.7368 GB/s per node +Grid : Message : Average mflops/s per call per node : 668122 +Grid : Message : Average mflops/s per call per node : 803149 +Grid : Message : Average mflops/s per call per node : 825487 +Grid : Message : Average mflops/s per call per node : 667634 +Grid : Message : Average mflops/s per call per node (full): 315650 +Grid : Message : Average mflops/s per call per node (full): 432558 +Grid : Message : Average mflops/s per call per node (full): 441983 +Grid : Message : Average mflops/s per call per node (full): 305838 +Grid : Message : Stencil 13.4652 GB/s per node +Grid : Message : Stencil 16.738 GB/s per node +Grid : Message : Stencil 17.9649 GB/s per node +Grid : Message : Stencil 12.6534 GB/s per node +Grid : Message : Average mflops/s per call per node : 664658 +Grid : Message : Average mflops/s per call per node : 800925 +Grid : Message : Average mflops/s per call per node : 821484 +Grid : Message : Average mflops/s per call per node : 665831 +Grid : Message : Average mflops/s per call per node (full): 315984 +Grid : Message : Average mflops/s per call per node (full): 435073 +Grid : Message : Average mflops/s per call per node (full): 445590 +Grid : Message : Average mflops/s per call per node (full): 305339 +Grid : Message : Stencil 13.2929 GB/s per node +Grid : Message : Stencil 17.0294 GB/s per node +Grid : Message : Stencil 17.992 GB/s per node +Grid : Message : Stencil 12.0384 GB/s per node +Grid : Message : Average mflops/s per call per node : 665322 +Grid : Message : Average mflops/s per call per node : 806397 +Grid : Message : Average mflops/s per call per node : 817346 +Grid : Message : Average mflops/s per call per node : 671984 +Grid : Message : Average mflops/s per call per node (full): 316333 +Grid : Message : Average mflops/s per call per node (full): 438125 +Grid : Message : Average mflops/s per call per node (full): 444428 +Grid : Message : Average mflops/s per call per node (full): 302906 +Grid : Message : Stencil 12.8307 GB/s per node +Grid : Message : Stencil 17.5075 GB/s per node +Grid : Message : Stencil 17.4408 GB/s per node +Grid : Message : Stencil 13.0046 GB/s per node +Grid : Message : Average mflops/s per call per node : 672324 +Grid : Message : Average mflops/s per call per node : 798334 +Grid : Message : Average mflops/s per call per node : 817300 +Grid : Message : Average mflops/s per call per node : 664807 +Grid : Message : Average mflops/s per call per node (full): 316035 +Grid : Message : Average mflops/s per call per node (full): 435968 +Grid : Message : Average mflops/s per call per node (full): 439149 +Grid : Message : Average mflops/s per call per node (full): 306540 +Grid : Message : Stencil 12.664 GB/s per node +Grid : Message : Stencil 16.3831 GB/s per node +Grid : Message : Stencil 18.4677 GB/s per node +Grid : Message : Stencil 12.9777 GB/s per node +Grid : Message : Average mflops/s per call per node : 672242 +Grid : Message : Average mflops/s per call per node : 806043 +Grid : Message : Average mflops/s per call per node : 823586 +Grid : Message : Average mflops/s per call per node : 668304 +Grid : Message : Average mflops/s per call per node (full): 315131 +Grid : Message : Average mflops/s per call per node (full): 433011 +Grid : Message : Average mflops/s per call per node (full): 447910 +Grid : Message : Average mflops/s per call per node (full): 304572 +Grid : Message : Stencil 12.647 GB/s per node +Grid : Message : Stencil 17.7233 GB/s per node +Grid : Message : Stencil 19.045 GB/s per node +Grid : Message : Stencil 14.4043 GB/s per node +Grid : Message : Average mflops/s per call per node : 670068 +Grid : Message : Average mflops/s per call per node : 802508 +Grid : Message : Average mflops/s per call per node : 822255 +Grid : Message : Average mflops/s per call per node : 660874 +Grid : Message : Average mflops/s per call per node (full): 314382 +Grid : Message : Average mflops/s per call per node (full): 440019 +Grid : Message : Average mflops/s per call per node (full): 449501 +Grid : Message : Average mflops/s per call per node (full): 307181 +Grid : Message : Stencil 12.933 GB/s per node +Grid : Message : Stencil 16.7577 GB/s per node +Grid : Message : Stencil 17.9727 GB/s per node +Grid : Message : Stencil 13.1741 GB/s per node +Grid : Message : Average mflops/s per call per node : 667285 +Grid : Message : Average mflops/s per call per node : 807065 +Grid : Message : Average mflops/s per call per node : 816317 +Grid : Message : Average mflops/s per call per node : 660677 +Grid : Message : Average mflops/s per call per node (full): 313925 +Grid : Message : Average mflops/s per call per node (full): 437370 +Grid : Message : Average mflops/s per call per node (full): 445682 +Grid : Message : Average mflops/s per call per node (full): 304500 +Grid : Message : Stencil 12.8929 GB/s per node +Grid : Message : Stencil 16.5216 GB/s per node +Grid : Message : Stencil 17.6462 GB/s per node +Grid : Message : Stencil 11.6618 GB/s per node +Grid : Message : Average mflops/s per call per node : 670583 +Grid : Message : Average mflops/s per call per node : 809937 +Grid : Message : Average mflops/s per call per node : 818639 +Grid : Message : Average mflops/s per call per node : 669559 +Grid : Message : Average mflops/s per call per node (full): 314614 +Grid : Message : Average mflops/s per call per node (full): 434299 +Grid : Message : Average mflops/s per call per node (full): 444539 +Grid : Message : Average mflops/s per call per node (full): 298526 +Grid : Message : Stencil 13.0396 GB/s per node +Grid : Message : Stencil 16.8357 GB/s per node +Grid : Message : Stencil 17.251 GB/s per node +Grid : Message : Stencil 14.0986 GB/s per node +Grid : Message : Average mflops/s per call per node : 667395 +Grid : Message : Average mflops/s per call per node : 807567 +Grid : Message : Average mflops/s per call per node : 825726 +Grid : Message : Average mflops/s per call per node : 661632 +Grid : Message : Average mflops/s per call per node (full): 314509 +Grid : Message : Average mflops/s per call per node (full): 437961 +Grid : Message : Average mflops/s per call per node (full): 443090 +Grid : Message : Average mflops/s per call per node (full): 303319 +Grid : Message : Stencil 14.2244 GB/s per node +Grid : Message : Stencil 16.71 GB/s per node +Grid : Message : Stencil 17.8663 GB/s per node +Grid : Message : Stencil 13.1549 GB/s per node +Grid : Message : Average mflops/s per call per node : 663781 +Grid : Message : Average mflops/s per call per node : 802430 +Grid : Message : Average mflops/s per call per node : 822847 +Grid : Message : Average mflops/s per call per node : 662736 +Grid : Message : Average mflops/s per call per node (full): 315901 +Grid : Message : Average mflops/s per call per node (full): 434370 +Grid : Message : Average mflops/s per call per node (full): 447766 +Grid : Message : Average mflops/s per call per node (full): 305592 +Grid : Message : Stencil 13.8167 GB/s per node +Grid : Message : Stencil 16.4358 GB/s per node +Grid : Message : Stencil 17.2993 GB/s per node +Grid : Message : Stencil 12.7085 GB/s per node +Grid : Message : Average mflops/s per call per node : 661245 +Grid : Message : Average mflops/s per call per node : 803105 +Grid : Message : Average mflops/s per call per node : 823692 +Grid : Message : Average mflops/s per call per node : 664362 +Grid : Message : Average mflops/s per call per node (full): 314367 +Grid : Message : Average mflops/s per call per node (full): 433581 +Grid : Message : Average mflops/s per call per node (full): 442903 +Grid : Message : Average mflops/s per call per node (full): 305447 +Grid : Message : Stencil 13.1055 GB/s per node +Grid : Message : Stencil 9.4695 GB/s per node +Grid : Message : Stencil 17.4881 GB/s per node +Grid : Message : Stencil 13.3955 GB/s per node +Grid : Message : Average mflops/s per call per node : 664820 +Grid : Message : Average mflops/s per call per node : 809060 +Grid : Message : Average mflops/s per call per node : 822009 +Grid : Message : Average mflops/s per call per node : 663366 +Grid : Message : Average mflops/s per call per node (full): 313768 +Grid : Message : Average mflops/s per call per node (full): 303820 +Grid : Message : Average mflops/s per call per node (full): 443817 +Grid : Message : Average mflops/s per call per node (full): 304245 +Grid : Message : Stencil 13.5332 GB/s per node +Grid : Message : Stencil 14.8741 GB/s per node +Grid : Message : Stencil 17.4688 GB/s per node +Grid : Message : Stencil 12.3018 GB/s per node +Grid : Message : Average mflops/s per call per node : 663171 +Grid : Message : Average mflops/s per call per node : 807899 +Grid : Message : Average mflops/s per call per node : 816213 +Grid : Message : Average mflops/s per call per node : 665794 +Grid : Message : Average mflops/s per call per node (full): 314666 +Grid : Message : Average mflops/s per call per node (full): 414230 +Grid : Message : Average mflops/s per call per node (full): 436437 +Grid : Message : Average mflops/s per call per node (full): 300328 +Grid : Message : Stencil 13.4418 GB/s per node +Grid : Message : Stencil 16.7514 GB/s per node +Grid : Message : Stencil 18.5606 GB/s per node +Grid : Message : Stencil 13.6725 GB/s per node +Grid : Message : Average mflops/s per call per node : 663475 +Grid : Message : Average mflops/s per call per node : 802921 +Grid : Message : Average mflops/s per call per node : 820846 +Grid : Message : Average mflops/s per call per node : 660621 +Grid : Message : Average mflops/s per call per node (full): 315321 +Grid : Message : Average mflops/s per call per node (full): 436917 +Grid : Message : Average mflops/s per call per node (full): 447510 +Grid : Message : Average mflops/s per call per node (full): 306476 +Grid : Message : Stencil 13.2975 GB/s per node +Grid : Message : Stencil 16.294 GB/s per node +Grid : Message : Stencil 16.9049 GB/s per node +Grid : Message : Stencil 12.8291 GB/s per node +Grid : Message : Average mflops/s per call per node : 666696 +Grid : Message : Average mflops/s per call per node : 804316 +Grid : Message : Average mflops/s per call per node : 820510 +Grid : Message : Average mflops/s per call per node : 664933 +Grid : Message : Average mflops/s per call per node (full): 315006 +Grid : Message : Average mflops/s per call per node (full): 429726 +Grid : Message : Average mflops/s per call per node (full): 439125 +Grid : Message : Average mflops/s per call per node (full): 305590 +Grid : Message : Stencil 14.0116 GB/s per node +Grid : Message : Stencil 17.0097 GB/s per node +Grid : Message : Stencil 17.7337 GB/s per node +Grid : Message : Stencil 12.7172 GB/s per node +Grid : Message : Average mflops/s per call per node : 664280 +Grid : Message : Average mflops/s per call per node : 803742 +Grid : Message : Average mflops/s per call per node : 827551 +Grid : Message : Average mflops/s per call per node : 665486 +Grid : Message : Average mflops/s per call per node (full): 316235 +Grid : Message : Average mflops/s per call per node (full): 438807 +Grid : Message : Average mflops/s per call per node (full): 447663 +Grid : Message : Average mflops/s per call per node (full): 304516 +Grid : Message : Stencil 13.3899 GB/s per node +Grid : Message : Stencil 18.1307 GB/s per node +Grid : Message : Stencil 17.2902 GB/s per node +Grid : Message : Stencil 13.0768 GB/s per node +Grid : Message : Average mflops/s per call per node : 670032 +Grid : Message : Average mflops/s per call per node : 801219 +Grid : Message : Average mflops/s per call per node : 829903 +Grid : Message : Average mflops/s per call per node : 655400 +Grid : Message : Average mflops/s per call per node (full): 316179 +Grid : Message : Average mflops/s per call per node (full): 438143 +Grid : Message : Average mflops/s per call per node (full): 444077 +Grid : Message : Average mflops/s per call per node (full): 304791 +Grid : Message : Stencil 13.0342 GB/s per node +Grid : Message : Stencil 17.5561 GB/s per node +Grid : Message : Stencil 18.0282 GB/s per node +Grid : Message : Stencil 13.7715 GB/s per node +Grid : Message : Average mflops/s per call per node : 666334 +Grid : Message : Average mflops/s per call per node : 799060 +Grid : Message : Average mflops/s per call per node : 821650 +Grid : Message : Average mflops/s per call per node : 662056 +Grid : Message : Average mflops/s per call per node (full): 315412 +Grid : Message : Average mflops/s per call per node (full): 437403 +Grid : Message : Average mflops/s per call per node (full): 446257 +Grid : Message : Average mflops/s per call per node (full): 307217 +Grid : Message : Stencil 13.5037 GB/s per node +Grid : Message : Stencil 18.5172 GB/s per node +Grid : Message : Stencil 17.1817 GB/s per node +Grid : Message : Stencil 12.5464 GB/s per node +Grid : Message : Average mflops/s per call per node : 665393 +Grid : Message : Average mflops/s per call per node : 799830 +Grid : Message : Average mflops/s per call per node : 824258 +Grid : Message : Average mflops/s per call per node : 671050 +Grid : Message : Average mflops/s per call per node (full): 315641 +Grid : Message : Average mflops/s per call per node (full): 441107 +Grid : Message : Average mflops/s per call per node (full): 441294 +Grid : Message : Average mflops/s per call per node (full): 305549 +Grid : Message : Stencil 13.3831 GB/s per node +Grid : Message : Stencil 14.2564 GB/s per node +Grid : Message : Stencil 17.669 GB/s per node +Grid : Message : Stencil 13.9748 GB/s per node +Grid : Message : Average mflops/s per call per node : 668898 +Grid : Message : Average mflops/s per call per node : 807392 +Grid : Message : Average mflops/s per call per node : 823886 +Grid : Message : Average mflops/s per call per node : 659980 +Grid : Message : Average mflops/s per call per node (full): 316380 +Grid : Message : Average mflops/s per call per node (full): 403824 +Grid : Message : Average mflops/s per call per node (full): 445627 +Grid : Message : Average mflops/s per call per node (full): 305922 +Grid : Message : Stencil 13.0011 GB/s per node +Grid : Message : Stencil 16.4418 GB/s per node +Grid : Message : Stencil 17.5423 GB/s per node +Grid : Message : Stencil 12.2651 GB/s per node +Grid : Message : Average mflops/s per call per node : 667833 +Grid : Message : Average mflops/s per call per node : 799772 +Grid : Message : Average mflops/s per call per node : 825062 +Grid : Message : Average mflops/s per call per node : 662430 +Grid : Message : Average mflops/s per call per node (full): 312990 +Grid : Message : Average mflops/s per call per node (full): 432380 +Grid : Message : Average mflops/s per call per node (full): 446061 +Grid : Message : Average mflops/s per call per node (full): 301703 +Grid : Message : Stencil 13.122 GB/s per node +Grid : Message : Stencil 14.4751 GB/s per node +Grid : Message : Stencil 17.8534 GB/s per node +Grid : Message : Stencil 14.0202 GB/s per node +Grid : Message : Average mflops/s per call per node : 668032 +Grid : Message : Average mflops/s per call per node : 801144 +Grid : Message : Average mflops/s per call per node : 825762 +Grid : Message : Average mflops/s per call per node : 665085 +Grid : Message : Average mflops/s per call per node (full): 315796 +Grid : Message : Average mflops/s per call per node (full): 405994 +Grid : Message : Average mflops/s per call per node (full): 446302 +Grid : Message : Average mflops/s per call per node (full): 306393 +Grid : Message : Stencil 12.8932 GB/s per node +Grid : Message : Stencil 16.6554 GB/s per node +Grid : Message : Stencil 17.8851 GB/s per node +Grid : Message : Stencil 12.5967 GB/s per node +Grid : Message : Average mflops/s per call per node : 665908 +Grid : Message : Average mflops/s per call per node : 808734 +Grid : Message : Average mflops/s per call per node : 822333 +Grid : Message : Average mflops/s per call per node : 662094 +Grid : Message : Average mflops/s per call per node (full): 314977 +Grid : Message : Average mflops/s per call per node (full): 434727 +Grid : Message : Average mflops/s per call per node (full): 446631 +Grid : Message : Average mflops/s per call per node (full): 304237 +Grid : Message : Stencil 12.4235 GB/s per node +Grid : Message : Stencil 16.2135 GB/s per node +Grid : Message : Stencil 17.6276 GB/s per node +Grid : Message : Stencil 14.727 GB/s per node +Grid : Message : Average mflops/s per call per node : 666897 +Grid : Message : Average mflops/s per call per node : 806787 +Grid : Message : Average mflops/s per call per node : 825036 +Grid : Message : Average mflops/s per call per node : 662896 +Grid : Message : Average mflops/s per call per node (full): 311217 +Grid : Message : Average mflops/s per call per node (full): 431113 +Grid : Message : Average mflops/s per call per node (full): 445192 +Grid : Message : Average mflops/s per call per node (full): 307938 +Grid : Message : Stencil 12.8052 GB/s per node +Grid : Message : Stencil 16.6575 GB/s per node +Grid : Message : Stencil 17.5731 GB/s per node +Grid : Message : Stencil 13.5217 GB/s per node +Grid : Message : Average mflops/s per call per node : 665083 +Grid : Message : Average mflops/s per call per node : 802066 +Grid : Message : Average mflops/s per call per node : 822189 +Grid : Message : Average mflops/s per call per node : 664236 +Grid : Message : Average mflops/s per call per node (full): 312239 +Grid : Message : Average mflops/s per call per node (full): 436940 +Grid : Message : Average mflops/s per call per node (full): 446146 +Grid : Message : Average mflops/s per call per node (full): 305529 +Grid : Message : Stencil 12.0483 GB/s per node +Grid : Message : Stencil 17.8025 GB/s per node +Grid : Message : Stencil 17.3049 GB/s per node +Grid : Message : Stencil 11.5328 GB/s per node +Grid : Message : Average mflops/s per call per node : 668534 +Grid : Message : Average mflops/s per call per node : 803686 +Grid : Message : Average mflops/s per call per node : 815868 +Grid : Message : Average mflops/s per call per node : 663886 +Grid : Message : Average mflops/s per call per node (full): 308896 +Grid : Message : Average mflops/s per call per node (full): 439722 +Grid : Message : Average mflops/s per call per node (full): 432427 +Grid : Message : Average mflops/s per call per node (full): 287890 +Grid : Message : Stencil 12.676 GB/s per node +Grid : Message : Stencil 17.23 GB/s per node +Grid : Message : Stencil 17.5567 GB/s per node +Grid : Message : Stencil 12.3602 GB/s per node +Grid : Message : Average mflops/s per call per node : 662447 +Grid : Message : Average mflops/s per call per node : 804064 +Grid : Message : Average mflops/s per call per node : 822752 +Grid : Message : Average mflops/s per call per node : 660888 +Grid : Message : Average mflops/s per call per node (full): 310141 +Grid : Message : Average mflops/s per call per node (full): 439871 +Grid : Message : Average mflops/s per call per node (full): 445413 +Grid : Message : Average mflops/s per call per node (full): 300370 +Grid : Message : Stencil 12.3903 GB/s per node +Grid : Message : Stencil 13.9581 GB/s per node +Grid : Message : Stencil 17.4805 GB/s per node +Grid : Message : Stencil 13.8736 GB/s per node +Grid : Message : Average mflops/s per call per node : 666960 +Grid : Message : Average mflops/s per call per node : 811450 +Grid : Message : Average mflops/s per call per node : 818355 +Grid : Message : Average mflops/s per call per node : 666749 +Grid : Message : Average mflops/s per call per node (full): 312948 +Grid : Message : Average mflops/s per call per node (full): 399590 +Grid : Message : Average mflops/s per call per node (full): 444163 +Grid : Message : Average mflops/s per call per node (full): 307748 +Grid : Message : Stencil 12.2576 GB/s per node +Grid : Message : Stencil 16.9103 GB/s per node +Grid : Message : Stencil 17.6251 GB/s per node +Grid : Message : Stencil 13.1704 GB/s per node +Grid : Message : Average mflops/s per call per node : 669561 +Grid : Message : Average mflops/s per call per node : 804700 +Grid : Message : Average mflops/s per call per node : 822080 +Grid : Message : Average mflops/s per call per node : 663517 +Grid : Message : Average mflops/s per call per node (full): 311869 +Grid : Message : Average mflops/s per call per node (full): 438410 +Grid : Message : Average mflops/s per call per node (full): 445679 +Grid : Message : Average mflops/s per call per node (full): 306253 +Grid : Message : Stencil 13.1253 GB/s per node +Grid : Message : Stencil 17.0406 GB/s per node +Grid : Message : Stencil 17.4521 GB/s per node +Grid : Message : Stencil 12.5503 GB/s per node +Grid : Message : Average mflops/s per call per node : 663884 +Grid : Message : Average mflops/s per call per node : 805130 +Grid : Message : Average mflops/s per call per node : 822047 +Grid : Message : Average mflops/s per call per node : 666019 +Grid : Message : Average mflops/s per call per node (full): 313695 +Grid : Message : Average mflops/s per call per node (full): 433054 +Grid : Message : Average mflops/s per call per node (full): 444033 +Grid : Message : Average mflops/s per call per node (full): 304935 +Grid : Message : Stencil 14.2193 GB/s per node +Grid : Message : Stencil 16.7231 GB/s per node +Grid : Message : Stencil 17.4057 GB/s per node +Grid : Message : Stencil 12.8035 GB/s per node +Grid : Message : Average mflops/s per call per node : 659679 +Grid : Message : Average mflops/s per call per node : 802868 +Grid : Message : Average mflops/s per call per node : 822679 +Grid : Message : Average mflops/s per call per node : 668637 +Grid : Message : Average mflops/s per call per node (full): 316201 +Grid : Message : Average mflops/s per call per node (full): 436200 +Grid : Message : Average mflops/s per call per node (full): 443477 +Grid : Message : Average mflops/s per call per node (full): 305573 +Grid : Message : Stencil 12.5265 GB/s per node +Grid : Message : Stencil 11.3738 GB/s per node +Grid : Message : Stencil 17.8532 GB/s per node +Grid : Message : Stencil 12.9027 GB/s per node +Grid : Message : Average mflops/s per call per node : 664605 +Grid : Message : Average mflops/s per call per node : 806182 +Grid : Message : Average mflops/s per call per node : 817960 +Grid : Message : Average mflops/s per call per node : 665454 +Grid : Message : Average mflops/s per call per node (full): 312821 +Grid : Message : Average mflops/s per call per node (full): 347998 +Grid : Message : Average mflops/s per call per node (full): 445111 +Grid : Message : Average mflops/s per call per node (full): 305415 +Grid : Message : Stencil 12.8852 GB/s per node +Grid : Message : Stencil 15.8886 GB/s per node +Grid : Message : Stencil 17.8262 GB/s per node +Grid : Message : Stencil 14.1356 GB/s per node +Grid : Message : Average mflops/s per call per node : 666314 +Grid : Message : Average mflops/s per call per node : 807479 +Grid : Message : Average mflops/s per call per node : 819779 +Grid : Message : Average mflops/s per call per node : 661020 +Grid : Message : Average mflops/s per call per node (full): 314064 +Grid : Message : Average mflops/s per call per node (full): 426703 +Grid : Message : Average mflops/s per call per node (full): 443793 +Grid : Message : Average mflops/s per call per node (full): 307501 +Grid : Message : Stencil 13.3414 GB/s per node +Grid : Message : Stencil 18.0181 GB/s per node +Grid : Message : Stencil 17.9612 GB/s per node +Grid : Message : Stencil 13.7132 GB/s per node +Grid : Message : Average mflops/s per call per node : 662865 +Grid : Message : Average mflops/s per call per node : 800521 +Grid : Message : Average mflops/s per call per node : 831440 +Grid : Message : Average mflops/s per call per node : 663001 +Grid : Message : Average mflops/s per call per node (full): 312104 +Grid : Message : Average mflops/s per call per node (full): 440473 +Grid : Message : Average mflops/s per call per node (full): 449193 +Grid : Message : Average mflops/s per call per node (full): 305778 +Grid : Message : Stencil 12.4744 GB/s per node +Grid : Message : Stencil 12.4069 GB/s per node +Grid : Message : Stencil 18.6512 GB/s per node +Grid : Message : Stencil 13.9883 GB/s per node +Grid : Message : Average mflops/s per call per node : 668317 +Grid : Message : Average mflops/s per call per node : 809376 +Grid : Message : Average mflops/s per call per node : 824709 +Grid : Message : Average mflops/s per call per node : 659530 +Grid : Message : Average mflops/s per call per node (full): 313873 +Grid : Message : Average mflops/s per call per node (full): 370099 +Grid : Message : Average mflops/s per call per node (full): 447544 +Grid : Message : Average mflops/s per call per node (full): 306571 +Grid : Message : Stencil 13.0316 GB/s per node +Grid : Message : Stencil 16.6968 GB/s per node +Grid : Message : Stencil 17.7661 GB/s per node +Grid : Message : Stencil 14.0046 GB/s per node +Grid : Message : Average mflops/s per call per node : 666400 +Grid : Message : Average mflops/s per call per node : 802491 +Grid : Message : Average mflops/s per call per node : 821831 +Grid : Message : Average mflops/s per call per node : 661370 +Grid : Message : Average mflops/s per call per node (full): 314349 +Grid : Message : Average mflops/s per call per node (full): 435465 +Grid : Message : Average mflops/s per call per node (full): 446423 +Grid : Message : Average mflops/s per call per node (full): 306860 +Grid : Message : Stencil 12.8639 GB/s per node +Grid : Message : Stencil 16.4533 GB/s per node +Grid : Message : Stencil 17.201 GB/s per node +Grid : Message : Stencil 12.2991 GB/s per node +Grid : Message : Average mflops/s per call per node : 665471 +Grid : Message : Average mflops/s per call per node : 801379 +Grid : Message : Average mflops/s per call per node : 817508 +Grid : Message : Average mflops/s per call per node : 663412 +Grid : Message : Average mflops/s per call per node (full): 314238 +Grid : Message : Average mflops/s per call per node (full): 433165 +Grid : Message : Average mflops/s per call per node (full): 441811 +Grid : Message : Average mflops/s per call per node (full): 303330 +Grid : Message : Stencil 12.5565 GB/s per node +Grid : Message : Stencil 15.1478 GB/s per node +Grid : Message : Stencil 18.294 GB/s per node +Grid : Message : Stencil 14.0358 GB/s per node +Grid : Message : Average mflops/s per call per node : 668709 +Grid : Message : Average mflops/s per call per node : 807656 +Grid : Message : Average mflops/s per call per node : 821970 +Grid : Message : Average mflops/s per call per node : 660987 +Grid : Message : Average mflops/s per call per node (full): 314130 +Grid : Message : Average mflops/s per call per node (full): 417627 +Grid : Message : Average mflops/s per call per node (full): 447522 +Grid : Message : Average mflops/s per call per node (full): 306286 +Grid : Message : Stencil 13.4528 GB/s per node +Grid : Message : Stencil 16.5353 GB/s per node +Grid : Message : Stencil 16.4358 GB/s per node +Grid : Message : Stencil 14.2175 GB/s per node +Grid : Message : Average mflops/s per call per node : 667247 +Grid : Message : Average mflops/s per call per node : 803885 +Grid : Message : Average mflops/s per call per node : 824360 +Grid : Message : Average mflops/s per call per node : 663661 +Grid : Message : Average mflops/s per call per node (full): 316220 +Grid : Message : Average mflops/s per call per node (full): 434687 +Grid : Message : Average mflops/s per call per node (full): 424565 +Grid : Message : Average mflops/s per call per node (full): 306818 +Grid : Message : Stencil 12.9009 GB/s per node +Grid : Message : Stencil 16.6621 GB/s per node +Grid : Message : Stencil 17.2524 GB/s per node +Grid : Message : Stencil 12.7505 GB/s per node +Grid : Message : Average mflops/s per call per node : 665737 +Grid : Message : Average mflops/s per call per node : 802449 +Grid : Message : Average mflops/s per call per node : 824897 +Grid : Message : Average mflops/s per call per node : 663450 +Grid : Message : Average mflops/s per call per node (full): 312619 +Grid : Message : Average mflops/s per call per node (full): 434975 +Grid : Message : Average mflops/s per call per node (full): 441741 +Grid : Message : Average mflops/s per call per node (full): 303787 +Grid : Message : Stencil 13.8603 GB/s per node +Grid : Message : Stencil 17.3898 GB/s per node +Grid : Message : Stencil 18.0875 GB/s per node +Grid : Message : Stencil 12.4416 GB/s per node +Grid : Message : Average mflops/s per call per node : 661775 +Grid : Message : Average mflops/s per call per node : 806436 +Grid : Message : Average mflops/s per call per node : 824965 +Grid : Message : Average mflops/s per call per node : 666546 +Grid : Message : Average mflops/s per call per node (full): 315060 +Grid : Message : Average mflops/s per call per node (full): 436662 +Grid : Message : Average mflops/s per call per node (full): 443945 +Grid : Message : Average mflops/s per call per node (full): 304778 +Grid : Message : Stencil 13.7038 GB/s per node +Grid : Message : Stencil 17.6172 GB/s per node +Grid : Message : Stencil 18.4053 GB/s per node +Grid : Message : Stencil 12.6816 GB/s per node +Grid : Message : Average mflops/s per call per node : 663760 +Grid : Message : Average mflops/s per call per node : 801627 +Grid : Message : Average mflops/s per call per node : 825152 +Grid : Message : Average mflops/s per call per node : 667139 +Grid : Message : Average mflops/s per call per node (full): 315050 +Grid : Message : Average mflops/s per call per node (full): 438868 +Grid : Message : Average mflops/s per call per node (full): 448242 +Grid : Message : Average mflops/s per call per node (full): 305717 +Grid : Message : Stencil 12.9353 GB/s per node +Grid : Message : Stencil 16.0852 GB/s per node +Grid : Message : Stencil 17.3692 GB/s per node +Grid : Message : Stencil 12.9265 GB/s per node +Grid : Message : Average mflops/s per call per node : 669251 +Grid : Message : Average mflops/s per call per node : 805532 +Grid : Message : Average mflops/s per call per node : 826799 +Grid : Message : Average mflops/s per call per node : 663717 +Grid : Message : Average mflops/s per call per node (full): 313709 +Grid : Message : Average mflops/s per call per node (full): 424664 +Grid : Message : Average mflops/s per call per node (full): 445421 +Grid : Message : Average mflops/s per call per node (full): 305257 +Grid : Message : Stencil 13.379 GB/s per node +Grid : Message : Stencil 16.9515 GB/s per node +Grid : Message : Stencil 18.3665 GB/s per node +Grid : Message : Stencil 12.7137 GB/s per node +Grid : Message : Average mflops/s per call per node : 665154 +Grid : Message : Average mflops/s per call per node : 802761 +Grid : Message : Average mflops/s per call per node : 818096 +Grid : Message : Average mflops/s per call per node : 664144 +Grid : Message : Average mflops/s per call per node (full): 315941 +Grid : Message : Average mflops/s per call per node (full): 438033 +Grid : Message : Average mflops/s per call per node (full): 446798 +Grid : Message : Average mflops/s per call per node (full): 304773 +Grid : Message : Stencil 13.3168 GB/s per node +Grid : Message : Stencil 10.2857 GB/s per node +Grid : Message : Stencil 17.1783 GB/s per node +Grid : Message : Stencil 12.9949 GB/s per node +Grid : Message : Average mflops/s per call per node : 665785 +Grid : Message : Average mflops/s per call per node : 810825 +Grid : Message : Average mflops/s per call per node : 823353 +Grid : Message : Average mflops/s per call per node : 665362 +Grid : Message : Average mflops/s per call per node (full): 315764 +Grid : Message : Average mflops/s per call per node (full): 323579 +Grid : Message : Average mflops/s per call per node (full): 443309 +Grid : Message : Average mflops/s per call per node (full): 305253 +Grid : Message : Stencil 13.9634 GB/s per node +Grid : Message : Stencil 17.0929 GB/s per node +Grid : Message : Stencil 18.7741 GB/s per node +Grid : Message : Stencil 12.8815 GB/s per node +Grid : Message : Average mflops/s per call per node : 666241 +Grid : Message : Average mflops/s per call per node : 809735 +Grid : Message : Average mflops/s per call per node : 813012 +Grid : Message : Average mflops/s per call per node : 661935 +Grid : Message : Average mflops/s per call per node (full): 316880 +Grid : Message : Average mflops/s per call per node (full): 438139 +Grid : Message : Average mflops/s per call per node (full): 445881 +Grid : Message : Average mflops/s per call per node (full): 305467 +Grid : Message : Stencil 13.1792 GB/s per node +Grid : Message : Stencil 17.8518 GB/s per node +Grid : Message : Stencil 17.7641 GB/s per node +Grid : Message : Stencil 13.0227 GB/s per node +Grid : Message : Average mflops/s per call per node : 667995 +Grid : Message : Average mflops/s per call per node : 806047 +Grid : Message : Average mflops/s per call per node : 825949 +Grid : Message : Average mflops/s per call per node : 663069 +Grid : Message : Average mflops/s per call per node (full): 314623 +Grid : Message : Average mflops/s per call per node (full): 440785 +Grid : Message : Average mflops/s per call per node (full): 446951 +Grid : Message : Average mflops/s per call per node (full): 305984 +Grid : Message : Stencil 12.8287 GB/s per node +Grid : Message : Stencil 16.6164 GB/s per node +Grid : Message : Stencil 18.9068 GB/s per node +Grid : Message : Stencil 12.0297 GB/s per node +Grid : Message : Average mflops/s per call per node : 668841 +Grid : Message : Average mflops/s per call per node : 803523 +Grid : Message : Average mflops/s per call per node : 818370 +Grid : Message : Average mflops/s per call per node : 666093 +Grid : Message : Average mflops/s per call per node (full): 315297 +Grid : Message : Average mflops/s per call per node (full): 435944 +Grid : Message : Average mflops/s per call per node (full): 447388 +Grid : Message : Average mflops/s per call per node (full): 302011 +Grid : Message : Stencil 13.2067 GB/s per node +Grid : Message : Stencil 17.3298 GB/s per node +Grid : Message : Stencil 17.6517 GB/s per node +Grid : Message : Stencil 12.6095 GB/s per node +Grid : Message : Average mflops/s per call per node : 666902 +Grid : Message : Average mflops/s per call per node : 800780 +Grid : Message : Average mflops/s per call per node : 827430 +Grid : Message : Average mflops/s per call per node : 670098 +Grid : Message : Average mflops/s per call per node (full): 315707 +Grid : Message : Average mflops/s per call per node (full): 438855 +Grid : Message : Average mflops/s per call per node (full): 446502 +Grid : Message : Average mflops/s per call per node (full): 305800 +Grid : Message : Stencil 12.8391 GB/s per node +Grid : Message : Stencil 18.1332 GB/s per node +Grid : Message : Stencil 18.4131 GB/s per node +Grid : Message : Stencil 13.6612 GB/s per node +Grid : Message : Average mflops/s per call per node : 667111 +Grid : Message : Average mflops/s per call per node : 800736 +Grid : Message : Average mflops/s per call per node : 825266 +Grid : Message : Average mflops/s per call per node : 659026 +Grid : Message : Average mflops/s per call per node (full): 315263 +Grid : Message : Average mflops/s per call per node (full): 440530 +Grid : Message : Average mflops/s per call per node (full): 448816 +Grid : Message : Average mflops/s per call per node (full): 306118 +Grid : Message : Stencil 13.2343 GB/s per node +Grid : Message : Stencil 16.9053 GB/s per node +Grid : Message : Stencil 18.1019 GB/s per node +Grid : Message : Stencil 12.6898 GB/s per node +Grid : Message : Average mflops/s per call per node : 666497 +Grid : Message : Average mflops/s per call per node : 808899 +Grid : Message : Average mflops/s per call per node : 821718 +Grid : Message : Average mflops/s per call per node : 663943 +Grid : Message : Average mflops/s per call per node (full): 316154 +Grid : Message : Average mflops/s per call per node (full): 437821 +Grid : Message : Average mflops/s per call per node (full): 447299 +Grid : Message : Average mflops/s per call per node (full): 302565 +Grid : Message : Stencil 13.6119 GB/s per node +Grid : Message : Stencil 16.4649 GB/s per node +Grid : Message : Stencil 16.6632 GB/s per node +Grid : Message : Stencil 13.1453 GB/s per node +Grid : Message : Average mflops/s per call per node : 665911 +Grid : Message : Average mflops/s per call per node : 802839 +Grid : Message : Average mflops/s per call per node : 822157 +Grid : Message : Average mflops/s per call per node : 659798 +Grid : Message : Average mflops/s per call per node (full): 317175 +Grid : Message : Average mflops/s per call per node (full): 434421 +Grid : Message : Average mflops/s per call per node (full): 426729 +Grid : Message : Average mflops/s per call per node (full): 301588 +Grid : Message : Stencil 13.6702 GB/s per node +Grid : Message : Stencil 16.2963 GB/s per node +Grid : Message : Stencil 18.2252 GB/s per node +Grid : Message : Stencil 12.2345 GB/s per node +Grid : Message : Average mflops/s per call per node : 666119 +Grid : Message : Average mflops/s per call per node : 803426 +Grid : Message : Average mflops/s per call per node : 823702 +Grid : Message : Average mflops/s per call per node : 668199 +Grid : Message : Average mflops/s per call per node (full): 317097 +Grid : Message : Average mflops/s per call per node (full): 431819 +Grid : Message : Average mflops/s per call per node (full): 447193 +Grid : Message : Average mflops/s per call per node (full): 303845 +Grid : Message : Stencil 13.3411 GB/s per node +Grid : Message : Stencil 17.1795 GB/s per node +Grid : Message : Stencil 18.0849 GB/s per node +Grid : Message : Stencil 12.496 GB/s per node +Grid : Message : Average mflops/s per call per node : 668590 +Grid : Message : Average mflops/s per call per node : 800821 +Grid : Message : Average mflops/s per call per node : 829262 +Grid : Message : Average mflops/s per call per node : 664689 +Grid : Message : Average mflops/s per call per node (full): 316188 +Grid : Message : Average mflops/s per call per node (full): 438769 +Grid : Message : Average mflops/s per call per node (full): 448396 +Grid : Message : Average mflops/s per call per node (full): 304490 +Grid : Message : Stencil 14.7099 GB/s per node +Grid : Message : Stencil 17.1644 GB/s per node +Grid : Message : Stencil 17.1071 GB/s per node +Grid : Message : Stencil 12.4515 GB/s per node +Grid : Message : Average mflops/s per call per node : 663658 +Grid : Message : Average mflops/s per call per node : 804636 +Grid : Message : Average mflops/s per call per node : 821849 +Grid : Message : Average mflops/s per call per node : 659873 +Grid : Message : Average mflops/s per call per node (full): 317243 +Grid : Message : Average mflops/s per call per node (full): 435777 +Grid : Message : Average mflops/s per call per node (full): 438993 +Grid : Message : Average mflops/s per call per node (full): 304428 +Grid : Message : Stencil 14.5401 GB/s per node +Grid : Message : Stencil 17.653 GB/s per node +Grid : Message : Stencil 16.8231 GB/s per node +Grid : Message : Stencil 12.4649 GB/s per node +Grid : Message : Average mflops/s per call per node : 663737 +Grid : Message : Average mflops/s per call per node : 799291 +Grid : Message : Average mflops/s per call per node : 820476 +Grid : Message : Average mflops/s per call per node : 659852 +Grid : Message : Average mflops/s per call per node (full): 316420 +Grid : Message : Average mflops/s per call per node (full): 437281 +Grid : Message : Average mflops/s per call per node (full): 437689 +Grid : Message : Average mflops/s per call per node (full): 302147 +Grid : Message : Stencil 12.8809 GB/s per node +Grid : Message : Stencil 17.1122 GB/s per node +Grid : Message : Stencil 17.9883 GB/s per node +Grid : Message : Stencil 12.297 GB/s per node +Grid : Message : Average mflops/s per call per node : 666660 +Grid : Message : Average mflops/s per call per node : 804044 +Grid : Message : Average mflops/s per call per node : 821177 +Grid : Message : Average mflops/s per call per node : 665370 +Grid : Message : Average mflops/s per call per node (full): 314404 +Grid : Message : Average mflops/s per call per node (full): 435744 +Grid : Message : Average mflops/s per call per node (full): 445622 +Grid : Message : Average mflops/s per call per node (full): 303978 +Grid : Message : Stencil 13.4031 GB/s per node +Grid : Message : Stencil 16.5617 GB/s per node +Grid : Message : Stencil 17.8289 GB/s per node +Grid : Message : Stencil 12.8881 GB/s per node +Grid : Message : Average mflops/s per call per node : 663272 +Grid : Message : Average mflops/s per call per node : 802300 +Grid : Message : Average mflops/s per call per node : 819337 +Grid : Message : Average mflops/s per call per node : 663035 +Grid : Message : Average mflops/s per call per node (full): 315588 +Grid : Message : Average mflops/s per call per node (full): 435178 +Grid : Message : Average mflops/s per call per node (full): 445795 +Grid : Message : Average mflops/s per call per node (full): 301042 +Grid : Message : Stencil 14.0041 GB/s per node +Grid : Message : Stencil 16.5044 GB/s per node +Grid : Message : Stencil 16.8942 GB/s per node +Grid : Message : Stencil 12.0931 GB/s per node +Grid : Message : Average mflops/s per call per node : 659529 +Grid : Message : Average mflops/s per call per node : 800857 +Grid : Message : Average mflops/s per call per node : 821965 +Grid : Message : Average mflops/s per call per node : 668337 +Grid : Message : Average mflops/s per call per node (full): 313966 +Grid : Message : Average mflops/s per call per node (full): 434530 +Grid : Message : Average mflops/s per call per node (full): 438095 +Grid : Message : Average mflops/s per call per node (full): 303502 +Grid : Message : Stencil 13.632 GB/s per node +Grid : Message : Stencil 16.2975 GB/s per node +Grid : Message : Stencil 17.9547 GB/s per node +Grid : Message : Stencil 13.4796 GB/s per node +Grid : Message : Average mflops/s per call per node : 663517 +Grid : Message : Average mflops/s per call per node : 803902 +Grid : Message : Average mflops/s per call per node : 816525 +Grid : Message : Average mflops/s per call per node : 664545 +Grid : Message : Average mflops/s per call per node (full): 314656 +Grid : Message : Average mflops/s per call per node (full): 432209 +Grid : Message : Average mflops/s per call per node (full): 445822 +Grid : Message : Average mflops/s per call per node (full): 305938 +Grid : Message : Stencil 13.1287 GB/s per node +Grid : Message : Stencil 16.983 GB/s per node +Grid : Message : Stencil 17.7084 GB/s per node +Grid : Message : Stencil 12.3646 GB/s per node +Grid : Message : Average mflops/s per call per node : 666280 +Grid : Message : Average mflops/s per call per node : 802273 +Grid : Message : Average mflops/s per call per node : 818273 +Grid : Message : Average mflops/s per call per node : 669441 +Grid : Message : Average mflops/s per call per node (full): 315151 +Grid : Message : Average mflops/s per call per node (full): 433794 +Grid : Message : Average mflops/s per call per node (full): 444069 +Grid : Message : Average mflops/s per call per node (full): 304140 +Grid : Message : Stencil 12.8293 GB/s per node +Grid : Message : Stencil 16.3603 GB/s per node +Grid : Message : Stencil 17.4713 GB/s per node +Grid : Message : Stencil 12.433 GB/s per node +Grid : Message : Average mflops/s per call per node : 666807 +Grid : Message : Average mflops/s per call per node : 803321 +Grid : Message : Average mflops/s per call per node : 827302 +Grid : Message : Average mflops/s per call per node : 668460 +Grid : Message : Average mflops/s per call per node (full): 314888 +Grid : Message : Average mflops/s per call per node (full): 433450 +Grid : Message : Average mflops/s per call per node (full): 446392 +Grid : Message : Average mflops/s per call per node (full): 305275 +Grid : Message : Stencil 13.0512 GB/s per node +Grid : Message : Stencil 17.9731 GB/s per node +Grid : Message : Stencil 17.1651 GB/s per node +Grid : Message : Stencil 11.6682 GB/s per node +Grid : Message : Average mflops/s per call per node : 664991 +Grid : Message : Average mflops/s per call per node : 805192 +Grid : Message : Average mflops/s per call per node : 817573 +Grid : Message : Average mflops/s per call per node : 662967 +Grid : Message : Average mflops/s per call per node (full): 315623 +Grid : Message : Average mflops/s per call per node (full): 441507 +Grid : Message : Average mflops/s per call per node (full): 441601 +Grid : Message : Average mflops/s per call per node (full): 298035 +Grid : Message : Stencil 13.253 GB/s per node +Grid : Message : Stencil 16.3774 GB/s per node +Grid : Message : Stencil 18.6875 GB/s per node +Grid : Message : Stencil 12.2165 GB/s per node +Grid : Message : Average mflops/s per call per node : 667799 +Grid : Message : Average mflops/s per call per node : 803575 +Grid : Message : Average mflops/s per call per node : 818975 +Grid : Message : Average mflops/s per call per node : 670247 +Grid : Message : Average mflops/s per call per node (full): 316219 +Grid : Message : Average mflops/s per call per node (full): 433350 +Grid : Message : Average mflops/s per call per node (full): 448136 +Grid : Message : Average mflops/s per call per node (full): 304675 +Grid : Message : Stencil 14.3041 GB/s per node +Grid : Message : Stencil 17.0718 GB/s per node +Grid : Message : Stencil 17.9263 GB/s per node +Grid : Message : Stencil 12.7822 GB/s per node +Grid : Message : Average mflops/s per call per node : 665593 +Grid : Message : Average mflops/s per call per node : 805329 +Grid : Message : Average mflops/s per call per node : 826751 +Grid : Message : Average mflops/s per call per node : 664358 +Grid : Message : Average mflops/s per call per node (full): 316931 +Grid : Message : Average mflops/s per call per node (full): 437511 +Grid : Message : Average mflops/s per call per node (full): 442404 +Grid : Message : Average mflops/s per call per node (full): 303260 +Grid : Message : Stencil 12.5125 GB/s per node +Grid : Message : Stencil 16.6135 GB/s per node +Grid : Message : Stencil 17.3698 GB/s per node +Grid : Message : Stencil 13.3542 GB/s per node +Grid : Message : Average mflops/s per call per node : 669120 +Grid : Message : Average mflops/s per call per node : 801584 +Grid : Message : Average mflops/s per call per node : 823063 +Grid : Message : Average mflops/s per call per node : 659636 +Grid : Message : Average mflops/s per call per node (full): 314085 +Grid : Message : Average mflops/s per call per node (full): 434002 +Grid : Message : Average mflops/s per call per node (full): 443555 +Grid : Message : Average mflops/s per call per node (full): 305938 +Grid : Message : Stencil 12.6212 GB/s per node +Grid : Message : Stencil 16.2682 GB/s per node +Grid : Message : Stencil 19.0768 GB/s per node +Grid : Message : Stencil 12.0519 GB/s per node +Grid : Message : Average mflops/s per call per node : 669969 +Grid : Message : Average mflops/s per call per node : 806422 +Grid : Message : Average mflops/s per call per node : 824098 +Grid : Message : Average mflops/s per call per node : 668735 +Grid : Message : Average mflops/s per call per node (full): 315226 +Grid : Message : Average mflops/s per call per node (full): 432388 +Grid : Message : Average mflops/s per call per node (full): 450312 +Grid : Message : Average mflops/s per call per node (full): 301944 +Grid : Message : Stencil 13.7274 GB/s per node +Grid : Message : Stencil 17.1047 GB/s per node +Grid : Message : Stencil 17.2058 GB/s per node +Grid : Message : Stencil 12.2715 GB/s per node +Grid : Message : Average mflops/s per call per node : 664176 +Grid : Message : Average mflops/s per call per node : 801792 +Grid : Message : Average mflops/s per call per node : 823566 +Grid : Message : Average mflops/s per call per node : 667132 +Grid : Message : Average mflops/s per call per node (full): 315721 +Grid : Message : Average mflops/s per call per node (full): 437657 +Grid : Message : Average mflops/s per call per node (full): 442652 +Grid : Message : Average mflops/s per call per node (full): 304294 +Grid : Message : Stencil 12.5236 GB/s per node +Grid : Message : Stencil 16.2429 GB/s per node +Grid : Message : Stencil 17.4594 GB/s per node +Grid : Message : Stencil 12.6165 GB/s per node +Grid : Message : Average mflops/s per call per node : 669461 +Grid : Message : Average mflops/s per call per node : 808103 +Grid : Message : Average mflops/s per call per node : 820303 +Grid : Message : Average mflops/s per call per node : 665400 +Grid : Message : Average mflops/s per call per node (full): 311139 +Grid : Message : Average mflops/s per call per node (full): 428508 +Grid : Message : Average mflops/s per call per node (full): 444642 +Grid : Message : Average mflops/s per call per node (full): 303657 +Grid : Message : Stencil 13.1776 GB/s per node +Grid : Message : Stencil 14.7654 GB/s per node +Grid : Message : Stencil 17.2279 GB/s per node +Grid : Message : Stencil 13.697 GB/s per node +Grid : Message : Average mflops/s per call per node : 667935 +Grid : Message : Average mflops/s per call per node : 801285 +Grid : Message : Average mflops/s per call per node : 823621 +Grid : Message : Average mflops/s per call per node : 665857 +Grid : Message : Average mflops/s per call per node (full): 315134 +Grid : Message : Average mflops/s per call per node (full): 411898 +Grid : Message : Average mflops/s per call per node (full): 442485 +Grid : Message : Average mflops/s per call per node (full): 306877 +Grid : Message : Stencil 13.3373 GB/s per node +Grid : Message : Stencil 17.5889 GB/s per node +Grid : Message : Stencil 18.0192 GB/s per node +Grid : Message : Stencil 13.6967 GB/s per node +Grid : Message : Average mflops/s per call per node : 669391 +Grid : Message : Average mflops/s per call per node : 807592 +Grid : Message : Average mflops/s per call per node : 820767 +Grid : Message : Average mflops/s per call per node : 662345 +Grid : Message : Average mflops/s per call per node (full): 315870 +Grid : Message : Average mflops/s per call per node (full): 439749 +Grid : Message : Average mflops/s per call per node (full): 446580 +Grid : Message : Average mflops/s per call per node (full): 304171 +Grid : Message : Stencil 13.801 GB/s per node +Grid : Message : Stencil 16.7288 GB/s per node +Grid : Message : Stencil 17.6663 GB/s per node +Grid : Message : Stencil 12.7834 GB/s per node +Grid : Message : Average mflops/s per call per node : 664427 +Grid : Message : Average mflops/s per call per node : 804456 +Grid : Message : Average mflops/s per call per node : 822575 +Grid : Message : Average mflops/s per call per node : 667446 +Grid : Message : Average mflops/s per call per node (full): 315898 +Grid : Message : Average mflops/s per call per node (full): 437236 +Grid : Message : Average mflops/s per call per node (full): 445612 +Grid : Message : Average mflops/s per call per node (full): 304453 +Grid : Message : Stencil 14.3359 GB/s per node +Grid : Message : Stencil 16.4792 GB/s per node +Grid : Message : Stencil 16.8484 GB/s per node +Grid : Message : Stencil 11.8385 GB/s per node +Grid : Message : Average mflops/s per call per node : 665692 +Grid : Message : Average mflops/s per call per node : 802731 +Grid : Message : Average mflops/s per call per node : 820411 +Grid : Message : Average mflops/s per call per node : 668575 +Grid : Message : Average mflops/s per call per node (full): 315999 +Grid : Message : Average mflops/s per call per node (full): 434040 +Grid : Message : Average mflops/s per call per node (full): 435236 +Grid : Message : Average mflops/s per call per node (full): 300225 +Grid : Message : Stencil 13.8964 GB/s per node +Grid : Message : Stencil 16.9391 GB/s per node +Grid : Message : Stencil 17.6979 GB/s per node +Grid : Message : Stencil 12.8515 GB/s per node +Grid : Message : Average mflops/s per call per node : 662069 +Grid : Message : Average mflops/s per call per node : 803656 +Grid : Message : Average mflops/s per call per node : 815813 +Grid : Message : Average mflops/s per call per node : 665169 +Grid : Message : Average mflops/s per call per node (full): 314933 +Grid : Message : Average mflops/s per call per node (full): 437751 +Grid : Message : Average mflops/s per call per node (full): 445789 +Grid : Message : Average mflops/s per call per node (full): 304065 +Grid : Message : Stencil 13.561 GB/s per node +Grid : Message : Stencil 17.9284 GB/s per node +Grid : Message : Stencil 17.2071 GB/s per node +Grid : Message : Stencil 13.2587 GB/s per node +Grid : Message : Average mflops/s per call per node : 662725 +Grid : Message : Average mflops/s per call per node : 801438 +Grid : Message : Average mflops/s per call per node : 814631 +Grid : Message : Average mflops/s per call per node : 664032 +Grid : Message : Average mflops/s per call per node (full): 314547 +Grid : Message : Average mflops/s per call per node (full): 438516 +Grid : Message : Average mflops/s per call per node (full): 439489 +Grid : Message : Average mflops/s per call per node (full): 306046 +Grid : Message : Stencil 12.3496 GB/s per node +Grid : Message : Stencil 18.8328 GB/s per node +Grid : Message : Stencil 18.8839 GB/s per node +Grid : Message : Stencil 12.5511 GB/s per node +Grid : Message : Average mflops/s per call per node : 665142 +Grid : Message : Average mflops/s per call per node : 803796 +Grid : Message : Average mflops/s per call per node : 813808 +Grid : Message : Average mflops/s per call per node : 664365 +Grid : Message : Average mflops/s per call per node (full): 311260 +Grid : Message : Average mflops/s per call per node (full): 440747 +Grid : Message : Average mflops/s per call per node (full): 445725 +Grid : Message : Average mflops/s per call per node (full): 301717 +Grid : Message : Stencil 13.3218 GB/s per node +Grid : Message : Stencil 16.7001 GB/s per node +Grid : Message : Stencil 17.7836 GB/s per node +Grid : Message : Stencil 13.2898 GB/s per node +Grid : Message : Average mflops/s per call per node : 667594 +Grid : Message : Average mflops/s per call per node : 802337 +Grid : Message : Average mflops/s per call per node : 821749 +Grid : Message : Average mflops/s per call per node : 659994 +Grid : Message : Average mflops/s per call per node (full): 315952 +Grid : Message : Average mflops/s per call per node (full): 436347 +Grid : Message : Average mflops/s per call per node (full): 445341 +Grid : Message : Average mflops/s per call per node (full): 305925 +Grid : Message : Stencil 13.3547 GB/s per node +Grid : Message : Stencil 16.3847 GB/s per node +Grid : Message : Stencil 18.0773 GB/s per node +Grid : Message : Stencil 12.2147 GB/s per node +Grid : Message : Average mflops/s per call per node : 663244 +Grid : Message : Average mflops/s per call per node : 803493 +Grid : Message : Average mflops/s per call per node : 820519 +Grid : Message : Average mflops/s per call per node : 665691 +Grid : Message : Average mflops/s per call per node (full): 315608 +Grid : Message : Average mflops/s per call per node (full): 433807 +Grid : Message : Average mflops/s per call per node (full): 442851 +Grid : Message : Average mflops/s per call per node (full): 299870 +Grid : Message : Stencil 13.4964 GB/s per node +Grid : Message : Stencil 17.4179 GB/s per node +Grid : Message : Stencil 17.2663 GB/s per node +Grid : Message : Stencil 12.9006 GB/s per node +Grid : Message : Average mflops/s per call per node : 665195 +Grid : Message : Average mflops/s per call per node : 806421 +Grid : Message : Average mflops/s per call per node : 821652 +Grid : Message : Average mflops/s per call per node : 664788 +Grid : Message : Average mflops/s per call per node (full): 316373 +Grid : Message : Average mflops/s per call per node (full): 438593 +Grid : Message : Average mflops/s per call per node (full): 442859 +Grid : Message : Average mflops/s per call per node (full): 303968 +Grid : Message : Stencil 13.7037 GB/s per node +Grid : Message : Stencil 16.746 GB/s per node +Grid : Message : Stencil 18.4778 GB/s per node +Grid : Message : Stencil 12.3692 GB/s per node +Grid : Message : Average mflops/s per call per node : 669746 +Grid : Message : Average mflops/s per call per node : 801249 +Grid : Message : Average mflops/s per call per node : 819217 +Grid : Message : Average mflops/s per call per node : 671226 +Grid : Message : Average mflops/s per call per node (full): 316275 +Grid : Message : Average mflops/s per call per node (full): 434504 +Grid : Message : Average mflops/s per call per node (full): 447449 +Grid : Message : Average mflops/s per call per node (full): 303978 +Grid : Message : Stencil 13.6722 GB/s per node +Grid : Message : Stencil 17.1809 GB/s per node +Grid : Message : Stencil 17.5852 GB/s per node +Grid : Message : Stencil 12.8785 GB/s per node +Grid : Message : Average mflops/s per call per node : 667196 +Grid : Message : Average mflops/s per call per node : 808159 +Grid : Message : Average mflops/s per call per node : 822944 +Grid : Message : Average mflops/s per call per node : 660357 +Grid : Message : Average mflops/s per call per node (full): 315620 +Grid : Message : Average mflops/s per call per node (full): 437790 +Grid : Message : Average mflops/s per call per node (full): 445648 +Grid : Message : Average mflops/s per call per node (full): 304434 +Grid : Message : Stencil 12.5679 GB/s per node +Grid : Message : Stencil 17.4199 GB/s per node +Grid : Message : Stencil 17.2132 GB/s per node +Grid : Message : Stencil 14.2392 GB/s per node +Grid : Message : Average mflops/s per call per node : 672005 +Grid : Message : Average mflops/s per call per node : 802189 +Grid : Message : Average mflops/s per call per node : 828593 +Grid : Message : Average mflops/s per call per node : 662159 +Grid : Message : Average mflops/s per call per node (full): 314643 +Grid : Message : Average mflops/s per call per node (full): 437588 +Grid : Message : Average mflops/s per call per node (full): 443841 +Grid : Message : Average mflops/s per call per node (full): 307739 +Grid : Message : Stencil 13.6386 GB/s per node +Grid : Message : Stencil 17.1214 GB/s per node +Grid : Message : Stencil 18.3172 GB/s per node +Grid : Message : Stencil 13.1393 GB/s per node +Grid : Message : Average mflops/s per call per node : 666778 +Grid : Message : Average mflops/s per call per node : 803811 +Grid : Message : Average mflops/s per call per node : 817101 +Grid : Message : Average mflops/s per call per node : 668798 +Grid : Message : Average mflops/s per call per node (full): 316127 +Grid : Message : Average mflops/s per call per node (full): 439144 +Grid : Message : Average mflops/s per call per node (full): 445765 +Grid : Message : Average mflops/s per call per node (full): 306233 +Grid : Message : Stencil 12.5875 GB/s per node +Grid : Message : Stencil 16.5928 GB/s per node +Grid : Message : Stencil 17.1334 GB/s per node +Grid : Message : Stencil 12.7421 GB/s per node +Grid : Message : Average mflops/s per call per node : 668509 +Grid : Message : Average mflops/s per call per node : 802915 +Grid : Message : Average mflops/s per call per node : 826550 +Grid : Message : Average mflops/s per call per node : 663648 +Grid : Message : Average mflops/s per call per node (full): 313506 +Grid : Message : Average mflops/s per call per node (full): 435660 +Grid : Message : Average mflops/s per call per node (full): 442517 +Grid : Message : Average mflops/s per call per node (full): 304384 +Grid : Message : Stencil 12.7432 GB/s per node +Grid : Message : Stencil 17.4419 GB/s per node +Grid : Message : Stencil 18.3907 GB/s per node +Grid : Message : Stencil 12.7773 GB/s per node +Grid : Message : Average mflops/s per call per node : 665510 +Grid : Message : Average mflops/s per call per node : 803177 +Grid : Message : Average mflops/s per call per node : 825020 +Grid : Message : Average mflops/s per call per node : 665546 +Grid : Message : Average mflops/s per call per node (full): 313556 +Grid : Message : Average mflops/s per call per node (full): 438560 +Grid : Message : Average mflops/s per call per node (full): 447051 +Grid : Message : Average mflops/s per call per node (full): 305798 +Grid : Message : Stencil 13.6475 GB/s per node +Grid : Message : Stencil 14.9065 GB/s per node +Grid : Message : Stencil 17.237 GB/s per node +Grid : Message : Stencil 12.7836 GB/s per node +Grid : Message : Average mflops/s per call per node : 666778 +Grid : Message : Average mflops/s per call per node : 807523 +Grid : Message : Average mflops/s per call per node : 824240 +Grid : Message : Average mflops/s per call per node : 662212 +Grid : Message : Average mflops/s per call per node (full): 315898 +Grid : Message : Average mflops/s per call per node (full): 413795 +Grid : Message : Average mflops/s per call per node (full): 442913 +Grid : Message : Average mflops/s per call per node (full): 304489 +Grid : Message : Stencil 13.3365 GB/s per node +Grid : Message : Stencil 17.3594 GB/s per node +Grid : Message : Stencil 17.4579 GB/s per node +Grid : Message : Stencil 12.355 GB/s per node +Grid : Message : Average mflops/s per call per node : 667231 +Grid : Message : Average mflops/s per call per node : 802404 +Grid : Message : Average mflops/s per call per node : 828621 +Grid : Message : Average mflops/s per call per node : 665864 +Grid : Message : Average mflops/s per call per node (full): 314036 +Grid : Message : Average mflops/s per call per node (full): 435686 +Grid : Message : Average mflops/s per call per node (full): 445797 +Grid : Message : Average mflops/s per call per node (full): 303018 +Grid : Message : Stencil 13.3896 GB/s per node +Grid : Message : Stencil 16.8074 GB/s per node +Grid : Message : Stencil 17.3581 GB/s per node +Grid : Message : Stencil 13.3522 GB/s per node +Grid : Message : Average mflops/s per call per node : 668693 +Grid : Message : Average mflops/s per call per node : 804947 +Grid : Message : Average mflops/s per call per node : 823825 +Grid : Message : Average mflops/s per call per node : 663289 +Grid : Message : Average mflops/s per call per node (full): 315872 +Grid : Message : Average mflops/s per call per node (full): 437838 +Grid : Message : Average mflops/s per call per node (full): 444007 +Grid : Message : Average mflops/s per call per node (full): 306131 +Grid : Message : Stencil 12.2837 GB/s per node +Grid : Message : Stencil 16.4696 GB/s per node +Grid : Message : Stencil 18.1402 GB/s per node +Grid : Message : Stencil 13.8135 GB/s per node +Grid : Message : Average mflops/s per call per node : 667352 +Grid : Message : Average mflops/s per call per node : 803747 +Grid : Message : Average mflops/s per call per node : 824454 +Grid : Message : Average mflops/s per call per node : 661954 +Grid : Message : Average mflops/s per call per node (full): 310871 +Grid : Message : Average mflops/s per call per node (full): 434036 +Grid : Message : Average mflops/s per call per node (full): 449157 +Grid : Message : Average mflops/s per call per node (full): 304027 +Grid : Message : Stencil 10.6491 GB/s per node +Grid : Message : Stencil 16.7318 GB/s per node +Grid : Message : Stencil 17.8447 GB/s per node +Grid : Message : Stencil 12.1491 GB/s per node +Grid : Message : Average mflops/s per call per node : 671516 +Grid : Message : Average mflops/s per call per node : 804093 +Grid : Message : Average mflops/s per call per node : 820766 +Grid : Message : Average mflops/s per call per node : 669280 +Grid : Message : Average mflops/s per call per node (full): 293317 +Grid : Message : Average mflops/s per call per node (full): 436082 +Grid : Message : Average mflops/s per call per node (full): 446245 +Grid : Message : Average mflops/s per call per node (full): 303620 +Grid : Message : Stencil 13.9134 GB/s per node +Grid : Message : Stencil 13.2131 GB/s per node +Grid : Message : Stencil 18.4007 GB/s per node +Grid : Message : Stencil 11.608 GB/s per node +Grid : Message : Average mflops/s per call per node : 665008 +Grid : Message : Average mflops/s per call per node : 804174 +Grid : Message : Average mflops/s per call per node : 820197 +Grid : Message : Average mflops/s per call per node : 665283 +Grid : Message : Average mflops/s per call per node (full): 316562 +Grid : Message : Average mflops/s per call per node (full): 385144 +Grid : Message : Average mflops/s per call per node (full): 446507 +Grid : Message : Average mflops/s per call per node (full): 294405 +Grid : Message : Stencil 14.206 GB/s per node +Grid : Message : Stencil 17.3668 GB/s per node +Grid : Message : Stencil 17.3285 GB/s per node +Grid : Message : Stencil 12.5141 GB/s per node +Grid : Message : Average mflops/s per call per node : 666293 +Grid : Message : Average mflops/s per call per node : 799737 +Grid : Message : Average mflops/s per call per node : 825270 +Grid : Message : Average mflops/s per call per node : 665729 +Grid : Message : Average mflops/s per call per node (full): 316554 +Grid : Message : Average mflops/s per call per node (full): 437565 +Grid : Message : Average mflops/s per call per node (full): 442063 +Grid : Message : Average mflops/s per call per node (full): 304515 +Grid : Message : Stencil 13.162 GB/s per node +Grid : Message : Stencil 16.3511 GB/s per node +Grid : Message : Stencil 17.517 GB/s per node +Grid : Message : Stencil 12.0928 GB/s per node +Grid : Message : Average mflops/s per call per node : 666213 +Grid : Message : Average mflops/s per call per node : 804341 +Grid : Message : Average mflops/s per call per node : 818709 +Grid : Message : Average mflops/s per call per node : 662936 +Grid : Message : Average mflops/s per call per node (full): 315494 +Grid : Message : Average mflops/s per call per node (full): 433146 +Grid : Message : Average mflops/s per call per node (full): 444798 +Grid : Message : Average mflops/s per call per node (full): 302146 +Grid : Message : Stencil 13.6063 GB/s per node +Grid : Message : Stencil 16.6806 GB/s per node +Grid : Message : Stencil 17.1707 GB/s per node +Grid : Message : Stencil 12.1258 GB/s per node +Grid : Message : Average mflops/s per call per node : 662785 +Grid : Message : Average mflops/s per call per node : 810464 +Grid : Message : Average mflops/s per call per node : 820353 +Grid : Message : Average mflops/s per call per node : 670523 +Grid : Message : Average mflops/s per call per node (full): 315716 +Grid : Message : Average mflops/s per call per node (full): 436969 +Grid : Message : Average mflops/s per call per node (full): 440335 +Grid : Message : Average mflops/s per call per node (full): 302790 +Grid : Message : Stencil 14.1364 GB/s per node +Grid : Message : Stencil 16.9377 GB/s per node +Grid : Message : Stencil 18.1098 GB/s per node +Grid : Message : Stencil 12.7042 GB/s per node +Grid : Message : Average mflops/s per call per node : 664049 +Grid : Message : Average mflops/s per call per node : 803685 +Grid : Message : Average mflops/s per call per node : 825457 +Grid : Message : Average mflops/s per call per node : 667548 +Grid : Message : Average mflops/s per call per node (full): 315970 +Grid : Message : Average mflops/s per call per node (full): 434618 +Grid : Message : Average mflops/s per call per node (full): 448142 +Grid : Message : Average mflops/s per call per node (full): 305991 +Grid : Message : Stencil 12.8699 GB/s per node +Grid : Message : Stencil 16.6379 GB/s per node +Grid : Message : Stencil 18.522 GB/s per node +Grid : Message : Stencil 13.0003 GB/s per node +Grid : Message : Average mflops/s per call per node : 668760 +Grid : Message : Average mflops/s per call per node : 807728 +Grid : Message : Average mflops/s per call per node : 824873 +Grid : Message : Average mflops/s per call per node : 661669 +Grid : Message : Average mflops/s per call per node (full): 315270 +Grid : Message : Average mflops/s per call per node (full): 436781 +Grid : Message : Average mflops/s per call per node (full): 449931 +Grid : Message : Average mflops/s per call per node (full): 304919 +Grid : Message : Stencil 14.9178 GB/s per node +Grid : Message : Stencil 16.4403 GB/s per node +Grid : Message : Stencil 17.7768 GB/s per node +Grid : Message : Stencil 14.7444 GB/s per node +Grid : Message : Average mflops/s per call per node : 666216 +Grid : Message : Average mflops/s per call per node : 803335 +Grid : Message : Average mflops/s per call per node : 823697 +Grid : Message : Average mflops/s per call per node : 658365 +Grid : Message : Average mflops/s per call per node (full): 316364 +Grid : Message : Average mflops/s per call per node (full): 432841 +Grid : Message : Average mflops/s per call per node (full): 445773 +Grid : Message : Average mflops/s per call per node (full): 307202 +Grid : Message : Stencil 13.4562 GB/s per node +Grid : Message : Stencil 17.3964 GB/s per node +Grid : Message : Stencil 18.6967 GB/s per node +Grid : Message : Stencil 13.176 GB/s per node +Grid : Message : Average mflops/s per call per node : 668300 +Grid : Message : Average mflops/s per call per node : 796984 +Grid : Message : Average mflops/s per call per node : 824084 +Grid : Message : Average mflops/s per call per node : 662759 +Grid : Message : Average mflops/s per call per node (full): 316025 +Grid : Message : Average mflops/s per call per node (full): 436907 +Grid : Message : Average mflops/s per call per node (full): 447205 +Grid : Message : Average mflops/s per call per node (full): 305831 +Grid : Message : Stencil 13.7019 GB/s per node +Grid : Message : Stencil 17.0464 GB/s per node +Grid : Message : Stencil 17.5725 GB/s per node +Grid : Message : Stencil 13.1021 GB/s per node +Grid : Message : Average mflops/s per call per node : 668842 +Grid : Message : Average mflops/s per call per node : 806323 +Grid : Message : Average mflops/s per call per node : 817625 +Grid : Message : Average mflops/s per call per node : 662144 +Grid : Message : Average mflops/s per call per node (full): 316260 +Grid : Message : Average mflops/s per call per node (full): 439138 +Grid : Message : Average mflops/s per call per node (full): 444101 +Grid : Message : Average mflops/s per call per node (full): 305157 +Grid : Message : Stencil 13.3656 GB/s per node +Grid : Message : Stencil 18.3142 GB/s per node +Grid : Message : Stencil 17.2804 GB/s per node +Grid : Message : Stencil 12.0448 GB/s per node +Grid : Message : Average mflops/s per call per node : 669417 +Grid : Message : Average mflops/s per call per node : 804771 +Grid : Message : Average mflops/s per call per node : 821989 +Grid : Message : Average mflops/s per call per node : 670615 +Grid : Message : Average mflops/s per call per node (full): 316023 +Grid : Message : Average mflops/s per call per node (full): 441714 +Grid : Message : Average mflops/s per call per node (full): 442340 +Grid : Message : Average mflops/s per call per node (full): 302689 +Grid : Message : Stencil 13.4482 GB/s per node +Grid : Message : Stencil 17.1081 GB/s per node +Grid : Message : Stencil 17.2833 GB/s per node +Grid : Message : Stencil 12.2318 GB/s per node +Grid : Message : Average mflops/s per call per node : 666082 +Grid : Message : Average mflops/s per call per node : 803669 +Grid : Message : Average mflops/s per call per node : 822570 +Grid : Message : Average mflops/s per call per node : 671442 +Grid : Message : Average mflops/s per call per node (full): 315574 +Grid : Message : Average mflops/s per call per node (full): 438385 +Grid : Message : Average mflops/s per call per node (full): 442237 +Grid : Message : Average mflops/s per call per node (full): 304457 +Grid : Message : Stencil 12.9332 GB/s per node +Grid : Message : Stencil 19.7532 GB/s per node +Grid : Message : Stencil 18.0717 GB/s per node +Grid : Message : Stencil 12.6331 GB/s per node +Grid : Message : Average mflops/s per call per node : 666753 +Grid : Message : Average mflops/s per call per node : 804898 +Grid : Message : Average mflops/s per call per node : 818580 +Grid : Message : Average mflops/s per call per node : 671362 +Grid : Message : Average mflops/s per call per node (full): 314607 +Grid : Message : Average mflops/s per call per node (full): 444353 +Grid : Message : Average mflops/s per call per node (full): 445560 +Grid : Message : Average mflops/s per call per node (full): 305843 +Grid : Message : Stencil 12.5512 GB/s per node +Grid : Message : Stencil 17.5269 GB/s per node +Grid : Message : Stencil 17.2834 GB/s per node +Grid : Message : Stencil 13.4437 GB/s per node +Grid : Message : Average mflops/s per call per node : 672546 +Grid : Message : Average mflops/s per call per node : 803988 +Grid : Message : Average mflops/s per call per node : 823710 +Grid : Message : Average mflops/s per call per node : 661994 +Grid : Message : Average mflops/s per call per node (full): 314246 +Grid : Message : Average mflops/s per call per node (full): 441246 +Grid : Message : Average mflops/s per call per node (full): 442659 +Grid : Message : Average mflops/s per call per node (full): 306323 +Grid : Message : Stencil 13.7881 GB/s per node +Grid : Message : Stencil 17.1041 GB/s per node +Grid : Message : Stencil 17.8683 GB/s per node +Grid : Message : Stencil 14.7355 GB/s per node +Grid : Message : Average mflops/s per call per node : 662517 +Grid : Message : Average mflops/s per call per node : 802137 +Grid : Message : Average mflops/s per call per node : 820673 +Grid : Message : Average mflops/s per call per node : 660132 +Grid : Message : Average mflops/s per call per node (full): 315677 +Grid : Message : Average mflops/s per call per node (full): 435985 +Grid : Message : Average mflops/s per call per node (full): 446418 +Grid : Message : Average mflops/s per call per node (full): 306190 +Grid : Message : Stencil 14.4227 GB/s per node +Grid : Message : Stencil 16.9638 GB/s per node +Grid : Message : Stencil 17.2421 GB/s per node +Grid : Message : Stencil 13.146 GB/s per node +Grid : Message : Average mflops/s per call per node : 662917 +Grid : Message : Average mflops/s per call per node : 805807 +Grid : Message : Average mflops/s per call per node : 817935 +Grid : Message : Average mflops/s per call per node : 664956 +Grid : Message : Average mflops/s per call per node (full): 316235 +Grid : Message : Average mflops/s per call per node (full): 436484 +Grid : Message : Average mflops/s per call per node (full): 440255 +Grid : Message : Average mflops/s per call per node (full): 305928 +Grid : Message : Stencil 12.6977 GB/s per node +Grid : Message : Stencil 10.1535 GB/s per node +Grid : Message : Stencil 17.3576 GB/s per node +Grid : Message : Stencil 12.7675 GB/s per node +Grid : Message : Average mflops/s per call per node : 669493 +Grid : Message : Average mflops/s per call per node : 808986 +Grid : Message : Average mflops/s per call per node : 823811 +Grid : Message : Average mflops/s per call per node : 668299 +Grid : Message : Average mflops/s per call per node (full): 314590 +Grid : Message : Average mflops/s per call per node (full): 320442 +Grid : Message : Average mflops/s per call per node (full): 444171 +Grid : Message : Average mflops/s per call per node (full): 303688 +Grid : Message : Stencil 13.195 GB/s per node +Grid : Message : Stencil 16.5774 GB/s per node +Grid : Message : Stencil 17.064 GB/s per node +Grid : Message : Stencil 12.4249 GB/s per node +Grid : Message : Average mflops/s per call per node : 668161 +Grid : Message : Average mflops/s per call per node : 805483 +Grid : Message : Average mflops/s per call per node : 819279 +Grid : Message : Average mflops/s per call per node : 668078 +Grid : Message : Average mflops/s per call per node (full): 315648 +Grid : Message : Average mflops/s per call per node (full): 435404 +Grid : Message : Average mflops/s per call per node (full): 440242 +Grid : Message : Average mflops/s per call per node (full): 304294 +Grid : Message : Stencil 13.1611 GB/s per node +Grid : Message : Stencil 10.293 GB/s per node +Grid : Message : Stencil 18.4021 GB/s per node +Grid : Message : Stencil 12.2993 GB/s per node +Grid : Message : Average mflops/s per call per node : 664944 +Grid : Message : Average mflops/s per call per node : 810756 +Grid : Message : Average mflops/s per call per node : 823706 +Grid : Message : Average mflops/s per call per node : 665806 +Grid : Message : Average mflops/s per call per node (full): 315544 +Grid : Message : Average mflops/s per call per node (full): 322690 +Grid : Message : Average mflops/s per call per node (full): 446038 +Grid : Message : Average mflops/s per call per node (full): 303429 +Grid : Message : Stencil 14.224 GB/s per node +Grid : Message : Stencil 16.9568 GB/s per node +Grid : Message : Stencil 17.3814 GB/s per node +Grid : Message : Stencil 12.3383 GB/s per node +Grid : Message : Average mflops/s per call per node : 663420 +Grid : Message : Average mflops/s per call per node : 803452 +Grid : Message : Average mflops/s per call per node : 822770 +Grid : Message : Average mflops/s per call per node : 664749 +Grid : Message : Average mflops/s per call per node (full): 316778 +Grid : Message : Average mflops/s per call per node (full): 436976 +Grid : Message : Average mflops/s per call per node (full): 443927 +Grid : Message : Average mflops/s per call per node (full): 303698 +Grid : Message : Stencil 14.2383 GB/s per node +Grid : Message : Stencil 16.4187 GB/s per node +Grid : Message : Stencil 18.1444 GB/s per node +Grid : Message : Stencil 14.1145 GB/s per node +Grid : Message : Average mflops/s per call per node : 662589 +Grid : Message : Average mflops/s per call per node : 803662 +Grid : Message : Average mflops/s per call per node : 824074 +Grid : Message : Average mflops/s per call per node : 655333 +Grid : Message : Average mflops/s per call per node (full): 316927 +Grid : Message : Average mflops/s per call per node (full): 433461 +Grid : Message : Average mflops/s per call per node (full): 447367 +Grid : Message : Average mflops/s per call per node (full): 305873 +Grid : Message : Stencil 14.1274 GB/s per node +Grid : Message : Stencil 16.8365 GB/s per node +Grid : Message : Stencil 18.1599 GB/s per node +Grid : Message : Stencil 13.2011 GB/s per node +Grid : Message : Average mflops/s per call per node : 662459 +Grid : Message : Average mflops/s per call per node : 804676 +Grid : Message : Average mflops/s per call per node : 824916 +Grid : Message : Average mflops/s per call per node : 663064 +Grid : Message : Average mflops/s per call per node (full): 315764 +Grid : Message : Average mflops/s per call per node (full): 437445 +Grid : Message : Average mflops/s per call per node (full): 447714 +Grid : Message : Average mflops/s per call per node (full): 305780 +Grid : Message : Stencil 14.4141 GB/s per node +Grid : Message : Stencil 17.0002 GB/s per node +Grid : Message : Stencil 17.4644 GB/s per node +Grid : Message : Stencil 12.4324 GB/s per node +Grid : Message : Average mflops/s per call per node : 666248 +Grid : Message : Average mflops/s per call per node : 807964 +Grid : Message : Average mflops/s per call per node : 817203 +Grid : Message : Average mflops/s per call per node : 668184 +Grid : Message : Average mflops/s per call per node (full): 315953 +Grid : Message : Average mflops/s per call per node (full): 436210 +Grid : Message : Average mflops/s per call per node (full): 443869 +Grid : Message : Average mflops/s per call per node (full): 304638 +Grid : Message : Stencil 13.9462 GB/s per node +Grid : Message : Stencil 9.16624 GB/s per node +Grid : Message : Stencil 17.3436 GB/s per node +Grid : Message : Stencil 12.7577 GB/s per node +Grid : Message : Average mflops/s per call per node : 663975 +Grid : Message : Average mflops/s per call per node : 815086 +Grid : Message : Average mflops/s per call per node : 819869 +Grid : Message : Average mflops/s per call per node : 661803 +Grid : Message : Average mflops/s per call per node (full): 316413 +Grid : Message : Average mflops/s per call per node (full): 296466 +Grid : Message : Average mflops/s per call per node (full): 443320 +Grid : Message : Average mflops/s per call per node (full): 305294 +Grid : Message : Stencil 13.1198 GB/s per node +Grid : Message : Stencil 16.541 GB/s per node +Grid : Message : Stencil 17.5719 GB/s per node +Grid : Message : Stencil 12.426 GB/s per node +Grid : Message : Average mflops/s per call per node : 666706 +Grid : Message : Average mflops/s per call per node : 806562 +Grid : Message : Average mflops/s per call per node : 827599 +Grid : Message : Average mflops/s per call per node : 671380 +Grid : Message : Average mflops/s per call per node (full): 314551 +Grid : Message : Average mflops/s per call per node (full): 435958 +Grid : Message : Average mflops/s per call per node (full): 447131 +Grid : Message : Average mflops/s per call per node (full): 305332 +Grid : Message : Stencil 14.0187 GB/s per node +Grid : Message : Stencil 16.7282 GB/s per node +Grid : Message : Stencil 17.5016 GB/s per node +Grid : Message : Stencil 12.3897 GB/s per node +Grid : Message : Average mflops/s per call per node : 665303 +Grid : Message : Average mflops/s per call per node : 803576 +Grid : Message : Average mflops/s per call per node : 820190 +Grid : Message : Average mflops/s per call per node : 662707 +Grid : Message : Average mflops/s per call per node (full): 316797 +Grid : Message : Average mflops/s per call per node (full): 436659 +Grid : Message : Average mflops/s per call per node (full): 444002 +Grid : Message : Average mflops/s per call per node (full): 303495 +Grid : Message : Stencil 12.9872 GB/s per node +Grid : Message : Stencil 10.7783 GB/s per node +Grid : Message : Stencil 17.6598 GB/s per node +Grid : Message : Stencil 12.1232 GB/s per node +Grid : Message : Average mflops/s per call per node : 666356 +Grid : Message : Average mflops/s per call per node : 808219 +Grid : Message : Average mflops/s per call per node : 821321 +Grid : Message : Average mflops/s per call per node : 667910 +Grid : Message : Average mflops/s per call per node (full): 315669 +Grid : Message : Average mflops/s per call per node (full): 332681 +Grid : Message : Average mflops/s per call per node (full): 442435 +Grid : Message : Average mflops/s per call per node (full): 302498 +Grid : Message : Stencil 12.3811 GB/s per node +Grid : Message : Stencil 17.0083 GB/s per node +Grid : Message : Stencil 17.8352 GB/s per node +Grid : Message : Stencil 13.3962 GB/s per node +Grid : Message : Average mflops/s per call per node : 669156 +Grid : Message : Average mflops/s per call per node : 805285 +Grid : Message : Average mflops/s per call per node : 818262 +Grid : Message : Average mflops/s per call per node : 657968 +Grid : Message : Average mflops/s per call per node (full): 312932 +Grid : Message : Average mflops/s per call per node (full): 438176 +Grid : Message : Average mflops/s per call per node (full): 437432 +Grid : Message : Average mflops/s per call per node (full): 304967 +Grid : Message : Stencil 12.6208 GB/s per node +Grid : Message : Stencil 16.5345 GB/s per node +Grid : Message : Stencil 17.303 GB/s per node +Grid : Message : Stencil 13.6195 GB/s per node +Grid : Message : Average mflops/s per call per node : 667075 +Grid : Message : Average mflops/s per call per node : 802960 +Grid : Message : Average mflops/s per call per node : 826809 +Grid : Message : Average mflops/s per call per node : 658842 +Grid : Message : Average mflops/s per call per node (full): 311157 +Grid : Message : Average mflops/s per call per node (full): 433094 +Grid : Message : Average mflops/s per call per node (full): 441505 +Grid : Message : Average mflops/s per call per node (full): 304660 +Grid : Message : Stencil 13.5426 GB/s per node +Grid : Message : Stencil 16.4742 GB/s per node +Grid : Message : Stencil 18.4648 GB/s per node +Grid : Message : Stencil 14.1341 GB/s per node +Grid : Message : Average mflops/s per call per node : 665545 +Grid : Message : Average mflops/s per call per node : 804158 +Grid : Message : Average mflops/s per call per node : 819437 +Grid : Message : Average mflops/s per call per node : 664279 +Grid : Message : Average mflops/s per call per node (full): 315607 +Grid : Message : Average mflops/s per call per node (full): 432159 +Grid : Message : Average mflops/s per call per node (full): 446260 +Grid : Message : Average mflops/s per call per node (full): 305848 +Grid : Message : Stencil 13.2587 GB/s per node +Grid : Message : Stencil 16.5012 GB/s per node +Grid : Message : Stencil 17.3578 GB/s per node +Grid : Message : Stencil 11.7879 GB/s per node +Grid : Message : Average mflops/s per call per node : 668955 +Grid : Message : Average mflops/s per call per node : 804300 +Grid : Message : Average mflops/s per call per node : 821209 +Grid : Message : Average mflops/s per call per node : 669854 +Grid : Message : Average mflops/s per call per node (full): 315721 +Grid : Message : Average mflops/s per call per node (full): 431274 +Grid : Message : Average mflops/s per call per node (full): 444321 +Grid : Message : Average mflops/s per call per node (full): 300279 +Grid : Message : Stencil 12.8008 GB/s per node +Grid : Message : Stencil 16.3146 GB/s per node +Grid : Message : Stencil 17.1493 GB/s per node +Grid : Message : Stencil 12.0482 GB/s per node +Grid : Message : Average mflops/s per call per node : 669211 +Grid : Message : Average mflops/s per call per node : 807401 +Grid : Message : Average mflops/s per call per node : 822916 +Grid : Message : Average mflops/s per call per node : 665423 +Grid : Message : Average mflops/s per call per node (full): 313918 +Grid : Message : Average mflops/s per call per node (full): 428195 +Grid : Message : Average mflops/s per call per node (full): 442564 +Grid : Message : Average mflops/s per call per node (full): 301389 +Grid : Message : Stencil 12.6949 GB/s per node +Grid : Message : Stencil 16.4159 GB/s per node +Grid : Message : Stencil 18.1188 GB/s per node +Grid : Message : Stencil 13.1025 GB/s per node +Grid : Message : Average mflops/s per call per node : 666446 +Grid : Message : Average mflops/s per call per node : 804369 +Grid : Message : Average mflops/s per call per node : 824741 +Grid : Message : Average mflops/s per call per node : 663836 +Grid : Message : Average mflops/s per call per node (full): 312030 +Grid : Message : Average mflops/s per call per node (full): 433563 +Grid : Message : Average mflops/s per call per node (full): 448410 +Grid : Message : Average mflops/s per call per node (full): 305915 +Grid : Message : Stencil 15.1128 GB/s per node +Grid : Message : Stencil 17.6454 GB/s per node +Grid : Message : Stencil 17.5161 GB/s per node +Grid : Message : Stencil 12.8753 GB/s per node +Grid : Message : Average mflops/s per call per node : 663917 +Grid : Message : Average mflops/s per call per node : 801411 +Grid : Message : Average mflops/s per call per node : 828285 +Grid : Message : Average mflops/s per call per node : 661367 +Grid : Message : Average mflops/s per call per node (full): 316998 +Grid : Message : Average mflops/s per call per node (full): 437959 +Grid : Message : Average mflops/s per call per node (full): 444458 +Grid : Message : Average mflops/s per call per node (full): 304657 +Grid : Message : Stencil 13.4841 GB/s per node +Grid : Message : Stencil 16.5592 GB/s per node +Grid : Message : Stencil 17.373 GB/s per node +Grid : Message : Stencil 13.1126 GB/s per node +Grid : Message : Average mflops/s per call per node : 667237 +Grid : Message : Average mflops/s per call per node : 800749 +Grid : Message : Average mflops/s per call per node : 822447 +Grid : Message : Average mflops/s per call per node : 664609 +Grid : Message : Average mflops/s per call per node (full): 315971 +Grid : Message : Average mflops/s per call per node (full): 434444 +Grid : Message : Average mflops/s per call per node (full): 444853 +Grid : Message : Average mflops/s per call per node (full): 305671 +Grid : Message : Stencil 12.8365 GB/s per node +Grid : Message : Stencil 16.8574 GB/s per node +Grid : Message : Stencil 18.38 GB/s per node +Grid : Message : Stencil 13.3748 GB/s per node +Grid : Message : Average mflops/s per call per node : 667470 +Grid : Message : Average mflops/s per call per node : 808901 +Grid : Message : Average mflops/s per call per node : 818028 +Grid : Message : Average mflops/s per call per node : 668664 +Grid : Message : Average mflops/s per call per node (full): 312274 +Grid : Message : Average mflops/s per call per node (full): 437422 +Grid : Message : Average mflops/s per call per node (full): 445399 +Grid : Message : Average mflops/s per call per node (full): 307343 +Grid : Message : Stencil 13.3121 GB/s per node +Grid : Message : Stencil 17.5213 GB/s per node +Grid : Message : Stencil 18.3939 GB/s per node +Grid : Message : Stencil 13.0664 GB/s per node +Grid : Message : Average mflops/s per call per node : 667802 +Grid : Message : Average mflops/s per call per node : 801730 +Grid : Message : Average mflops/s per call per node : 827544 +Grid : Message : Average mflops/s per call per node : 665724 +Grid : Message : Average mflops/s per call per node (full): 315980 +Grid : Message : Average mflops/s per call per node (full): 438621 +Grid : Message : Average mflops/s per call per node (full): 448983 +Grid : Message : Average mflops/s per call per node (full): 306193 +Grid : Message : Stencil 13.4233 GB/s per node +Grid : Message : Stencil 17.1774 GB/s per node +Grid : Message : Stencil 17.7923 GB/s per node +Grid : Message : Stencil 11.9634 GB/s per node +Grid : Message : Average mflops/s per call per node : 667088 +Grid : Message : Average mflops/s per call per node : 802362 +Grid : Message : Average mflops/s per call per node : 820102 +Grid : Message : Average mflops/s per call per node : 667356 +Grid : Message : Average mflops/s per call per node (full): 315180 +Grid : Message : Average mflops/s per call per node (full): 436717 +Grid : Message : Average mflops/s per call per node (full): 444688 +Grid : Message : Average mflops/s per call per node (full): 301726 +Grid : Message : Stencil 12.8978 GB/s per node +Grid : Message : Stencil 16.6616 GB/s per node +Grid : Message : Stencil 17.9218 GB/s per node +Grid : Message : Stencil 13.2227 GB/s per node +Grid : Message : Average mflops/s per call per node : 667878 +Grid : Message : Average mflops/s per call per node : 802312 +Grid : Message : Average mflops/s per call per node : 815521 +Grid : Message : Average mflops/s per call per node : 660750 +Grid : Message : Average mflops/s per call per node (full): 314861 +Grid : Message : Average mflops/s per call per node (full): 434376 +Grid : Message : Average mflops/s per call per node (full): 445104 +Grid : Message : Average mflops/s per call per node (full): 305488 +Grid : Message : Stencil 12.7428 GB/s per node +Grid : Message : Stencil 16.777 GB/s per node +Grid : Message : Stencil 16.8466 GB/s per node +Grid : Message : Stencil 12.3905 GB/s per node +Grid : Message : Average mflops/s per call per node : 668370 +Grid : Message : Average mflops/s per call per node : 804917 +Grid : Message : Average mflops/s per call per node : 825243 +Grid : Message : Average mflops/s per call per node : 666776 +Grid : Message : Average mflops/s per call per node (full): 314673 +Grid : Message : Average mflops/s per call per node (full): 435603 +Grid : Message : Average mflops/s per call per node (full): 438303 +Grid : Message : Average mflops/s per call per node (full): 303690 +Grid : Message : Stencil 13.3228 GB/s per node +Grid : Message : Stencil 17.4223 GB/s per node +Grid : Message : Stencil 16.754 GB/s per node +Grid : Message : Stencil 12.6843 GB/s per node +Grid : Message : Average mflops/s per call per node : 667883 +Grid : Message : Average mflops/s per call per node : 805010 +Grid : Message : Average mflops/s per call per node : 820299 +Grid : Message : Average mflops/s per call per node : 663059 +Grid : Message : Average mflops/s per call per node (full): 315375 +Grid : Message : Average mflops/s per call per node (full): 440281 +Grid : Message : Average mflops/s per call per node (full): 430543 +Grid : Message : Average mflops/s per call per node (full): 301193 +Grid : Message : Stencil 13.3313 GB/s per node +Grid : Message : Stencil 9.28908 GB/s per node +Grid : Message : Stencil 18.1805 GB/s per node +Grid : Message : Stencil 13.6462 GB/s per node +Grid : Message : Average mflops/s per call per node : 667383 +Grid : Message : Average mflops/s per call per node : 815148 +Grid : Message : Average mflops/s per call per node : 817603 +Grid : Message : Average mflops/s per call per node : 660300 +Grid : Message : Average mflops/s per call per node (full): 315710 +Grid : Message : Average mflops/s per call per node (full): 299406 +Grid : Message : Average mflops/s per call per node (full): 446043 +Grid : Message : Average mflops/s per call per node (full): 306193 +Grid : Message : Stencil 12.3255 GB/s per node +Grid : Message : Stencil 10.828 GB/s per node +Grid : Message : Stencil 17.9218 GB/s per node +Grid : Message : Stencil 12.2895 GB/s per node +Grid : Message : Average mflops/s per call per node : 668942 +Grid : Message : Average mflops/s per call per node : 808118 +Grid : Message : Average mflops/s per call per node : 815742 +Grid : Message : Average mflops/s per call per node : 666199 +Grid : Message : Average mflops/s per call per node (full): 312475 +Grid : Message : Average mflops/s per call per node (full): 335871 +Grid : Message : Average mflops/s per call per node (full): 444799 +Grid : Message : Average mflops/s per call per node (full): 303522 +Grid : Message : Stencil 12.3314 GB/s per node +Grid : Message : Stencil 16.9833 GB/s per node +Grid : Message : Stencil 17.2071 GB/s per node +Grid : Message : Stencil 13.1734 GB/s per node +Grid : Message : Average mflops/s per call per node : 669271 +Grid : Message : Average mflops/s per call per node : 802289 +Grid : Message : Average mflops/s per call per node : 831405 +Grid : Message : Average mflops/s per call per node : 661655 +Grid : Message : Average mflops/s per call per node (full): 312489 +Grid : Message : Average mflops/s per call per node (full): 434880 +Grid : Message : Average mflops/s per call per node (full): 443932 +Grid : Message : Average mflops/s per call per node (full): 305986 +Grid : Message : Stencil 12.5033 GB/s per node +Grid : Message : Stencil 18.164 GB/s per node +Grid : Message : Stencil 17.9494 GB/s per node +Grid : Message : Stencil 11.9873 GB/s per node +Grid : Message : Average mflops/s per call per node : 668282 +Grid : Message : Average mflops/s per call per node : 798868 +Grid : Message : Average mflops/s per call per node : 821773 +Grid : Message : Average mflops/s per call per node : 668449 +Grid : Message : Average mflops/s per call per node (full): 312460 +Grid : Message : Average mflops/s per call per node (full): 440317 +Grid : Message : Average mflops/s per call per node (full): 445596 +Grid : Message : Average mflops/s per call per node (full): 302268 +Grid : Message : Stencil 13.6313 GB/s per node +Grid : Message : Stencil 9.02077 GB/s per node +Grid : Message : Stencil 17.3999 GB/s per node +Grid : Message : Stencil 13.8185 GB/s per node +Grid : Message : Average mflops/s per call per node : 667175 +Grid : Message : Average mflops/s per call per node : 811629 +Grid : Message : Average mflops/s per call per node : 821079 +Grid : Message : Average mflops/s per call per node : 660880 +Grid : Message : Average mflops/s per call per node (full): 315879 +Grid : Message : Average mflops/s per call per node (full): 293111 +Grid : Message : Average mflops/s per call per node (full): 444471 +Grid : Message : Average mflops/s per call per node (full): 305386 +Grid : Message : Stencil 13.1158 GB/s per node +Grid : Message : Stencil 17.6661 GB/s per node +Grid : Message : Stencil 17.4208 GB/s per node +Grid : Message : Stencil 12.3309 GB/s per node +Grid : Message : Average mflops/s per call per node : 667019 +Grid : Message : Average mflops/s per call per node : 804662 +Grid : Message : Average mflops/s per call per node : 812437 +Grid : Message : Average mflops/s per call per node : 666289 +Grid : Message : Average mflops/s per call per node (full): 314758 +Grid : Message : Average mflops/s per call per node (full): 441791 +Grid : Message : Average mflops/s per call per node (full): 437368 +Grid : Message : Average mflops/s per call per node (full): 303335 +Grid : Message : Stencil 13.6911 GB/s per node +Grid : Message : Stencil 14.1241 GB/s per node +Grid : Message : Stencil 16.9226 GB/s per node +Grid : Message : Stencil 12.4308 GB/s per node +Grid : Message : Average mflops/s per call per node : 665603 +Grid : Message : Average mflops/s per call per node : 807566 +Grid : Message : Average mflops/s per call per node : 824624 +Grid : Message : Average mflops/s per call per node : 668689 +Grid : Message : Average mflops/s per call per node (full): 315008 +Grid : Message : Average mflops/s per call per node (full): 401068 +Grid : Message : Average mflops/s per call per node (full): 439576 +Grid : Message : Average mflops/s per call per node (full): 305056 +Grid : Message : Stencil 13.4017 GB/s per node +Grid : Message : Stencil 17.9305 GB/s per node +Grid : Message : Stencil 18.0716 GB/s per node +Grid : Message : Stencil 13.3354 GB/s per node +Grid : Message : Average mflops/s per call per node : 663292 +Grid : Message : Average mflops/s per call per node : 801563 +Grid : Message : Average mflops/s per call per node : 818955 +Grid : Message : Average mflops/s per call per node : 669092 +Grid : Message : Average mflops/s per call per node (full): 315241 +Grid : Message : Average mflops/s per call per node (full): 438778 +Grid : Message : Average mflops/s per call per node (full): 445765 +Grid : Message : Average mflops/s per call per node (full): 306511 +Grid : Message : Stencil 13.4992 GB/s per node +Grid : Message : Stencil 16.8372 GB/s per node +Grid : Message : Stencil 18.0878 GB/s per node +Grid : Message : Stencil 13.7108 GB/s per node +Grid : Message : Average mflops/s per call per node : 664701 +Grid : Message : Average mflops/s per call per node : 805242 +Grid : Message : Average mflops/s per call per node : 816353 +Grid : Message : Average mflops/s per call per node : 660762 +Grid : Message : Average mflops/s per call per node (full): 316150 +Grid : Message : Average mflops/s per call per node (full): 436528 +Grid : Message : Average mflops/s per call per node (full): 444918 +Grid : Message : Average mflops/s per call per node (full): 304280 +Grid : Message : Stencil 13.2318 GB/s per node +Grid : Message : Stencil 9.11562 GB/s per node +Grid : Message : Stencil 17.3461 GB/s per node +Grid : Message : Stencil 12.4613 GB/s per node +Grid : Message : Average mflops/s per call per node : 664415 +Grid : Message : Average mflops/s per call per node : 812585 +Grid : Message : Average mflops/s per call per node : 823017 +Grid : Message : Average mflops/s per call per node : 664295 +Grid : Message : Average mflops/s per call per node (full): 314799 +Grid : Message : Average mflops/s per call per node (full): 294282 +Grid : Message : Average mflops/s per call per node (full): 443996 +Grid : Message : Average mflops/s per call per node (full): 302613 +Grid : Message : Stencil 12.8501 GB/s per node +Grid : Message : Stencil 18.1365 GB/s per node +Grid : Message : Stencil 17.5319 GB/s per node +Grid : Message : Stencil 13.6167 GB/s per node +Grid : Message : Average mflops/s per call per node : 667116 +Grid : Message : Average mflops/s per call per node : 805284 +Grid : Message : Average mflops/s per call per node : 824300 +Grid : Message : Average mflops/s per call per node : 663720 +Grid : Message : Average mflops/s per call per node (full): 314912 +Grid : Message : Average mflops/s per call per node (full): 441021 +Grid : Message : Average mflops/s per call per node (full): 446060 +Grid : Message : Average mflops/s per call per node (full): 306658 +Grid : Message : Stencil 12.9248 GB/s per node +Grid : Message : Stencil 17.9967 GB/s per node +Grid : Message : Stencil 16.997 GB/s per node +Grid : Message : Stencil 13.6847 GB/s per node +Grid : Message : Average mflops/s per call per node : 664023 +Grid : Message : Average mflops/s per call per node : 804162 +Grid : Message : Average mflops/s per call per node : 824007 +Grid : Message : Average mflops/s per call per node : 664748 +Grid : Message : Average mflops/s per call per node (full): 313748 +Grid : Message : Average mflops/s per call per node (full): 440774 +Grid : Message : Average mflops/s per call per node (full): 440280 +Grid : Message : Average mflops/s per call per node (full): 306476 +Grid : Message : Stencil 13.4693 GB/s per node +Grid : Message : Stencil 10.9112 GB/s per node +Grid : Message : Stencil 18.7818 GB/s per node +Grid : Message : Stencil 13.5111 GB/s per node +Grid : Message : Average mflops/s per call per node : 665246 +Grid : Message : Average mflops/s per call per node : 814175 +Grid : Message : Average mflops/s per call per node : 821192 +Grid : Message : Average mflops/s per call per node : 665535 +Grid : Message : Average mflops/s per call per node (full): 316162 +Grid : Message : Average mflops/s per call per node (full): 337525 +Grid : Message : Average mflops/s per call per node (full): 447987 +Grid : Message : Average mflops/s per call per node (full): 306323 +Grid : Message : Stencil 13.0777 GB/s per node +Grid : Message : Stencil 16.9577 GB/s per node +Grid : Message : Stencil 18.3557 GB/s per node +Grid : Message : Stencil 14.4107 GB/s per node +Grid : Message : Average mflops/s per call per node : 665813 +Grid : Message : Average mflops/s per call per node : 805514 +Grid : Message : Average mflops/s per call per node : 819381 +Grid : Message : Average mflops/s per call per node : 658705 +Grid : Message : Average mflops/s per call per node (full): 314850 +Grid : Message : Average mflops/s per call per node (full): 439586 +Grid : Message : Average mflops/s per call per node (full): 447271 +Grid : Message : Average mflops/s per call per node (full): 306939 +Grid : Message : Stencil 13.3862 GB/s per node +Grid : Message : Stencil 17.1971 GB/s per node +Grid : Message : Stencil 17.6139 GB/s per node +Grid : Message : Stencil 12.9573 GB/s per node +Grid : Message : Average mflops/s per call per node : 666671 +Grid : Message : Average mflops/s per call per node : 803421 +Grid : Message : Average mflops/s per call per node : 825401 +Grid : Message : Average mflops/s per call per node : 659545 +Grid : Message : Average mflops/s per call per node (full): 315837 +Grid : Message : Average mflops/s per call per node (full): 438396 +Grid : Message : Average mflops/s per call per node (full): 445524 +Grid : Message : Average mflops/s per call per node (full): 304195 +Grid : Message : Stencil 13.5591 GB/s per node +Grid : Message : Stencil 17.7183 GB/s per node +Grid : Message : Stencil 17.2019 GB/s per node +Grid : Message : Stencil 12.3189 GB/s per node +Grid : Message : Average mflops/s per call per node : 665892 +Grid : Message : Average mflops/s per call per node : 806264 +Grid : Message : Average mflops/s per call per node : 824604 +Grid : Message : Average mflops/s per call per node : 667157 +Grid : Message : Average mflops/s per call per node (full): 315081 +Grid : Message : Average mflops/s per call per node (full): 440782 +Grid : Message : Average mflops/s per call per node (full): 440963 +Grid : Message : Average mflops/s per call per node (full): 304250 +Grid : Message : Stencil 13.248 GB/s per node +Grid : Message : Stencil 14.8018 GB/s per node +Grid : Message : Stencil 17.461 GB/s per node +Grid : Message : Stencil 13.2692 GB/s per node +Grid : Message : Average mflops/s per call per node : 665956 +Grid : Message : Average mflops/s per call per node : 807889 +Grid : Message : Average mflops/s per call per node : 825939 +Grid : Message : Average mflops/s per call per node : 665659 +Grid : Message : Average mflops/s per call per node (full): 313670 +Grid : Message : Average mflops/s per call per node (full): 412631 +Grid : Message : Average mflops/s per call per node (full): 445575 +Grid : Message : Average mflops/s per call per node (full): 306363 +Grid : Message : Stencil 13.7145 GB/s per node +Grid : Message : Stencil 17.3324 GB/s per node +Grid : Message : Stencil 18.1848 GB/s per node +Grid : Message : Stencil 12.9419 GB/s per node +Grid : Message : Average mflops/s per call per node : 665029 +Grid : Message : Average mflops/s per call per node : 808743 +Grid : Message : Average mflops/s per call per node : 821243 +Grid : Message : Average mflops/s per call per node : 662212 +Grid : Message : Average mflops/s per call per node (full): 314960 +Grid : Message : Average mflops/s per call per node (full): 439231 +Grid : Message : Average mflops/s per call per node (full): 447120 +Grid : Message : Average mflops/s per call per node (full): 305205 +Grid : Message : Stencil 13.5834 GB/s per node +Grid : Message : Stencil 13.9187 GB/s per node +Grid : Message : Stencil 17.0826 GB/s per node +Grid : Message : Stencil 13.5903 GB/s per node +Grid : Message : Average mflops/s per call per node : 668611 +Grid : Message : Average mflops/s per call per node : 810477 +Grid : Message : Average mflops/s per call per node : 820414 +Grid : Message : Average mflops/s per call per node : 662241 +Grid : Message : Average mflops/s per call per node (full): 316736 +Grid : Message : Average mflops/s per call per node (full): 397739 +Grid : Message : Average mflops/s per call per node (full): 434490 +Grid : Message : Average mflops/s per call per node (full): 306342 +Grid : Message : Stencil 12.8785 GB/s per node +Grid : Message : Stencil 18.0336 GB/s per node +Grid : Message : Stencil 17.5106 GB/s per node +Grid : Message : Stencil 12.2151 GB/s per node +Grid : Message : Average mflops/s per call per node : 668606 +Grid : Message : Average mflops/s per call per node : 805111 +Grid : Message : Average mflops/s per call per node : 822796 +Grid : Message : Average mflops/s per call per node : 664684 +Grid : Message : Average mflops/s per call per node (full): 313610 +Grid : Message : Average mflops/s per call per node (full): 440398 +Grid : Message : Average mflops/s per call per node (full): 444645 +Grid : Message : Average mflops/s per call per node (full): 303118 +Grid : Message : Stencil 13.3419 GB/s per node +Grid : Message : Stencil 17.7552 GB/s per node +Grid : Message : Stencil 18.0255 GB/s per node +Grid : Message : Stencil 12.1451 GB/s per node +Grid : Message : Average mflops/s per call per node : 666457 +Grid : Message : Average mflops/s per call per node : 803015 +Grid : Message : Average mflops/s per call per node : 819714 +Grid : Message : Average mflops/s per call per node : 666522 +Grid : Message : Average mflops/s per call per node (full): 314487 +Grid : Message : Average mflops/s per call per node (full): 440563 +Grid : Message : Average mflops/s per call per node (full): 446709 +Grid : Message : Average mflops/s per call per node (full): 302842 +Grid : Message : Stencil 13.0752 GB/s per node +Grid : Message : Stencil 16.7879 GB/s per node +Grid : Message : Stencil 17.1563 GB/s per node +Grid : Message : Stencil 12.2405 GB/s per node +Grid : Message : Average mflops/s per call per node : 661678 +Grid : Message : Average mflops/s per call per node : 806525 +Grid : Message : Average mflops/s per call per node : 822295 +Grid : Message : Average mflops/s per call per node : 662354 +Grid : Message : Average mflops/s per call per node (full): 314978 +Grid : Message : Average mflops/s per call per node (full): 437910 +Grid : Message : Average mflops/s per call per node (full): 432688 +Grid : Message : Average mflops/s per call per node (full): 300123 +Grid : Message : Stencil 12.8164 GB/s per node +Grid : Message : Stencil 10.8645 GB/s per node +Grid : Message : Stencil 17.5877 GB/s per node +Grid : Message : Stencil 12.2955 GB/s per node +Grid : Message : Average mflops/s per call per node : 663617 +Grid : Message : Average mflops/s per call per node : 815350 +Grid : Message : Average mflops/s per call per node : 822622 +Grid : Message : Average mflops/s per call per node : 670769 +Grid : Message : Average mflops/s per call per node (full): 311572 +Grid : Message : Average mflops/s per call per node (full): 336451 +Grid : Message : Average mflops/s per call per node (full): 445744 +Grid : Message : Average mflops/s per call per node (full): 305249 +Grid : Message : Stencil 13.4844 GB/s per node +Grid : Message : Stencil 9.41537 GB/s per node +Grid : Message : Stencil 17.3923 GB/s per node +Grid : Message : Stencil 12.6071 GB/s per node +Grid : Message : Average mflops/s per call per node : 664692 +Grid : Message : Average mflops/s per call per node : 804293 +Grid : Message : Average mflops/s per call per node : 820679 +Grid : Message : Average mflops/s per call per node : 666786 +Grid : Message : Average mflops/s per call per node (full): 315823 +Grid : Message : Average mflops/s per call per node (full): 302137 +Grid : Message : Average mflops/s per call per node (full): 444841 +Grid : Message : Average mflops/s per call per node (full): 304938 +Grid : Message : Stencil 13.9875 GB/s per node +Grid : Message : Stencil 17.01 GB/s per node +Grid : Message : Stencil 18.6205 GB/s per node +Grid : Message : Stencil 13.0562 GB/s per node +Grid : Message : Average mflops/s per call per node : 664500 +Grid : Message : Average mflops/s per call per node : 802832 +Grid : Message : Average mflops/s per call per node : 818612 +Grid : Message : Average mflops/s per call per node : 665705 +Grid : Message : Average mflops/s per call per node (full): 316672 +Grid : Message : Average mflops/s per call per node (full): 438180 +Grid : Message : Average mflops/s per call per node (full): 447127 +Grid : Message : Average mflops/s per call per node (full): 306157 +Grid : Message : Stencil 14.0333 GB/s per node +Grid : Message : Stencil 16.5672 GB/s per node +Grid : Message : Stencil 17.9764 GB/s per node +Grid : Message : Stencil 12.9231 GB/s per node +Grid : Message : Average mflops/s per call per node : 663626 +Grid : Message : Average mflops/s per call per node : 804403 +Grid : Message : Average mflops/s per call per node : 819891 +Grid : Message : Average mflops/s per call per node : 666901 +Grid : Message : Average mflops/s per call per node (full): 315999 +Grid : Message : Average mflops/s per call per node (full): 435532 +Grid : Message : Average mflops/s per call per node (full): 446810 +Grid : Message : Average mflops/s per call per node (full): 306073 +Grid : Message : Stencil 13.809 GB/s per node +Grid : Message : Stencil 18.2537 GB/s per node +Grid : Message : Stencil 17.7281 GB/s per node +Grid : Message : Stencil 14.1155 GB/s per node +Grid : Message : Average mflops/s per call per node : 665162 +Grid : Message : Average mflops/s per call per node : 799502 +Grid : Message : Average mflops/s per call per node : 826679 +Grid : Message : Average mflops/s per call per node : 660384 +Grid : Message : Average mflops/s per call per node (full): 316186 +Grid : Message : Average mflops/s per call per node (full): 439300 +Grid : Message : Average mflops/s per call per node (full): 447229 +Grid : Message : Average mflops/s per call per node (full): 307210 +Grid : Message : Stencil 13.7922 GB/s per node +Grid : Message : Stencil 17.225 GB/s per node +Grid : Message : Stencil 18.2881 GB/s per node +Grid : Message : Stencil 13.7297 GB/s per node +Grid : Message : Average mflops/s per call per node : 665724 +Grid : Message : Average mflops/s per call per node : 799898 +Grid : Message : Average mflops/s per call per node : 825497 +Grid : Message : Average mflops/s per call per node : 660536 +Grid : Message : Average mflops/s per call per node (full): 315508 +Grid : Message : Average mflops/s per call per node (full): 437931 +Grid : Message : Average mflops/s per call per node (full): 448941 +Grid : Message : Average mflops/s per call per node (full): 306707 +Grid : Message : Stencil 13.9262 GB/s per node +Grid : Message : Stencil 16.7104 GB/s per node +Grid : Message : Stencil 18.1467 GB/s per node +Grid : Message : Stencil 13.2761 GB/s per node +Grid : Message : Average mflops/s per call per node : 665930 +Grid : Message : Average mflops/s per call per node : 805745 +Grid : Message : Average mflops/s per call per node : 819049 +Grid : Message : Average mflops/s per call per node : 658184 +Grid : Message : Average mflops/s per call per node (full): 316284 +Grid : Message : Average mflops/s per call per node (full): 436808 +Grid : Message : Average mflops/s per call per node (full): 445900 +Grid : Message : Average mflops/s per call per node (full): 304988 +Grid : Message : Stencil 13.6979 GB/s per node +Grid : Message : Stencil 16.5942 GB/s per node +Grid : Message : Stencil 17.6454 GB/s per node +Grid : Message : Stencil 13.6655 GB/s per node +Grid : Message : Average mflops/s per call per node : 667274 +Grid : Message : Average mflops/s per call per node : 804315 +Grid : Message : Average mflops/s per call per node : 818993 +Grid : Message : Average mflops/s per call per node : 661050 +Grid : Message : Average mflops/s per call per node (full): 315545 +Grid : Message : Average mflops/s per call per node (full): 435802 +Grid : Message : Average mflops/s per call per node (full): 444969 +Grid : Message : Average mflops/s per call per node (full): 305752 +Grid : Message : Stencil 14.8011 GB/s per node +Grid : Message : Stencil 17.4577 GB/s per node +Grid : Message : Stencil 16.0438 GB/s per node +Grid : Message : Stencil 11.8119 GB/s per node +Grid : Message : Average mflops/s per call per node : 666647 +Grid : Message : Average mflops/s per call per node : 803512 +Grid : Message : Average mflops/s per call per node : 825357 +Grid : Message : Average mflops/s per call per node : 669817 +Grid : Message : Average mflops/s per call per node (full): 318122 +Grid : Message : Average mflops/s per call per node (full): 438539 +Grid : Message : Average mflops/s per call per node (full): 420134 +Grid : Message : Average mflops/s per call per node (full): 299258 +Grid : Message : Stencil 14.2815 GB/s per node +Grid : Message : Stencil 16.5677 GB/s per node +Grid : Message : Stencil 16.7552 GB/s per node +Grid : Message : Stencil 12.4322 GB/s per node +Grid : Message : Average mflops/s per call per node : 665525 +Grid : Message : Average mflops/s per call per node : 802985 +Grid : Message : Average mflops/s per call per node : 820117 +Grid : Message : Average mflops/s per call per node : 668353 +Grid : Message : Average mflops/s per call per node (full): 317822 +Grid : Message : Average mflops/s per call per node (full): 435373 +Grid : Message : Average mflops/s per call per node (full): 437842 +Grid : Message : Average mflops/s per call per node (full): 304972 +Grid : Message : Stencil 12.445 GB/s per node +Grid : Message : Stencil 13.4312 GB/s per node +Grid : Message : Stencil 17.1194 GB/s per node +Grid : Message : Stencil 13.338 GB/s per node +Grid : Message : Average mflops/s per call per node : 668202 +Grid : Message : Average mflops/s per call per node : 807277 +Grid : Message : Average mflops/s per call per node : 817844 +Grid : Message : Average mflops/s per call per node : 661735 +Grid : Message : Average mflops/s per call per node (full): 313315 +Grid : Message : Average mflops/s per call per node (full): 390731 +Grid : Message : Average mflops/s per call per node (full): 440923 +Grid : Message : Average mflops/s per call per node (full): 306331 +Grid : Message : Stencil 12.7171 GB/s per node +Grid : Message : Stencil 16.314 GB/s per node +Grid : Message : Stencil 18.5352 GB/s per node +Grid : Message : Stencil 13.2695 GB/s per node +Grid : Message : Average mflops/s per call per node : 666693 +Grid : Message : Average mflops/s per call per node : 800267 +Grid : Message : Average mflops/s per call per node : 826845 +Grid : Message : Average mflops/s per call per node : 660822 +Grid : Message : Average mflops/s per call per node (full): 314168 +Grid : Message : Average mflops/s per call per node (full): 431113 +Grid : Message : Average mflops/s per call per node (full): 448873 +Grid : Message : Average mflops/s per call per node (full): 305170 +Grid : Message : Stencil 12.9387 GB/s per node +Grid : Message : Stencil 16.4085 GB/s per node +Grid : Message : Stencil 16.8739 GB/s per node +Grid : Message : Stencil 13.6264 GB/s per node +Grid : Message : Average mflops/s per call per node : 670156 +Grid : Message : Average mflops/s per call per node : 805849 +Grid : Message : Average mflops/s per call per node : 818217 +Grid : Message : Average mflops/s per call per node : 660247 +Grid : Message : Average mflops/s per call per node (full): 315820 +Grid : Message : Average mflops/s per call per node (full): 429584 +Grid : Message : Average mflops/s per call per node (full): 440164 +Grid : Message : Average mflops/s per call per node (full): 305487 +Grid : Message : Stencil 12.917 GB/s per node +Grid : Message : Stencil 16.6841 GB/s per node +Grid : Message : Stencil 17.6266 GB/s per node +Grid : Message : Stencil 12.6883 GB/s per node +Grid : Message : Average mflops/s per call per node : 671234 +Grid : Message : Average mflops/s per call per node : 806459 +Grid : Message : Average mflops/s per call per node : 823954 +Grid : Message : Average mflops/s per call per node : 662970 +Grid : Message : Average mflops/s per call per node (full): 314276 +Grid : Message : Average mflops/s per call per node (full): 433660 +Grid : Message : Average mflops/s per call per node (full): 445980 +Grid : Message : Average mflops/s per call per node (full): 303927 +Grid : Message : Stencil 12.3094 GB/s per node +Grid : Message : Stencil 16.6002 GB/s per node +Grid : Message : Stencil 17.6975 GB/s per node +Grid : Message : Stencil 12.2183 GB/s per node +Grid : Message : Average mflops/s per call per node : 671314 +Grid : Message : Average mflops/s per call per node : 805711 +Grid : Message : Average mflops/s per call per node : 819593 +Grid : Message : Average mflops/s per call per node : 664132 +Grid : Message : Average mflops/s per call per node (full): 312627 +Grid : Message : Average mflops/s per call per node (full): 435765 +Grid : Message : Average mflops/s per call per node (full): 445720 +Grid : Message : Average mflops/s per call per node (full): 303954 +Grid : Message : Stencil 12.8645 GB/s per node +Grid : Message : Stencil 15.0431 GB/s per node +Grid : Message : Stencil 18.2157 GB/s per node +Grid : Message : Stencil 12.3631 GB/s per node +Grid : Message : Average mflops/s per call per node : 666465 +Grid : Message : Average mflops/s per call per node : 800239 +Grid : Message : Average mflops/s per call per node : 819023 +Grid : Message : Average mflops/s per call per node : 665368 +Grid : Message : Average mflops/s per call per node (full): 314038 +Grid : Message : Average mflops/s per call per node (full): 414968 +Grid : Message : Average mflops/s per call per node (full): 447939 +Grid : Message : Average mflops/s per call per node (full): 301079 +Grid : Message : Stencil 14.8432 GB/s per node +Grid : Message : Stencil 9.11744 GB/s per node +Grid : Message : Stencil 17.9773 GB/s per node +Grid : Message : Stencil 13.6707 GB/s per node +Grid : Message : Average mflops/s per call per node : 662854 +Grid : Message : Average mflops/s per call per node : 809082 +Grid : Message : Average mflops/s per call per node : 820687 +Grid : Message : Average mflops/s per call per node : 662361 +Grid : Message : Average mflops/s per call per node (full): 315454 +Grid : Message : Average mflops/s per call per node (full): 295354 +Grid : Message : Average mflops/s per call per node (full): 446626 +Grid : Message : Average mflops/s per call per node (full): 305813 +Grid : Message : Stencil 12.7809 GB/s per node +Grid : Message : Stencil 16.2911 GB/s per node +Grid : Message : Stencil 17.3266 GB/s per node +Grid : Message : Stencil 13.7354 GB/s per node +Grid : Message : Average mflops/s per call per node : 668695 +Grid : Message : Average mflops/s per call per node : 803473 +Grid : Message : Average mflops/s per call per node : 820087 +Grid : Message : Average mflops/s per call per node : 662076 +Grid : Message : Average mflops/s per call per node (full): 315302 +Grid : Message : Average mflops/s per call per node (full): 432491 +Grid : Message : Average mflops/s per call per node (full): 443546 +Grid : Message : Average mflops/s per call per node (full): 305753 +Grid : Message : Stencil 15.392 GB/s per node +Grid : Message : Stencil 16.6395 GB/s per node +Grid : Message : Stencil 18.4007 GB/s per node +Grid : Message : Stencil 11.7254 GB/s per node +Grid : Message : Average mflops/s per call per node : 661087 +Grid : Message : Average mflops/s per call per node : 800899 +Grid : Message : Average mflops/s per call per node : 823878 +Grid : Message : Average mflops/s per call per node : 665867 +Grid : Message : Average mflops/s per call per node (full): 316630 +Grid : Message : Average mflops/s per call per node (full): 435665 +Grid : Message : Average mflops/s per call per node (full): 447474 +Grid : Message : Average mflops/s per call per node (full): 299133 +Grid : Message : Stencil 12.7109 GB/s per node +Grid : Message : Stencil 16.6862 GB/s per node +Grid : Message : Stencil 17.2417 GB/s per node +Grid : Message : Stencil 12.1287 GB/s per node +Grid : Message : Average mflops/s per call per node : 668518 +Grid : Message : Average mflops/s per call per node : 805298 +Grid : Message : Average mflops/s per call per node : 817691 +Grid : Message : Average mflops/s per call per node : 669558 +Grid : Message : Average mflops/s per call per node (full): 315247 +Grid : Message : Average mflops/s per call per node (full): 436881 +Grid : Message : Average mflops/s per call per node (full): 442923 +Grid : Message : Average mflops/s per call per node (full): 303777 +Grid : Message : Stencil 14.6304 GB/s per node +Grid : Message : Stencil 17.0628 GB/s per node +Grid : Message : Stencil 17.9849 GB/s per node +Grid : Message : Stencil 12.5003 GB/s per node +Grid : Message : Average mflops/s per call per node : 661927 +Grid : Message : Average mflops/s per call per node : 800619 +Grid : Message : Average mflops/s per call per node : 824018 +Grid : Message : Average mflops/s per call per node : 669003 +Grid : Message : Average mflops/s per call per node (full): 316799 +Grid : Message : Average mflops/s per call per node (full): 435870 +Grid : Message : Average mflops/s per call per node (full): 448908 +Grid : Message : Average mflops/s per call per node (full): 305761 +Grid : Message : Stencil 14.2657 GB/s per node +Grid : Message : Stencil 17.7772 GB/s per node +Grid : Message : Stencil 16.4253 GB/s per node +Grid : Message : Stencil 12.0975 GB/s per node +Grid : Message : Average mflops/s per call per node : 662242 +Grid : Message : Average mflops/s per call per node : 803620 +Grid : Message : Average mflops/s per call per node : 822478 +Grid : Message : Average mflops/s per call per node : 666255 +Grid : Message : Average mflops/s per call per node (full): 316413 +Grid : Message : Average mflops/s per call per node (full): 440755 +Grid : Message : Average mflops/s per call per node (full): 425590 +Grid : Message : Average mflops/s per call per node (full): 301935 +Grid : Message : Stencil 15.2164 GB/s per node +Grid : Message : Stencil 9.49935 GB/s per node +Grid : Message : Stencil 17.2102 GB/s per node +Grid : Message : Stencil 13.7943 GB/s per node +Grid : Message : Average mflops/s per call per node : 659534 +Grid : Message : Average mflops/s per call per node : 809661 +Grid : Message : Average mflops/s per call per node : 823187 +Grid : Message : Average mflops/s per call per node : 661130 +Grid : Message : Average mflops/s per call per node (full): 316258 +Grid : Message : Average mflops/s per call per node (full): 304426 +Grid : Message : Average mflops/s per call per node (full): 442628 +Grid : Message : Average mflops/s per call per node (full): 306161 +Grid : Message : Stencil 14.6314 GB/s per node +Grid : Message : Stencil 16.992 GB/s per node +Grid : Message : Stencil 17.8637 GB/s per node +Grid : Message : Stencil 12.3308 GB/s per node +Grid : Message : Average mflops/s per call per node : 658515 +Grid : Message : Average mflops/s per call per node : 801185 +Grid : Message : Average mflops/s per call per node : 823355 +Grid : Message : Average mflops/s per call per node : 670100 +Grid : Message : Average mflops/s per call per node (full): 315176 +Grid : Message : Average mflops/s per call per node (full): 437692 +Grid : Message : Average mflops/s per call per node (full): 444062 +Grid : Message : Average mflops/s per call per node (full): 304858 +Grid : Message : Stencil 12.3942 GB/s per node +Grid : Message : Stencil 17.204 GB/s per node +Grid : Message : Stencil 17.2984 GB/s per node +Grid : Message : Stencil 12.1147 GB/s per node +Grid : Message : Average mflops/s per call per node : 663799 +Grid : Message : Average mflops/s per call per node : 800854 +Grid : Message : Average mflops/s per call per node : 823783 +Grid : Message : Average mflops/s per call per node : 667952 +Grid : Message : Average mflops/s per call per node (full): 312919 +Grid : Message : Average mflops/s per call per node (full): 439336 +Grid : Message : Average mflops/s per call per node (full): 442674 +Grid : Message : Average mflops/s per call per node (full): 302111 +Grid : Message : Stencil 13.5046 GB/s per node +Grid : Message : Stencil 8.65121 GB/s per node +Grid : Message : Stencil 16.8146 GB/s per node +Grid : Message : Stencil 12.6679 GB/s per node +Grid : Message : Average mflops/s per call per node : 664548 +Grid : Message : Average mflops/s per call per node : 810316 +Grid : Message : Average mflops/s per call per node : 824111 +Grid : Message : Average mflops/s per call per node : 668561 +Grid : Message : Average mflops/s per call per node (full): 315068 +Grid : Message : Average mflops/s per call per node (full): 283443 +Grid : Message : Average mflops/s per call per node (full): 438671 +Grid : Message : Average mflops/s per call per node (full): 305982 +Grid : Message : Stencil 13.1484 GB/s per node +Grid : Message : Stencil 17.4256 GB/s per node +Grid : Message : Stencil 18.1358 GB/s per node +Grid : Message : Stencil 12.7422 GB/s per node +Grid : Message : Average mflops/s per call per node : 663668 +Grid : Message : Average mflops/s per call per node : 800415 +Grid : Message : Average mflops/s per call per node : 808348 +Grid : Message : Average mflops/s per call per node : 666082 +Grid : Message : Average mflops/s per call per node (full): 313980 +Grid : Message : Average mflops/s per call per node (full): 437498 +Grid : Message : Average mflops/s per call per node (full): 443415 +Grid : Message : Average mflops/s per call per node (full): 305528 +Grid : Message : Stencil 13.3168 GB/s per node +Grid : Message : Stencil 16.8823 GB/s per node +Grid : Message : Stencil 17.3926 GB/s per node +Grid : Message : Stencil 14.2876 GB/s per node +Grid : Message : Average mflops/s per call per node : 667078 +Grid : Message : Average mflops/s per call per node : 801243 +Grid : Message : Average mflops/s per call per node : 823023 +Grid : Message : Average mflops/s per call per node : 661535 +Grid : Message : Average mflops/s per call per node (full): 315190 +Grid : Message : Average mflops/s per call per node (full): 432799 +Grid : Message : Average mflops/s per call per node (full): 442973 +Grid : Message : Average mflops/s per call per node (full): 303404 +Grid : Message : Stencil 12.6679 GB/s per node +Grid : Message : Stencil 16.6478 GB/s per node +Grid : Message : Stencil 17.3866 GB/s per node +Grid : Message : Stencil 13.3003 GB/s per node +Grid : Message : Average mflops/s per call per node : 670601 +Grid : Message : Average mflops/s per call per node : 802308 +Grid : Message : Average mflops/s per call per node : 818386 +Grid : Message : Average mflops/s per call per node : 661566 +Grid : Message : Average mflops/s per call per node (full): 315534 +Grid : Message : Average mflops/s per call per node (full): 435309 +Grid : Message : Average mflops/s per call per node (full): 443217 +Grid : Message : Average mflops/s per call per node (full): 305060 +Grid : Message : Stencil 13.1465 GB/s per node +Grid : Message : Stencil 11.5329 GB/s per node +Grid : Message : Stencil 17.4103 GB/s per node +Grid : Message : Stencil 13.828 GB/s per node +Grid : Message : Average mflops/s per call per node : 664492 +Grid : Message : Average mflops/s per call per node : 807782 +Grid : Message : Average mflops/s per call per node : 829479 +Grid : Message : Average mflops/s per call per node : 657773 +Grid : Message : Average mflops/s per call per node (full): 315037 +Grid : Message : Average mflops/s per call per node (full): 351401 +Grid : Message : Average mflops/s per call per node (full): 445392 +Grid : Message : Average mflops/s per call per node (full): 305751 +Grid : Message : Stencil 13.773 GB/s per node +Grid : Message : Stencil 17.1506 GB/s per node +Grid : Message : Stencil 17.6133 GB/s per node +Grid : Message : Stencil 13.3226 GB/s per node +Grid : Message : Average mflops/s per call per node : 664042 +Grid : Message : Average mflops/s per call per node : 804459 +Grid : Message : Average mflops/s per call per node : 822795 +Grid : Message : Average mflops/s per call per node : 664842 +Grid : Message : Average mflops/s per call per node (full): 316073 +Grid : Message : Average mflops/s per call per node (full): 437764 +Grid : Message : Average mflops/s per call per node (full): 446153 +Grid : Message : Average mflops/s per call per node (full): 299502 +Grid : Message : Stencil 15.0845 GB/s per node +Grid : Message : Stencil 17.7414 GB/s per node +Grid : Message : Stencil 16.8645 GB/s per node +Grid : Message : Stencil 12.3923 GB/s per node +Grid : Message : Average mflops/s per call per node : 661352 +Grid : Message : Average mflops/s per call per node : 800592 +Grid : Message : Average mflops/s per call per node : 817293 +Grid : Message : Average mflops/s per call per node : 664372 +Grid : Message : Average mflops/s per call per node (full): 316852 +Grid : Message : Average mflops/s per call per node (full): 439206 +Grid : Message : Average mflops/s per call per node (full): 437628 +Grid : Message : Average mflops/s per call per node (full): 304183 +Grid : Message : Stencil 14.7196 GB/s per node +Grid : Message : Stencil 16.5359 GB/s per node +Grid : Message : Stencil 18.1543 GB/s per node +Grid : Message : Stencil 13.2842 GB/s per node +Grid : Message : Average mflops/s per call per node : 662539 +Grid : Message : Average mflops/s per call per node : 802369 +Grid : Message : Average mflops/s per call per node : 824979 +Grid : Message : Average mflops/s per call per node : 661577 +Grid : Message : Average mflops/s per call per node (full): 316824 +Grid : Message : Average mflops/s per call per node (full): 434027 +Grid : Message : Average mflops/s per call per node (full): 446898 +Grid : Message : Average mflops/s per call per node (full): 305991 +Grid : Message : Stencil 15.5375 GB/s per node +Grid : Message : Stencil 16.5096 GB/s per node +Grid : Message : Stencil 18.3063 GB/s per node +Grid : Message : Stencil 13.1609 GB/s per node +Grid : Message : Average mflops/s per call per node : 664450 +Grid : Message : Average mflops/s per call per node : 802683 +Grid : Message : Average mflops/s per call per node : 823743 +Grid : Message : Average mflops/s per call per node : 662443 +Grid : Message : Average mflops/s per call per node (full): 317778 +Grid : Message : Average mflops/s per call per node (full): 435735 +Grid : Message : Average mflops/s per call per node (full): 448569 +Grid : Message : Average mflops/s per call per node (full): 305253 +Grid : Message : Stencil 12.8296 GB/s per node +Grid : Message : Stencil 9.10781 GB/s per node +Grid : Message : Stencil 17.7547 GB/s per node +Grid : Message : Stencil 12.4499 GB/s per node +Grid : Message : Average mflops/s per call per node : 669156 +Grid : Message : Average mflops/s per call per node : 807513 +Grid : Message : Average mflops/s per call per node : 825799 +Grid : Message : Average mflops/s per call per node : 670206 +Grid : Message : Average mflops/s per call per node (full): 314775 +Grid : Message : Average mflops/s per call per node (full): 294729 +Grid : Message : Average mflops/s per call per node (full): 446972 +Grid : Message : Average mflops/s per call per node (full): 305790 +Grid : Message : Stencil 12.6955 GB/s per node +Grid : Message : Stencil 16.6487 GB/s per node +Grid : Message : Stencil 17.6868 GB/s per node +Grid : Message : Stencil 12.7983 GB/s per node +Grid : Message : Average mflops/s per call per node : 673509 +Grid : Message : Average mflops/s per call per node : 802936 +Grid : Message : Average mflops/s per call per node : 827030 +Grid : Message : Average mflops/s per call per node : 664901 +Grid : Message : Average mflops/s per call per node (full): 315828 +Grid : Message : Average mflops/s per call per node (full): 435932 +Grid : Message : Average mflops/s per call per node (full): 446750 +Grid : Message : Average mflops/s per call per node (full): 305108 +Grid : Message : Stencil 12.6757 GB/s per node +Grid : Message : Stencil 16.1476 GB/s per node +Grid : Message : Stencil 18.0383 GB/s per node +Grid : Message : Stencil 14.2022 GB/s per node +Grid : Message : Average mflops/s per call per node : 672735 +Grid : Message : Average mflops/s per call per node : 802948 +Grid : Message : Average mflops/s per call per node : 822948 +Grid : Message : Average mflops/s per call per node : 659746 +Grid : Message : Average mflops/s per call per node (full): 315118 +Grid : Message : Average mflops/s per call per node (full): 429520 +Grid : Message : Average mflops/s per call per node (full): 446367 +Grid : Message : Average mflops/s per call per node (full): 306267 +Grid : Message : Stencil 13.1888 GB/s per node +Grid : Message : Stencil 17.1689 GB/s per node +Grid : Message : Stencil 17.964 GB/s per node +Grid : Message : Stencil 12.4117 GB/s per node +Grid : Message : Average mflops/s per call per node : 666876 +Grid : Message : Average mflops/s per call per node : 806357 +Grid : Message : Average mflops/s per call per node : 820983 +Grid : Message : Average mflops/s per call per node : 665899 +Grid : Message : Average mflops/s per call per node (full): 316192 +Grid : Message : Average mflops/s per call per node (full): 440065 +Grid : Message : Average mflops/s per call per node (full): 445936 +Grid : Message : Average mflops/s per call per node (full): 304403 +Grid : Message : Stencil 13.9266 GB/s per node +Grid : Message : Stencil 9.79511 GB/s per node +Grid : Message : Stencil 17.4312 GB/s per node +Grid : Message : Stencil 12.2498 GB/s per node +Grid : Message : Average mflops/s per call per node : 665834 +Grid : Message : Average mflops/s per call per node : 810976 +Grid : Message : Average mflops/s per call per node : 821093 +Grid : Message : Average mflops/s per call per node : 667009 +Grid : Message : Average mflops/s per call per node (full): 312307 +Grid : Message : Average mflops/s per call per node (full): 311635 +Grid : Message : Average mflops/s per call per node (full): 445404 +Grid : Message : Average mflops/s per call per node (full): 303006 +Grid : Message : Stencil 12.81 GB/s per node +Grid : Message : Stencil 16.5723 GB/s per node +Grid : Message : Stencil 18.4032 GB/s per node +Grid : Message : Stencil 13.6021 GB/s per node +Grid : Message : Average mflops/s per call per node : 671280 +Grid : Message : Average mflops/s per call per node : 806415 +Grid : Message : Average mflops/s per call per node : 818215 +Grid : Message : Average mflops/s per call per node : 664436 +Grid : Message : Average mflops/s per call per node (full): 315501 +Grid : Message : Average mflops/s per call per node (full): 435810 +Grid : Message : Average mflops/s per call per node (full): 444505 +Grid : Message : Average mflops/s per call per node (full): 302928 +Grid : Message : Stencil 12.8577 GB/s per node +Grid : Message : Stencil 17.5319 GB/s per node +Grid : Message : Stencil 18.6292 GB/s per node +Grid : Message : Stencil 12.6488 GB/s per node +Grid : Message : Average mflops/s per call per node : 668092 +Grid : Message : Average mflops/s per call per node : 803661 +Grid : Message : Average mflops/s per call per node : 820460 +Grid : Message : Average mflops/s per call per node : 670432 +Grid : Message : Average mflops/s per call per node (full): 315044 +Grid : Message : Average mflops/s per call per node (full): 439046 +Grid : Message : Average mflops/s per call per node (full): 447930 +Grid : Message : Average mflops/s per call per node (full): 304999 +Grid : Message : Stencil 13.0255 GB/s per node +Grid : Message : Stencil 17.3176 GB/s per node +Grid : Message : Stencil 17.2674 GB/s per node +Grid : Message : Stencil 13.168 GB/s per node +Grid : Message : Average mflops/s per call per node : 668654 +Grid : Message : Average mflops/s per call per node : 802245 +Grid : Message : Average mflops/s per call per node : 822532 +Grid : Message : Average mflops/s per call per node : 660223 +Grid : Message : Average mflops/s per call per node (full): 315458 +Grid : Message : Average mflops/s per call per node (full): 439511 +Grid : Message : Average mflops/s per call per node (full): 443161 +Grid : Message : Average mflops/s per call per node (full): 304948 +Grid : Message : Stencil 13.5487 GB/s per node +Grid : Message : Stencil 17.5157 GB/s per node +Grid : Message : Stencil 17.6107 GB/s per node +Grid : Message : Stencil 13.0801 GB/s per node +Grid : Message : Average mflops/s per call per node : 662485 +Grid : Message : Average mflops/s per call per node : 801571 +Grid : Message : Average mflops/s per call per node : 825590 +Grid : Message : Average mflops/s per call per node : 667309 +Grid : Message : Average mflops/s per call per node (full): 315559 +Grid : Message : Average mflops/s per call per node (full): 439667 +Grid : Message : Average mflops/s per call per node (full): 446588 +Grid : Message : Average mflops/s per call per node (full): 304921 +Grid : Message : Stencil 13.1937 GB/s per node +Grid : Message : Stencil 17.687 GB/s per node +Grid : Message : Stencil 17.401 GB/s per node +Grid : Message : Stencil 13.7869 GB/s per node +Grid : Message : Average mflops/s per call per node : 662902 +Grid : Message : Average mflops/s per call per node : 805299 +Grid : Message : Average mflops/s per call per node : 822967 +Grid : Message : Average mflops/s per call per node : 662737 +Grid : Message : Average mflops/s per call per node (full): 313476 +Grid : Message : Average mflops/s per call per node (full): 439484 +Grid : Message : Average mflops/s per call per node (full): 445173 +Grid : Message : Average mflops/s per call per node (full): 306650 +Grid : Message : Stencil 13.5789 GB/s per node +Grid : Message : Stencil 16.1773 GB/s per node +Grid : Message : Stencil 17.6389 GB/s per node +Grid : Message : Stencil 13.2715 GB/s per node +Grid : Message : Average mflops/s per call per node : 664326 +Grid : Message : Average mflops/s per call per node : 804113 +Grid : Message : Average mflops/s per call per node : 822547 +Grid : Message : Average mflops/s per call per node : 663436 +Grid : Message : Average mflops/s per call per node (full): 314307 +Grid : Message : Average mflops/s per call per node (full): 431593 +Grid : Message : Average mflops/s per call per node (full): 445804 +Grid : Message : Average mflops/s per call per node (full): 306092 +Grid : Message : Stencil 13.654 GB/s per node +Grid : Message : Stencil 17.5621 GB/s per node +Grid : Message : Stencil 16.9713 GB/s per node +Grid : Message : Stencil 12.6528 GB/s per node +Grid : Message : Average mflops/s per call per node : 665081 +Grid : Message : Average mflops/s per call per node : 805621 +Grid : Message : Average mflops/s per call per node : 826388 +Grid : Message : Average mflops/s per call per node : 669706 +Grid : Message : Average mflops/s per call per node (full): 315746 +Grid : Message : Average mflops/s per call per node (full): 441183 +Grid : Message : Average mflops/s per call per node (full): 440868 +Grid : Message : Average mflops/s per call per node (full): 305130 +Grid : Message : Stencil 12.7877 GB/s per node +Grid : Message : Stencil 16.8454 GB/s per node +Grid : Message : Stencil 17.1362 GB/s per node +Grid : Message : Stencil 12.2521 GB/s per node +Grid : Message : Average mflops/s per call per node : 667733 +Grid : Message : Average mflops/s per call per node : 805130 +Grid : Message : Average mflops/s per call per node : 818806 +Grid : Message : Average mflops/s per call per node : 667100 +Grid : Message : Average mflops/s per call per node (full): 314589 +Grid : Message : Average mflops/s per call per node (full): 436885 +Grid : Message : Average mflops/s per call per node (full): 441502 +Grid : Message : Average mflops/s per call per node (full): 303569 +Grid : Message : Stencil 13.6816 GB/s per node +Grid : Message : Stencil 16.5988 GB/s per node +Grid : Message : Stencil 17.4377 GB/s per node +Grid : Message : Stencil 13.7525 GB/s per node +Grid : Message : Average mflops/s per call per node : 664947 +Grid : Message : Average mflops/s per call per node : 807086 +Grid : Message : Average mflops/s per call per node : 822228 +Grid : Message : Average mflops/s per call per node : 664514 +Grid : Message : Average mflops/s per call per node (full): 315168 +Grid : Message : Average mflops/s per call per node (full): 436002 +Grid : Message : Average mflops/s per call per node (full): 443007 +Grid : Message : Average mflops/s per call per node (full): 306737 +Grid : Message : Stencil 13.0057 GB/s per node +Grid : Message : Stencil 16.9441 GB/s per node +Grid : Message : Stencil 17.679 GB/s per node +Grid : Message : Stencil 12.2985 GB/s per node +Grid : Message : Average mflops/s per call per node : 668090 +Grid : Message : Average mflops/s per call per node : 806060 +Grid : Message : Average mflops/s per call per node : 826981 +Grid : Message : Average mflops/s per call per node : 670588 +Grid : Message : Average mflops/s per call per node (full): 314896 +Grid : Message : Average mflops/s per call per node (full): 438351 +Grid : Message : Average mflops/s per call per node (full): 446426 +Grid : Message : Average mflops/s per call per node (full): 304257 +Grid : Message : Stencil 13.1404 GB/s per node +Grid : Message : Stencil 12.6783 GB/s per node +Grid : Message : Stencil 18.8603 GB/s per node +Grid : Message : Stencil 12.6124 GB/s per node +Grid : Message : Average mflops/s per call per node : 665879 +Grid : Message : Average mflops/s per call per node : 806963 +Grid : Message : Average mflops/s per call per node : 814888 +Grid : Message : Average mflops/s per call per node : 669744 +Grid : Message : Average mflops/s per call per node (full): 313332 +Grid : Message : Average mflops/s per call per node (full): 374495 +Grid : Message : Average mflops/s per call per node (full): 447120 +Grid : Message : Average mflops/s per call per node (full): 305294 +Grid : Message : Stencil 12.582 GB/s per node +Grid : Message : Stencil 15.9827 GB/s per node +Grid : Message : Stencil 18.3784 GB/s per node +Grid : Message : Stencil 12.6071 GB/s per node +Grid : Message : Average mflops/s per call per node : 666960 +Grid : Message : Average mflops/s per call per node : 801866 +Grid : Message : Average mflops/s per call per node : 815942 +Grid : Message : Average mflops/s per call per node : 667845 +Grid : Message : Average mflops/s per call per node (full): 314410 +Grid : Message : Average mflops/s per call per node (full): 428552 +Grid : Message : Average mflops/s per call per node (full): 447166 +Grid : Message : Average mflops/s per call per node (full): 304418 +Grid : Message : Stencil 13.5239 GB/s per node +Grid : Message : Stencil 17.4 GB/s per node +Grid : Message : Stencil 16.7494 GB/s per node +Grid : Message : Stencil 12.2387 GB/s per node +Grid : Message : Average mflops/s per call per node : 665104 +Grid : Message : Average mflops/s per call per node : 804421 +Grid : Message : Average mflops/s per call per node : 821566 +Grid : Message : Average mflops/s per call per node : 668199 +Grid : Message : Average mflops/s per call per node (full): 316575 +Grid : Message : Average mflops/s per call per node (full): 437744 +Grid : Message : Average mflops/s per call per node (full): 438409 +Grid : Message : Average mflops/s per call per node (full): 304113 +Grid : Message : Stencil 12.9829 GB/s per node +Grid : Message : Stencil 10.9168 GB/s per node +Grid : Message : Stencil 18.3933 GB/s per node +Grid : Message : Stencil 13.774 GB/s per node +Grid : Message : Average mflops/s per call per node : 666011 +Grid : Message : Average mflops/s per call per node : 807342 +Grid : Message : Average mflops/s per call per node : 821382 +Grid : Message : Average mflops/s per call per node : 660536 +Grid : Message : Average mflops/s per call per node (full): 314102 +Grid : Message : Average mflops/s per call per node (full): 337750 +Grid : Message : Average mflops/s per call per node (full): 447207 +Grid : Message : Average mflops/s per call per node (full): 303080 +Grid : Message : Stencil 13.3301 GB/s per node +Grid : Message : Stencil 17.2677 GB/s per node +Grid : Message : Stencil 17.264 GB/s per node +Grid : Message : Stencil 12.5656 GB/s per node +Grid : Message : Average mflops/s per call per node : 663154 +Grid : Message : Average mflops/s per call per node : 803852 +Grid : Message : Average mflops/s per call per node : 823789 +Grid : Message : Average mflops/s per call per node : 665791 +Grid : Message : Average mflops/s per call per node (full): 314605 +Grid : Message : Average mflops/s per call per node (full): 439696 +Grid : Message : Average mflops/s per call per node (full): 442832 +Grid : Message : Average mflops/s per call per node (full): 303476 +Grid : Message : Stencil 13.0953 GB/s per node +Grid : Message : Stencil 16.4118 GB/s per node +Grid : Message : Stencil 17.0283 GB/s per node +Grid : Message : Stencil 12.9251 GB/s per node +Grid : Message : Average mflops/s per call per node : 663595 +Grid : Message : Average mflops/s per call per node : 799392 +Grid : Message : Average mflops/s per call per node : 821889 +Grid : Message : Average mflops/s per call per node : 663291 +Grid : Message : Average mflops/s per call per node (full): 314648 +Grid : Message : Average mflops/s per call per node (full): 433149 +Grid : Message : Average mflops/s per call per node (full): 439817 +Grid : Message : Average mflops/s per call per node (full): 305093 +Grid : Message : Stencil 14.1367 GB/s per node +Grid : Message : Stencil 16.7558 GB/s per node +Grid : Message : Stencil 17.9835 GB/s per node +Grid : Message : Stencil 13.1096 GB/s per node +Grid : Message : Average mflops/s per call per node : 665854 +Grid : Message : Average mflops/s per call per node : 803889 +Grid : Message : Average mflops/s per call per node : 822656 +Grid : Message : Average mflops/s per call per node : 662789 +Grid : Message : Average mflops/s per call per node (full): 316383 +Grid : Message : Average mflops/s per call per node (full): 437067 +Grid : Message : Average mflops/s per call per node (full): 445712 +Grid : Message : Average mflops/s per call per node (full): 304553 +Grid : Message : Stencil 14.3347 GB/s per node +Grid : Message : Stencil 15.9031 GB/s per node +Grid : Message : Stencil 17.775 GB/s per node +Grid : Message : Stencil 12.7522 GB/s per node +Grid : Message : Average mflops/s per call per node : 666072 +Grid : Message : Average mflops/s per call per node : 804652 +Grid : Message : Average mflops/s per call per node : 819328 +Grid : Message : Average mflops/s per call per node : 670436 +Grid : Message : Average mflops/s per call per node (full): 316941 +Grid : Message : Average mflops/s per call per node (full): 423228 +Grid : Message : Average mflops/s per call per node (full): 444860 +Grid : Message : Average mflops/s per call per node (full): 305769 +Grid : Message : Stencil 14.0882 GB/s per node +Grid : Message : Stencil 16.6305 GB/s per node +Grid : Message : Stencil 17.847 GB/s per node +Grid : Message : Stencil 13.1699 GB/s per node +Grid : Message : Average mflops/s per call per node : 669135 +Grid : Message : Average mflops/s per call per node : 804670 +Grid : Message : Average mflops/s per call per node : 821916 +Grid : Message : Average mflops/s per call per node : 660372 +Grid : Message : Average mflops/s per call per node (full): 317075 +Grid : Message : Average mflops/s per call per node (full): 436490 +Grid : Message : Average mflops/s per call per node (full): 446030 +Grid : Message : Average mflops/s per call per node (full): 305999 +Grid : Message : Stencil 12.3987 GB/s per node +Grid : Message : Stencil 17.0523 GB/s per node +Grid : Message : Stencil 16.8339 GB/s per node +Grid : Message : Stencil 13.2098 GB/s per node +Grid : Message : Average mflops/s per call per node : 667431 +Grid : Message : Average mflops/s per call per node : 806676 +Grid : Message : Average mflops/s per call per node : 819751 +Grid : Message : Average mflops/s per call per node : 660084 +Grid : Message : Average mflops/s per call per node (full): 312865 +Grid : Message : Average mflops/s per call per node (full): 439743 +Grid : Message : Average mflops/s per call per node (full): 438997 +Grid : Message : Average mflops/s per call per node (full): 303877 +Grid : Message : Stencil 13.8262 GB/s per node +Grid : Message : Stencil 16.6954 GB/s per node +Grid : Message : Stencil 18.417 GB/s per node +Grid : Message : Stencil 13.1479 GB/s per node +Grid : Message : Average mflops/s per call per node : 668787 +Grid : Message : Average mflops/s per call per node : 798666 +Grid : Message : Average mflops/s per call per node : 831464 +Grid : Message : Average mflops/s per call per node : 668642 +Grid : Message : Average mflops/s per call per node (full): 317648 +Grid : Message : Average mflops/s per call per node (full): 436717 +Grid : Message : Average mflops/s per call per node (full): 450757 +Grid : Message : Average mflops/s per call per node (full): 306965 +Grid : Message : Stencil 13.7838 GB/s per node +Grid : Message : Stencil 9.57078 GB/s per node +Grid : Message : Stencil 16.9114 GB/s per node +Grid : Message : Stencil 12.9244 GB/s per node +Grid : Message : Average mflops/s per call per node : 667864 +Grid : Message : Average mflops/s per call per node : 810482 +Grid : Message : Average mflops/s per call per node : 825563 +Grid : Message : Average mflops/s per call per node : 666999 +Grid : Message : Average mflops/s per call per node (full): 316673 +Grid : Message : Average mflops/s per call per node (full): 305942 +Grid : Message : Average mflops/s per call per node (full): 440459 +Grid : Message : Average mflops/s per call per node (full): 303764 +Grid : Message : Stencil 14.3677 GB/s per node +Grid : Message : Stencil 15.2322 GB/s per node +Grid : Message : Stencil 17.9007 GB/s per node +Grid : Message : Stencil 13.0004 GB/s per node +Grid : Message : Average mflops/s per call per node : 664683 +Grid : Message : Average mflops/s per call per node : 805400 +Grid : Message : Average mflops/s per call per node : 817421 +Grid : Message : Average mflops/s per call per node : 665777 +Grid : Message : Average mflops/s per call per node (full): 316767 +Grid : Message : Average mflops/s per call per node (full): 417760 +Grid : Message : Average mflops/s per call per node (full): 446308 +Grid : Message : Average mflops/s per call per node (full): 303530 +Grid : Message : Stencil 13.3195 GB/s per node +Grid : Message : Stencil 16.2863 GB/s per node +Grid : Message : Stencil 18.3161 GB/s per node +Grid : Message : Stencil 13.6078 GB/s per node +Grid : Message : Average mflops/s per call per node : 667666 +Grid : Message : Average mflops/s per call per node : 803458 +Grid : Message : Average mflops/s per call per node : 820298 +Grid : Message : Average mflops/s per call per node : 661331 +Grid : Message : Average mflops/s per call per node (full): 316283 +Grid : Message : Average mflops/s per call per node (full): 431512 +Grid : Message : Average mflops/s per call per node (full): 448171 +Grid : Message : Average mflops/s per call per node (full): 306466 +Grid : Message : Stencil 12.7435 GB/s per node +Grid : Message : Stencil 17.6106 GB/s per node +Grid : Message : Stencil 18.4497 GB/s per node +Grid : Message : Stencil 12.8518 GB/s per node +Grid : Message : Average mflops/s per call per node : 667716 +Grid : Message : Average mflops/s per call per node : 802090 +Grid : Message : Average mflops/s per call per node : 819461 +Grid : Message : Average mflops/s per call per node : 659626 +Grid : Message : Average mflops/s per call per node (full): 315239 +Grid : Message : Average mflops/s per call per node (full): 439084 +Grid : Message : Average mflops/s per call per node (full): 446296 +Grid : Message : Average mflops/s per call per node (full): 304276 +Grid : Message : Stencil 13.4268 GB/s per node +Grid : Message : Stencil 16.7193 GB/s per node +Grid : Message : Stencil 17.3289 GB/s per node +Grid : Message : Stencil 12.5761 GB/s per node +Grid : Message : Average mflops/s per call per node : 665303 +Grid : Message : Average mflops/s per call per node : 806654 +Grid : Message : Average mflops/s per call per node : 822986 +Grid : Message : Average mflops/s per call per node : 669838 +Grid : Message : Average mflops/s per call per node (full): 315531 +Grid : Message : Average mflops/s per call per node (full): 437072 +Grid : Message : Average mflops/s per call per node (full): 443806 +Grid : Message : Average mflops/s per call per node (full): 305796 +Grid : Message : Stencil 14.2954 GB/s per node +Grid : Message : Stencil 16.7181 GB/s per node +Grid : Message : Stencil 17.119 GB/s per node +Grid : Message : Stencil 12.2928 GB/s per node +Grid : Message : Average mflops/s per call per node : 660850 +Grid : Message : Average mflops/s per call per node : 805612 +Grid : Message : Average mflops/s per call per node : 822817 +Grid : Message : Average mflops/s per call per node : 662549 +Grid : Message : Average mflops/s per call per node (full): 315957 +Grid : Message : Average mflops/s per call per node (full): 436318 +Grid : Message : Average mflops/s per call per node (full): 439364 +Grid : Message : Average mflops/s per call per node (full): 297783 +Grid : Message : Stencil 12.7076 GB/s per node +Grid : Message : Stencil 8.72101 GB/s per node +Grid : Message : Stencil 18.2561 GB/s per node +Grid : Message : Stencil 13.1808 GB/s per node +Grid : Message : Average mflops/s per call per node : 668366 +Grid : Message : Average mflops/s per call per node : 806517 +Grid : Message : Average mflops/s per call per node : 821816 +Grid : Message : Average mflops/s per call per node : 662894 +Grid : Message : Average mflops/s per call per node (full): 314523 +Grid : Message : Average mflops/s per call per node (full): 285263 +Grid : Message : Average mflops/s per call per node (full): 448258 +Grid : Message : Average mflops/s per call per node (full): 306647 +Grid : Message : Stencil 12.9634 GB/s per node +Grid : Message : Stencil 17.0186 GB/s per node +Grid : Message : Stencil 17.4637 GB/s per node +Grid : Message : Stencil 12.5254 GB/s per node +Grid : Message : Average mflops/s per call per node : 666297 +Grid : Message : Average mflops/s per call per node : 804426 +Grid : Message : Average mflops/s per call per node : 829078 +Grid : Message : Average mflops/s per call per node : 666390 +Grid : Message : Average mflops/s per call per node (full): 313370 +Grid : Message : Average mflops/s per call per node (full): 437900 +Grid : Message : Average mflops/s per call per node (full): 444086 +Grid : Message : Average mflops/s per call per node (full): 304885 +Grid : Message : Stencil 12.691 GB/s per node +Grid : Message : Stencil 14.2967 GB/s per node +Grid : Message : Stencil 18.1624 GB/s per node +Grid : Message : Stencil 14.6255 GB/s per node +Grid : Message : Average mflops/s per call per node : 667652 +Grid : Message : Average mflops/s per call per node : 802604 +Grid : Message : Average mflops/s per call per node : 821366 +Grid : Message : Average mflops/s per call per node : 662911 +Grid : Message : Average mflops/s per call per node (full): 312773 +Grid : Message : Average mflops/s per call per node (full): 403134 +Grid : Message : Average mflops/s per call per node (full): 445599 +Grid : Message : Average mflops/s per call per node (full): 307073 +Grid : Message : Stencil 13.343 GB/s per node +Grid : Message : Stencil 13.1466 GB/s per node +Grid : Message : Stencil 17.5352 GB/s per node +Grid : Message : Stencil 13.6102 GB/s per node +Grid : Message : Average mflops/s per call per node : 665664 +Grid : Message : Average mflops/s per call per node : 806207 +Grid : Message : Average mflops/s per call per node : 818724 +Grid : Message : Average mflops/s per call per node : 662443 +Grid : Message : Average mflops/s per call per node (full): 314916 +Grid : Message : Average mflops/s per call per node (full): 377419 +Grid : Message : Average mflops/s per call per node (full): 445019 +Grid : Message : Average mflops/s per call per node (full): 305836 +Grid : Message : Stencil 13.2107 GB/s per node +Grid : Message : Stencil 17.44 GB/s per node +Grid : Message : Stencil 16.9745 GB/s per node +Grid : Message : Stencil 11.7252 GB/s per node +Grid : Message : Average mflops/s per call per node : 666774 +Grid : Message : Average mflops/s per call per node : 805953 +Grid : Message : Average mflops/s per call per node : 814111 +Grid : Message : Average mflops/s per call per node : 662423 +Grid : Message : Average mflops/s per call per node (full): 313687 +Grid : Message : Average mflops/s per call per node (full): 442093 +Grid : Message : Average mflops/s per call per node (full): 440174 +Grid : Message : Average mflops/s per call per node (full): 298957 +Grid : Message : Stencil 12.3945 GB/s per node +Grid : Message : Stencil 8.02693 GB/s per node +Grid : Message : Stencil 17.5639 GB/s per node +Grid : Message : Stencil 13.2315 GB/s per node +Grid : Message : Average mflops/s per call per node : 667677 +Grid : Message : Average mflops/s per call per node : 811328 +Grid : Message : Average mflops/s per call per node : 821375 +Grid : Message : Average mflops/s per call per node : 666391 +Grid : Message : Average mflops/s per call per node (full): 312447 +Grid : Message : Average mflops/s per call per node (full): 267131 +Grid : Message : Average mflops/s per call per node (full): 444425 +Grid : Message : Average mflops/s per call per node (full): 306431 +Grid : Message : Stencil 12.422 GB/s per node +Grid : Message : Stencil 16.6346 GB/s per node +Grid : Message : Stencil 18.3315 GB/s per node +Grid : Message : Stencil 12.0226 GB/s per node +Grid : Message : Average mflops/s per call per node : 668795 +Grid : Message : Average mflops/s per call per node : 805787 +Grid : Message : Average mflops/s per call per node : 822035 +Grid : Message : Average mflops/s per call per node : 670737 +Grid : Message : Average mflops/s per call per node (full): 312813 +Grid : Message : Average mflops/s per call per node (full): 435939 +Grid : Message : Average mflops/s per call per node (full): 446949 +Grid : Message : Average mflops/s per call per node (full): 302645 +Grid : Message : Stencil 13.6331 GB/s per node +Grid : Message : Stencil 16.8444 GB/s per node +Grid : Message : Stencil 17.3844 GB/s per node +Grid : Message : Stencil 12.7145 GB/s per node +Grid : Message : Average mflops/s per call per node : 662530 +Grid : Message : Average mflops/s per call per node : 802929 +Grid : Message : Average mflops/s per call per node : 823807 +Grid : Message : Average mflops/s per call per node : 660727 +Grid : Message : Average mflops/s per call per node (full): 310885 +Grid : Message : Average mflops/s per call per node (full): 432574 +Grid : Message : Average mflops/s per call per node (full): 444738 +Grid : Message : Average mflops/s per call per node (full): 303776 +Grid : Message : Stencil 13.319 GB/s per node +Grid : Message : Stencil 16.6364 GB/s per node +Grid : Message : Stencil 17.5618 GB/s per node +Grid : Message : Stencil 12.1875 GB/s per node +Grid : Message : Average mflops/s per call per node : 666624 +Grid : Message : Average mflops/s per call per node : 802022 +Grid : Message : Average mflops/s per call per node : 820670 +Grid : Message : Average mflops/s per call per node : 662307 +Grid : Message : Average mflops/s per call per node (full): 315583 +Grid : Message : Average mflops/s per call per node (full): 433871 +Grid : Message : Average mflops/s per call per node (full): 445169 +Grid : Message : Average mflops/s per call per node (full): 295793 +Grid : Message : Stencil 13.5188 GB/s per node +Grid : Message : Stencil 17.4021 GB/s per node +Grid : Message : Stencil 18.2654 GB/s per node +Grid : Message : Stencil 14.2935 GB/s per node +Grid : Message : Average mflops/s per call per node : 664121 +Grid : Message : Average mflops/s per call per node : 802832 +Grid : Message : Average mflops/s per call per node : 820333 +Grid : Message : Average mflops/s per call per node : 662363 +Grid : Message : Average mflops/s per call per node (full): 315530 +Grid : Message : Average mflops/s per call per node (full): 437807 +Grid : Message : Average mflops/s per call per node (full): 446585 +Grid : Message : Average mflops/s per call per node (full): 306735 +Grid : Message : Stencil 13.6374 GB/s per node +Grid : Message : Stencil 16.7984 GB/s per node +Grid : Message : Stencil 17.4589 GB/s per node +Grid : Message : Stencil 13.3826 GB/s per node +Grid : Message : Average mflops/s per call per node : 659833 +Grid : Message : Average mflops/s per call per node : 807520 +Grid : Message : Average mflops/s per call per node : 823853 +Grid : Message : Average mflops/s per call per node : 665819 +Grid : Message : Average mflops/s per call per node (full): 314759 +Grid : Message : Average mflops/s per call per node (full): 436571 +Grid : Message : Average mflops/s per call per node (full): 445851 +Grid : Message : Average mflops/s per call per node (full): 306885 +Grid : Message : Stencil 13.1851 GB/s per node +Grid : Message : Stencil 9.70481 GB/s per node +Grid : Message : Stencil 17.4339 GB/s per node +Grid : Message : Stencil 12.6306 GB/s per node +Grid : Message : Average mflops/s per call per node : 661627 +Grid : Message : Average mflops/s per call per node : 805451 +Grid : Message : Average mflops/s per call per node : 817025 +Grid : Message : Average mflops/s per call per node : 664777 +Grid : Message : Average mflops/s per call per node (full): 312092 +Grid : Message : Average mflops/s per call per node (full): 309023 +Grid : Message : Average mflops/s per call per node (full): 443869 +Grid : Message : Average mflops/s per call per node (full): 304307 +Grid : Message : Stencil 13.8383 GB/s per node +Grid : Message : Stencil 17.767 GB/s per node +Grid : Message : Stencil 17.4802 GB/s per node +Grid : Message : Stencil 13.0814 GB/s per node +Grid : Message : Average mflops/s per call per node : 662545 +Grid : Message : Average mflops/s per call per node : 804357 +Grid : Message : Average mflops/s per call per node : 823795 +Grid : Message : Average mflops/s per call per node : 668634 +Grid : Message : Average mflops/s per call per node (full): 314985 +Grid : Message : Average mflops/s per call per node (full): 437156 +Grid : Message : Average mflops/s per call per node (full): 445449 +Grid : Message : Average mflops/s per call per node (full): 306406 +Grid : Message : Stencil 12.8009 GB/s per node +Grid : Message : Stencil 17.7658 GB/s per node +Grid : Message : Stencil 16.9127 GB/s per node +Grid : Message : Stencil 13.0729 GB/s per node +Grid : Message : Average mflops/s per call per node : 664961 +Grid : Message : Average mflops/s per call per node : 798250 +Grid : Message : Average mflops/s per call per node : 817036 +Grid : Message : Average mflops/s per call per node : 666474 +Grid : Message : Average mflops/s per call per node (full): 313921 +Grid : Message : Average mflops/s per call per node (full): 437420 +Grid : Message : Average mflops/s per call per node (full): 438986 +Grid : Message : Average mflops/s per call per node (full): 305255 +Grid : Message : Stencil 12.5554 GB/s per node +Grid : Message : Stencil 15.1018 GB/s per node +Grid : Message : Stencil 17.9067 GB/s per node +Grid : Message : Stencil 11.922 GB/s per node +Grid : Message : Average mflops/s per call per node : 663244 +Grid : Message : Average mflops/s per call per node : 808394 +Grid : Message : Average mflops/s per call per node : 816336 +Grid : Message : Average mflops/s per call per node : 669089 +Grid : Message : Average mflops/s per call per node (full): 312840 +Grid : Message : Average mflops/s per call per node (full): 417492 +Grid : Message : Average mflops/s per call per node (full): 446369 +Grid : Message : Average mflops/s per call per node (full): 300965 +Grid : Message : Stencil 14.4247 GB/s per node +Grid : Message : Stencil 16.806 GB/s per node +Grid : Message : Stencil 17.3214 GB/s per node +Grid : Message : Stencil 13.4102 GB/s per node +Grid : Message : Average mflops/s per call per node : 662793 +Grid : Message : Average mflops/s per call per node : 805114 +Grid : Message : Average mflops/s per call per node : 820028 +Grid : Message : Average mflops/s per call per node : 663970 +Grid : Message : Average mflops/s per call per node (full): 316120 +Grid : Message : Average mflops/s per call per node (full): 436892 +Grid : Message : Average mflops/s per call per node (full): 444044 +Grid : Message : Average mflops/s per call per node (full): 306598 +Grid : Message : Stencil 13.1868 GB/s per node +Grid : Message : Stencil 16.79 GB/s per node +Grid : Message : Stencil 17.9053 GB/s per node +Grid : Message : Stencil 12.5273 GB/s per node +Grid : Message : Average mflops/s per call per node : 665115 +Grid : Message : Average mflops/s per call per node : 800161 +Grid : Message : Average mflops/s per call per node : 821672 +Grid : Message : Average mflops/s per call per node : 668719 +Grid : Message : Average mflops/s per call per node (full): 315324 +Grid : Message : Average mflops/s per call per node (full): 429540 +Grid : Message : Average mflops/s per call per node (full): 446378 +Grid : Message : Average mflops/s per call per node (full): 306237 +Grid : Message : Stencil 13.2253 GB/s per node +Grid : Message : Stencil 16.8698 GB/s per node +Grid : Message : Stencil 18.2046 GB/s per node +Grid : Message : Stencil 12.7647 GB/s per node +Grid : Message : Average mflops/s per call per node : 667962 +Grid : Message : Average mflops/s per call per node : 802042 +Grid : Message : Average mflops/s per call per node : 820920 +Grid : Message : Average mflops/s per call per node : 668023 +Grid : Message : Average mflops/s per call per node (full): 314959 +Grid : Message : Average mflops/s per call per node (full): 438077 +Grid : Message : Average mflops/s per call per node (full): 445637 +Grid : Message : Average mflops/s per call per node (full): 305345 +Grid : Message : Stencil 13.2723 GB/s per node +Grid : Message : Stencil 17.0483 GB/s per node +Grid : Message : Stencil 17.3302 GB/s per node +Grid : Message : Stencil 12.4456 GB/s per node +Grid : Message : Average mflops/s per call per node : 664764 +Grid : Message : Average mflops/s per call per node : 803607 +Grid : Message : Average mflops/s per call per node : 820416 +Grid : Message : Average mflops/s per call per node : 665533 +Grid : Message : Average mflops/s per call per node (full): 315139 +Grid : Message : Average mflops/s per call per node (full): 439429 +Grid : Message : Average mflops/s per call per node (full): 444492 +Grid : Message : Average mflops/s per call per node (full): 304859 +Grid : Message : Stencil 14.0213 GB/s per node +Grid : Message : Stencil 13.8357 GB/s per node +Grid : Message : Stencil 17.182 GB/s per node +Grid : Message : Stencil 12.6582 GB/s per node +Grid : Message : Average mflops/s per call per node : 664981 +Grid : Message : Average mflops/s per call per node : 806282 +Grid : Message : Average mflops/s per call per node : 824095 +Grid : Message : Average mflops/s per call per node : 665473 +Grid : Message : Average mflops/s per call per node (full): 316013 +Grid : Message : Average mflops/s per call per node (full): 396530 +Grid : Message : Average mflops/s per call per node (full): 442563 +Grid : Message : Average mflops/s per call per node (full): 304489 +Grid : Message : Stencil 15.2683 GB/s per node +Grid : Message : Stencil 10.9317 GB/s per node +Grid : Message : Stencil 17.2713 GB/s per node +Grid : Message : Stencil 13.4705 GB/s per node +Grid : Message : Average mflops/s per call per node : 661032 +Grid : Message : Average mflops/s per call per node : 808017 +Grid : Message : Average mflops/s per call per node : 819256 +Grid : Message : Average mflops/s per call per node : 669147 +Grid : Message : Average mflops/s per call per node (full): 316887 +Grid : Message : Average mflops/s per call per node (full): 337035 +Grid : Message : Average mflops/s per call per node (full): 442321 +Grid : Message : Average mflops/s per call per node (full): 306892 +Grid : Message : Stencil 13.1559 GB/s per node +Grid : Message : Stencil 16.449 GB/s per node +Grid : Message : Stencil 17.9936 GB/s per node +Grid : Message : Stencil 13.389 GB/s per node +Grid : Message : Average mflops/s per call per node : 667960 +Grid : Message : Average mflops/s per call per node : 803262 +Grid : Message : Average mflops/s per call per node : 823091 +Grid : Message : Average mflops/s per call per node : 664836 +Grid : Message : Average mflops/s per call per node (full): 315214 +Grid : Message : Average mflops/s per call per node (full): 433790 +Grid : Message : Average mflops/s per call per node (full): 441761 +Grid : Message : Average mflops/s per call per node (full): 301416 +Grid : Message : Stencil 13.7506 GB/s per node +Grid : Message : Stencil 16.4977 GB/s per node +Grid : Message : Stencil 17.039 GB/s per node +Grid : Message : Stencil 15.0107 GB/s per node +Grid : Message : Average mflops/s per call per node : 663396 +Grid : Message : Average mflops/s per call per node : 810470 +Grid : Message : Average mflops/s per call per node : 824869 +Grid : Message : Average mflops/s per call per node : 659004 +Grid : Message : Average mflops/s per call per node (full): 316219 +Grid : Message : Average mflops/s per call per node (full): 435982 +Grid : Message : Average mflops/s per call per node (full): 441402 +Grid : Message : Average mflops/s per call per node (full): 306600 +Grid : Message : Stencil 13.6079 GB/s per node +Grid : Message : Stencil 16.9354 GB/s per node +Grid : Message : Stencil 16.8547 GB/s per node +Grid : Message : Stencil 13.1611 GB/s per node +Grid : Message : Average mflops/s per call per node : 665147 +Grid : Message : Average mflops/s per call per node : 803221 +Grid : Message : Average mflops/s per call per node : 817284 +Grid : Message : Average mflops/s per call per node : 664592 +Grid : Message : Average mflops/s per call per node (full): 316696 +Grid : Message : Average mflops/s per call per node (full): 438191 +Grid : Message : Average mflops/s per call per node (full): 439513 +Grid : Message : Average mflops/s per call per node (full): 306013 +Grid : Message : Stencil 12.8309 GB/s per node +Grid : Message : Stencil 17.2165 GB/s per node +Grid : Message : Stencil 17.9916 GB/s per node +Grid : Message : Stencil 13.2875 GB/s per node +Grid : Message : Average mflops/s per call per node : 665799 +Grid : Message : Average mflops/s per call per node : 803556 +Grid : Message : Average mflops/s per call per node : 824068 +Grid : Message : Average mflops/s per call per node : 666535 +Grid : Message : Average mflops/s per call per node (full): 313823 +Grid : Message : Average mflops/s per call per node (full): 438727 +Grid : Message : Average mflops/s per call per node (full): 446218 +Grid : Message : Average mflops/s per call per node (full): 306419 +Grid : Message : Stencil 13.2736 GB/s per node +Grid : Message : Stencil 17.2401 GB/s per node +Grid : Message : Stencil 17.6393 GB/s per node +Grid : Message : Stencil 12.4333 GB/s per node +Grid : Message : Average mflops/s per call per node : 664483 +Grid : Message : Average mflops/s per call per node : 802986 +Grid : Message : Average mflops/s per call per node : 825353 +Grid : Message : Average mflops/s per call per node : 671556 +Grid : Message : Average mflops/s per call per node (full): 315141 +Grid : Message : Average mflops/s per call per node (full): 431488 +Grid : Message : Average mflops/s per call per node (full): 447340 +Grid : Message : Average mflops/s per call per node (full): 305656 +Grid : Message : Stencil 12.2857 GB/s per node +Grid : Message : Stencil 16.7884 GB/s per node +Grid : Message : Stencil 17.3337 GB/s per node +Grid : Message : Stencil 12.6979 GB/s per node +Grid : Message : Average mflops/s per call per node : 669141 +Grid : Message : Average mflops/s per call per node : 807060 +Grid : Message : Average mflops/s per call per node : 826403 +Grid : Message : Average mflops/s per call per node : 669655 +Grid : Message : Average mflops/s per call per node (full): 312097 +Grid : Message : Average mflops/s per call per node (full): 437245 +Grid : Message : Average mflops/s per call per node (full): 444011 +Grid : Message : Average mflops/s per call per node (full): 306295 +Grid : Message : Stencil 13.0719 GB/s per node +Grid : Message : Stencil 17.8789 GB/s per node +Grid : Message : Stencil 16.9782 GB/s per node +Grid : Message : Stencil 12.5719 GB/s per node +Grid : Message : Average mflops/s per call per node : 665336 +Grid : Message : Average mflops/s per call per node : 806073 +Grid : Message : Average mflops/s per call per node : 820607 +Grid : Message : Average mflops/s per call per node : 660984 +Grid : Message : Average mflops/s per call per node (full): 315686 +Grid : Message : Average mflops/s per call per node (full): 440215 +Grid : Message : Average mflops/s per call per node (full): 439389 +Grid : Message : Average mflops/s per call per node (full): 304164 +Grid : Message : Stencil 13.3607 GB/s per node +Grid : Message : Stencil 16.2373 GB/s per node +Grid : Message : Stencil 17.749 GB/s per node +Grid : Message : Stencil 13.0617 GB/s per node +Grid : Message : Average mflops/s per call per node : 667798 +Grid : Message : Average mflops/s per call per node : 808757 +Grid : Message : Average mflops/s per call per node : 826930 +Grid : Message : Average mflops/s per call per node : 667184 +Grid : Message : Average mflops/s per call per node (full): 316106 +Grid : Message : Average mflops/s per call per node (full): 431650 +Grid : Message : Average mflops/s per call per node (full): 446141 +Grid : Message : Average mflops/s per call per node (full): 305764 +Grid : Message : Stencil 12.1402 GB/s per node +Grid : Message : Stencil 17.6135 GB/s per node +Grid : Message : Stencil 18.115 GB/s per node +Grid : Message : Stencil 12.9154 GB/s per node +Grid : Message : Average mflops/s per call per node : 669426 +Grid : Message : Average mflops/s per call per node : 802030 +Grid : Message : Average mflops/s per call per node : 821277 +Grid : Message : Average mflops/s per call per node : 664313 +Grid : Message : Average mflops/s per call per node (full): 310554 +Grid : Message : Average mflops/s per call per node (full): 438004 +Grid : Message : Average mflops/s per call per node (full): 448393 +Grid : Message : Average mflops/s per call per node (full): 303893 +Grid : Message : Stencil 12.8153 GB/s per node +Grid : Message : Stencil 17.2724 GB/s per node +Grid : Message : Stencil 16.7937 GB/s per node +Grid : Message : Stencil 12.4661 GB/s per node +Grid : Message : Average mflops/s per call per node : 666144 +Grid : Message : Average mflops/s per call per node : 800199 +Grid : Message : Average mflops/s per call per node : 825550 +Grid : Message : Average mflops/s per call per node : 667956 +Grid : Message : Average mflops/s per call per node (full): 314030 +Grid : Message : Average mflops/s per call per node (full): 436611 +Grid : Message : Average mflops/s per call per node (full): 438304 +Grid : Message : Average mflops/s per call per node (full): 303741 +Grid : Message : Stencil 13.1943 GB/s per node +Grid : Message : Stencil 14.5485 GB/s per node +Grid : Message : Stencil 17.4574 GB/s per node +Grid : Message : Stencil 13.4498 GB/s per node +Grid : Message : Average mflops/s per call per node : 667448 +Grid : Message : Average mflops/s per call per node : 806169 +Grid : Message : Average mflops/s per call per node : 824639 +Grid : Message : Average mflops/s per call per node : 666959 +Grid : Message : Average mflops/s per call per node (full): 315919 +Grid : Message : Average mflops/s per call per node (full): 408858 +Grid : Message : Average mflops/s per call per node (full): 445835 +Grid : Message : Average mflops/s per call per node (full): 306758 +Grid : Message : Stencil 12.5642 GB/s per node +Grid : Message : Stencil 16.9379 GB/s per node +Grid : Message : Stencil 18.1868 GB/s per node +Grid : Message : Stencil 12.0624 GB/s per node +Grid : Message : Average mflops/s per call per node : 668822 +Grid : Message : Average mflops/s per call per node : 803972 +Grid : Message : Average mflops/s per call per node : 820042 +Grid : Message : Average mflops/s per call per node : 672336 +Grid : Message : Average mflops/s per call per node (full): 314087 +Grid : Message : Average mflops/s per call per node (full): 436495 +Grid : Message : Average mflops/s per call per node (full): 446760 +Grid : Message : Average mflops/s per call per node (full): 302805 +Grid : Message : Stencil 12.4464 GB/s per node +Grid : Message : Stencil 10.9115 GB/s per node +Grid : Message : Stencil 17.3986 GB/s per node +Grid : Message : Stencil 12.7824 GB/s per node +Grid : Message : Average mflops/s per call per node : 667093 +Grid : Message : Average mflops/s per call per node : 815614 +Grid : Message : Average mflops/s per call per node : 830593 +Grid : Message : Average mflops/s per call per node : 656955 +Grid : Message : Average mflops/s per call per node (full): 314114 +Grid : Message : Average mflops/s per call per node (full): 337799 +Grid : Message : Average mflops/s per call per node (full): 445295 +Grid : Message : Average mflops/s per call per node (full): 299650 +Grid : Message : Stencil 12.5693 GB/s per node +Grid : Message : Stencil 16.035 GB/s per node +Grid : Message : Stencil 16.6488 GB/s per node +Grid : Message : Stencil 13.8067 GB/s per node +Grid : Message : Average mflops/s per call per node : 665625 +Grid : Message : Average mflops/s per call per node : 807346 +Grid : Message : Average mflops/s per call per node : 830478 +Grid : Message : Average mflops/s per call per node : 667173 +Grid : Message : Average mflops/s per call per node (full): 313665 +Grid : Message : Average mflops/s per call per node (full): 426106 +Grid : Message : Average mflops/s per call per node (full): 426730 +Grid : Message : Average mflops/s per call per node (full): 305773 +Grid : Message : Stencil 13.5136 GB/s per node +Grid : Message : Stencil 9.41642 GB/s per node +Grid : Message : Stencil 19.2267 GB/s per node +Grid : Message : Stencil 12.8042 GB/s per node +Grid : Message : Average mflops/s per call per node : 662943 +Grid : Message : Average mflops/s per call per node : 812832 +Grid : Message : Average mflops/s per call per node : 818514 +Grid : Message : Average mflops/s per call per node : 663012 +Grid : Message : Average mflops/s per call per node (full): 314230 +Grid : Message : Average mflops/s per call per node (full): 302620 +Grid : Message : Average mflops/s per call per node (full): 449719 +Grid : Message : Average mflops/s per call per node (full): 305924 +Grid : Message : Stencil 12.9002 GB/s per node +Grid : Message : Stencil 17.2748 GB/s per node +Grid : Message : Stencil 17.5933 GB/s per node +Grid : Message : Stencil 13.646 GB/s per node +Grid : Message : Average mflops/s per call per node : 671705 +Grid : Message : Average mflops/s per call per node : 803725 +Grid : Message : Average mflops/s per call per node : 821077 +Grid : Message : Average mflops/s per call per node : 661167 +Grid : Message : Average mflops/s per call per node (full): 315733 +Grid : Message : Average mflops/s per call per node (full): 438357 +Grid : Message : Average mflops/s per call per node (full): 445776 +Grid : Message : Average mflops/s per call per node (full): 305797 +Grid : Message : Stencil 13.0162 GB/s per node +Grid : Message : Stencil 16.996 GB/s per node +Grid : Message : Stencil 17.4666 GB/s per node +Grid : Message : Stencil 13.2392 GB/s per node +Grid : Message : Average mflops/s per call per node : 664654 +Grid : Message : Average mflops/s per call per node : 802776 +Grid : Message : Average mflops/s per call per node : 820496 +Grid : Message : Average mflops/s per call per node : 661304 +Grid : Message : Average mflops/s per call per node (full): 314583 +Grid : Message : Average mflops/s per call per node (full): 437784 +Grid : Message : Average mflops/s per call per node (full): 444315 +Grid : Message : Average mflops/s per call per node (full): 305299 +Grid : Message : Stencil 13.4746 GB/s per node +Grid : Message : Stencil 16.9801 GB/s per node +Grid : Message : Stencil 17.6159 GB/s per node +Grid : Message : Stencil 13.1317 GB/s per node +Grid : Message : Average mflops/s per call per node : 666274 +Grid : Message : Average mflops/s per call per node : 802737 +Grid : Message : Average mflops/s per call per node : 822278 +Grid : Message : Average mflops/s per call per node : 658715 +Grid : Message : Average mflops/s per call per node (full): 314579 +Grid : Message : Average mflops/s per call per node (full): 432928 +Grid : Message : Average mflops/s per call per node (full): 445152 +Grid : Message : Average mflops/s per call per node (full): 303967 +Grid : Message : Stencil 13.2933 GB/s per node +Grid : Message : Stencil 16.6674 GB/s per node +Grid : Message : Stencil 17.6578 GB/s per node +Grid : Message : Stencil 12.655 GB/s per node +Grid : Message : Average mflops/s per call per node : 663792 +Grid : Message : Average mflops/s per call per node : 805521 +Grid : Message : Average mflops/s per call per node : 817418 +Grid : Message : Average mflops/s per call per node : 667583 +Grid : Message : Average mflops/s per call per node (full): 315054 +Grid : Message : Average mflops/s per call per node (full): 432370 +Grid : Message : Average mflops/s per call per node (full): 445767 +Grid : Message : Average mflops/s per call per node (full): 304227 +Grid : Message : Stencil 14.0909 GB/s per node +Grid : Message : Stencil 18.2789 GB/s per node +Grid : Message : Stencil 17.553 GB/s per node +Grid : Message : Stencil 11.7614 GB/s per node +Grid : Message : Average mflops/s per call per node : 664315 +Grid : Message : Average mflops/s per call per node : 801435 +Grid : Message : Average mflops/s per call per node : 822475 +Grid : Message : Average mflops/s per call per node : 666305 +Grid : Message : Average mflops/s per call per node (full): 316926 +Grid : Message : Average mflops/s per call per node (full): 439618 +Grid : Message : Average mflops/s per call per node (full): 445020 +Grid : Message : Average mflops/s per call per node (full): 299389 +Grid : Message : Stencil 13.56 GB/s per node +Grid : Message : Stencil 16.6586 GB/s per node +Grid : Message : Stencil 17.5614 GB/s per node +Grid : Message : Stencil 13.1493 GB/s per node +Grid : Message : Average mflops/s per call per node : 664674 +Grid : Message : Average mflops/s per call per node : 802079 +Grid : Message : Average mflops/s per call per node : 825166 +Grid : Message : Average mflops/s per call per node : 665438 +Grid : Message : Average mflops/s per call per node (full): 314723 +Grid : Message : Average mflops/s per call per node (full): 435905 +Grid : Message : Average mflops/s per call per node (full): 445889 +Grid : Message : Average mflops/s per call per node (full): 306380 +Grid : Message : Stencil 12.9297 GB/s per node +Grid : Message : Stencil 16.6985 GB/s per node +Grid : Message : Stencil 18.1772 GB/s per node +Grid : Message : Stencil 13.673 GB/s per node +Grid : Message : Average mflops/s per call per node : 666231 +Grid : Message : Average mflops/s per call per node : 800458 +Grid : Message : Average mflops/s per call per node : 821243 +Grid : Message : Average mflops/s per call per node : 660904 +Grid : Message : Average mflops/s per call per node (full): 313374 +Grid : Message : Average mflops/s per call per node (full): 432784 +Grid : Message : Average mflops/s per call per node (full): 448213 +Grid : Message : Average mflops/s per call per node (full): 304931 +Grid : Message : Stencil 13.248 GB/s per node +Grid : Message : Stencil 16.5793 GB/s per node +Grid : Message : Stencil 17.6583 GB/s per node +Grid : Message : Stencil 13.0133 GB/s per node +Grid : Message : Average mflops/s per call per node : 667207 +Grid : Message : Average mflops/s per call per node : 801544 +Grid : Message : Average mflops/s per call per node : 820776 +Grid : Message : Average mflops/s per call per node : 664668 +Grid : Message : Average mflops/s per call per node (full): 315702 +Grid : Message : Average mflops/s per call per node (full): 434485 +Grid : Message : Average mflops/s per call per node (full): 445645 +Grid : Message : Average mflops/s per call per node (full): 305435 +Grid : Message : Stencil 13.0376 GB/s per node +Grid : Message : Stencil 16.7384 GB/s per node +Grid : Message : Stencil 17.5046 GB/s per node +Grid : Message : Stencil 12.7411 GB/s per node +Grid : Message : Average mflops/s per call per node : 665202 +Grid : Message : Average mflops/s per call per node : 801357 +Grid : Message : Average mflops/s per call per node : 822786 +Grid : Message : Average mflops/s per call per node : 661364 +Grid : Message : Average mflops/s per call per node (full): 314427 +Grid : Message : Average mflops/s per call per node (full): 437039 +Grid : Message : Average mflops/s per call per node (full): 444813 +Grid : Message : Average mflops/s per call per node (full): 304747 +Grid : Message : Stencil 13.1282 GB/s per node +Grid : Message : Stencil 17.8233 GB/s per node +Grid : Message : Stencil 17.638 GB/s per node +Grid : Message : Stencil 13.3392 GB/s per node +Grid : Message : Average mflops/s per call per node : 669809 +Grid : Message : Average mflops/s per call per node : 803166 +Grid : Message : Average mflops/s per call per node : 818551 +Grid : Message : Average mflops/s per call per node : 663436 +Grid : Message : Average mflops/s per call per node (full): 315817 +Grid : Message : Average mflops/s per call per node (full): 439138 +Grid : Message : Average mflops/s per call per node (full): 443409 +Grid : Message : Average mflops/s per call per node (full): 305966 +Grid : Message : Stencil 13.3358 GB/s per node +Grid : Message : Stencil 17.2628 GB/s per node +Grid : Message : Stencil 17.9616 GB/s per node +Grid : Message : Stencil 13.314 GB/s per node +Grid : Message : Average mflops/s per call per node : 668368 +Grid : Message : Average mflops/s per call per node : 801644 +Grid : Message : Average mflops/s per call per node : 823157 +Grid : Message : Average mflops/s per call per node : 661210 +Grid : Message : Average mflops/s per call per node (full): 315243 +Grid : Message : Average mflops/s per call per node (full): 438471 +Grid : Message : Average mflops/s per call per node (full): 446383 +Grid : Message : Average mflops/s per call per node (full): 305657 +Grid : Message : Stencil 14.2109 GB/s per node +Grid : Message : Stencil 18.6749 GB/s per node +Grid : Message : Stencil 17.7836 GB/s per node +Grid : Message : Stencil 12.6261 GB/s per node +Grid : Message : Average mflops/s per call per node : 666304 +Grid : Message : Average mflops/s per call per node : 798222 +Grid : Message : Average mflops/s per call per node : 817149 +Grid : Message : Average mflops/s per call per node : 665280 +Grid : Message : Average mflops/s per call per node (full): 316455 +Grid : Message : Average mflops/s per call per node (full): 440022 +Grid : Message : Average mflops/s per call per node (full): 439907 +Grid : Message : Average mflops/s per call per node (full): 301760 +Grid : Message : Stencil 15.4239 GB/s per node +Grid : Message : Stencil 9.59995 GB/s per node +Grid : Message : Stencil 17.6231 GB/s per node +Grid : Message : Stencil 12.2973 GB/s per node +Grid : Message : Average mflops/s per call per node : 662349 +Grid : Message : Average mflops/s per call per node : 810307 +Grid : Message : Average mflops/s per call per node : 818809 +Grid : Message : Average mflops/s per call per node : 665904 +Grid : Message : Average mflops/s per call per node (full): 317328 +Grid : Message : Average mflops/s per call per node (full): 306879 +Grid : Message : Average mflops/s per call per node (full): 442167 +Grid : Message : Average mflops/s per call per node (full): 303953 +Grid : Message : Stencil 14.5719 GB/s per node +Grid : Message : Stencil 17.0462 GB/s per node +Grid : Message : Stencil 17.0002 GB/s per node +Grid : Message : Stencil 13.1762 GB/s per node +Grid : Message : Average mflops/s per call per node : 667267 +Grid : Message : Average mflops/s per call per node : 803553 +Grid : Message : Average mflops/s per call per node : 824600 +Grid : Message : Average mflops/s per call per node : 666051 +Grid : Message : Average mflops/s per call per node (full): 317919 +Grid : Message : Average mflops/s per call per node (full): 438698 +Grid : Message : Average mflops/s per call per node (full): 441306 +Grid : Message : Average mflops/s per call per node (full): 306707 +Grid : Message : Stencil 13.3023 GB/s per node +Grid : Message : Stencil 16.7035 GB/s per node +Grid : Message : Stencil 17.7461 GB/s per node +Grid : Message : Stencil 13.8032 GB/s per node +Grid : Message : Average mflops/s per call per node : 669854 +Grid : Message : Average mflops/s per call per node : 802712 +Grid : Message : Average mflops/s per call per node : 820285 +Grid : Message : Average mflops/s per call per node : 661765 +Grid : Message : Average mflops/s per call per node (full): 316185 +Grid : Message : Average mflops/s per call per node (full): 436687 +Grid : Message : Average mflops/s per call per node (full): 445842 +Grid : Message : Average mflops/s per call per node (full): 306302 +Grid : Message : Stencil 13.788 GB/s per node +Grid : Message : Stencil 16.0696 GB/s per node +Grid : Message : Stencil 17.3617 GB/s per node +Grid : Message : Stencil 13.2007 GB/s per node +Grid : Message : Average mflops/s per call per node : 666466 +Grid : Message : Average mflops/s per call per node : 805791 +Grid : Message : Average mflops/s per call per node : 825870 +Grid : Message : Average mflops/s per call per node : 663506 +Grid : Message : Average mflops/s per call per node (full): 315594 +Grid : Message : Average mflops/s per call per node (full): 428294 +Grid : Message : Average mflops/s per call per node (full): 444063 +Grid : Message : Average mflops/s per call per node (full): 305500 +Grid : Message : Stencil 13.5511 GB/s per node +Grid : Message : Stencil 8.71329 GB/s per node +Grid : Message : Stencil 17.9582 GB/s per node +Grid : Message : Stencil 14.0022 GB/s per node +Grid : Message : Average mflops/s per call per node : 670027 +Grid : Message : Average mflops/s per call per node : 809139 +Grid : Message : Average mflops/s per call per node : 823711 +Grid : Message : Average mflops/s per call per node : 660064 +Grid : Message : Average mflops/s per call per node (full): 316759 +Grid : Message : Average mflops/s per call per node (full): 283689 +Grid : Message : Average mflops/s per call per node (full): 446189 +Grid : Message : Average mflops/s per call per node (full): 306759 +Grid : Message : Stencil 12.7662 GB/s per node +Grid : Message : Stencil 16.7982 GB/s per node +Grid : Message : Stencil 19.0757 GB/s per node +Grid : Message : Stencil 13.4446 GB/s per node +Grid : Message : Average mflops/s per call per node : 670826 +Grid : Message : Average mflops/s per call per node : 808418 +Grid : Message : Average mflops/s per call per node : 822184 +Grid : Message : Average mflops/s per call per node : 664689 +Grid : Message : Average mflops/s per call per node (full): 316037 +Grid : Message : Average mflops/s per call per node (full): 437563 +Grid : Message : Average mflops/s per call per node (full): 450183 +Grid : Message : Average mflops/s per call per node (full): 307322 +Grid : Message : Stencil 14.3459 GB/s per node +Grid : Message : Stencil 16.6088 GB/s per node +Grid : Message : Stencil 17.5173 GB/s per node +Grid : Message : Stencil 12.2539 GB/s per node +Grid : Message : Average mflops/s per call per node : 659901 +Grid : Message : Average mflops/s per call per node : 806660 +Grid : Message : Average mflops/s per call per node : 824506 +Grid : Message : Average mflops/s per call per node : 666331 +Grid : Message : Average mflops/s per call per node (full): 315648 +Grid : Message : Average mflops/s per call per node (full): 436396 +Grid : Message : Average mflops/s per call per node (full): 444001 +Grid : Message : Average mflops/s per call per node (full): 303238 +Grid : Message : Stencil 13.6295 GB/s per node +Grid : Message : Stencil 15.0441 GB/s per node +Grid : Message : Stencil 16.959 GB/s per node +Grid : Message : Stencil 14.2892 GB/s per node +Grid : Message : Average mflops/s per call per node : 663747 +Grid : Message : Average mflops/s per call per node : 803580 +Grid : Message : Average mflops/s per call per node : 821219 +Grid : Message : Average mflops/s per call per node : 653734 +Grid : Message : Average mflops/s per call per node (full): 314459 +Grid : Message : Average mflops/s per call per node (full): 416182 +Grid : Message : Average mflops/s per call per node (full): 437365 +Grid : Message : Average mflops/s per call per node (full): 305736 +Grid : Message : Stencil 12.9195 GB/s per node +Grid : Message : Stencil 11.4968 GB/s per node +Grid : Message : Stencil 17.197 GB/s per node +Grid : Message : Stencil 13.6375 GB/s per node +Grid : Message : Average mflops/s per call per node : 665932 +Grid : Message : Average mflops/s per call per node : 808899 +Grid : Message : Average mflops/s per call per node : 827618 +Grid : Message : Average mflops/s per call per node : 663740 +Grid : Message : Average mflops/s per call per node (full): 314343 +Grid : Message : Average mflops/s per call per node (full): 350331 +Grid : Message : Average mflops/s per call per node (full): 437483 +Grid : Message : Average mflops/s per call per node (full): 303981 +Grid : Message : Stencil 13.2593 GB/s per node +Grid : Message : Stencil 16.3093 GB/s per node +Grid : Message : Stencil 17.8383 GB/s per node +Grid : Message : Stencil 12.615 GB/s per node +Grid : Message : Average mflops/s per call per node : 665928 +Grid : Message : Average mflops/s per call per node : 804355 +Grid : Message : Average mflops/s per call per node : 825301 +Grid : Message : Average mflops/s per call per node : 667432 +Grid : Message : Average mflops/s per call per node (full): 315382 +Grid : Message : Average mflops/s per call per node (full): 432655 +Grid : Message : Average mflops/s per call per node (full): 446212 +Grid : Message : Average mflops/s per call per node (full): 305544 +Grid : Message : Stencil 12.7735 GB/s per node +Grid : Message : Stencil 16.5774 GB/s per node +Grid : Message : Stencil 17.6756 GB/s per node +Grid : Message : Stencil 13.2914 GB/s per node +Grid : Message : Average mflops/s per call per node : 664773 +Grid : Message : Average mflops/s per call per node : 810050 +Grid : Message : Average mflops/s per call per node : 816671 +Grid : Message : Average mflops/s per call per node : 662241 +Grid : Message : Average mflops/s per call per node (full): 314265 +Grid : Message : Average mflops/s per call per node (full): 436636 +Grid : Message : Average mflops/s per call per node (full): 445247 +Grid : Message : Average mflops/s per call per node (full): 305656 +Grid : Message : Stencil 12.8344 GB/s per node +Grid : Message : Stencil 17.1858 GB/s per node +Grid : Message : Stencil 17.413 GB/s per node +Grid : Message : Stencil 12.3467 GB/s per node +Grid : Message : Average mflops/s per call per node : 666922 +Grid : Message : Average mflops/s per call per node : 802121 +Grid : Message : Average mflops/s per call per node : 826953 +Grid : Message : Average mflops/s per call per node : 665500 +Grid : Message : Average mflops/s per call per node (full): 313807 +Grid : Message : Average mflops/s per call per node (full): 436324 +Grid : Message : Average mflops/s per call per node (full): 445232 +Grid : Message : Average mflops/s per call per node (full): 302145 +Grid : Message : Stencil 15.1025 GB/s per node +Grid : Message : Stencil 9.66352 GB/s per node +Grid : Message : Stencil 17.8292 GB/s per node +Grid : Message : Stencil 14.5742 GB/s per node +Grid : Message : Average mflops/s per call per node : 662633 +Grid : Message : Average mflops/s per call per node : 812434 +Grid : Message : Average mflops/s per call per node : 826027 +Grid : Message : Average mflops/s per call per node : 655861 +Grid : Message : Average mflops/s per call per node (full): 317147 +Grid : Message : Average mflops/s per call per node (full): 308756 +Grid : Message : Average mflops/s per call per node (full): 447746 +Grid : Message : Average mflops/s per call per node (full): 306200 +Grid : Message : Stencil 13.9373 GB/s per node +Grid : Message : Stencil 15.1548 GB/s per node +Grid : Message : Stencil 17.3736 GB/s per node +Grid : Message : Stencil 13.8492 GB/s per node +Grid : Message : Average mflops/s per call per node : 664275 +Grid : Message : Average mflops/s per call per node : 805750 +Grid : Message : Average mflops/s per call per node : 823774 +Grid : Message : Average mflops/s per call per node : 661882 +Grid : Message : Average mflops/s per call per node (full): 315808 +Grid : Message : Average mflops/s per call per node (full): 418754 +Grid : Message : Average mflops/s per call per node (full): 443329 +Grid : Message : Average mflops/s per call per node (full): 305700 +Grid : Message : Stencil 13.141 GB/s per node +Grid : Message : Stencil 16.7009 GB/s per node +Grid : Message : Stencil 17.8167 GB/s per node +Grid : Message : Stencil 13.6201 GB/s per node +Grid : Message : Average mflops/s per call per node : 667151 +Grid : Message : Average mflops/s per call per node : 801398 +Grid : Message : Average mflops/s per call per node : 824131 +Grid : Message : Average mflops/s per call per node : 661978 +Grid : Message : Average mflops/s per call per node (full): 314659 +Grid : Message : Average mflops/s per call per node (full): 435937 +Grid : Message : Average mflops/s per call per node (full): 445330 +Grid : Message : Average mflops/s per call per node (full): 306688 +Grid : Message : Stencil 13.9323 GB/s per node +Grid : Message : Stencil 16.4841 GB/s per node +Grid : Message : Stencil 16.7951 GB/s per node +Grid : Message : Stencil 13.1655 GB/s per node +Grid : Message : Average mflops/s per call per node : 664108 +Grid : Message : Average mflops/s per call per node : 801113 +Grid : Message : Average mflops/s per call per node : 826510 +Grid : Message : Average mflops/s per call per node : 661766 +Grid : Message : Average mflops/s per call per node (full): 316038 +Grid : Message : Average mflops/s per call per node (full): 434101 +Grid : Message : Average mflops/s per call per node (full): 438453 +Grid : Message : Average mflops/s per call per node (full): 305318 +Grid : Message : Stencil 14.1698 GB/s per node +Grid : Message : Stencil 16.633 GB/s per node +Grid : Message : Stencil 17.4947 GB/s per node +Grid : Message : Stencil 13.9853 GB/s per node +Grid : Message : Average mflops/s per call per node : 662381 +Grid : Message : Average mflops/s per call per node : 804158 +Grid : Message : Average mflops/s per call per node : 825228 +Grid : Message : Average mflops/s per call per node : 659796 +Grid : Message : Average mflops/s per call per node (full): 315789 +Grid : Message : Average mflops/s per call per node (full): 431645 +Grid : Message : Average mflops/s per call per node (full): 444926 +Grid : Message : Average mflops/s per call per node (full): 306277 +Grid : Message : Stencil 14.2747 GB/s per node +Grid : Message : Stencil 16.6401 GB/s per node +Grid : Message : Stencil 18.2928 GB/s per node +Grid : Message : Stencil 13.065 GB/s per node +Grid : Message : Average mflops/s per call per node : 665096 +Grid : Message : Average mflops/s per call per node : 804366 +Grid : Message : Average mflops/s per call per node : 820500 +Grid : Message : Average mflops/s per call per node : 666332 +Grid : Message : Average mflops/s per call per node (full): 317067 +Grid : Message : Average mflops/s per call per node (full): 436069 +Grid : Message : Average mflops/s per call per node (full): 446419 +Grid : Message : Average mflops/s per call per node (full): 306089 +Grid : Message : Stencil 14.6541 GB/s per node +Grid : Message : Stencil 16.9482 GB/s per node +Grid : Message : Stencil 17.7746 GB/s per node +Grid : Message : Stencil 12.6571 GB/s per node +Grid : Message : Average mflops/s per call per node : 664825 +Grid : Message : Average mflops/s per call per node : 805429 +Grid : Message : Average mflops/s per call per node : 816785 +Grid : Message : Average mflops/s per call per node : 664280 +Grid : Message : Average mflops/s per call per node (full): 316962 +Grid : Message : Average mflops/s per call per node (full): 437622 +Grid : Message : Average mflops/s per call per node (full): 445335 +Grid : Message : Average mflops/s per call per node (full): 303974 +Grid : Message : Stencil 14.2193 GB/s per node +Grid : Message : Stencil 16.7243 GB/s per node +Grid : Message : Stencil 17.3418 GB/s per node +Grid : Message : Stencil 12.8809 GB/s per node +Grid : Message : Average mflops/s per call per node : 666393 +Grid : Message : Average mflops/s per call per node : 810603 +Grid : Message : Average mflops/s per call per node : 818960 +Grid : Message : Average mflops/s per call per node : 665596 +Grid : Message : Average mflops/s per call per node (full): 316810 +Grid : Message : Average mflops/s per call per node (full): 437826 +Grid : Message : Average mflops/s per call per node (full): 443884 +Grid : Message : Average mflops/s per call per node (full): 305181 +Grid : Message : Stencil 13.7367 GB/s per node +Grid : Message : Stencil 13.7684 GB/s per node +Grid : Message : Stencil 17.1511 GB/s per node +Grid : Message : Stencil 12.483 GB/s per node +Grid : Message : Average mflops/s per call per node : 665979 +Grid : Message : Average mflops/s per call per node : 805947 +Grid : Message : Average mflops/s per call per node : 821545 +Grid : Message : Average mflops/s per call per node : 663268 +Grid : Message : Average mflops/s per call per node (full): 316166 +Grid : Message : Average mflops/s per call per node (full): 395981 +Grid : Message : Average mflops/s per call per node (full): 442434 +Grid : Message : Average mflops/s per call per node (full): 303984 +Grid : Message : Stencil 13.9408 GB/s per node +Grid : Message : Stencil 17.3043 GB/s per node +Grid : Message : Stencil 17.7234 GB/s per node +Grid : Message : Stencil 14.3152 GB/s per node +Grid : Message : Average mflops/s per call per node : 667601 +Grid : Message : Average mflops/s per call per node : 801981 +Grid : Message : Average mflops/s per call per node : 822114 +Grid : Message : Average mflops/s per call per node : 662198 +Grid : Message : Average mflops/s per call per node (full): 316300 +Grid : Message : Average mflops/s per call per node (full): 438069 +Grid : Message : Average mflops/s per call per node (full): 445210 +Grid : Message : Average mflops/s per call per node (full): 306345 +Grid : Message : Stencil 12.5711 GB/s per node +Grid : Message : Stencil 15.0578 GB/s per node +Grid : Message : Stencil 17.8285 GB/s per node +Grid : Message : Stencil 12.6284 GB/s per node +Grid : Message : Average mflops/s per call per node : 671125 +Grid : Message : Average mflops/s per call per node : 806146 +Grid : Message : Average mflops/s per call per node : 821990 +Grid : Message : Average mflops/s per call per node : 661048 +Grid : Message : Average mflops/s per call per node (full): 312972 +Grid : Message : Average mflops/s per call per node (full): 416531 +Grid : Message : Average mflops/s per call per node (full): 446886 +Grid : Message : Average mflops/s per call per node (full): 302882 +Grid : Message : Stencil 12.5281 GB/s per node +Grid : Message : Stencil 15.8613 GB/s per node +Grid : Message : Stencil 17.6163 GB/s per node +Grid : Message : Stencil 13.0894 GB/s per node +Grid : Message : Average mflops/s per call per node : 670975 +Grid : Message : Average mflops/s per call per node : 801534 +Grid : Message : Average mflops/s per call per node : 823667 +Grid : Message : Average mflops/s per call per node : 663701 +Grid : Message : Average mflops/s per call per node (full): 314256 +Grid : Message : Average mflops/s per call per node (full): 425895 +Grid : Message : Average mflops/s per call per node (full): 444898 +Grid : Message : Average mflops/s per call per node (full): 305136 +Grid : Message : Stencil 13.8193 GB/s per node +Grid : Message : Stencil 16.2551 GB/s per node +Grid : Message : Stencil 17.1763 GB/s per node +Grid : Message : Stencil 13.4658 GB/s per node +Grid : Message : Average mflops/s per call per node : 666654 +Grid : Message : Average mflops/s per call per node : 798824 +Grid : Message : Average mflops/s per call per node : 822433 +Grid : Message : Average mflops/s per call per node : 662772 +Grid : Message : Average mflops/s per call per node (full): 316261 +Grid : Message : Average mflops/s per call per node (full): 431431 +Grid : Message : Average mflops/s per call per node (full): 442904 +Grid : Message : Average mflops/s per call per node (full): 306560 +Grid : Message : Stencil 14.0584 GB/s per node +Grid : Message : Stencil 16.9526 GB/s per node +Grid : Message : Stencil 16.855 GB/s per node +Grid : Message : Stencil 12.106 GB/s per node +Grid : Message : Average mflops/s per call per node : 667419 +Grid : Message : Average mflops/s per call per node : 801288 +Grid : Message : Average mflops/s per call per node : 828394 +Grid : Message : Average mflops/s per call per node : 664532 +Grid : Message : Average mflops/s per call per node (full): 315332 +Grid : Message : Average mflops/s per call per node (full): 430937 +Grid : Message : Average mflops/s per call per node (full): 439427 +Grid : Message : Average mflops/s per call per node (full): 302734 +Grid : Message : Stencil 12.7819 GB/s per node +Grid : Message : Stencil 11.0133 GB/s per node +Grid : Message : Stencil 17.3597 GB/s per node +Grid : Message : Stencil 12.6736 GB/s per node +Grid : Message : Average mflops/s per call per node : 671730 +Grid : Message : Average mflops/s per call per node : 814118 +Grid : Message : Average mflops/s per call per node : 824538 +Grid : Message : Average mflops/s per call per node : 668701 +Grid : Message : Average mflops/s per call per node (full): 315767 +Grid : Message : Average mflops/s per call per node (full): 340190 +Grid : Message : Average mflops/s per call per node (full): 444378 +Grid : Message : Average mflops/s per call per node (full): 305611 +Grid : Message : Stencil 13.3355 GB/s per node +Grid : Message : Stencil 17.0643 GB/s per node +Grid : Message : Stencil 17.77 GB/s per node +Grid : Message : Stencil 12.2757 GB/s per node +Grid : Message : Average mflops/s per call per node : 668068 +Grid : Message : Average mflops/s per call per node : 804577 +Grid : Message : Average mflops/s per call per node : 824356 +Grid : Message : Average mflops/s per call per node : 668566 +Grid : Message : Average mflops/s per call per node (full): 315740 +Grid : Message : Average mflops/s per call per node (full): 439670 +Grid : Message : Average mflops/s per call per node (full): 446852 +Grid : Message : Average mflops/s per call per node (full): 304567 +Grid : Message : Stencil 13.6138 GB/s per node +Grid : Message : Stencil 14.3006 GB/s per node +Grid : Message : Stencil 18.8566 GB/s per node +Grid : Message : Stencil 12.6303 GB/s per node +Grid : Message : Average mflops/s per call per node : 665029 +Grid : Message : Average mflops/s per call per node : 806340 +Grid : Message : Average mflops/s per call per node : 824355 +Grid : Message : Average mflops/s per call per node : 668042 +Grid : Message : Average mflops/s per call per node (full): 316228 +Grid : Message : Average mflops/s per call per node (full): 404877 +Grid : Message : Average mflops/s per call per node (full): 450779 +Grid : Message : Average mflops/s per call per node (full): 305727 +Grid : Message : Stencil 12.594 GB/s per node +Grid : Message : Stencil 16.5838 GB/s per node +Grid : Message : Stencil 17.822 GB/s per node +Grid : Message : Stencil 12.2811 GB/s per node +Grid : Message : Average mflops/s per call per node : 665494 +Grid : Message : Average mflops/s per call per node : 803374 +Grid : Message : Average mflops/s per call per node : 825090 +Grid : Message : Average mflops/s per call per node : 667290 +Grid : Message : Average mflops/s per call per node (full): 311572 +Grid : Message : Average mflops/s per call per node (full): 435649 +Grid : Message : Average mflops/s per call per node (full): 444993 +Grid : Message : Average mflops/s per call per node (full): 304229 +Grid : Message : Stencil 12.9542 GB/s per node +Grid : Message : Stencil 14.1127 GB/s per node +Grid : Message : Stencil 18.8473 GB/s per node +Grid : Message : Stencil 14.0909 GB/s per node +Grid : Message : Average mflops/s per call per node : 663959 +Grid : Message : Average mflops/s per call per node : 807391 +Grid : Message : Average mflops/s per call per node : 822112 +Grid : Message : Average mflops/s per call per node : 662313 +Grid : Message : Average mflops/s per call per node (full): 313998 +Grid : Message : Average mflops/s per call per node (full): 400634 +Grid : Message : Average mflops/s per call per node (full): 450565 +Grid : Message : Average mflops/s per call per node (full): 305393 +Grid : Message : Stencil 13.4408 GB/s per node +Grid : Message : Stencil 17.5697 GB/s per node +Grid : Message : Stencil 17.3413 GB/s per node +Grid : Message : Stencil 13.1107 GB/s per node +Grid : Message : Average mflops/s per call per node : 665884 +Grid : Message : Average mflops/s per call per node : 805244 +Grid : Message : Average mflops/s per call per node : 822770 +Grid : Message : Average mflops/s per call per node : 663204 +Grid : Message : Average mflops/s per call per node (full): 316076 +Grid : Message : Average mflops/s per call per node (full): 440380 +Grid : Message : Average mflops/s per call per node (full): 444103 +Grid : Message : Average mflops/s per call per node (full): 304950 +Grid : Message : Stencil 13.5491 GB/s per node +Grid : Message : Stencil 16.4245 GB/s per node +Grid : Message : Stencil 17.5451 GB/s per node +Grid : Message : Stencil 12.533 GB/s per node +Grid : Message : Average mflops/s per call per node : 666679 +Grid : Message : Average mflops/s per call per node : 801902 +Grid : Message : Average mflops/s per call per node : 822733 +Grid : Message : Average mflops/s per call per node : 667394 +Grid : Message : Average mflops/s per call per node (full): 317129 +Grid : Message : Average mflops/s per call per node (full): 434095 +Grid : Message : Average mflops/s per call per node (full): 443780 +Grid : Message : Average mflops/s per call per node (full): 305218 +Grid : Message : Stencil 13.6534 GB/s per node +Grid : Message : Stencil 13.8195 GB/s per node +Grid : Message : Stencil 17.2644 GB/s per node +Grid : Message : Stencil 12.3142 GB/s per node +Grid : Message : Average mflops/s per call per node : 667011 +Grid : Message : Average mflops/s per call per node : 810067 +Grid : Message : Average mflops/s per call per node : 821524 +Grid : Message : Average mflops/s per call per node : 671276 +Grid : Message : Average mflops/s per call per node (full): 317031 +Grid : Message : Average mflops/s per call per node (full): 396478 +Grid : Message : Average mflops/s per call per node (full): 443169 +Grid : Message : Average mflops/s per call per node (full): 303807 +Grid : Message : Stencil 13.3901 GB/s per node +Grid : Message : Stencil 18.0472 GB/s per node +Grid : Message : Stencil 17.2049 GB/s per node +Grid : Message : Stencil 13.2758 GB/s per node +Grid : Message : Average mflops/s per call per node : 665144 +Grid : Message : Average mflops/s per call per node : 798251 +Grid : Message : Average mflops/s per call per node : 820132 +Grid : Message : Average mflops/s per call per node : 667648 +Grid : Message : Average mflops/s per call per node (full): 316090 +Grid : Message : Average mflops/s per call per node (full): 437012 +Grid : Message : Average mflops/s per call per node (full): 438289 +Grid : Message : Average mflops/s per call per node (full): 306613 +Grid : Message : Stencil 13.1945 GB/s per node +Grid : Message : Stencil 17.1718 GB/s per node +Grid : Message : Stencil 17.3392 GB/s per node +Grid : Message : Stencil 11.9572 GB/s per node +Grid : Message : Average mflops/s per call per node : 667084 +Grid : Message : Average mflops/s per call per node : 796989 +Grid : Message : Average mflops/s per call per node : 824578 +Grid : Message : Average mflops/s per call per node : 666011 +Grid : Message : Average mflops/s per call per node (full): 316372 +Grid : Message : Average mflops/s per call per node (full): 435100 +Grid : Message : Average mflops/s per call per node (full): 443137 +Grid : Message : Average mflops/s per call per node (full): 301185 +Grid : Message : Stencil 12.8059 GB/s per node +Grid : Message : Stencil 16.412 GB/s per node +Grid : Message : Stencil 17.7064 GB/s per node +Grid : Message : Stencil 12.3333 GB/s per node +Grid : Message : Average mflops/s per call per node : 669057 +Grid : Message : Average mflops/s per call per node : 803586 +Grid : Message : Average mflops/s per call per node : 823735 +Grid : Message : Average mflops/s per call per node : 668583 +Grid : Message : Average mflops/s per call per node (full): 315115 +Grid : Message : Average mflops/s per call per node (full): 434082 +Grid : Message : Average mflops/s per call per node (full): 445782 +Grid : Message : Average mflops/s per call per node (full): 303832 +Grid : Message : Stencil 12.9454 GB/s per node +Grid : Message : Stencil 9.35683 GB/s per node +Grid : Message : Stencil 17.5506 GB/s per node +Grid : Message : Stencil 12.7689 GB/s per node +Grid : Message : Average mflops/s per call per node : 663821 +Grid : Message : Average mflops/s per call per node : 811170 +Grid : Message : Average mflops/s per call per node : 818730 +Grid : Message : Average mflops/s per call per node : 662393 +Grid : Message : Average mflops/s per call per node (full): 313741 +Grid : Message : Average mflops/s per call per node (full): 301128 +Grid : Message : Average mflops/s per call per node (full): 444151 +Grid : Message : Average mflops/s per call per node (full): 305199 +Grid : Message : Stencil 13.8853 GB/s per node +Grid : Message : Stencil 17.2209 GB/s per node +Grid : Message : Stencil 17.2958 GB/s per node +Grid : Message : Stencil 13.6379 GB/s per node +Grid : Message : Average mflops/s per call per node : 667527 +Grid : Message : Average mflops/s per call per node : 799978 +Grid : Message : Average mflops/s per call per node : 827915 +Grid : Message : Average mflops/s per call per node : 666053 +Grid : Message : Average mflops/s per call per node (full): 313304 +Grid : Message : Average mflops/s per call per node (full): 437343 +Grid : Message : Average mflops/s per call per node (full): 443923 +Grid : Message : Average mflops/s per call per node (full): 307158 +Grid : Message : Stencil 13.6277 GB/s per node +Grid : Message : Stencil 16.4862 GB/s per node +Grid : Message : Stencil 18.3482 GB/s per node +Grid : Message : Stencil 12.4 GB/s per node +Grid : Message : Average mflops/s per call per node : 664650 +Grid : Message : Average mflops/s per call per node : 803574 +Grid : Message : Average mflops/s per call per node : 818372 +Grid : Message : Average mflops/s per call per node : 661592 +Grid : Message : Average mflops/s per call per node (full): 316026 +Grid : Message : Average mflops/s per call per node (full): 434754 +Grid : Message : Average mflops/s per call per node (full): 445993 +Grid : Message : Average mflops/s per call per node (full): 302797 +Grid : Message : Stencil 12.6584 GB/s per node +Grid : Message : Stencil 14.6628 GB/s per node +Grid : Message : Stencil 17.4541 GB/s per node +Grid : Message : Stencil 12.0402 GB/s per node +Grid : Message : Average mflops/s per call per node : 666614 +Grid : Message : Average mflops/s per call per node : 808074 +Grid : Message : Average mflops/s per call per node : 820717 +Grid : Message : Average mflops/s per call per node : 670324 +Grid : Message : Average mflops/s per call per node (full): 312049 +Grid : Message : Average mflops/s per call per node (full): 410942 +Grid : Message : Average mflops/s per call per node (full): 445355 +Grid : Message : Average mflops/s per call per node (full): 303320 +Grid : Message : Stencil 12.9024 GB/s per node +Grid : Message : Stencil 16.444 GB/s per node +Grid : Message : Stencil 17.3399 GB/s per node +Grid : Message : Stencil 14.0399 GB/s per node +Grid : Message : Average mflops/s per call per node : 665405 +Grid : Message : Average mflops/s per call per node : 808103 +Grid : Message : Average mflops/s per call per node : 821609 +Grid : Message : Average mflops/s per call per node : 665294 +Grid : Message : Average mflops/s per call per node (full): 313606 +Grid : Message : Average mflops/s per call per node (full): 434873 +Grid : Message : Average mflops/s per call per node (full): 444264 +Grid : Message : Average mflops/s per call per node (full): 306771 +Grid : Message : Stencil 13.9308 GB/s per node +Grid : Message : Stencil 17.4158 GB/s per node +Grid : Message : Stencil 18.4423 GB/s per node +Grid : Message : Stencil 12.855 GB/s per node +Grid : Message : Average mflops/s per call per node : 665272 +Grid : Message : Average mflops/s per call per node : 797460 +Grid : Message : Average mflops/s per call per node : 823126 +Grid : Message : Average mflops/s per call per node : 662335 +Grid : Message : Average mflops/s per call per node (full): 314975 +Grid : Message : Average mflops/s per call per node (full): 435467 +Grid : Message : Average mflops/s per call per node (full): 449253 +Grid : Message : Average mflops/s per call per node (full): 305424 +Grid : Message : Stencil 12.9925 GB/s per node +Grid : Message : Stencil 16.3539 GB/s per node +Grid : Message : Stencil 17.2369 GB/s per node +Grid : Message : Stencil 13.4937 GB/s per node +Grid : Message : Average mflops/s per call per node : 669940 +Grid : Message : Average mflops/s per call per node : 803238 +Grid : Message : Average mflops/s per call per node : 825965 +Grid : Message : Average mflops/s per call per node : 663766 +Grid : Message : Average mflops/s per call per node (full): 314761 +Grid : Message : Average mflops/s per call per node (full): 434155 +Grid : Message : Average mflops/s per call per node (full): 444234 +Grid : Message : Average mflops/s per call per node (full): 299831 +Grid : Message : Stencil 14.106 GB/s per node +Grid : Message : Stencil 16.6361 GB/s per node +Grid : Message : Stencil 17.0601 GB/s per node +Grid : Message : Stencil 12.1837 GB/s per node +Grid : Message : Average mflops/s per call per node : 660382 +Grid : Message : Average mflops/s per call per node : 804401 +Grid : Message : Average mflops/s per call per node : 823592 +Grid : Message : Average mflops/s per call per node : 670380 +Grid : Message : Average mflops/s per call per node (full): 315078 +Grid : Message : Average mflops/s per call per node (full): 432738 +Grid : Message : Average mflops/s per call per node (full): 441811 +Grid : Message : Average mflops/s per call per node (full): 303707 +Grid : Message : Stencil 12.9924 GB/s per node +Grid : Message : Stencil 17.4822 GB/s per node +Grid : Message : Stencil 16.9255 GB/s per node +Grid : Message : Stencil 12.131 GB/s per node +Grid : Message : Average mflops/s per call per node : 666019 +Grid : Message : Average mflops/s per call per node : 804316 +Grid : Message : Average mflops/s per call per node : 828336 +Grid : Message : Average mflops/s per call per node : 669150 +Grid : Message : Average mflops/s per call per node (full): 314690 +Grid : Message : Average mflops/s per call per node (full): 438882 +Grid : Message : Average mflops/s per call per node (full): 440236 +Grid : Message : Average mflops/s per call per node (full): 303263 +Grid : Message : Stencil 12.9286 GB/s per node +Grid : Message : Stencil 16.5674 GB/s per node +Grid : Message : Stencil 17.4028 GB/s per node +Grid : Message : Stencil 12.2927 GB/s per node +Grid : Message : Average mflops/s per call per node : 663144 +Grid : Message : Average mflops/s per call per node : 802347 +Grid : Message : Average mflops/s per call per node : 822576 +Grid : Message : Average mflops/s per call per node : 664923 +Grid : Message : Average mflops/s per call per node (full): 313191 +Grid : Message : Average mflops/s per call per node (full): 434915 +Grid : Message : Average mflops/s per call per node (full): 443819 +Grid : Message : Average mflops/s per call per node (full): 303684 +Grid : Message : Stencil 12.9843 GB/s per node +Grid : Message : Stencil 16.5282 GB/s per node +Grid : Message : Stencil 16.3675 GB/s per node +Grid : Message : Stencil 12.5887 GB/s per node +Grid : Message : Average mflops/s per call per node : 663888 +Grid : Message : Average mflops/s per call per node : 806737 +Grid : Message : Average mflops/s per call per node : 820169 +Grid : Message : Average mflops/s per call per node : 668303 +Grid : Message : Average mflops/s per call per node (full): 314496 +Grid : Message : Average mflops/s per call per node (full): 434932 +Grid : Message : Average mflops/s per call per node (full): 424715 +Grid : Message : Average mflops/s per call per node (full): 305269 +Grid : Message : Stencil 14.4789 GB/s per node +Grid : Message : Stencil 9.60314 GB/s per node +Grid : Message : Stencil 17.4542 GB/s per node +Grid : Message : Stencil 12.7538 GB/s per node +Grid : Message : Average mflops/s per call per node : 661383 +Grid : Message : Average mflops/s per call per node : 814741 +Grid : Message : Average mflops/s per call per node : 824469 +Grid : Message : Average mflops/s per call per node : 665327 +Grid : Message : Average mflops/s per call per node (full): 316536 +Grid : Message : Average mflops/s per call per node (full): 306930 +Grid : Message : Average mflops/s per call per node (full): 444745 +Grid : Message : Average mflops/s per call per node (full): 304862 +Grid : Message : Stencil 14.1053 GB/s per node +Grid : Message : Stencil 17.8723 GB/s per node +Grid : Message : Stencil 17.2534 GB/s per node +Grid : Message : Stencil 12.2174 GB/s per node +Grid : Message : Average mflops/s per call per node : 661647 +Grid : Message : Average mflops/s per call per node : 803667 +Grid : Message : Average mflops/s per call per node : 821296 +Grid : Message : Average mflops/s per call per node : 667759 +Grid : Message : Average mflops/s per call per node (full): 314634 +Grid : Message : Average mflops/s per call per node (full): 438959 +Grid : Message : Average mflops/s per call per node (full): 442505 +Grid : Message : Average mflops/s per call per node (full): 304194 +Grid : Message : Stencil 13.0328 GB/s per node +Grid : Message : Stencil 17.0942 GB/s per node +Grid : Message : Stencil 18.1395 GB/s per node +Grid : Message : Stencil 12.7188 GB/s per node +Grid : Message : Average mflops/s per call per node : 666217 +Grid : Message : Average mflops/s per call per node : 804184 +Grid : Message : Average mflops/s per call per node : 821901 +Grid : Message : Average mflops/s per call per node : 665614 +Grid : Message : Average mflops/s per call per node (full): 315688 +Grid : Message : Average mflops/s per call per node (full): 438573 +Grid : Message : Average mflops/s per call per node (full): 443813 +Grid : Message : Average mflops/s per call per node (full): 305679 +Grid : Message : Stencil 12.8203 GB/s per node +Grid : Message : Stencil 16.6636 GB/s per node +Grid : Message : Stencil 17.7953 GB/s per node +Grid : Message : Stencil 14.0385 GB/s per node +Grid : Message : Average mflops/s per call per node : 671915 +Grid : Message : Average mflops/s per call per node : 806958 +Grid : Message : Average mflops/s per call per node : 827955 +Grid : Message : Average mflops/s per call per node : 661803 +Grid : Message : Average mflops/s per call per node (full): 315523 +Grid : Message : Average mflops/s per call per node (full): 435924 +Grid : Message : Average mflops/s per call per node (full): 447476 +Grid : Message : Average mflops/s per call per node (full): 306720 +Grid : Message : Stencil 13.0128 GB/s per node +Grid : Message : Stencil 16.5301 GB/s per node +Grid : Message : Stencil 17.1159 GB/s per node +Grid : Message : Stencil 12.2497 GB/s per node +Grid : Message : Average mflops/s per call per node : 668199 +Grid : Message : Average mflops/s per call per node : 804835 +Grid : Message : Average mflops/s per call per node : 824454 +Grid : Message : Average mflops/s per call per node : 672602 +Grid : Message : Average mflops/s per call per node (full): 315815 +Grid : Message : Average mflops/s per call per node (full): 435739 +Grid : Message : Average mflops/s per call per node (full): 441843 +Grid : Message : Average mflops/s per call per node (full): 303662 +Grid : Message : Stencil 12.9599 GB/s per node +Grid : Message : Stencil 16.5674 GB/s per node +Grid : Message : Stencil 18.6981 GB/s per node +Grid : Message : Stencil 12.4395 GB/s per node +Grid : Message : Average mflops/s per call per node : 668982 +Grid : Message : Average mflops/s per call per node : 805101 +Grid : Message : Average mflops/s per call per node : 819369 +Grid : Message : Average mflops/s per call per node : 662323 +Grid : Message : Average mflops/s per call per node (full): 316156 +Grid : Message : Average mflops/s per call per node (full): 434381 +Grid : Message : Average mflops/s per call per node (full): 447002 +Grid : Message : Average mflops/s per call per node (full): 301518 +Grid : Message : Stencil 13.6958 GB/s per node +Grid : Message : Stencil 17.0557 GB/s per node +Grid : Message : Stencil 17.5598 GB/s per node +Grid : Message : Stencil 12.6534 GB/s per node +Grid : Message : Average mflops/s per call per node : 667324 +Grid : Message : Average mflops/s per call per node : 800973 +Grid : Message : Average mflops/s per call per node : 824928 +Grid : Message : Average mflops/s per call per node : 662691 +Grid : Message : Average mflops/s per call per node (full): 316434 +Grid : Message : Average mflops/s per call per node (full): 435862 +Grid : Message : Average mflops/s per call per node (full): 445696 +Grid : Message : Average mflops/s per call per node (full): 303382 +Grid : Message : Stencil 13.1224 GB/s per node +Grid : Message : Stencil 17.4906 GB/s per node +Grid : Message : Stencil 17.5314 GB/s per node +Grid : Message : Stencil 12.9159 GB/s per node +Grid : Message : Average mflops/s per call per node : 669689 +Grid : Message : Average mflops/s per call per node : 801373 +Grid : Message : Average mflops/s per call per node : 823561 +Grid : Message : Average mflops/s per call per node : 663627 +Grid : Message : Average mflops/s per call per node (full): 314853 +Grid : Message : Average mflops/s per call per node (full): 438798 +Grid : Message : Average mflops/s per call per node (full): 446137 +Grid : Message : Average mflops/s per call per node (full): 305165 +Grid : Message : Stencil 13.3894 GB/s per node +Grid : Message : Stencil 17.0121 GB/s per node +Grid : Message : Stencil 18.0152 GB/s per node +Grid : Message : Stencil 13.0537 GB/s per node +Grid : Message : Average mflops/s per call per node : 668239 +Grid : Message : Average mflops/s per call per node : 803511 +Grid : Message : Average mflops/s per call per node : 820475 +Grid : Message : Average mflops/s per call per node : 668274 +Grid : Message : Average mflops/s per call per node (full): 315111 +Grid : Message : Average mflops/s per call per node (full): 434183 +Grid : Message : Average mflops/s per call per node (full): 441978 +Grid : Message : Average mflops/s per call per node (full): 305313 +Grid : Message : Stencil 13.515 GB/s per node +Grid : Message : Stencil 10.2688 GB/s per node +Grid : Message : Stencil 17.7445 GB/s per node +Grid : Message : Stencil 13.5992 GB/s per node +Grid : Message : Average mflops/s per call per node : 667074 +Grid : Message : Average mflops/s per call per node : 810297 +Grid : Message : Average mflops/s per call per node : 827854 +Grid : Message : Average mflops/s per call per node : 666083 +Grid : Message : Average mflops/s per call per node (full): 314808 +Grid : Message : Average mflops/s per call per node (full): 323184 +Grid : Message : Average mflops/s per call per node (full): 447110 +Grid : Message : Average mflops/s per call per node (full): 307365 +Grid : Message : Stencil 12.6776 GB/s per node +Grid : Message : Stencil 17.9313 GB/s per node +Grid : Message : Stencil 18.4433 GB/s per node +Grid : Message : Stencil 11.8563 GB/s per node +Grid : Message : Average mflops/s per call per node : 668535 +Grid : Message : Average mflops/s per call per node : 806107 +Grid : Message : Average mflops/s per call per node : 824265 +Grid : Message : Average mflops/s per call per node : 668328 +Grid : Message : Average mflops/s per call per node (full): 314950 +Grid : Message : Average mflops/s per call per node (full): 440526 +Grid : Message : Average mflops/s per call per node (full): 449722 +Grid : Message : Average mflops/s per call per node (full): 300696 +Grid : Message : Stencil 15.4819 GB/s per node +Grid : Message : Stencil 16.8911 GB/s per node +Grid : Message : Stencil 20.2369 GB/s per node +Grid : Message : Stencil 12.9396 GB/s per node +Grid : Message : Average mflops/s per call per node : 659746 +Grid : Message : Average mflops/s per call per node : 805149 +Grid : Message : Average mflops/s per call per node : 825933 +Grid : Message : Average mflops/s per call per node : 668331 +Grid : Message : Average mflops/s per call per node (full): 316586 +Grid : Message : Average mflops/s per call per node (full): 437437 +Grid : Message : Average mflops/s per call per node (full): 452041 +Grid : Message : Average mflops/s per call per node (full): 306154 +Grid : Message : Stencil 13.272 GB/s per node +Grid : Message : Stencil 17.3632 GB/s per node +Grid : Message : Stencil 19.4385 GB/s per node +Grid : Message : Stencil 12.092 GB/s per node +Grid : Message : Average mflops/s per call per node : 663519 +Grid : Message : Average mflops/s per call per node : 805282 +Grid : Message : Average mflops/s per call per node : 817363 +Grid : Message : Average mflops/s per call per node : 671292 +Grid : Message : Average mflops/s per call per node (full): 314996 +Grid : Message : Average mflops/s per call per node (full): 437586 +Grid : Message : Average mflops/s per call per node (full): 450000 +Grid : Message : Average mflops/s per call per node (full): 302839 +Grid : Message : Stencil 13.126 GB/s per node +Grid : Message : Stencil 17.6082 GB/s per node +Grid : Message : Stencil 17.4959 GB/s per node +Grid : Message : Stencil 12.5887 GB/s per node +Grid : Message : Average mflops/s per call per node : 666639 +Grid : Message : Average mflops/s per call per node : 804402 +Grid : Message : Average mflops/s per call per node : 824793 +Grid : Message : Average mflops/s per call per node : 663605 +Grid : Message : Average mflops/s per call per node (full): 314758 +Grid : Message : Average mflops/s per call per node (full): 439559 +Grid : Message : Average mflops/s per call per node (full): 445120 +Grid : Message : Average mflops/s per call per node (full): 300752 +Grid : Message : Stencil 14.3865 GB/s per node +Grid : Message : Stencil 13.7631 GB/s per node +Grid : Message : Stencil 17.914 GB/s per node +Grid : Message : Stencil 13.5841 GB/s per node +Grid : Message : Average mflops/s per call per node : 661551 +Grid : Message : Average mflops/s per call per node : 813095 +Grid : Message : Average mflops/s per call per node : 817403 +Grid : Message : Average mflops/s per call per node : 664747 +Grid : Message : Average mflops/s per call per node (full): 315777 +Grid : Message : Average mflops/s per call per node (full): 395402 +Grid : Message : Average mflops/s per call per node (full): 446025 +Grid : Message : Average mflops/s per call per node (full): 307213 +Grid : Message : Stencil 13.154 GB/s per node +Grid : Message : Stencil 16.589 GB/s per node +Grid : Message : Stencil 17.3772 GB/s per node +Grid : Message : Stencil 12.6145 GB/s per node +Grid : Message : Average mflops/s per call per node : 668811 +Grid : Message : Average mflops/s per call per node : 808048 +Grid : Message : Average mflops/s per call per node : 823148 +Grid : Message : Average mflops/s per call per node : 663637 +Grid : Message : Average mflops/s per call per node (full): 316526 +Grid : Message : Average mflops/s per call per node (full): 436480 +Grid : Message : Average mflops/s per call per node (full): 445049 +Grid : Message : Average mflops/s per call per node (full): 304377 +Grid : Message : Stencil 13.6708 GB/s per node +Grid : Message : Stencil 16.7315 GB/s per node +Grid : Message : Stencil 19.2646 GB/s per node +Grid : Message : Stencil 12.3974 GB/s per node +Grid : Message : Average mflops/s per call per node : 664753 +Grid : Message : Average mflops/s per call per node : 804731 +Grid : Message : Average mflops/s per call per node : 825383 +Grid : Message : Average mflops/s per call per node : 670540 +Grid : Message : Average mflops/s per call per node (full): 314606 +Grid : Message : Average mflops/s per call per node (full): 436933 +Grid : Message : Average mflops/s per call per node (full): 450415 +Grid : Message : Average mflops/s per call per node (full): 305052 +Grid : Message : Stencil 14.1461 GB/s per node +Grid : Message : Stencil 17.0066 GB/s per node +Grid : Message : Stencil 17.0841 GB/s per node +Grid : Message : Stencil 12.017 GB/s per node +Grid : Message : Average mflops/s per call per node : 664273 +Grid : Message : Average mflops/s per call per node : 803468 +Grid : Message : Average mflops/s per call per node : 825833 +Grid : Message : Average mflops/s per call per node : 666602 +Grid : Message : Average mflops/s per call per node (full): 316771 +Grid : Message : Average mflops/s per call per node (full): 436762 +Grid : Message : Average mflops/s per call per node (full): 442465 +Grid : Message : Average mflops/s per call per node (full): 302696 +Grid : Message : Stencil 13.5987 GB/s per node +Grid : Message : Stencil 16.8984 GB/s per node +Grid : Message : Stencil 17.6919 GB/s per node +Grid : Message : Stencil 12.3808 GB/s per node +Grid : Message : Average mflops/s per call per node : 667584 +Grid : Message : Average mflops/s per call per node : 801722 +Grid : Message : Average mflops/s per call per node : 823101 +Grid : Message : Average mflops/s per call per node : 669154 +Grid : Message : Average mflops/s per call per node (full): 316780 +Grid : Message : Average mflops/s per call per node (full): 438047 +Grid : Message : Average mflops/s per call per node (full): 445333 +Grid : Message : Average mflops/s per call per node (full): 304774 +Grid : Message : Stencil 13.6975 GB/s per node +Grid : Message : Stencil 18.6487 GB/s per node +Grid : Message : Stencil 17.5858 GB/s per node +Grid : Message : Stencil 14.6524 GB/s per node +Grid : Message : Average mflops/s per call per node : 667923 +Grid : Message : Average mflops/s per call per node : 807162 +Grid : Message : Average mflops/s per call per node : 825609 +Grid : Message : Average mflops/s per call per node : 659829 +Grid : Message : Average mflops/s per call per node (full): 316104 +Grid : Message : Average mflops/s per call per node (full): 441285 +Grid : Message : Average mflops/s per call per node (full): 446761 +Grid : Message : Average mflops/s per call per node (full): 306605 +Grid : Message : Stencil 16.0286 GB/s per node +Grid : Message : Stencil 12.0197 GB/s per node +Grid : Message : Stencil 17.7871 GB/s per node +Grid : Message : Stencil 13.0547 GB/s per node +Grid : Message : Average mflops/s per call per node : 664227 +Grid : Message : Average mflops/s per call per node : 809418 +Grid : Message : Average mflops/s per call per node : 822235 +Grid : Message : Average mflops/s per call per node : 668032 +Grid : Message : Average mflops/s per call per node (full): 318100 +Grid : Message : Average mflops/s per call per node (full): 361966 +Grid : Message : Average mflops/s per call per node (full): 446497 +Grid : Message : Average mflops/s per call per node (full): 306591 +Grid : Message : Stencil 13.76 GB/s per node +Grid : Message : Stencil 17.5266 GB/s per node +Grid : Message : Stencil 15.9418 GB/s per node +Grid : Message : Stencil 12.6726 GB/s per node +Grid : Message : Average mflops/s per call per node : 668512 +Grid : Message : Average mflops/s per call per node : 804067 +Grid : Message : Average mflops/s per call per node : 823981 +Grid : Message : Average mflops/s per call per node : 662906 +Grid : Message : Average mflops/s per call per node (full): 315789 +Grid : Message : Average mflops/s per call per node (full): 438450 +Grid : Message : Average mflops/s per call per node (full): 418927 +Grid : Message : Average mflops/s per call per node (full): 302969 +Grid : Message : Stencil 13.7171 GB/s per node +Grid : Message : Stencil 16.5623 GB/s per node +Grid : Message : Stencil 17.1249 GB/s per node +Grid : Message : Stencil 13.0189 GB/s per node +Grid : Message : Average mflops/s per call per node : 668511 +Grid : Message : Average mflops/s per call per node : 806847 +Grid : Message : Average mflops/s per call per node : 822286 +Grid : Message : Average mflops/s per call per node : 665505 +Grid : Message : Average mflops/s per call per node (full): 316746 +Grid : Message : Average mflops/s per call per node (full): 435621 +Grid : Message : Average mflops/s per call per node (full): 442049 +Grid : Message : Average mflops/s per call per node (full): 305990 +Grid : Message : Stencil 13.4105 GB/s per node +Grid : Message : Stencil 11.3204 GB/s per node +Grid : Message : Stencil 17.3826 GB/s per node +Grid : Message : Stencil 12.9017 GB/s per node +Grid : Message : Average mflops/s per call per node : 668608 +Grid : Message : Average mflops/s per call per node : 809832 +Grid : Message : Average mflops/s per call per node : 824581 +Grid : Message : Average mflops/s per call per node : 668790 +Grid : Message : Average mflops/s per call per node (full): 315761 +Grid : Message : Average mflops/s per call per node (full): 346191 +Grid : Message : Average mflops/s per call per node (full): 444567 +Grid : Message : Average mflops/s per call per node (full): 305959 +Grid : Message : Stencil 12.7964 GB/s per node +Grid : Message : Stencil 16.375 GB/s per node +Grid : Message : Stencil 18.0128 GB/s per node +Grid : Message : Stencil 12.5739 GB/s per node +Grid : Message : Average mflops/s per call per node : 666980 +Grid : Message : Average mflops/s per call per node : 805908 +Grid : Message : Average mflops/s per call per node : 823029 +Grid : Message : Average mflops/s per call per node : 669658 +Grid : Message : Average mflops/s per call per node (full): 314223 +Grid : Message : Average mflops/s per call per node (full): 432713 +Grid : Message : Average mflops/s per call per node (full): 446420 +Grid : Message : Average mflops/s per call per node (full): 305992 +Grid : Message : Stencil 12.5729 GB/s per node +Grid : Message : Stencil 15.7072 GB/s per node +Grid : Message : Stencil 17.8107 GB/s per node +Grid : Message : Stencil 12.6056 GB/s per node +Grid : Message : Average mflops/s per call per node : 667120 +Grid : Message : Average mflops/s per call per node : 808876 +Grid : Message : Average mflops/s per call per node : 821364 +Grid : Message : Average mflops/s per call per node : 667564 +Grid : Message : Average mflops/s per call per node (full): 313410 +Grid : Message : Average mflops/s per call per node (full): 416363 +Grid : Message : Average mflops/s per call per node (full): 446932 +Grid : Message : Average mflops/s per call per node (full): 304678 +Grid : Message : Stencil 12.945 GB/s per node +Grid : Message : Stencil 16.4332 GB/s per node +Grid : Message : Stencil 18.5724 GB/s per node +Grid : Message : Stencil 13.4346 GB/s per node +Grid : Message : Average mflops/s per call per node : 664294 +Grid : Message : Average mflops/s per call per node : 808725 +Grid : Message : Average mflops/s per call per node : 823405 +Grid : Message : Average mflops/s per call per node : 665357 +Grid : Message : Average mflops/s per call per node (full): 313774 +Grid : Message : Average mflops/s per call per node (full): 434302 +Grid : Message : Average mflops/s per call per node (full): 448348 +Grid : Message : Average mflops/s per call per node (full): 306846 +Grid : Message : Stencil 12.766 GB/s per node +Grid : Message : Stencil 10.3715 GB/s per node +Grid : Message : Stencil 17.2152 GB/s per node +Grid : Message : Stencil 13.2096 GB/s per node +Grid : Message : Average mflops/s per call per node : 668238 +Grid : Message : Average mflops/s per call per node : 809538 +Grid : Message : Average mflops/s per call per node : 821750 +Grid : Message : Average mflops/s per call per node : 665493 +Grid : Message : Average mflops/s per call per node (full): 314801 +Grid : Message : Average mflops/s per call per node (full): 325253 +Grid : Message : Average mflops/s per call per node (full): 443233 +Grid : Message : Average mflops/s per call per node (full): 305861 +Grid : Message : Stencil 14.2297 GB/s per node +Grid : Message : Stencil 8.79049 GB/s per node +Grid : Message : Stencil 17.2774 GB/s per node +Grid : Message : Stencil 12.9415 GB/s per node +Grid : Message : Average mflops/s per call per node : 664583 +Grid : Message : Average mflops/s per call per node : 805974 +Grid : Message : Average mflops/s per call per node : 819344 +Grid : Message : Average mflops/s per call per node : 664789 +Grid : Message : Average mflops/s per call per node (full): 316928 +Grid : Message : Average mflops/s per call per node (full): 286856 +Grid : Message : Average mflops/s per call per node (full): 443311 +Grid : Message : Average mflops/s per call per node (full): 305485 +Grid : Message : Stencil 12.3881 GB/s per node +Grid : Message : Stencil 10.9088 GB/s per node +Grid : Message : Stencil 17.466 GB/s per node +Grid : Message : Stencil 12.9535 GB/s per node +Grid : Message : Average mflops/s per call per node : 666538 +Grid : Message : Average mflops/s per call per node : 813628 +Grid : Message : Average mflops/s per call per node : 826126 +Grid : Message : Average mflops/s per call per node : 661329 +Grid : Message : Average mflops/s per call per node (full): 311884 +Grid : Message : Average mflops/s per call per node (full): 337706 +Grid : Message : Average mflops/s per call per node (full): 444964 +Grid : Message : Average mflops/s per call per node (full): 305091 +Grid : Message : Stencil 13.237 GB/s per node +Grid : Message : Stencil 12.3816 GB/s per node +Grid : Message : Stencil 17.6251 GB/s per node +Grid : Message : Stencil 14.167 GB/s per node +Grid : Message : Average mflops/s per call per node : 667943 +Grid : Message : Average mflops/s per call per node : 810737 +Grid : Message : Average mflops/s per call per node : 822289 +Grid : Message : Average mflops/s per call per node : 659425 +Grid : Message : Average mflops/s per call per node (full): 315870 +Grid : Message : Average mflops/s per call per node (full): 369389 +Grid : Message : Average mflops/s per call per node (full): 446280 +Grid : Message : Average mflops/s per call per node (full): 305260 +Grid : Message : Stencil 14.7861 GB/s per node +Grid : Message : Stencil 16.559 GB/s per node +Grid : Message : Stencil 17.8089 GB/s per node +Grid : Message : Stencil 13.5257 GB/s per node +Grid : Message : Average mflops/s per call per node : 663141 +Grid : Message : Average mflops/s per call per node : 805069 +Grid : Message : Average mflops/s per call per node : 815702 +Grid : Message : Average mflops/s per call per node : 664549 +Grid : Message : Average mflops/s per call per node (full): 316511 +Grid : Message : Average mflops/s per call per node (full): 435230 +Grid : Message : Average mflops/s per call per node (full): 444902 +Grid : Message : Average mflops/s per call per node (full): 306236 +Grid : Message : Stencil 13.1419 GB/s per node +Grid : Message : Stencil 18.032 GB/s per node +Grid : Message : Stencil 17.8433 GB/s per node +Grid : Message : Stencil 12.9064 GB/s per node +Grid : Message : Average mflops/s per call per node : 666758 +Grid : Message : Average mflops/s per call per node : 807885 +Grid : Message : Average mflops/s per call per node : 821856 +Grid : Message : Average mflops/s per call per node : 660427 +Grid : Message : Average mflops/s per call per node (full): 314625 +Grid : Message : Average mflops/s per call per node (full): 440284 +Grid : Message : Average mflops/s per call per node (full): 446483 +Grid : Message : Average mflops/s per call per node (full): 304466 +Grid : Message : Stencil 12.881 GB/s per node +Grid : Message : Stencil 17.0216 GB/s per node +Grid : Message : Stencil 18.7172 GB/s per node +Grid : Message : Stencil 13.37 GB/s per node +Grid : Message : Average mflops/s per call per node : 667531 +Grid : Message : Average mflops/s per call per node : 801594 +Grid : Message : Average mflops/s per call per node : 822271 +Grid : Message : Average mflops/s per call per node : 663941 +Grid : Message : Average mflops/s per call per node (full): 314968 +Grid : Message : Average mflops/s per call per node (full): 435815 +Grid : Message : Average mflops/s per call per node (full): 449114 +Grid : Message : Average mflops/s per call per node (full): 306456 +Grid : Message : Stencil 12.9034 GB/s per node +Grid : Message : Stencil 16.9758 GB/s per node +Grid : Message : Stencil 17.4356 GB/s per node +Grid : Message : Stencil 12.6256 GB/s per node +Grid : Message : Average mflops/s per call per node : 669880 +Grid : Message : Average mflops/s per call per node : 804816 +Grid : Message : Average mflops/s per call per node : 830930 +Grid : Message : Average mflops/s per call per node : 658692 +Grid : Message : Average mflops/s per call per node (full): 312269 +Grid : Message : Average mflops/s per call per node (full): 433171 +Grid : Message : Average mflops/s per call per node (full): 445211 +Grid : Message : Average mflops/s per call per node (full): 303503 +Grid : Message : Stencil 13.768 GB/s per node +Grid : Message : Stencil 16.5521 GB/s per node +Grid : Message : Stencil 17.7681 GB/s per node +Grid : Message : Stencil 12.1193 GB/s per node +Grid : Message : Average mflops/s per call per node : 667809 +Grid : Message : Average mflops/s per call per node : 805744 +Grid : Message : Average mflops/s per call per node : 824123 +Grid : Message : Average mflops/s per call per node : 668852 +Grid : Message : Average mflops/s per call per node (full): 316518 +Grid : Message : Average mflops/s per call per node (full): 431066 +Grid : Message : Average mflops/s per call per node (full): 445982 +Grid : Message : Average mflops/s per call per node (full): 303187 +Grid : Message : Stencil 13.1464 GB/s per node +Grid : Message : Stencil 16.9932 GB/s per node +Grid : Message : Stencil 18.8166 GB/s per node +Grid : Message : Stencil 13.4326 GB/s per node +Grid : Message : Average mflops/s per call per node : 669118 +Grid : Message : Average mflops/s per call per node : 806856 +Grid : Message : Average mflops/s per call per node : 823368 +Grid : Message : Average mflops/s per call per node : 666544 +Grid : Message : Average mflops/s per call per node (full): 315823 +Grid : Message : Average mflops/s per call per node (full): 437027 +Grid : Message : Average mflops/s per call per node (full): 443807 +Grid : Message : Average mflops/s per call per node (full): 307407 +Grid : Message : Stencil 13.491 GB/s per node +Grid : Message : Stencil 15.9341 GB/s per node +Grid : Message : Stencil 17.8917 GB/s per node +Grid : Message : Stencil 13.607 GB/s per node +Grid : Message : Average mflops/s per call per node : 668430 +Grid : Message : Average mflops/s per call per node : 804180 +Grid : Message : Average mflops/s per call per node : 822103 +Grid : Message : Average mflops/s per call per node : 661251 +Grid : Message : Average mflops/s per call per node (full): 316500 +Grid : Message : Average mflops/s per call per node (full): 427481 +Grid : Message : Average mflops/s per call per node (full): 443448 +Grid : Message : Average mflops/s per call per node (full): 305580 +Grid : Message : Stencil 13.5886 GB/s per node +Grid : Message : Stencil 17.1736 GB/s per node +Grid : Message : Stencil 17.3722 GB/s per node +Grid : Message : Stencil 12.6624 GB/s per node +Grid : Message : Average mflops/s per call per node : 667919 +Grid : Message : Average mflops/s per call per node : 801088 +Grid : Message : Average mflops/s per call per node : 820868 +Grid : Message : Average mflops/s per call per node : 656734 +Grid : Message : Average mflops/s per call per node (full): 313716 +Grid : Message : Average mflops/s per call per node (full): 435734 +Grid : Message : Average mflops/s per call per node (full): 443237 +Grid : Message : Average mflops/s per call per node (full): 303009 +Grid : Message : Stencil 12.5853 GB/s per node +Grid : Message : Stencil 17.522 GB/s per node +Grid : Message : Stencil 17.2681 GB/s per node +Grid : Message : Stencil 12.761 GB/s per node +Grid : Message : Average mflops/s per call per node : 670653 +Grid : Message : Average mflops/s per call per node : 807256 +Grid : Message : Average mflops/s per call per node : 821593 +Grid : Message : Average mflops/s per call per node : 660975 +Grid : Message : Average mflops/s per call per node (full): 314512 +Grid : Message : Average mflops/s per call per node (full): 438086 +Grid : Message : Average mflops/s per call per node (full): 439179 +Grid : Message : Average mflops/s per call per node (full): 303605 +Grid : Message : Stencil 13.697 GB/s per node +Grid : Message : Stencil 16.6424 GB/s per node +Grid : Message : Stencil 17.3883 GB/s per node +Grid : Message : Stencil 13.4778 GB/s per node +Grid : Message : Average mflops/s per call per node : 670865 +Grid : Message : Average mflops/s per call per node : 803527 +Grid : Message : Average mflops/s per call per node : 823427 +Grid : Message : Average mflops/s per call per node : 659487 +Grid : Message : Average mflops/s per call per node (full): 315548 +Grid : Message : Average mflops/s per call per node (full): 434599 +Grid : Message : Average mflops/s per call per node (full): 444751 +Grid : Message : Average mflops/s per call per node (full): 305546 +Grid : Message : Stencil 12.6026 GB/s per node +Grid : Message : Stencil 16.4971 GB/s per node +Grid : Message : Stencil 17.1935 GB/s per node +Grid : Message : Stencil 12.9868 GB/s per node +Grid : Message : Average mflops/s per call per node : 670945 +Grid : Message : Average mflops/s per call per node : 802586 +Grid : Message : Average mflops/s per call per node : 822190 +Grid : Message : Average mflops/s per call per node : 661417 +Grid : Message : Average mflops/s per call per node (full): 314391 +Grid : Message : Average mflops/s per call per node (full): 433381 +Grid : Message : Average mflops/s per call per node (full): 441169 +Grid : Message : Average mflops/s per call per node (full): 304820 +Grid : Message : Stencil 12.2678 GB/s per node +Grid : Message : Stencil 16.8147 GB/s per node +Grid : Message : Stencil 19.618 GB/s per node +Grid : Message : Stencil 13.8669 GB/s per node +Grid : Message : Average mflops/s per call per node : 669378 +Grid : Message : Average mflops/s per call per node : 800923 +Grid : Message : Average mflops/s per call per node : 818294 +Grid : Message : Average mflops/s per call per node : 661224 +Grid : Message : Average mflops/s per call per node (full): 311876 +Grid : Message : Average mflops/s per call per node (full): 434853 +Grid : Message : Average mflops/s per call per node (full): 449593 +Grid : Message : Average mflops/s per call per node (full): 306914 +Grid : Message : Stencil 12.4139 GB/s per node +Grid : Message : Stencil 17.2397 GB/s per node +Grid : Message : Stencil 17.7356 GB/s per node +Grid : Message : Stencil 13.2463 GB/s per node +Grid : Message : Average mflops/s per call per node : 668627 +Grid : Message : Average mflops/s per call per node : 800456 +Grid : Message : Average mflops/s per call per node : 818989 +Grid : Message : Average mflops/s per call per node : 665886 +Grid : Message : Average mflops/s per call per node (full): 313481 +Grid : Message : Average mflops/s per call per node (full): 436451 +Grid : Message : Average mflops/s per call per node (full): 435769 +Grid : Message : Average mflops/s per call per node (full): 306570 +Grid : Message : Stencil 13.5333 GB/s per node +Grid : Message : Stencil 8.13661 GB/s per node +Grid : Message : Stencil 16.9329 GB/s per node +Grid : Message : Stencil 12.8284 GB/s per node +Grid : Message : Average mflops/s per call per node : 664504 +Grid : Message : Average mflops/s per call per node : 804766 +Grid : Message : Average mflops/s per call per node : 821491 +Grid : Message : Average mflops/s per call per node : 664154 +Grid : Message : Average mflops/s per call per node (full): 316740 +Grid : Message : Average mflops/s per call per node (full): 270001 +Grid : Message : Average mflops/s per call per node (full): 439291 +Grid : Message : Average mflops/s per call per node (full): 305179 +Grid : Message : Stencil 14.5014 GB/s per node +Grid : Message : Stencil 16.4796 GB/s per node +Grid : Message : Stencil 17.684 GB/s per node +Grid : Message : Stencil 12.7725 GB/s per node +Grid : Message : Average mflops/s per call per node : 660744 +Grid : Message : Average mflops/s per call per node : 806758 +Grid : Message : Average mflops/s per call per node : 822278 +Grid : Message : Average mflops/s per call per node : 659832 +Grid : Message : Average mflops/s per call per node (full): 316390 +Grid : Message : Average mflops/s per call per node (full): 434171 +Grid : Message : Average mflops/s per call per node (full): 445325 +Grid : Message : Average mflops/s per call per node (full): 303568 +Grid : Message : Stencil 14.5585 GB/s per node +Grid : Message : Stencil 16.9893 GB/s per node +Grid : Message : Stencil 17.6812 GB/s per node +Grid : Message : Stencil 14.2824 GB/s per node +Grid : Message : Average mflops/s per call per node : 665943 +Grid : Message : Average mflops/s per call per node : 802984 +Grid : Message : Average mflops/s per call per node : 819906 +Grid : Message : Average mflops/s per call per node : 662809 +Grid : Message : Average mflops/s per call per node (full): 317714 +Grid : Message : Average mflops/s per call per node (full): 438366 +Grid : Message : Average mflops/s per call per node (full): 446131 +Grid : Message : Average mflops/s per call per node (full): 307211 +Grid : Message : Stencil 15.4553 GB/s per node +Grid : Message : Stencil 16.9426 GB/s per node +Grid : Message : Stencil 17.2209 GB/s per node +Grid : Message : Stencil 12.0195 GB/s per node +Grid : Message : Average mflops/s per call per node : 664098 +Grid : Message : Average mflops/s per call per node : 802553 +Grid : Message : Average mflops/s per call per node : 824960 +Grid : Message : Average mflops/s per call per node : 664260 +Grid : Message : Average mflops/s per call per node (full): 316082 +Grid : Message : Average mflops/s per call per node (full): 432776 +Grid : Message : Average mflops/s per call per node (full): 443286 +Grid : Message : Average mflops/s per call per node (full): 302022 +Grid : Message : Stencil 15.7701 GB/s per node +Grid : Message : Stencil 16.2108 GB/s per node +Grid : Message : Stencil 17.2166 GB/s per node +Grid : Message : Stencil 11.9306 GB/s per node +Grid : Message : Average mflops/s per call per node : 663469 +Grid : Message : Average mflops/s per call per node : 801281 +Grid : Message : Average mflops/s per call per node : 823253 +Grid : Message : Average mflops/s per call per node : 667932 +Grid : Message : Average mflops/s per call per node (full): 318237 +Grid : Message : Average mflops/s per call per node (full): 430731 +Grid : Message : Average mflops/s per call per node (full): 442433 +Grid : Message : Average mflops/s per call per node (full): 300951 +Grid : Message : Stencil 14.2805 GB/s per node +Grid : Message : Stencil 17.6347 GB/s per node +Grid : Message : Stencil 18.0806 GB/s per node +Grid : Message : Stencil 14.1374 GB/s per node +Grid : Message : Average mflops/s per call per node : 661408 +Grid : Message : Average mflops/s per call per node : 798410 +Grid : Message : Average mflops/s per call per node : 821662 +Grid : Message : Average mflops/s per call per node : 661402 +Grid : Message : Average mflops/s per call per node (full): 315896 +Grid : Message : Average mflops/s per call per node (full): 436256 +Grid : Message : Average mflops/s per call per node (full): 446757 +Grid : Message : Average mflops/s per call per node (full): 306241 +Grid : Message : Stencil 12.6773 GB/s per node +Grid : Message : Stencil 16.234 GB/s per node +Grid : Message : Stencil 18.0985 GB/s per node +Grid : Message : Stencil 13.6585 GB/s per node +Grid : Message : Average mflops/s per call per node : 668940 +Grid : Message : Average mflops/s per call per node : 806973 +Grid : Message : Average mflops/s per call per node : 819816 +Grid : Message : Average mflops/s per call per node : 663629 +Grid : Message : Average mflops/s per call per node (full): 315716 +Grid : Message : Average mflops/s per call per node (full): 432828 +Grid : Message : Average mflops/s per call per node (full): 446896 +Grid : Message : Average mflops/s per call per node (full): 307358 +Grid : Message : Stencil 13.1497 GB/s per node +Grid : Message : Stencil 17.7945 GB/s per node +Grid : Message : Stencil 17.2219 GB/s per node +Grid : Message : Stencil 13.6643 GB/s per node +Grid : Message : Average mflops/s per call per node : 661906 +Grid : Message : Average mflops/s per call per node : 795510 +Grid : Message : Average mflops/s per call per node : 823272 +Grid : Message : Average mflops/s per call per node : 660621 +Grid : Message : Average mflops/s per call per node (full): 310264 +Grid : Message : Average mflops/s per call per node (full): 438583 +Grid : Message : Average mflops/s per call per node (full): 442278 +Grid : Message : Average mflops/s per call per node (full): 306590 +Grid : Message : Stencil 13.7303 GB/s per node +Grid : Message : Stencil 14.0153 GB/s per node +Grid : Message : Stencil 17.1919 GB/s per node +Grid : Message : Stencil 12.1508 GB/s per node +Grid : Message : Average mflops/s per call per node : 665851 +Grid : Message : Average mflops/s per call per node : 806390 +Grid : Message : Average mflops/s per call per node : 826798 +Grid : Message : Average mflops/s per call per node : 668700 +Grid : Message : Average mflops/s per call per node (full): 315276 +Grid : Message : Average mflops/s per call per node (full): 401084 +Grid : Message : Average mflops/s per call per node (full): 440578 +Grid : Message : Average mflops/s per call per node (full): 303352 +Grid : Message : Stencil 12.9474 GB/s per node +Grid : Message : Stencil 16.7575 GB/s per node +Grid : Message : Stencil 18.5942 GB/s per node +Grid : Message : Stencil 12.6604 GB/s per node +Grid : Message : Average mflops/s per call per node : 660774 +Grid : Message : Average mflops/s per call per node : 806489 +Grid : Message : Average mflops/s per call per node : 824226 +Grid : Message : Average mflops/s per call per node : 665717 +Grid : Message : Average mflops/s per call per node (full): 311902 +Grid : Message : Average mflops/s per call per node (full): 438116 +Grid : Message : Average mflops/s per call per node (full): 448158 +Grid : Message : Average mflops/s per call per node (full): 304377 +Grid : Message : Stencil 14.3589 GB/s per node +Grid : Message : Stencil 16.9713 GB/s per node +Grid : Message : Stencil 17.9925 GB/s per node +Grid : Message : Stencil 11.8981 GB/s per node +Grid : Message : Average mflops/s per call per node : 660765 +Grid : Message : Average mflops/s per call per node : 801897 +Grid : Message : Average mflops/s per call per node : 821621 +Grid : Message : Average mflops/s per call per node : 670225 +Grid : Message : Average mflops/s per call per node (full): 316036 +Grid : Message : Average mflops/s per call per node (full): 436107 +Grid : Message : Average mflops/s per call per node (full): 446054 +Grid : Message : Average mflops/s per call per node (full): 301453 +Grid : Message : Stencil 13.7643 GB/s per node +Grid : Message : Stencil 16.6706 GB/s per node +Grid : Message : Stencil 17.0874 GB/s per node +Grid : Message : Stencil 12.087 GB/s per node +Grid : Message : Average mflops/s per call per node : 662129 +Grid : Message : Average mflops/s per call per node : 806221 +Grid : Message : Average mflops/s per call per node : 821227 +Grid : Message : Average mflops/s per call per node : 666438 +Grid : Message : Average mflops/s per call per node (full): 315264 +Grid : Message : Average mflops/s per call per node (full): 437614 +Grid : Message : Average mflops/s per call per node (full): 441200 +Grid : Message : Average mflops/s per call per node (full): 302746 +Grid : Message : Stencil 15.2251 GB/s per node +Grid : Message : Stencil 17.2964 GB/s per node +Grid : Message : Stencil 18.0299 GB/s per node +Grid : Message : Stencil 12.6288 GB/s per node +Grid : Message : Average mflops/s per call per node : 662399 +Grid : Message : Average mflops/s per call per node : 803931 +Grid : Message : Average mflops/s per call per node : 820320 +Grid : Message : Average mflops/s per call per node : 663829 +Grid : Message : Average mflops/s per call per node (full): 316822 +Grid : Message : Average mflops/s per call per node (full): 437254 +Grid : Message : Average mflops/s per call per node (full): 438214 +Grid : Message : Average mflops/s per call per node (full): 304733 +Grid : Message : Stencil 13.3113 GB/s per node +Grid : Message : Stencil 16.7028 GB/s per node +Grid : Message : Stencil 17.3581 GB/s per node +Grid : Message : Stencil 12.4669 GB/s per node +Grid : Message : Average mflops/s per call per node : 664624 +Grid : Message : Average mflops/s per call per node : 805500 +Grid : Message : Average mflops/s per call per node : 830041 +Grid : Message : Average mflops/s per call per node : 662596 +Grid : Message : Average mflops/s per call per node (full): 315637 +Grid : Message : Average mflops/s per call per node (full): 435982 +Grid : Message : Average mflops/s per call per node (full): 445041 +Grid : Message : Average mflops/s per call per node (full): 303697 +Grid : Message : Stencil 14.1203 GB/s per node +Grid : Message : Stencil 17.0781 GB/s per node +Grid : Message : Stencil 16.4489 GB/s per node +Grid : Message : Stencil 12.5695 GB/s per node +Grid : Message : Average mflops/s per call per node : 661332 +Grid : Message : Average mflops/s per call per node : 800493 +Grid : Message : Average mflops/s per call per node : 823953 +Grid : Message : Average mflops/s per call per node : 664719 +Grid : Message : Average mflops/s per call per node (full): 315811 +Grid : Message : Average mflops/s per call per node (full): 435198 +Grid : Message : Average mflops/s per call per node (full): 428685 +Grid : Message : Average mflops/s per call per node (full): 304638 +Grid : Message : Stencil 13.7742 GB/s per node +Grid : Message : Stencil 17.3357 GB/s per node +Grid : Message : Stencil 18.761 GB/s per node +Grid : Message : Stencil 12.7716 GB/s per node +Grid : Message : Average mflops/s per call per node : 663846 +Grid : Message : Average mflops/s per call per node : 801600 +Grid : Message : Average mflops/s per call per node : 819316 +Grid : Message : Average mflops/s per call per node : 669795 +Grid : Message : Average mflops/s per call per node (full): 315898 +Grid : Message : Average mflops/s per call per node (full): 436843 +Grid : Message : Average mflops/s per call per node (full): 447877 +Grid : Message : Average mflops/s per call per node (full): 306082 +Grid : Message : Stencil 13.2072 GB/s per node +Grid : Message : Stencil 16.8379 GB/s per node +Grid : Message : Stencil 17.4784 GB/s per node +Grid : Message : Stencil 13.3794 GB/s per node +Grid : Message : Average mflops/s per call per node : 666336 +Grid : Message : Average mflops/s per call per node : 805100 +Grid : Message : Average mflops/s per call per node : 829079 +Grid : Message : Average mflops/s per call per node : 663901 +Grid : Message : Average mflops/s per call per node (full): 315246 +Grid : Message : Average mflops/s per call per node (full): 432623 +Grid : Message : Average mflops/s per call per node (full): 445247 +Grid : Message : Average mflops/s per call per node (full): 306374 +Grid : Message : Stencil 12.3984 GB/s per node +Grid : Message : Stencil 8.37936 GB/s per node +Grid : Message : Stencil 17.0412 GB/s per node +Grid : Message : Stencil 12.0361 GB/s per node +Grid : Message : Average mflops/s per call per node : 667711 +Grid : Message : Average mflops/s per call per node : 810908 +Grid : Message : Average mflops/s per call per node : 825617 +Grid : Message : Average mflops/s per call per node : 667513 +Grid : Message : Average mflops/s per call per node (full): 312778 +Grid : Message : Average mflops/s per call per node (full): 276363 +Grid : Message : Average mflops/s per call per node (full): 441323 +Grid : Message : Average mflops/s per call per node (full): 302656 +Grid : Message : Stencil 13.0633 GB/s per node +Grid : Message : Stencil 16.331 GB/s per node +Grid : Message : Stencil 16.8624 GB/s per node +Grid : Message : Stencil 12.9714 GB/s per node +Grid : Message : Average mflops/s per call per node : 663183 +Grid : Message : Average mflops/s per call per node : 807105 +Grid : Message : Average mflops/s per call per node : 824210 +Grid : Message : Average mflops/s per call per node : 663522 +Grid : Message : Average mflops/s per call per node (full): 314204 +Grid : Message : Average mflops/s per call per node (full): 432882 +Grid : Message : Average mflops/s per call per node (full): 439099 +Grid : Message : Average mflops/s per call per node (full): 304943 +Grid : Message : Stencil 12.3296 GB/s per node +Grid : Message : Stencil 16.9761 GB/s per node +Grid : Message : Stencil 17.8134 GB/s per node +Grid : Message : Stencil 13.9537 GB/s per node +Grid : Message : Average mflops/s per call per node : 664208 +Grid : Message : Average mflops/s per call per node : 804127 +Grid : Message : Average mflops/s per call per node : 820009 +Grid : Message : Average mflops/s per call per node : 660766 +Grid : Message : Average mflops/s per call per node (full): 311581 +Grid : Message : Average mflops/s per call per node (full): 438930 +Grid : Message : Average mflops/s per call per node (full): 445649 +Grid : Message : Average mflops/s per call per node (full): 306281 +Grid : Message : Stencil 12.625 GB/s per node +Grid : Message : Stencil 17.463 GB/s per node +Grid : Message : Stencil 17.838 GB/s per node +Grid : Message : Stencil 14.241 GB/s per node +Grid : Message : Average mflops/s per call per node : 666891 +Grid : Message : Average mflops/s per call per node : 798629 +Grid : Message : Average mflops/s per call per node : 824752 +Grid : Message : Average mflops/s per call per node : 660324 +Grid : Message : Average mflops/s per call per node (full): 313583 +Grid : Message : Average mflops/s per call per node (full): 436361 +Grid : Message : Average mflops/s per call per node (full): 446309 +Grid : Message : Average mflops/s per call per node (full): 306683 +Grid : Message : Stencil 12.6714 GB/s per node +Grid : Message : Stencil 17.6931 GB/s per node +Grid : Message : Stencil 17.0934 GB/s per node +Grid : Message : Stencil 12.7118 GB/s per node +Grid : Message : Average mflops/s per call per node : 666993 +Grid : Message : Average mflops/s per call per node : 802461 +Grid : Message : Average mflops/s per call per node : 826448 +Grid : Message : Average mflops/s per call per node : 664123 +Grid : Message : Average mflops/s per call per node (full): 313724 +Grid : Message : Average mflops/s per call per node (full): 438044 +Grid : Message : Average mflops/s per call per node (full): 441526 +Grid : Message : Average mflops/s per call per node (full): 303764 +Grid : Message : Stencil 12.3299 GB/s per node +Grid : Message : Stencil 16.8856 GB/s per node +Grid : Message : Stencil 18.2979 GB/s per node +Grid : Message : Stencil 12.9924 GB/s per node +Grid : Message : Average mflops/s per call per node : 666837 +Grid : Message : Average mflops/s per call per node : 804554 +Grid : Message : Average mflops/s per call per node : 822620 +Grid : Message : Average mflops/s per call per node : 667675 +Grid : Message : Average mflops/s per call per node (full): 311836 +Grid : Message : Average mflops/s per call per node (full): 437328 +Grid : Message : Average mflops/s per call per node (full): 448041 +Grid : Message : Average mflops/s per call per node (full): 306129 +Grid : Message : Stencil 12.4805 GB/s per node +Grid : Message : Stencil 16.7107 GB/s per node +Grid : Message : Stencil 18.7081 GB/s per node +Grid : Message : Stencil 11.8256 GB/s per node +Grid : Message : Average mflops/s per call per node : 666222 +Grid : Message : Average mflops/s per call per node : 804660 +Grid : Message : Average mflops/s per call per node : 821869 +Grid : Message : Average mflops/s per call per node : 668984 +Grid : Message : Average mflops/s per call per node (full): 312524 +Grid : Message : Average mflops/s per call per node (full): 436016 +Grid : Message : Average mflops/s per call per node (full): 445127 +Grid : Message : Average mflops/s per call per node (full): 299541 +Grid : Message : Stencil 13.1368 GB/s per node +Grid : Message : Stencil 16.7553 GB/s per node +Grid : Message : Stencil 17.9153 GB/s per node +Grid : Message : Stencil 13.4052 GB/s per node +Grid : Message : Average mflops/s per call per node : 663625 +Grid : Message : Average mflops/s per call per node : 811113 +Grid : Message : Average mflops/s per call per node : 825860 +Grid : Message : Average mflops/s per call per node : 663241 +Grid : Message : Average mflops/s per call per node (full): 314396 +Grid : Message : Average mflops/s per call per node (full): 436770 +Grid : Message : Average mflops/s per call per node (full): 446415 +Grid : Message : Average mflops/s per call per node (full): 303725 +Grid : Message : Stencil 13.5957 GB/s per node +Grid : Message : Stencil 17.1293 GB/s per node +Grid : Message : Stencil 17.3253 GB/s per node +Grid : Message : Stencil 12.8351 GB/s per node +Grid : Message : Average mflops/s per call per node : 668326 +Grid : Message : Average mflops/s per call per node : 804576 +Grid : Message : Average mflops/s per call per node : 817037 +Grid : Message : Average mflops/s per call per node : 665455 +Grid : Message : Average mflops/s per call per node (full): 314691 +Grid : Message : Average mflops/s per call per node (full): 435872 +Grid : Message : Average mflops/s per call per node (full): 443981 +Grid : Message : Average mflops/s per call per node (full): 306470 +Grid : Message : Stencil 14.4764 GB/s per node +Grid : Message : Stencil 17.9404 GB/s per node +Grid : Message : Stencil 18.3239 GB/s per node +Grid : Message : Stencil 14.7409 GB/s per node +Grid : Message : Average mflops/s per call per node : 666658 +Grid : Message : Average mflops/s per call per node : 801177 +Grid : Message : Average mflops/s per call per node : 824112 +Grid : Message : Average mflops/s per call per node : 660454 +Grid : Message : Average mflops/s per call per node (full): 316984 +Grid : Message : Average mflops/s per call per node (full): 440871 +Grid : Message : Average mflops/s per call per node (full): 448721 +Grid : Message : Average mflops/s per call per node (full): 307159 +Grid : Message : Stencil 12.4847 GB/s per node +Grid : Message : Stencil 17.617 GB/s per node +Grid : Message : Stencil 17.778 GB/s per node +Grid : Message : Stencil 13.3334 GB/s per node +Grid : Message : Average mflops/s per call per node : 673753 +Grid : Message : Average mflops/s per call per node : 800196 +Grid : Message : Average mflops/s per call per node : 821568 +Grid : Message : Average mflops/s per call per node : 664242 +Grid : Message : Average mflops/s per call per node (full): 313021 +Grid : Message : Average mflops/s per call per node (full): 436903 +Grid : Message : Average mflops/s per call per node (full): 443523 +Grid : Message : Average mflops/s per call per node (full): 305924 +Grid : Message : Stencil 13.2565 GB/s per node +Grid : Message : Stencil 17.3985 GB/s per node +Grid : Message : Stencil 17.83 GB/s per node +Grid : Message : Stencil 12.8742 GB/s per node +Grid : Message : Average mflops/s per call per node : 670556 +Grid : Message : Average mflops/s per call per node : 806249 +Grid : Message : Average mflops/s per call per node : 820354 +Grid : Message : Average mflops/s per call per node : 663650 +Grid : Message : Average mflops/s per call per node (full): 317296 +Grid : Message : Average mflops/s per call per node (full): 440128 +Grid : Message : Average mflops/s per call per node (full): 445629 +Grid : Message : Average mflops/s per call per node (full): 305392 +Grid : Message : Stencil 13.6076 GB/s per node +Grid : Message : Stencil 16.6494 GB/s per node +Grid : Message : Stencil 17.5183 GB/s per node +Grid : Message : Stencil 14.4296 GB/s per node +Grid : Message : Average mflops/s per call per node : 666624 +Grid : Message : Average mflops/s per call per node : 804377 +Grid : Message : Average mflops/s per call per node : 817964 +Grid : Message : Average mflops/s per call per node : 665206 +Grid : Message : Average mflops/s per call per node (full): 316269 +Grid : Message : Average mflops/s per call per node (full): 434950 +Grid : Message : Average mflops/s per call per node (full): 443647 +Grid : Message : Average mflops/s per call per node (full): 307731 +Grid : Message : Stencil 14.0593 GB/s per node +Grid : Message : Stencil 17.2712 GB/s per node +Grid : Message : Stencil 18.0003 GB/s per node +Grid : Message : Stencil 12.4421 GB/s per node +Grid : Message : Average mflops/s per call per node : 666307 +Grid : Message : Average mflops/s per call per node : 803766 +Grid : Message : Average mflops/s per call per node : 823949 +Grid : Message : Average mflops/s per call per node : 666058 +Grid : Message : Average mflops/s per call per node (full): 317264 +Grid : Message : Average mflops/s per call per node (full): 437667 +Grid : Message : Average mflops/s per call per node (full): 437707 +Grid : Message : Average mflops/s per call per node (full): 299945 +Grid : Message : Stencil 12.4696 GB/s per node +Grid : Message : Stencil 16.5084 GB/s per node +Grid : Message : Stencil 17.5922 GB/s per node +Grid : Message : Stencil 12.8264 GB/s per node +Grid : Message : Average mflops/s per call per node : 672217 +Grid : Message : Average mflops/s per call per node : 807160 +Grid : Message : Average mflops/s per call per node : 825964 +Grid : Message : Average mflops/s per call per node : 667014 +Grid : Message : Average mflops/s per call per node (full): 313820 +Grid : Message : Average mflops/s per call per node (full): 433654 +Grid : Message : Average mflops/s per call per node (full): 446353 +Grid : Message : Average mflops/s per call per node (full): 305633 +Grid : Message : Stencil 12.6734 GB/s per node +Grid : Message : Stencil 16.4931 GB/s per node +Grid : Message : Stencil 17.477 GB/s per node +Grid : Message : Stencil 14.7119 GB/s per node +Grid : Message : Average mflops/s per call per node : 671820 +Grid : Message : Average mflops/s per call per node : 804476 +Grid : Message : Average mflops/s per call per node : 817111 +Grid : Message : Average mflops/s per call per node : 658700 +Grid : Message : Average mflops/s per call per node (full): 314863 +Grid : Message : Average mflops/s per call per node (full): 435448 +Grid : Message : Average mflops/s per call per node (full): 443580 +Grid : Message : Average mflops/s per call per node (full): 306743 +Grid : Message : Stencil 12.5016 GB/s per node +Grid : Message : Stencil 9.52792 GB/s per node +Grid : Message : Stencil 17.9562 GB/s per node +Grid : Message : Stencil 12.3768 GB/s per node +Grid : Message : Average mflops/s per call per node : 671064 +Grid : Message : Average mflops/s per call per node : 815749 +Grid : Message : Average mflops/s per call per node : 821584 +Grid : Message : Average mflops/s per call per node : 670523 +Grid : Message : Average mflops/s per call per node (full): 313644 +Grid : Message : Average mflops/s per call per node (full): 305467 +Grid : Message : Average mflops/s per call per node (full): 445200 +Grid : Message : Average mflops/s per call per node (full): 305141 +Grid : Message : Stencil 12.1447 GB/s per node +Grid : Message : Stencil 16.8099 GB/s per node +Grid : Message : Stencil 17.3329 GB/s per node +Grid : Message : Stencil 13.932 GB/s per node +Grid : Message : Average mflops/s per call per node : 669301 +Grid : Message : Average mflops/s per call per node : 806765 +Grid : Message : Average mflops/s per call per node : 821776 +Grid : Message : Average mflops/s per call per node : 657008 +Grid : Message : Average mflops/s per call per node (full): 309647 +Grid : Message : Average mflops/s per call per node (full): 438769 +Grid : Message : Average mflops/s per call per node (full): 444367 +Grid : Message : Average mflops/s per call per node (full): 304731 +Grid : Message : Stencil 12.6801 GB/s per node +Grid : Message : Stencil 16.7348 GB/s per node +Grid : Message : Stencil 17.7477 GB/s per node +Grid : Message : Stencil 12.265 GB/s per node +Grid : Message : Average mflops/s per call per node : 670334 +Grid : Message : Average mflops/s per call per node : 807859 +Grid : Message : Average mflops/s per call per node : 825198 +Grid : Message : Average mflops/s per call per node : 664489 +Grid : Message : Average mflops/s per call per node (full): 315221 +Grid : Message : Average mflops/s per call per node (full): 436968 +Grid : Message : Average mflops/s per call per node (full): 446639 +Grid : Message : Average mflops/s per call per node (full): 303895 +Grid : Message : Stencil 12.4594 GB/s per node +Grid : Message : Stencil 17.1054 GB/s per node +Grid : Message : Stencil 17.0036 GB/s per node +Grid : Message : Stencil 13.1651 GB/s per node +Grid : Message : Average mflops/s per call per node : 667348 +Grid : Message : Average mflops/s per call per node : 804015 +Grid : Message : Average mflops/s per call per node : 826743 +Grid : Message : Average mflops/s per call per node : 661906 +Grid : Message : Average mflops/s per call per node (full): 311877 +Grid : Message : Average mflops/s per call per node (full): 437038 +Grid : Message : Average mflops/s per call per node (full): 441547 +Grid : Message : Average mflops/s per call per node (full): 305557 +Grid : Message : Stencil 13.786 GB/s per node +Grid : Message : Stencil 16.8368 GB/s per node +Grid : Message : Stencil 17.6723 GB/s per node +Grid : Message : Stencil 12.2742 GB/s per node +Grid : Message : Average mflops/s per call per node : 668162 +Grid : Message : Average mflops/s per call per node : 800651 +Grid : Message : Average mflops/s per call per node : 819596 +Grid : Message : Average mflops/s per call per node : 664426 +Grid : Message : Average mflops/s per call per node (full): 316998 +Grid : Message : Average mflops/s per call per node (full): 436396 +Grid : Message : Average mflops/s per call per node (full): 443602 +Grid : Message : Average mflops/s per call per node (full): 303972 +Grid : Message : Stencil 13.7633 GB/s per node +Grid : Message : Stencil 16.2817 GB/s per node +Grid : Message : Stencil 17.8035 GB/s per node +Grid : Message : Stencil 12.6944 GB/s per node +Grid : Message : Average mflops/s per call per node : 664684 +Grid : Message : Average mflops/s per call per node : 808546 +Grid : Message : Average mflops/s per call per node : 822845 +Grid : Message : Average mflops/s per call per node : 664413 +Grid : Message : Average mflops/s per call per node (full): 316050 +Grid : Message : Average mflops/s per call per node (full): 432889 +Grid : Message : Average mflops/s per call per node (full): 446074 +Grid : Message : Average mflops/s per call per node (full): 304574 +Grid : Message : Stencil 13.4668 GB/s per node +Grid : Message : Stencil 16.8483 GB/s per node +Grid : Message : Stencil 18.8989 GB/s per node +Grid : Message : Stencil 11.9368 GB/s per node +Grid : Message : Average mflops/s per call per node : 667145 +Grid : Message : Average mflops/s per call per node : 802864 +Grid : Message : Average mflops/s per call per node : 814890 +Grid : Message : Average mflops/s per call per node : 669128 +Grid : Message : Average mflops/s per call per node (full): 315400 +Grid : Message : Average mflops/s per call per node (full): 436546 +Grid : Message : Average mflops/s per call per node (full): 447561 +Grid : Message : Average mflops/s per call per node (full): 301691 +Grid : Message : Stencil 13.2105 GB/s per node +Grid : Message : Stencil 16.9977 GB/s per node +Grid : Message : Stencil 17.0903 GB/s per node +Grid : Message : Stencil 12.057 GB/s per node +Grid : Message : Average mflops/s per call per node : 663785 +Grid : Message : Average mflops/s per call per node : 803215 +Grid : Message : Average mflops/s per call per node : 823658 +Grid : Message : Average mflops/s per call per node : 663578 +Grid : Message : Average mflops/s per call per node (full): 314991 +Grid : Message : Average mflops/s per call per node (full): 436633 +Grid : Message : Average mflops/s per call per node (full): 441939 +Grid : Message : Average mflops/s per call per node (full): 301989 +Grid : Message : Stencil 14.4148 GB/s per node +Grid : Message : Stencil 17.3747 GB/s per node +Grid : Message : Stencil 17.3421 GB/s per node +Grid : Message : Stencil 12.1089 GB/s per node +Grid : Message : Average mflops/s per call per node : 662050 +Grid : Message : Average mflops/s per call per node : 800697 +Grid : Message : Average mflops/s per call per node : 823547 +Grid : Message : Average mflops/s per call per node : 672339 +Grid : Message : Average mflops/s per call per node (full): 315394 +Grid : Message : Average mflops/s per call per node (full): 438004 +Grid : Message : Average mflops/s per call per node (full): 440362 +Grid : Message : Average mflops/s per call per node (full): 303484 +Grid : Message : Stencil 14.5963 GB/s per node +Grid : Message : Stencil 9.37368 GB/s per node +Grid : Message : Stencil 17.0918 GB/s per node +Grid : Message : Stencil 12.5873 GB/s per node +Grid : Message : Average mflops/s per call per node : 663264 +Grid : Message : Average mflops/s per call per node : 808126 +Grid : Message : Average mflops/s per call per node : 824264 +Grid : Message : Average mflops/s per call per node : 668386 +Grid : Message : Average mflops/s per call per node (full): 316801 +Grid : Message : Average mflops/s per call per node (full): 300452 +Grid : Message : Average mflops/s per call per node (full): 441820 +Grid : Message : Average mflops/s per call per node (full): 304562 +Grid : Message : Stencil 12.9733 GB/s per node +Grid : Message : Stencil 18.1228 GB/s per node +Grid : Message : Stencil 17.4571 GB/s per node +Grid : Message : Stencil 13.2088 GB/s per node +Grid : Message : Average mflops/s per call per node : 665026 +Grid : Message : Average mflops/s per call per node : 803643 +Grid : Message : Average mflops/s per call per node : 820059 +Grid : Message : Average mflops/s per call per node : 662757 +Grid : Message : Average mflops/s per call per node (full): 314393 +Grid : Message : Average mflops/s per call per node (full): 440506 +Grid : Message : Average mflops/s per call per node (full): 435037 +Grid : Message : Average mflops/s per call per node (full): 304707 +Grid : Message : Stencil 14.9553 GB/s per node +Grid : Message : Stencil 17.5984 GB/s per node +Grid : Message : Stencil 17.2416 GB/s per node +Grid : Message : Stencil 12.3825 GB/s per node +Grid : Message : Average mflops/s per call per node : 662569 +Grid : Message : Average mflops/s per call per node : 803959 +Grid : Message : Average mflops/s per call per node : 824464 +Grid : Message : Average mflops/s per call per node : 663959 +Grid : Message : Average mflops/s per call per node (full): 316442 +Grid : Message : Average mflops/s per call per node (full): 440130 +Grid : Message : Average mflops/s per call per node (full): 442776 +Grid : Message : Average mflops/s per call per node (full): 303533 +Grid : Message : Stencil 13.6284 GB/s per node +Grid : Message : Stencil 16.7526 GB/s per node +Grid : Message : Stencil 17.4366 GB/s per node +Grid : Message : Stencil 11.8692 GB/s per node +Grid : Message : Average mflops/s per call per node : 664654 +Grid : Message : Average mflops/s per call per node : 805596 +Grid : Message : Average mflops/s per call per node : 826425 +Grid : Message : Average mflops/s per call per node : 666243 +Grid : Message : Average mflops/s per call per node (full): 315764 +Grid : Message : Average mflops/s per call per node (full): 437992 +Grid : Message : Average mflops/s per call per node (full): 446462 +Grid : Message : Average mflops/s per call per node (full): 300689 +Grid : Message : Stencil 13.1773 GB/s per node +Grid : Message : Stencil 17.1496 GB/s per node +Grid : Message : Stencil 17.2845 GB/s per node +Grid : Message : Stencil 12.2689 GB/s per node +Grid : Message : Average mflops/s per call per node : 662457 +Grid : Message : Average mflops/s per call per node : 799747 +Grid : Message : Average mflops/s per call per node : 823813 +Grid : Message : Average mflops/s per call per node : 668403 +Grid : Message : Average mflops/s per call per node (full): 315019 +Grid : Message : Average mflops/s per call per node (full): 437159 +Grid : Message : Average mflops/s per call per node (full): 443460 +Grid : Message : Average mflops/s per call per node (full): 304224 +Grid : Message : Stencil 13.1181 GB/s per node +Grid : Message : Stencil 17.2829 GB/s per node +Grid : Message : Stencil 17.6311 GB/s per node +Grid : Message : Stencil 12.7872 GB/s per node +Grid : Message : Average mflops/s per call per node : 662005 +Grid : Message : Average mflops/s per call per node : 810197 +Grid : Message : Average mflops/s per call per node : 821979 +Grid : Message : Average mflops/s per call per node : 659287 +Grid : Message : Average mflops/s per call per node (full): 311259 +Grid : Message : Average mflops/s per call per node (full): 440542 +Grid : Message : Average mflops/s per call per node (full): 445679 +Grid : Message : Average mflops/s per call per node (full): 304177 +Grid : Message : Stencil 12.2806 GB/s per node +Grid : Message : Stencil 17.0642 GB/s per node +Grid : Message : Stencil 17.5733 GB/s per node +Grid : Message : Stencil 12.6914 GB/s per node +Grid : Message : Average mflops/s per call per node : 669888 +Grid : Message : Average mflops/s per call per node : 805610 +Grid : Message : Average mflops/s per call per node : 824584 +Grid : Message : Average mflops/s per call per node : 657127 +Grid : Message : Average mflops/s per call per node (full): 312348 +Grid : Message : Average mflops/s per call per node (full): 439208 +Grid : Message : Average mflops/s per call per node (full): 443666 +Grid : Message : Average mflops/s per call per node (full): 303205 +Grid : Message : Stencil 12.9661 GB/s per node +Grid : Message : Stencil 16.5873 GB/s per node +Grid : Message : Stencil 17.5306 GB/s per node +Grid : Message : Stencil 12.8656 GB/s per node +Grid : Message : Average mflops/s per call per node : 667796 +Grid : Message : Average mflops/s per call per node : 804159 +Grid : Message : Average mflops/s per call per node : 822987 +Grid : Message : Average mflops/s per call per node : 667076 +Grid : Message : Average mflops/s per call per node (full): 316412 +Grid : Message : Average mflops/s per call per node (full): 435030 +Grid : Message : Average mflops/s per call per node (full): 445900 +Grid : Message : Average mflops/s per call per node (full): 305052 +Grid : Message : Stencil 13.1916 GB/s per node +Grid : Message : Stencil 10.271 GB/s per node +Grid : Message : Stencil 18.4837 GB/s per node +Grid : Message : Stencil 12.6862 GB/s per node +Grid : Message : Average mflops/s per call per node : 667469 +Grid : Message : Average mflops/s per call per node : 809427 +Grid : Message : Average mflops/s per call per node : 815268 +Grid : Message : Average mflops/s per call per node : 665818 +Grid : Message : Average mflops/s per call per node (full): 315679 +Grid : Message : Average mflops/s per call per node (full): 323040 +Grid : Message : Average mflops/s per call per node (full): 446261 +Grid : Message : Average mflops/s per call per node (full): 304701 +Grid : Message : Stencil 13.1754 GB/s per node +Grid : Message : Stencil 15.6857 GB/s per node +Grid : Message : Stencil 17.1186 GB/s per node +Grid : Message : Stencil 12.8238 GB/s per node +Grid : Message : Average mflops/s per call per node : 668281 +Grid : Message : Average mflops/s per call per node : 804696 +Grid : Message : Average mflops/s per call per node : 824252 +Grid : Message : Average mflops/s per call per node : 661608 +Grid : Message : Average mflops/s per call per node (full): 316585 +Grid : Message : Average mflops/s per call per node (full): 424725 +Grid : Message : Average mflops/s per call per node (full): 442445 +Grid : Message : Average mflops/s per call per node (full): 304938 +Grid : Message : Stencil 14.1363 GB/s per node +Grid : Message : Stencil 17.2076 GB/s per node +Grid : Message : Stencil 17.2527 GB/s per node +Grid : Message : Stencil 12.6664 GB/s per node +Grid : Message : Average mflops/s per call per node : 664212 +Grid : Message : Average mflops/s per call per node : 807256 +Grid : Message : Average mflops/s per call per node : 828470 +Grid : Message : Average mflops/s per call per node : 658701 +Grid : Message : Average mflops/s per call per node (full): 316642 +Grid : Message : Average mflops/s per call per node (full): 440364 +Grid : Message : Average mflops/s per call per node (full): 444454 +Grid : Message : Average mflops/s per call per node (full): 300892 +Grid : Message : Stencil 12.4873 GB/s per node +Grid : Message : Stencil 18.1535 GB/s per node +Grid : Message : Stencil 17.2971 GB/s per node +Grid : Message : Stencil 12.6544 GB/s per node +Grid : Message : Average mflops/s per call per node : 669576 +Grid : Message : Average mflops/s per call per node : 804299 +Grid : Message : Average mflops/s per call per node : 824912 +Grid : Message : Average mflops/s per call per node : 666060 +Grid : Message : Average mflops/s per call per node (full): 314263 +Grid : Message : Average mflops/s per call per node (full): 442052 +Grid : Message : Average mflops/s per call per node (full): 443849 +Grid : Message : Average mflops/s per call per node (full): 304899 +Grid : Message : Stencil 12.5467 GB/s per node +Grid : Message : Stencil 16.4949 GB/s per node +Grid : Message : Stencil 17.1648 GB/s per node +Grid : Message : Stencil 12.1105 GB/s per node +Grid : Message : Average mflops/s per call per node : 668703 +Grid : Message : Average mflops/s per call per node : 801782 +Grid : Message : Average mflops/s per call per node : 818095 +Grid : Message : Average mflops/s per call per node : 667291 +Grid : Message : Average mflops/s per call per node (full): 314193 +Grid : Message : Average mflops/s per call per node (full): 433869 +Grid : Message : Average mflops/s per call per node (full): 441706 +Grid : Message : Average mflops/s per call per node (full): 303151 +Grid : Message : Stencil 13.4886 GB/s per node +Grid : Message : Stencil 16.6148 GB/s per node +Grid : Message : Stencil 17.7462 GB/s per node +Grid : Message : Stencil 11.9484 GB/s per node +Grid : Message : Average mflops/s per call per node : 666020 +Grid : Message : Average mflops/s per call per node : 806462 +Grid : Message : Average mflops/s per call per node : 821792 +Grid : Message : Average mflops/s per call per node : 667299 +Grid : Message : Average mflops/s per call per node (full): 316359 +Grid : Message : Average mflops/s per call per node (full): 433941 +Grid : Message : Average mflops/s per call per node (full): 446264 +Grid : Message : Average mflops/s per call per node (full): 301910 +Grid : Message : Stencil 13.7286 GB/s per node +Grid : Message : Stencil 16.5805 GB/s per node +Grid : Message : Stencil 17.4372 GB/s per node +Grid : Message : Stencil 13.3038 GB/s per node +Grid : Message : Average mflops/s per call per node : 664492 +Grid : Message : Average mflops/s per call per node : 805003 +Grid : Message : Average mflops/s per call per node : 819586 +Grid : Message : Average mflops/s per call per node : 660935 +Grid : Message : Average mflops/s per call per node (full): 314319 +Grid : Message : Average mflops/s per call per node (full): 434736 +Grid : Message : Average mflops/s per call per node (full): 435192 +Grid : Message : Average mflops/s per call per node (full): 306127 +Grid : Message : Stencil 13.5175 GB/s per node +Grid : Message : Stencil 16.9837 GB/s per node +Grid : Message : Stencil 17.7435 GB/s per node +Grid : Message : Stencil 13.2658 GB/s per node +Grid : Message : Average mflops/s per call per node : 667154 +Grid : Message : Average mflops/s per call per node : 803437 +Grid : Message : Average mflops/s per call per node : 822090 +Grid : Message : Average mflops/s per call per node : 660675 +Grid : Message : Average mflops/s per call per node (full): 315651 +Grid : Message : Average mflops/s per call per node (full): 437381 +Grid : Message : Average mflops/s per call per node (full): 444310 +Grid : Message : Average mflops/s per call per node (full): 305053 +Grid : Message : Stencil 12.718 GB/s per node +Grid : Message : Stencil 13.4649 GB/s per node +Grid : Message : Stencil 17.1457 GB/s per node +Grid : Message : Stencil 13.5868 GB/s per node +Grid : Message : Average mflops/s per call per node : 670258 +Grid : Message : Average mflops/s per call per node : 808276 +Grid : Message : Average mflops/s per call per node : 823242 +Grid : Message : Average mflops/s per call per node : 662121 +Grid : Message : Average mflops/s per call per node (full): 314845 +Grid : Message : Average mflops/s per call per node (full): 389476 +Grid : Message : Average mflops/s per call per node (full): 442441 +Grid : Message : Average mflops/s per call per node (full): 306231 +Grid : Message : Stencil 13.9675 GB/s per node +Grid : Message : Stencil 17.0644 GB/s per node +Grid : Message : Stencil 17.6995 GB/s per node +Grid : Message : Stencil 12.7459 GB/s per node +Grid : Message : Average mflops/s per call per node : 663407 +Grid : Message : Average mflops/s per call per node : 808739 +Grid : Message : Average mflops/s per call per node : 819816 +Grid : Message : Average mflops/s per call per node : 667386 +Grid : Message : Average mflops/s per call per node (full): 315984 +Grid : Message : Average mflops/s per call per node (full): 440156 +Grid : Message : Average mflops/s per call per node (full): 445980 +Grid : Message : Average mflops/s per call per node (full): 305803 +Grid : Message : Stencil 14.1632 GB/s per node +Grid : Message : Stencil 15.7343 GB/s per node +Grid : Message : Stencil 18.4273 GB/s per node +Grid : Message : Stencil 14.4432 GB/s per node +Grid : Message : Average mflops/s per call per node : 664937 +Grid : Message : Average mflops/s per call per node : 805253 +Grid : Message : Average mflops/s per call per node : 828233 +Grid : Message : Average mflops/s per call per node : 664513 +Grid : Message : Average mflops/s per call per node (full): 315813 +Grid : Message : Average mflops/s per call per node (full): 415168 +Grid : Message : Average mflops/s per call per node (full): 447412 +Grid : Message : Average mflops/s per call per node (full): 307131 +Grid : Message : Stencil 12.4829 GB/s per node +Grid : Message : Stencil 16.5053 GB/s per node +Grid : Message : Stencil 17.2726 GB/s per node +Grid : Message : Stencil 14.047 GB/s per node +Grid : Message : Average mflops/s per call per node : 670858 +Grid : Message : Average mflops/s per call per node : 801237 +Grid : Message : Average mflops/s per call per node : 822537 +Grid : Message : Average mflops/s per call per node : 662443 +Grid : Message : Average mflops/s per call per node (full): 313466 +Grid : Message : Average mflops/s per call per node (full): 433622 +Grid : Message : Average mflops/s per call per node (full): 443957 +Grid : Message : Average mflops/s per call per node (full): 306748 +Grid : Message : Stencil 12.8276 GB/s per node +Grid : Message : Stencil 17.1233 GB/s per node +Grid : Message : Stencil 18.0525 GB/s per node +Grid : Message : Stencil 13.1973 GB/s per node +Grid : Message : Average mflops/s per call per node : 668220 +Grid : Message : Average mflops/s per call per node : 800530 +Grid : Message : Average mflops/s per call per node : 824932 +Grid : Message : Average mflops/s per call per node : 665198 +Grid : Message : Average mflops/s per call per node (full): 315263 +Grid : Message : Average mflops/s per call per node (full): 438804 +Grid : Message : Average mflops/s per call per node (full): 446733 +Grid : Message : Average mflops/s per call per node (full): 306020 +Grid : Message : Stencil 13.3079 GB/s per node +Grid : Message : Stencil 17.2573 GB/s per node +Grid : Message : Stencil 17.4127 GB/s per node +Grid : Message : Stencil 12.1482 GB/s per node +Grid : Message : Average mflops/s per call per node : 668893 +Grid : Message : Average mflops/s per call per node : 806882 +Grid : Message : Average mflops/s per call per node : 819174 +Grid : Message : Average mflops/s per call per node : 664891 +Grid : Message : Average mflops/s per call per node (full): 315958 +Grid : Message : Average mflops/s per call per node (full): 438004 +Grid : Message : Average mflops/s per call per node (full): 443438 +Grid : Message : Average mflops/s per call per node (full): 303070 +Grid : Message : Stencil 13.6325 GB/s per node +Grid : Message : Stencil 13.3344 GB/s per node +Grid : Message : Stencil 17.6158 GB/s per node +Grid : Message : Stencil 13.8209 GB/s per node +Grid : Message : Average mflops/s per call per node : 664060 +Grid : Message : Average mflops/s per call per node : 806781 +Grid : Message : Average mflops/s per call per node : 819397 +Grid : Message : Average mflops/s per call per node : 659580 +Grid : Message : Average mflops/s per call per node (full): 315675 +Grid : Message : Average mflops/s per call per node (full): 387874 +Grid : Message : Average mflops/s per call per node (full): 443746 +Grid : Message : Average mflops/s per call per node (full): 305557 +Grid : Message : Stencil 12.8853 GB/s per node +Grid : Message : Stencil 16.7132 GB/s per node +Grid : Message : Stencil 17.1811 GB/s per node +Grid : Message : Stencil 13.2971 GB/s per node +Grid : Message : Average mflops/s per call per node : 668706 +Grid : Message : Average mflops/s per call per node : 805007 +Grid : Message : Average mflops/s per call per node : 820607 +Grid : Message : Average mflops/s per call per node : 660532 +Grid : Message : Average mflops/s per call per node (full): 315550 +Grid : Message : Average mflops/s per call per node (full): 436813 +Grid : Message : Average mflops/s per call per node (full): 441014 +Grid : Message : Average mflops/s per call per node (full): 303187 +Grid : Message : Stencil 12.3614 GB/s per node +Grid : Message : Stencil 16.6071 GB/s per node +Grid : Message : Stencil 19.2775 GB/s per node +Grid : Message : Stencil 12.0097 GB/s per node +Grid : Message : Average mflops/s per call per node : 667522 +Grid : Message : Average mflops/s per call per node : 802893 +Grid : Message : Average mflops/s per call per node : 818988 +Grid : Message : Average mflops/s per call per node : 663512 +Grid : Message : Average mflops/s per call per node (full): 312664 +Grid : Message : Average mflops/s per call per node (full): 435656 +Grid : Message : Average mflops/s per call per node (full): 449837 +Grid : Message : Average mflops/s per call per node (full): 300482 +Grid : Message : Stencil 13.3121 GB/s per node +Grid : Message : Stencil 17.2751 GB/s per node +Grid : Message : Stencil 17.4883 GB/s per node +Grid : Message : Stencil 13.266 GB/s per node +Grid : Message : Average mflops/s per call per node : 665762 +Grid : Message : Average mflops/s per call per node : 801452 +Grid : Message : Average mflops/s per call per node : 827915 +Grid : Message : Average mflops/s per call per node : 663806 +Grid : Message : Average mflops/s per call per node (full): 314945 +Grid : Message : Average mflops/s per call per node (full): 437703 +Grid : Message : Average mflops/s per call per node (full): 443208 +Grid : Message : Average mflops/s per call per node (full): 305507 +Grid : Message : Stencil 13.3057 GB/s per node +Grid : Message : Stencil 16.4284 GB/s per node +Grid : Message : Stencil 17.7674 GB/s per node +Grid : Message : Stencil 12.8116 GB/s per node +Grid : Message : Average mflops/s per call per node : 665746 +Grid : Message : Average mflops/s per call per node : 806662 +Grid : Message : Average mflops/s per call per node : 820972 +Grid : Message : Average mflops/s per call per node : 663597 +Grid : Message : Average mflops/s per call per node (full): 315385 +Grid : Message : Average mflops/s per call per node (full): 434085 +Grid : Message : Average mflops/s per call per node (full): 445698 +Grid : Message : Average mflops/s per call per node (full): 304993 +Grid : Message : Stencil 13.4849 GB/s per node +Grid : Message : Stencil 9.38352 GB/s per node +Grid : Message : Stencil 17.645 GB/s per node +Grid : Message : Stencil 13.2214 GB/s per node +Grid : Message : Average mflops/s per call per node : 663124 +Grid : Message : Average mflops/s per call per node : 809265 +Grid : Message : Average mflops/s per call per node : 823398 +Grid : Message : Average mflops/s per call per node : 664345 +Grid : Message : Average mflops/s per call per node (full): 313884 +Grid : Message : Average mflops/s per call per node (full): 301790 +Grid : Message : Average mflops/s per call per node (full): 443023 +Grid : Message : Average mflops/s per call per node (full): 304831 +Grid : Message : Stencil 13.7732 GB/s per node +Grid : Message : Stencil 15.1391 GB/s per node +Grid : Message : Stencil 18.2708 GB/s per node +Grid : Message : Stencil 14.2929 GB/s per node +Grid : Message : Average mflops/s per call per node : 664574 +Grid : Message : Average mflops/s per call per node : 800209 +Grid : Message : Average mflops/s per call per node : 821873 +Grid : Message : Average mflops/s per call per node : 659978 +Grid : Message : Average mflops/s per call per node (full): 315542 +Grid : Message : Average mflops/s per call per node (full): 417439 +Grid : Message : Average mflops/s per call per node (full): 444265 +Grid : Message : Average mflops/s per call per node (full): 306522 +Grid : Message : Stencil 12.7582 GB/s per node +Grid : Message : Stencil 16.5874 GB/s per node +Grid : Message : Stencil 17.4572 GB/s per node +Grid : Message : Stencil 12.9153 GB/s per node +Grid : Message : Average mflops/s per call per node : 665604 +Grid : Message : Average mflops/s per call per node : 803460 +Grid : Message : Average mflops/s per call per node : 822525 +Grid : Message : Average mflops/s per call per node : 664818 +Grid : Message : Average mflops/s per call per node (full): 313795 +Grid : Message : Average mflops/s per call per node (full): 435096 +Grid : Message : Average mflops/s per call per node (full): 444071 +Grid : Message : Average mflops/s per call per node (full): 305431 +Grid : Message : Stencil 13.8559 GB/s per node +Grid : Message : Stencil 17.0497 GB/s per node +Grid : Message : Stencil 16.8828 GB/s per node +Grid : Message : Stencil 12.9922 GB/s per node +Grid : Message : Average mflops/s per call per node : 659253 +Grid : Message : Average mflops/s per call per node : 805404 +Grid : Message : Average mflops/s per call per node : 819715 +Grid : Message : Average mflops/s per call per node : 664332 +Grid : Message : Average mflops/s per call per node (full): 313997 +Grid : Message : Average mflops/s per call per node (full): 435225 +Grid : Message : Average mflops/s per call per node (full): 439067 +Grid : Message : Average mflops/s per call per node (full): 305231 +Grid : Message : Stencil 13.0007 GB/s per node +Grid : Message : Stencil 8.69655 GB/s per node +Grid : Message : Stencil 17.5308 GB/s per node +Grid : Message : Stencil 12.2284 GB/s per node +Grid : Message : Average mflops/s per call per node : 665359 +Grid : Message : Average mflops/s per call per node : 813330 +Grid : Message : Average mflops/s per call per node : 824392 +Grid : Message : Average mflops/s per call per node : 666731 +Grid : Message : Average mflops/s per call per node (full): 314743 +Grid : Message : Average mflops/s per call per node (full): 284313 +Grid : Message : Average mflops/s per call per node (full): 444231 +Grid : Message : Average mflops/s per call per node (full): 303602 +Grid : Message : Stencil 14.0179 GB/s per node +Grid : Message : Stencil 16.4361 GB/s per node +Grid : Message : Stencil 17.3788 GB/s per node +Grid : Message : Stencil 13.9105 GB/s per node +Grid : Message : Average mflops/s per call per node : 661910 +Grid : Message : Average mflops/s per call per node : 801215 +Grid : Message : Average mflops/s per call per node : 825305 +Grid : Message : Average mflops/s per call per node : 662391 +Grid : Message : Average mflops/s per call per node (full): 315920 +Grid : Message : Average mflops/s per call per node (full): 432965 +Grid : Message : Average mflops/s per call per node (full): 440216 +Grid : Message : Average mflops/s per call per node (full): 306253 +Grid : Message : Stencil 14.7332 GB/s per node +Grid : Message : Stencil 17.0097 GB/s per node +Grid : Message : Stencil 18.3556 GB/s per node +Grid : Message : Stencil 12.3901 GB/s per node +Grid : Message : Average mflops/s per call per node : 660267 +Grid : Message : Average mflops/s per call per node : 804795 +Grid : Message : Average mflops/s per call per node : 822348 +Grid : Message : Average mflops/s per call per node : 665592 +Grid : Message : Average mflops/s per call per node (full): 315863 +Grid : Message : Average mflops/s per call per node (full): 439178 +Grid : Message : Average mflops/s per call per node (full): 448396 +Grid : Message : Average mflops/s per call per node (full): 303397 +Grid : Message : Stencil 13.3785 GB/s per node +Grid : Message : Stencil 17.2514 GB/s per node +Grid : Message : Stencil 17.3377 GB/s per node +Grid : Message : Stencil 14.8653 GB/s per node +Grid : Message : Average mflops/s per call per node : 668976 +Grid : Message : Average mflops/s per call per node : 804297 +Grid : Message : Average mflops/s per call per node : 823445 +Grid : Message : Average mflops/s per call per node : 659754 +Grid : Message : Average mflops/s per call per node (full): 316369 +Grid : Message : Average mflops/s per call per node (full): 439002 +Grid : Message : Average mflops/s per call per node (full): 443709 +Grid : Message : Average mflops/s per call per node (full): 306861 +Grid : Message : Stencil 14.4473 GB/s per node +Grid : Message : Stencil 18.4221 GB/s per node +Grid : Message : Stencil 18.7098 GB/s per node +Grid : Message : Stencil 12.6884 GB/s per node +Grid : Message : Average mflops/s per call per node : 665908 +Grid : Message : Average mflops/s per call per node : 797772 +Grid : Message : Average mflops/s per call per node : 821779 +Grid : Message : Average mflops/s per call per node : 665933 +Grid : Message : Average mflops/s per call per node (full): 316513 +Grid : Message : Average mflops/s per call per node (full): 439698 +Grid : Message : Average mflops/s per call per node (full): 446877 +Grid : Message : Average mflops/s per call per node (full): 304760 +Grid : Message : Stencil 12.9669 GB/s per node +Grid : Message : Stencil 16.3339 GB/s per node +Grid : Message : Stencil 17.6991 GB/s per node +Grid : Message : Stencil 13.9045 GB/s per node +Grid : Message : Average mflops/s per call per node : 671505 +Grid : Message : Average mflops/s per call per node : 807941 +Grid : Message : Average mflops/s per call per node : 819920 +Grid : Message : Average mflops/s per call per node : 661226 +Grid : Message : Average mflops/s per call per node (full): 315894 +Grid : Message : Average mflops/s per call per node (full): 433325 +Grid : Message : Average mflops/s per call per node (full): 445189 +Grid : Message : Average mflops/s per call per node (full): 305056 +Grid : Message : Stencil 13.389 GB/s per node +Grid : Message : Stencil 18.4311 GB/s per node +Grid : Message : Stencil 17.3176 GB/s per node +Grid : Message : Stencil 12.428 GB/s per node +Grid : Message : Average mflops/s per call per node : 669226 +Grid : Message : Average mflops/s per call per node : 802742 +Grid : Message : Average mflops/s per call per node : 821110 +Grid : Message : Average mflops/s per call per node : 667706 +Grid : Message : Average mflops/s per call per node (full): 316891 +Grid : Message : Average mflops/s per call per node (full): 441526 +Grid : Message : Average mflops/s per call per node (full): 442371 +Grid : Message : Average mflops/s per call per node (full): 303524 +Grid : Message : Stencil 13.2963 GB/s per node +Grid : Message : Stencil 16.6181 GB/s per node +Grid : Message : Stencil 17.976 GB/s per node +Grid : Message : Stencil 12.7132 GB/s per node +Grid : Message : Average mflops/s per call per node : 668142 +Grid : Message : Average mflops/s per call per node : 803065 +Grid : Message : Average mflops/s per call per node : 826397 +Grid : Message : Average mflops/s per call per node : 662717 +Grid : Message : Average mflops/s per call per node (full): 316414 +Grid : Message : Average mflops/s per call per node (full): 433704 +Grid : Message : Average mflops/s per call per node (full): 449195 +Grid : Message : Average mflops/s per call per node (full): 303450 +Grid : Message : Stencil 13.407 GB/s per node +Grid : Message : Stencil 16.42 GB/s per node +Grid : Message : Stencil 18.5532 GB/s per node +Grid : Message : Stencil 12.0239 GB/s per node +Grid : Message : Average mflops/s per call per node : 667511 +Grid : Message : Average mflops/s per call per node : 808754 +Grid : Message : Average mflops/s per call per node : 817579 +Grid : Message : Average mflops/s per call per node : 664542 +Grid : Message : Average mflops/s per call per node (full): 316072 +Grid : Message : Average mflops/s per call per node (full): 434448 +Grid : Message : Average mflops/s per call per node (full): 448228 +Grid : Message : Average mflops/s per call per node (full): 302366 +Grid : Message : Stencil 12.6577 GB/s per node +Grid : Message : Stencil 16.6525 GB/s per node +Grid : Message : Stencil 18.0754 GB/s per node +Grid : Message : Stencil 13.328 GB/s per node +Grid : Message : Average mflops/s per call per node : 670269 +Grid : Message : Average mflops/s per call per node : 806147 +Grid : Message : Average mflops/s per call per node : 822395 +Grid : Message : Average mflops/s per call per node : 663421 +Grid : Message : Average mflops/s per call per node (full): 315162 +Grid : Message : Average mflops/s per call per node (full): 434256 +Grid : Message : Average mflops/s per call per node (full): 443098 +Grid : Message : Average mflops/s per call per node (full): 306801 +Grid : Message : Stencil 12.5144 GB/s per node +Grid : Message : Stencil 16.1335 GB/s per node +Grid : Message : Stencil 17.0158 GB/s per node +Grid : Message : Stencil 11.8636 GB/s per node +Grid : Message : Average mflops/s per call per node : 671085 +Grid : Message : Average mflops/s per call per node : 800540 +Grid : Message : Average mflops/s per call per node : 821557 +Grid : Message : Average mflops/s per call per node : 668438 +Grid : Message : Average mflops/s per call per node (full): 314542 +Grid : Message : Average mflops/s per call per node (full): 429368 +Grid : Message : Average mflops/s per call per node (full): 441488 +Grid : Message : Average mflops/s per call per node (full): 300842 +Grid : Message : Stencil 12.9445 GB/s per node +Grid : Message : Stencil 16.5649 GB/s per node +Grid : Message : Stencil 17.6354 GB/s per node +Grid : Message : Stencil 12.6334 GB/s per node +Grid : Message : Average mflops/s per call per node : 670258 +Grid : Message : Average mflops/s per call per node : 807901 +Grid : Message : Average mflops/s per call per node : 820599 +Grid : Message : Average mflops/s per call per node : 664701 +Grid : Message : Average mflops/s per call per node (full): 316240 +Grid : Message : Average mflops/s per call per node (full): 436680 +Grid : Message : Average mflops/s per call per node (full): 444438 +Grid : Message : Average mflops/s per call per node (full): 304377 +Grid : Message : Stencil 12.9229 GB/s per node +Grid : Message : Stencil 16.476 GB/s per node +Grid : Message : Stencil 17.3273 GB/s per node +Grid : Message : Stencil 12.1586 GB/s per node +Grid : Message : Average mflops/s per call per node : 671176 +Grid : Message : Average mflops/s per call per node : 804588 +Grid : Message : Average mflops/s per call per node : 824591 +Grid : Message : Average mflops/s per call per node : 668922 +Grid : Message : Average mflops/s per call per node (full): 316452 +Grid : Message : Average mflops/s per call per node (full): 431674 +Grid : Message : Average mflops/s per call per node (full): 443769 +Grid : Message : Average mflops/s per call per node (full): 304071 +Grid : Message : Stencil 13.7492 GB/s per node +Grid : Message : Stencil 16.0205 GB/s per node +Grid : Message : Stencil 17.7682 GB/s per node +Grid : Message : Stencil 12.5497 GB/s per node +Grid : Message : Average mflops/s per call per node : 667267 +Grid : Message : Average mflops/s per call per node : 804527 +Grid : Message : Average mflops/s per call per node : 822570 +Grid : Message : Average mflops/s per call per node : 668442 +Grid : Message : Average mflops/s per call per node (full): 315015 +Grid : Message : Average mflops/s per call per node (full): 428099 +Grid : Message : Average mflops/s per call per node (full): 446695 +Grid : Message : Average mflops/s per call per node (full): 304877 +Grid : Message : Stencil 16.0749 GB/s per node +Grid : Message : Stencil 16.7101 GB/s per node +Grid : Message : Stencil 17.5895 GB/s per node +Grid : Message : Stencil 12.6814 GB/s per node +Grid : Message : Average mflops/s per call per node : 664654 +Grid : Message : Average mflops/s per call per node : 804560 +Grid : Message : Average mflops/s per call per node : 820827 +Grid : Message : Average mflops/s per call per node : 663960 +Grid : Message : Average mflops/s per call per node (full): 317931 +Grid : Message : Average mflops/s per call per node (full): 436764 +Grid : Message : Average mflops/s per call per node (full): 444919 +Grid : Message : Average mflops/s per call per node (full): 304502 +Grid : Message : Stencil 13.8452 GB/s per node +Grid : Message : Stencil 16.5895 GB/s per node +Grid : Message : Stencil 17.6105 GB/s per node +Grid : Message : Stencil 12.7434 GB/s per node +Grid : Message : Average mflops/s per call per node : 662680 +Grid : Message : Average mflops/s per call per node : 804209 +Grid : Message : Average mflops/s per call per node : 826764 +Grid : Message : Average mflops/s per call per node : 665513 +Grid : Message : Average mflops/s per call per node (full): 315795 +Grid : Message : Average mflops/s per call per node (full): 435470 +Grid : Message : Average mflops/s per call per node (full): 446375 +Grid : Message : Average mflops/s per call per node (full): 306667 +Grid : Message : Stencil 12.696 GB/s per node +Grid : Message : Stencil 17.8462 GB/s per node +Grid : Message : Stencil 17.4581 GB/s per node +Grid : Message : Stencil 12.4621 GB/s per node +Grid : Message : Average mflops/s per call per node : 668704 +Grid : Message : Average mflops/s per call per node : 801632 +Grid : Message : Average mflops/s per call per node : 821837 +Grid : Message : Average mflops/s per call per node : 665805 +Grid : Message : Average mflops/s per call per node (full): 314958 +Grid : Message : Average mflops/s per call per node (full): 439770 +Grid : Message : Average mflops/s per call per node (full): 444800 +Grid : Message : Average mflops/s per call per node (full): 304450 +Grid : Message : Stencil 13.195 GB/s per node +Grid : Message : Stencil 16.3625 GB/s per node +Grid : Message : Stencil 17.9368 GB/s per node +Grid : Message : Stencil 12.7961 GB/s per node +Grid : Message : Average mflops/s per call per node : 664900 +Grid : Message : Average mflops/s per call per node : 802314 +Grid : Message : Average mflops/s per call per node : 822453 +Grid : Message : Average mflops/s per call per node : 662885 +Grid : Message : Average mflops/s per call per node (full): 312141 +Grid : Message : Average mflops/s per call per node (full): 433682 +Grid : Message : Average mflops/s per call per node (full): 445677 +Grid : Message : Average mflops/s per call per node (full): 304522 +Grid : Message : Stencil 13.1627 GB/s per node +Grid : Message : Stencil 16.5828 GB/s per node +Grid : Message : Stencil 18.8868 GB/s per node +Grid : Message : Stencil 12.1598 GB/s per node +Grid : Message : Average mflops/s per call per node : 667526 +Grid : Message : Average mflops/s per call per node : 804126 +Grid : Message : Average mflops/s per call per node : 826797 +Grid : Message : Average mflops/s per call per node : 666150 +Grid : Message : Average mflops/s per call per node (full): 315161 +Grid : Message : Average mflops/s per call per node (full): 436237 +Grid : Message : Average mflops/s per call per node (full): 450383 +Grid : Message : Average mflops/s per call per node (full): 303367 +Grid : Message : Stencil 14.3233 GB/s per node +Grid : Message : Stencil 17.0524 GB/s per node +Grid : Message : Stencil 17.7682 GB/s per node +Grid : Message : Stencil 13.6733 GB/s per node +Grid : Message : Average mflops/s per call per node : 662040 +Grid : Message : Average mflops/s per call per node : 802978 +Grid : Message : Average mflops/s per call per node : 827704 +Grid : Message : Average mflops/s per call per node : 663840 +Grid : Message : Average mflops/s per call per node (full): 316403 +Grid : Message : Average mflops/s per call per node (full): 438560 +Grid : Message : Average mflops/s per call per node (full): 447686 +Grid : Message : Average mflops/s per call per node (full): 306479 +Grid : Message : Stencil 14.0166 GB/s per node +Grid : Message : Stencil 17.7287 GB/s per node +Grid : Message : Stencil 17.3918 GB/s per node +Grid : Message : Stencil 13.5967 GB/s per node +Grid : Message : Average mflops/s per call per node : 662621 +Grid : Message : Average mflops/s per call per node : 803027 +Grid : Message : Average mflops/s per call per node : 828930 +Grid : Message : Average mflops/s per call per node : 666495 +Grid : Message : Average mflops/s per call per node (full): 315597 +Grid : Message : Average mflops/s per call per node (full): 439267 +Grid : Message : Average mflops/s per call per node (full): 444967 +Grid : Message : Average mflops/s per call per node (full): 306897 +Grid : Message : Stencil 12.7254 GB/s per node +Grid : Message : Stencil 16.7791 GB/s per node +Grid : Message : Stencil 17.8821 GB/s per node +Grid : Message : Stencil 12.1478 GB/s per node +Grid : Message : Average mflops/s per call per node : 667639 +Grid : Message : Average mflops/s per call per node : 808509 +Grid : Message : Average mflops/s per call per node : 822811 +Grid : Message : Average mflops/s per call per node : 669762 +Grid : Message : Average mflops/s per call per node (full): 314173 +Grid : Message : Average mflops/s per call per node (full): 437440 +Grid : Message : Average mflops/s per call per node (full): 445486 +Grid : Message : Average mflops/s per call per node (full): 303879 +Grid : Message : Stencil 12.7592 GB/s per node +Grid : Message : Stencil 17.3833 GB/s per node +Grid : Message : Stencil 18.0241 GB/s per node +Grid : Message : Stencil 11.543 GB/s per node +Grid : Message : Average mflops/s per call per node : 666434 +Grid : Message : Average mflops/s per call per node : 800539 +Grid : Message : Average mflops/s per call per node : 824422 +Grid : Message : Average mflops/s per call per node : 664539 +Grid : Message : Average mflops/s per call per node (full): 314702 +Grid : Message : Average mflops/s per call per node (full): 436754 +Grid : Message : Average mflops/s per call per node (full): 438523 +Grid : Message : Average mflops/s per call per node (full): 295096 +Grid : Message : Stencil 13.6429 GB/s per node +Grid : Message : Stencil 17.1495 GB/s per node +Grid : Message : Stencil 17.9739 GB/s per node +Grid : Message : Stencil 12.6514 GB/s per node +Grid : Message : Average mflops/s per call per node : 666162 +Grid : Message : Average mflops/s per call per node : 803120 +Grid : Message : Average mflops/s per call per node : 820330 +Grid : Message : Average mflops/s per call per node : 665553 +Grid : Message : Average mflops/s per call per node (full): 315273 +Grid : Message : Average mflops/s per call per node (full): 436051 +Grid : Message : Average mflops/s per call per node (full): 445863 +Grid : Message : Average mflops/s per call per node (full): 305580 +Grid : Message : Stencil 13.2379 GB/s per node +Grid : Message : Stencil 17.1006 GB/s per node +Grid : Message : Stencil 17.7767 GB/s per node +Grid : Message : Stencil 13.0215 GB/s per node +Grid : Message : Average mflops/s per call per node : 664785 +Grid : Message : Average mflops/s per call per node : 800435 +Grid : Message : Average mflops/s per call per node : 823378 +Grid : Message : Average mflops/s per call per node : 668449 +Grid : Message : Average mflops/s per call per node (full): 314326 +Grid : Message : Average mflops/s per call per node (full): 438018 +Grid : Message : Average mflops/s per call per node (full): 446735 +Grid : Message : Average mflops/s per call per node (full): 305534 +Grid : Message : Stencil 13.5944 GB/s per node +Grid : Message : Stencil 17.0677 GB/s per node +Grid : Message : Stencil 17.4291 GB/s per node +Grid : Message : Stencil 12.6877 GB/s per node +Grid : Message : Average mflops/s per call per node : 666724 +Grid : Message : Average mflops/s per call per node : 802857 +Grid : Message : Average mflops/s per call per node : 822555 +Grid : Message : Average mflops/s per call per node : 666741 +Grid : Message : Average mflops/s per call per node (full): 315873 +Grid : Message : Average mflops/s per call per node (full): 438425 +Grid : Message : Average mflops/s per call per node (full): 445125 +Grid : Message : Average mflops/s per call per node (full): 305291 +Grid : Message : Stencil 13.3329 GB/s per node +Grid : Message : Stencil 13.2059 GB/s per node +Grid : Message : Stencil 17.6162 GB/s per node +Grid : Message : Stencil 13.8388 GB/s per node +Grid : Message : Average mflops/s per call per node : 665180 +Grid : Message : Average mflops/s per call per node : 810879 +Grid : Message : Average mflops/s per call per node : 821220 +Grid : Message : Average mflops/s per call per node : 658602 +Grid : Message : Average mflops/s per call per node (full): 315310 +Grid : Message : Average mflops/s per call per node (full): 385291 +Grid : Message : Average mflops/s per call per node (full): 445718 +Grid : Message : Average mflops/s per call per node (full): 306487 +Grid : Message : Stencil 13.7408 GB/s per node +Grid : Message : Stencil 14.8181 GB/s per node +Grid : Message : Stencil 17.5145 GB/s per node +Grid : Message : Stencil 12.4369 GB/s per node +Grid : Message : Average mflops/s per call per node : 663746 +Grid : Message : Average mflops/s per call per node : 802457 +Grid : Message : Average mflops/s per call per node : 818522 +Grid : Message : Average mflops/s per call per node : 663289 +Grid : Message : Average mflops/s per call per node (full): 315225 +Grid : Message : Average mflops/s per call per node (full): 412019 +Grid : Message : Average mflops/s per call per node (full): 445551 +Grid : Message : Average mflops/s per call per node (full): 303850 +Grid : Message : Stencil 13.3968 GB/s per node +Grid : Message : Stencil 16.7506 GB/s per node +Grid : Message : Stencil 16.9505 GB/s per node +Grid : Message : Stencil 13.0396 GB/s per node +Grid : Message : Average mflops/s per call per node : 663215 +Grid : Message : Average mflops/s per call per node : 803452 +Grid : Message : Average mflops/s per call per node : 816860 +Grid : Message : Average mflops/s per call per node : 662333 +Grid : Message : Average mflops/s per call per node (full): 314571 +Grid : Message : Average mflops/s per call per node (full): 436462 +Grid : Message : Average mflops/s per call per node (full): 438271 +Grid : Message : Average mflops/s per call per node (full): 304220 +Grid : Message : Stencil 13.2516 GB/s per node +Grid : Message : Stencil 18.4683 GB/s per node +Grid : Message : Stencil 20.1626 GB/s per node +Grid : Message : Stencil 12.3178 GB/s per node +Grid : Message : Average mflops/s per call per node : 665033 +Grid : Message : Average mflops/s per call per node : 805690 +Grid : Message : Average mflops/s per call per node : 821338 +Grid : Message : Average mflops/s per call per node : 667382 +Grid : Message : Average mflops/s per call per node (full): 313801 +Grid : Message : Average mflops/s per call per node (full): 442080 +Grid : Message : Average mflops/s per call per node (full): 451370 +Grid : Message : Average mflops/s per call per node (full): 304378 +Grid : Message : Stencil 12.4761 GB/s per node +Grid : Message : Stencil 17.063 GB/s per node +Grid : Message : Stencil 17.2998 GB/s per node +Grid : Message : Stencil 13.0949 GB/s per node +Grid : Message : Average mflops/s per call per node : 669469 +Grid : Message : Average mflops/s per call per node : 807207 +Grid : Message : Average mflops/s per call per node : 818619 +Grid : Message : Average mflops/s per call per node : 662204 +Grid : Message : Average mflops/s per call per node (full): 313152 +Grid : Message : Average mflops/s per call per node (full): 438548 +Grid : Message : Average mflops/s per call per node (full): 438583 +Grid : Message : Average mflops/s per call per node (full): 304705 +Grid : Message : Stencil 13.9202 GB/s per node +Grid : Message : Stencil 16.7782 GB/s per node +Grid : Message : Stencil 18.2511 GB/s per node +Grid : Message : Stencil 12.3493 GB/s per node +Grid : Message : Average mflops/s per call per node : 665295 +Grid : Message : Average mflops/s per call per node : 800954 +Grid : Message : Average mflops/s per call per node : 820727 +Grid : Message : Average mflops/s per call per node : 667125 +Grid : Message : Average mflops/s per call per node (full): 315384 +Grid : Message : Average mflops/s per call per node (full): 433980 +Grid : Message : Average mflops/s per call per node (full): 447066 +Grid : Message : Average mflops/s per call per node (full): 304322 +Grid : Message : Stencil 12.2009 GB/s per node +Grid : Message : Stencil 17.8894 GB/s per node +Grid : Message : Stencil 17.733 GB/s per node +Grid : Message : Stencil 12.1817 GB/s per node +Grid : Message : Average mflops/s per call per node : 669787 +Grid : Message : Average mflops/s per call per node : 799609 +Grid : Message : Average mflops/s per call per node : 818548 +Grid : Message : Average mflops/s per call per node : 667138 +Grid : Message : Average mflops/s per call per node (full): 311653 +Grid : Message : Average mflops/s per call per node (full): 438106 +Grid : Message : Average mflops/s per call per node (full): 444209 +Grid : Message : Average mflops/s per call per node (full): 304070 +Grid : Message : Stencil 14.1701 GB/s per node +Grid : Message : Stencil 17.9506 GB/s per node +Grid : Message : Stencil 18.1714 GB/s per node +Grid : Message : Stencil 12.1593 GB/s per node +Grid : Message : Average mflops/s per call per node : 664334 +Grid : Message : Average mflops/s per call per node : 804515 +Grid : Message : Average mflops/s per call per node : 814830 +Grid : Message : Average mflops/s per call per node : 662621 +Grid : Message : Average mflops/s per call per node (full): 316632 +Grid : Message : Average mflops/s per call per node (full): 441857 +Grid : Message : Average mflops/s per call per node (full): 444490 +Grid : Message : Average mflops/s per call per node (full): 302082 +Grid : Message : Stencil 14.012 GB/s per node +Grid : Message : Stencil 16.5864 GB/s per node +Grid : Message : Stencil 17.2479 GB/s per node +Grid : Message : Stencil 12.3873 GB/s per node +Grid : Message : Average mflops/s per call per node : 666742 +Grid : Message : Average mflops/s per call per node : 799540 +Grid : Message : Average mflops/s per call per node : 815256 +Grid : Message : Average mflops/s per call per node : 666099 +Grid : Message : Average mflops/s per call per node (full): 317252 +Grid : Message : Average mflops/s per call per node (full): 435178 +Grid : Message : Average mflops/s per call per node (full): 442294 +Grid : Message : Average mflops/s per call per node (full): 304068 +Grid : Message : Stencil 13.1105 GB/s per node +Grid : Message : Stencil 17.0605 GB/s per node +Grid : Message : Stencil 17.4726 GB/s per node +Grid : Message : Stencil 13.9293 GB/s per node +Grid : Message : Average mflops/s per call per node : 665709 +Grid : Message : Average mflops/s per call per node : 802462 +Grid : Message : Average mflops/s per call per node : 821252 +Grid : Message : Average mflops/s per call per node : 660691 +Grid : Message : Average mflops/s per call per node (full): 314770 +Grid : Message : Average mflops/s per call per node (full): 438294 +Grid : Message : Average mflops/s per call per node (full): 445239 +Grid : Message : Average mflops/s per call per node (full): 304619 +Grid : Message : Stencil 12.7894 GB/s per node +Grid : Message : Stencil 13.614 GB/s per node +Grid : Message : Stencil 18.0553 GB/s per node +Grid : Message : Stencil 14.1673 GB/s per node +Grid : Message : Average mflops/s per call per node : 670212 +Grid : Message : Average mflops/s per call per node : 810108 +Grid : Message : Average mflops/s per call per node : 822900 +Grid : Message : Average mflops/s per call per node : 663050 +Grid : Message : Average mflops/s per call per node (full): 315724 +Grid : Message : Average mflops/s per call per node (full): 392709 +Grid : Message : Average mflops/s per call per node (full): 447040 +Grid : Message : Average mflops/s per call per node (full): 307350 +Grid : Message : Stencil 12.6603 GB/s per node +Grid : Message : Stencil 17.4165 GB/s per node +Grid : Message : Stencil 18.1449 GB/s per node +Grid : Message : Stencil 11.9735 GB/s per node +Grid : Message : Average mflops/s per call per node : 668180 +Grid : Message : Average mflops/s per call per node : 803492 +Grid : Message : Average mflops/s per call per node : 821395 +Grid : Message : Average mflops/s per call per node : 666526 +Grid : Message : Average mflops/s per call per node (full): 314481 +Grid : Message : Average mflops/s per call per node (full): 438394 +Grid : Message : Average mflops/s per call per node (full): 446746 +Grid : Message : Average mflops/s per call per node (full): 301260 +Grid : Message : Stencil 14.5401 GB/s per node +Grid : Message : Stencil 18.8236 GB/s per node +Grid : Message : Stencil 17.4676 GB/s per node +Grid : Message : Stencil 12.6769 GB/s per node +Grid : Message : Average mflops/s per call per node : 662498 +Grid : Message : Average mflops/s per call per node : 806168 +Grid : Message : Average mflops/s per call per node : 820787 +Grid : Message : Average mflops/s per call per node : 667998 +Grid : Message : Average mflops/s per call per node (full): 316850 +Grid : Message : Average mflops/s per call per node (full): 442363 +Grid : Message : Average mflops/s per call per node (full): 444850 +Grid : Message : Average mflops/s per call per node (full): 304213 +Grid : Message : Stencil 14.4069 GB/s per node +Grid : Message : Stencil 17.4845 GB/s per node +Grid : Message : Stencil 18.0741 GB/s per node +Grid : Message : Stencil 11.8651 GB/s per node +Grid : Message : Average mflops/s per call per node : 667022 +Grid : Message : Average mflops/s per call per node : 806637 +Grid : Message : Average mflops/s per call per node : 822004 +Grid : Message : Average mflops/s per call per node : 669972 +Grid : Message : Average mflops/s per call per node (full): 317121 +Grid : Message : Average mflops/s per call per node (full): 439636 +Grid : Message : Average mflops/s per call per node (full): 439085 +Grid : Message : Average mflops/s per call per node (full): 300811 +Grid : Message : Stencil 13.3774 GB/s per node +Grid : Message : Stencil 11.3078 GB/s per node +Grid : Message : Stencil 17.8454 GB/s per node +Grid : Message : Stencil 12.793 GB/s per node +Grid : Message : Average mflops/s per call per node : 668798 +Grid : Message : Average mflops/s per call per node : 810858 +Grid : Message : Average mflops/s per call per node : 818351 +Grid : Message : Average mflops/s per call per node : 666968 +Grid : Message : Average mflops/s per call per node (full): 316429 +Grid : Message : Average mflops/s per call per node (full): 346497 +Grid : Message : Average mflops/s per call per node (full): 445644 +Grid : Message : Average mflops/s per call per node (full): 305367 +Grid : Message : Stencil 13.1528 GB/s per node +Grid : Message : Stencil 17.1553 GB/s per node +Grid : Message : Stencil 18.4991 GB/s per node +Grid : Message : Stencil 12.9203 GB/s per node +Grid : Message : Average mflops/s per call per node : 666186 +Grid : Message : Average mflops/s per call per node : 807320 +Grid : Message : Average mflops/s per call per node : 825001 +Grid : Message : Average mflops/s per call per node : 663941 +Grid : Message : Average mflops/s per call per node (full): 315095 +Grid : Message : Average mflops/s per call per node (full): 436414 +Grid : Message : Average mflops/s per call per node (full): 446638 +Grid : Message : Average mflops/s per call per node (full): 305184 +Grid : Message : Stencil 14.3512 GB/s per node +Grid : Message : Stencil 8.08582 GB/s per node +Grid : Message : Stencil 17.2098 GB/s per node +Grid : Message : Stencil 12.9761 GB/s per node +Grid : Message : Average mflops/s per call per node : 664921 +Grid : Message : Average mflops/s per call per node : 811951 +Grid : Message : Average mflops/s per call per node : 820638 +Grid : Message : Average mflops/s per call per node : 662247 +Grid : Message : Average mflops/s per call per node (full): 317454 +Grid : Message : Average mflops/s per call per node (full): 269204 +Grid : Message : Average mflops/s per call per node (full): 442623 +Grid : Message : Average mflops/s per call per node (full): 302247 +Grid : Message : Stencil 13.6523 GB/s per node +Grid : Message : Stencil 12.1863 GB/s per node +Grid : Message : Stencil 18.2226 GB/s per node +Grid : Message : Stencil 14.5407 GB/s per node +Grid : Message : Average mflops/s per call per node : 668120 +Grid : Message : Average mflops/s per call per node : 810323 +Grid : Message : Average mflops/s per call per node : 829405 +Grid : Message : Average mflops/s per call per node : 660045 +Grid : Message : Average mflops/s per call per node (full): 316597 +Grid : Message : Average mflops/s per call per node (full): 365500 +Grid : Message : Average mflops/s per call per node (full): 449915 +Grid : Message : Average mflops/s per call per node (full): 306907 +Grid : Message : Stencil 13.3278 GB/s per node +Grid : Message : Stencil 16.5323 GB/s per node +Grid : Message : Stencil 17.9966 GB/s per node +Grid : Message : Stencil 12.5225 GB/s per node +Grid : Message : Average mflops/s per call per node : 668482 +Grid : Message : Average mflops/s per call per node : 806322 +Grid : Message : Average mflops/s per call per node : 817287 +Grid : Message : Average mflops/s per call per node : 667975 +Grid : Message : Average mflops/s per call per node (full): 315330 +Grid : Message : Average mflops/s per call per node (full): 435216 +Grid : Message : Average mflops/s per call per node (full): 444432 +Grid : Message : Average mflops/s per call per node (full): 304925 +Grid : Message : Stencil 12.8598 GB/s per node +Grid : Message : Stencil 17.2892 GB/s per node +Grid : Message : Stencil 17.7201 GB/s per node +Grid : Message : Stencil 12.3424 GB/s per node +Grid : Message : Average mflops/s per call per node : 670194 +Grid : Message : Average mflops/s per call per node : 805940 +Grid : Message : Average mflops/s per call per node : 816860 +Grid : Message : Average mflops/s per call per node : 670715 +Grid : Message : Average mflops/s per call per node (full): 316157 +Grid : Message : Average mflops/s per call per node (full): 440925 +Grid : Message : Average mflops/s per call per node (full): 445275 +Grid : Message : Average mflops/s per call per node (full): 304670 +Grid : Message : Stencil 12.7423 GB/s per node +Grid : Message : Stencil 17.3792 GB/s per node +Grid : Message : Stencil 17.3747 GB/s per node +Grid : Message : Stencil 12.4431 GB/s per node +Grid : Message : Average mflops/s per call per node : 670648 +Grid : Message : Average mflops/s per call per node : 801905 +Grid : Message : Average mflops/s per call per node : 824759 +Grid : Message : Average mflops/s per call per node : 664315 +Grid : Message : Average mflops/s per call per node (full): 315426 +Grid : Message : Average mflops/s per call per node (full): 435399 +Grid : Message : Average mflops/s per call per node (full): 444680 +Grid : Message : Average mflops/s per call per node (full): 303482 +Grid : Message : Stencil 12.3573 GB/s per node +Grid : Message : Stencil 17.5181 GB/s per node +Grid : Message : Stencil 17.2293 GB/s per node +Grid : Message : Stencil 12.7231 GB/s per node +Grid : Message : Average mflops/s per call per node : 670000 +Grid : Message : Average mflops/s per call per node : 800657 +Grid : Message : Average mflops/s per call per node : 822182 +Grid : Message : Average mflops/s per call per node : 661835 +Grid : Message : Average mflops/s per call per node (full): 313166 +Grid : Message : Average mflops/s per call per node (full): 439074 +Grid : Message : Average mflops/s per call per node (full): 442154 +Grid : Message : Average mflops/s per call per node (full): 301674 +Grid : Message : Stencil 13.6493 GB/s per node +Grid : Message : Stencil 10.9089 GB/s per node +Grid : Message : Stencil 16.1128 GB/s per node +Grid : Message : Stencil 12.0041 GB/s per node +Grid : Message : Average mflops/s per call per node : 668159 +Grid : Message : Average mflops/s per call per node : 814123 +Grid : Message : Average mflops/s per call per node : 826384 +Grid : Message : Average mflops/s per call per node : 668706 +Grid : Message : Average mflops/s per call per node (full): 315568 +Grid : Message : Average mflops/s per call per node (full): 337505 +Grid : Message : Average mflops/s per call per node (full): 417961 +Grid : Message : Average mflops/s per call per node (full): 302061 +Grid : Message : Stencil 13.8407 GB/s per node +Grid : Message : Stencil 12.1634 GB/s per node +Grid : Message : Stencil 17.1616 GB/s per node +Grid : Message : Stencil 13.0852 GB/s per node +Grid : Message : Average mflops/s per call per node : 666780 +Grid : Message : Average mflops/s per call per node : 804701 +Grid : Message : Average mflops/s per call per node : 820652 +Grid : Message : Average mflops/s per call per node : 663281 +Grid : Message : Average mflops/s per call per node (full): 317003 +Grid : Message : Average mflops/s per call per node (full): 364820 +Grid : Message : Average mflops/s per call per node (full): 443175 +Grid : Message : Average mflops/s per call per node (full): 305023 +Grid : Message : Stencil 14.5455 GB/s per node +Grid : Message : Stencil 16.8268 GB/s per node +Grid : Message : Stencil 17.682 GB/s per node +Grid : Message : Stencil 12.4407 GB/s per node +Grid : Message : Average mflops/s per call per node : 665431 +Grid : Message : Average mflops/s per call per node : 805665 +Grid : Message : Average mflops/s per call per node : 828909 +Grid : Message : Average mflops/s per call per node : 668099 +Grid : Message : Average mflops/s per call per node (full): 317273 +Grid : Message : Average mflops/s per call per node (full): 437080 +Grid : Message : Average mflops/s per call per node (full): 446707 +Grid : Message : Average mflops/s per call per node (full): 305324 +Grid : Message : Stencil 12.7316 GB/s per node +Grid : Message : Stencil 17.028 GB/s per node +Grid : Message : Stencil 17.9791 GB/s per node +Grid : Message : Stencil 13.2229 GB/s per node +Grid : Message : Average mflops/s per call per node : 668528 +Grid : Message : Average mflops/s per call per node : 803356 +Grid : Message : Average mflops/s per call per node : 820340 +Grid : Message : Average mflops/s per call per node : 659327 +Grid : Message : Average mflops/s per call per node (full): 314400 +Grid : Message : Average mflops/s per call per node (full): 439496 +Grid : Message : Average mflops/s per call per node (full): 445632 +Grid : Message : Average mflops/s per call per node (full): 305293 +Grid : Message : Stencil 12.5028 GB/s per node +Grid : Message : Stencil 15.9845 GB/s per node +Grid : Message : Stencil 18.9131 GB/s per node +Grid : Message : Stencil 12.9499 GB/s per node +Grid : Message : Average mflops/s per call per node : 668710 +Grid : Message : Average mflops/s per call per node : 807360 +Grid : Message : Average mflops/s per call per node : 821502 +Grid : Message : Average mflops/s per call per node : 668162 +Grid : Message : Average mflops/s per call per node (full): 313671 +Grid : Message : Average mflops/s per call per node (full): 426907 +Grid : Message : Average mflops/s per call per node (full): 449433 +Grid : Message : Average mflops/s per call per node (full): 306237 +Grid : Message : Stencil 12.4208 GB/s per node +Grid : Message : Stencil 17.8274 GB/s per node +Grid : Message : Stencil 18.3711 GB/s per node +Grid : Message : Stencil 12.2259 GB/s per node +Grid : Message : Average mflops/s per call per node : 666707 +Grid : Message : Average mflops/s per call per node : 800849 +Grid : Message : Average mflops/s per call per node : 818819 +Grid : Message : Average mflops/s per call per node : 668127 +Grid : Message : Average mflops/s per call per node (full): 312720 +Grid : Message : Average mflops/s per call per node (full): 439674 +Grid : Message : Average mflops/s per call per node (full): 446184 +Grid : Message : Average mflops/s per call per node (full): 303664 +Grid : Message : Stencil 12.5386 GB/s per node +Grid : Message : Stencil 16.7961 GB/s per node +Grid : Message : Stencil 17.123 GB/s per node +Grid : Message : Stencil 12.187 GB/s per node +Grid : Message : Average mflops/s per call per node : 669033 +Grid : Message : Average mflops/s per call per node : 806477 +Grid : Message : Average mflops/s per call per node : 824230 +Grid : Message : Average mflops/s per call per node : 665084 +Grid : Message : Average mflops/s per call per node (full): 313818 +Grid : Message : Average mflops/s per call per node (full): 437820 +Grid : Message : Average mflops/s per call per node (full): 442023 +Grid : Message : Average mflops/s per call per node (full): 303308 +Grid : Message : Stencil 13.9051 GB/s per node +Grid : Message : Stencil 14.1793 GB/s per node +Grid : Message : Stencil 17.3172 GB/s per node +Grid : Message : Stencil 13.5939 GB/s per node +Grid : Message : Average mflops/s per call per node : 662731 +Grid : Message : Average mflops/s per call per node : 806139 +Grid : Message : Average mflops/s per call per node : 826644 +Grid : Message : Average mflops/s per call per node : 662587 +Grid : Message : Average mflops/s per call per node (full): 316606 +Grid : Message : Average mflops/s per call per node (full): 403966 +Grid : Message : Average mflops/s per call per node (full): 444894 +Grid : Message : Average mflops/s per call per node (full): 306687 +Grid : Message : Stencil 12.713 GB/s per node +Grid : Message : Stencil 17.2275 GB/s per node +Grid : Message : Stencil 17.3628 GB/s per node +Grid : Message : Stencil 12.5522 GB/s per node +Grid : Message : Average mflops/s per call per node : 667285 +Grid : Message : Average mflops/s per call per node : 799222 +Grid : Message : Average mflops/s per call per node : 825492 +Grid : Message : Average mflops/s per call per node : 669083 +Grid : Message : Average mflops/s per call per node (full): 313531 +Grid : Message : Average mflops/s per call per node (full): 437657 +Grid : Message : Average mflops/s per call per node (full): 444462 +Grid : Message : Average mflops/s per call per node (full): 305101 +Grid : Message : Stencil 13.1261 GB/s per node +Grid : Message : Stencil 17.3873 GB/s per node +Grid : Message : Stencil 18.1324 GB/s per node +Grid : Message : Stencil 13.4224 GB/s per node +Grid : Message : Average mflops/s per call per node : 662768 +Grid : Message : Average mflops/s per call per node : 805359 +Grid : Message : Average mflops/s per call per node : 818840 +Grid : Message : Average mflops/s per call per node : 664498 +Grid : Message : Average mflops/s per call per node (full): 314186 +Grid : Message : Average mflops/s per call per node (full): 437617 +Grid : Message : Average mflops/s per call per node (full): 446608 +Grid : Message : Average mflops/s per call per node (full): 306661 +Grid : Message : Stencil 13.5709 GB/s per node +Grid : Message : Stencil 16.8654 GB/s per node +Grid : Message : Stencil 17.8845 GB/s per node +Grid : Message : Stencil 13.5503 GB/s per node +Grid : Message : Average mflops/s per call per node : 659042 +Grid : Message : Average mflops/s per call per node : 808434 +Grid : Message : Average mflops/s per call per node : 819807 +Grid : Message : Average mflops/s per call per node : 661740 +Grid : Message : Average mflops/s per call per node (full): 314042 +Grid : Message : Average mflops/s per call per node (full): 439197 +Grid : Message : Average mflops/s per call per node (full): 445970 +Grid : Message : Average mflops/s per call per node (full): 305878 +Grid : Message : Stencil 14.4503 GB/s per node +Grid : Message : Stencil 16.376 GB/s per node +Grid : Message : Stencil 17.6563 GB/s per node +Grid : Message : Stencil 12.3184 GB/s per node +Grid : Message : Average mflops/s per call per node : 661059 +Grid : Message : Average mflops/s per call per node : 810627 +Grid : Message : Average mflops/s per call per node : 824265 +Grid : Message : Average mflops/s per call per node : 666180 +Grid : Message : Average mflops/s per call per node (full): 316759 +Grid : Message : Average mflops/s per call per node (full): 433890 +Grid : Message : Average mflops/s per call per node (full): 446667 +Grid : Message : Average mflops/s per call per node (full): 303785 +Grid : Message : Stencil 12.8266 GB/s per node +Grid : Message : Stencil 16.7177 GB/s per node +Grid : Message : Stencil 17.8434 GB/s per node +Grid : Message : Stencil 12.9931 GB/s per node +Grid : Message : Average mflops/s per call per node : 666756 +Grid : Message : Average mflops/s per call per node : 801430 +Grid : Message : Average mflops/s per call per node : 823620 +Grid : Message : Average mflops/s per call per node : 664019 +Grid : Message : Average mflops/s per call per node (full): 314420 +Grid : Message : Average mflops/s per call per node (full): 436518 +Grid : Message : Average mflops/s per call per node (full): 445382 +Grid : Message : Average mflops/s per call per node (full): 305582 +Grid : Message : Stencil 14.6546 GB/s per node +Grid : Message : Stencil 17.191 GB/s per node +Grid : Message : Stencil 17.9299 GB/s per node +Grid : Message : Stencil 12.0428 GB/s per node +Grid : Message : Average mflops/s per call per node : 664627 +Grid : Message : Average mflops/s per call per node : 796301 +Grid : Message : Average mflops/s per call per node : 822949 +Grid : Message : Average mflops/s per call per node : 668229 +Grid : Message : Average mflops/s per call per node (full): 316538 +Grid : Message : Average mflops/s per call per node (full): 435682 +Grid : Message : Average mflops/s per call per node (full): 436898 +Grid : Message : Average mflops/s per call per node (full): 301776 +Grid : Message : Stencil 13.7608 GB/s per node +Grid : Message : Stencil 12.5123 GB/s per node +Grid : Message : Stencil 17.7201 GB/s per node +Grid : Message : Stencil 12.7085 GB/s per node +Grid : Message : Average mflops/s per call per node : 666215 +Grid : Message : Average mflops/s per call per node : 810734 +Grid : Message : Average mflops/s per call per node : 820886 +Grid : Message : Average mflops/s per call per node : 664598 +Grid : Message : Average mflops/s per call per node (full): 315574 +Grid : Message : Average mflops/s per call per node (full): 371951 +Grid : Message : Average mflops/s per call per node (full): 445993 +Grid : Message : Average mflops/s per call per node (full): 304257 +Grid : Message : Stencil 13.5357 GB/s per node +Grid : Message : Stencil 17.5152 GB/s per node +Grid : Message : Stencil 17.4573 GB/s per node +Grid : Message : Stencil 13.2572 GB/s per node +Grid : Message : Average mflops/s per call per node : 663986 +Grid : Message : Average mflops/s per call per node : 803379 +Grid : Message : Average mflops/s per call per node : 820745 +Grid : Message : Average mflops/s per call per node : 664000 +Grid : Message : Average mflops/s per call per node (full): 315566 +Grid : Message : Average mflops/s per call per node (full): 438003 +Grid : Message : Average mflops/s per call per node (full): 440111 +Grid : Message : Average mflops/s per call per node (full): 306596 +Grid : Message : Stencil 14.4558 GB/s per node +Grid : Message : Stencil 10.5721 GB/s per node +Grid : Message : Stencil 17.7932 GB/s per node +Grid : Message : Stencil 14.146 GB/s per node +Grid : Message : Average mflops/s per call per node : 665281 +Grid : Message : Average mflops/s per call per node : 810654 +Grid : Message : Average mflops/s per call per node : 818240 +Grid : Message : Average mflops/s per call per node : 661650 +Grid : Message : Average mflops/s per call per node (full): 317179 +Grid : Message : Average mflops/s per call per node (full): 329822 +Grid : Message : Average mflops/s per call per node (full): 446330 +Grid : Message : Average mflops/s per call per node (full): 306634 +Grid : Message : Stencil 13.8995 GB/s per node +Grid : Message : Stencil 17.8563 GB/s per node +Grid : Message : Stencil 17.7542 GB/s per node +Grid : Message : Stencil 13.4172 GB/s per node +Grid : Message : Average mflops/s per call per node : 664829 +Grid : Message : Average mflops/s per call per node : 802048 +Grid : Message : Average mflops/s per call per node : 819825 +Grid : Message : Average mflops/s per call per node : 657813 +Grid : Message : Average mflops/s per call per node (full): 316146 +Grid : Message : Average mflops/s per call per node (full): 439566 +Grid : Message : Average mflops/s per call per node (full): 443082 +Grid : Message : Average mflops/s per call per node (full): 304548 +Grid : Message : Stencil 13.312 GB/s per node +Grid : Message : Stencil 16.6707 GB/s per node +Grid : Message : Stencil 18.6503 GB/s per node +Grid : Message : Stencil 13.0266 GB/s per node +Grid : Message : Average mflops/s per call per node : 667661 +Grid : Message : Average mflops/s per call per node : 800789 +Grid : Message : Average mflops/s per call per node : 823530 +Grid : Message : Average mflops/s per call per node : 662616 +Grid : Message : Average mflops/s per call per node (full): 315744 +Grid : Message : Average mflops/s per call per node (full): 435038 +Grid : Message : Average mflops/s per call per node (full): 447909 +Grid : Message : Average mflops/s per call per node (full): 303186 +Grid : Message : Stencil 12.6566 GB/s per node +Grid : Message : Stencil 17.8213 GB/s per node +Grid : Message : Stencil 17.9463 GB/s per node +Grid : Message : Stencil 13.4799 GB/s per node +Grid : Message : Average mflops/s per call per node : 666857 +Grid : Message : Average mflops/s per call per node : 803446 +Grid : Message : Average mflops/s per call per node : 814357 +Grid : Message : Average mflops/s per call per node : 658835 +Grid : Message : Average mflops/s per call per node (full): 314154 +Grid : Message : Average mflops/s per call per node (full): 440308 +Grid : Message : Average mflops/s per call per node (full): 444496 +Grid : Message : Average mflops/s per call per node (full): 305207 +Grid : Message : Stencil 12.8388 GB/s per node +Grid : Message : Stencil 17.0231 GB/s per node +Grid : Message : Stencil 18.4924 GB/s per node +Grid : Message : Stencil 12.3361 GB/s per node +Grid : Message : Average mflops/s per call per node : 669083 +Grid : Message : Average mflops/s per call per node : 804015 +Grid : Message : Average mflops/s per call per node : 821390 +Grid : Message : Average mflops/s per call per node : 667814 +Grid : Message : Average mflops/s per call per node (full): 315517 +Grid : Message : Average mflops/s per call per node (full): 434609 +Grid : Message : Average mflops/s per call per node (full): 447579 +Grid : Message : Average mflops/s per call per node (full): 303505 +Grid : Message : Stencil 13.7879 GB/s per node +Grid : Message : Stencil 16.5433 GB/s per node +Grid : Message : Stencil 17.908 GB/s per node +Grid : Message : Stencil 13.8103 GB/s per node +Grid : Message : Average mflops/s per call per node : 662509 +Grid : Message : Average mflops/s per call per node : 806916 +Grid : Message : Average mflops/s per call per node : 818770 +Grid : Message : Average mflops/s per call per node : 662643 +Grid : Message : Average mflops/s per call per node (full): 315036 +Grid : Message : Average mflops/s per call per node (full): 435654 +Grid : Message : Average mflops/s per call per node (full): 445632 +Grid : Message : Average mflops/s per call per node (full): 306895 +Grid : Message : Stencil 13.4876 GB/s per node +Grid : Message : Stencil 16.8819 GB/s per node +Grid : Message : Stencil 17.6484 GB/s per node +Grid : Message : Stencil 12.5259 GB/s per node +Grid : Message : Average mflops/s per call per node : 663824 +Grid : Message : Average mflops/s per call per node : 808188 +Grid : Message : Average mflops/s per call per node : 823638 +Grid : Message : Average mflops/s per call per node : 662030 +Grid : Message : Average mflops/s per call per node (full): 314759 +Grid : Message : Average mflops/s per call per node (full): 438549 +Grid : Message : Average mflops/s per call per node (full): 445451 +Grid : Message : Average mflops/s per call per node (full): 303669 +Grid : Message : Stencil 12.3675 GB/s per node +Grid : Message : Stencil 16.4276 GB/s per node +Grid : Message : Stencil 17.5417 GB/s per node +Grid : Message : Stencil 12.1676 GB/s per node +Grid : Message : Average mflops/s per call per node : 669071 +Grid : Message : Average mflops/s per call per node : 806133 +Grid : Message : Average mflops/s per call per node : 820619 +Grid : Message : Average mflops/s per call per node : 667351 +Grid : Message : Average mflops/s per call per node (full): 312679 +Grid : Message : Average mflops/s per call per node (full): 434983 +Grid : Message : Average mflops/s per call per node (full): 444984 +Grid : Message : Average mflops/s per call per node (full): 303637 +Grid : Message : Stencil 13.0709 GB/s per node +Grid : Message : Stencil 17.5349 GB/s per node +Grid : Message : Stencil 17.7934 GB/s per node +Grid : Message : Stencil 12.1751 GB/s per node +Grid : Message : Average mflops/s per call per node : 668344 +Grid : Message : Average mflops/s per call per node : 807623 +Grid : Message : Average mflops/s per call per node : 824665 +Grid : Message : Average mflops/s per call per node : 666928 +Grid : Message : Average mflops/s per call per node (full): 315271 +Grid : Message : Average mflops/s per call per node (full): 438487 +Grid : Message : Average mflops/s per call per node (full): 445571 +Grid : Message : Average mflops/s per call per node (full): 303691 +Grid : Message : Stencil 12.9941 GB/s per node +Grid : Message : Stencil 14.9089 GB/s per node +Grid : Message : Stencil 17.9194 GB/s per node +Grid : Message : Stencil 12.0044 GB/s per node +Grid : Message : Average mflops/s per call per node : 664444 +Grid : Message : Average mflops/s per call per node : 808597 +Grid : Message : Average mflops/s per call per node : 823230 +Grid : Message : Average mflops/s per call per node : 665374 +Grid : Message : Average mflops/s per call per node (full): 314230 +Grid : Message : Average mflops/s per call per node (full): 414717 +Grid : Message : Average mflops/s per call per node (full): 447203 +Grid : Message : Average mflops/s per call per node (full): 302432 +Grid : Message : Stencil 12.563 GB/s per node +Grid : Message : Stencil 17.4419 GB/s per node +Grid : Message : Stencil 16.3727 GB/s per node +Grid : Message : Stencil 12.8744 GB/s per node +Grid : Message : Average mflops/s per call per node : 667188 +Grid : Message : Average mflops/s per call per node : 803660 +Grid : Message : Average mflops/s per call per node : 828617 +Grid : Message : Average mflops/s per call per node : 665791 +Grid : Message : Average mflops/s per call per node (full): 313854 +Grid : Message : Average mflops/s per call per node (full): 439680 +Grid : Message : Average mflops/s per call per node (full): 432089 +Grid : Message : Average mflops/s per call per node (full): 305079 +Grid : Message : Stencil 13.1201 GB/s per node +Grid : Message : Stencil 16.4825 GB/s per node +Grid : Message : Stencil 17.2141 GB/s per node +Grid : Message : Stencil 12.8028 GB/s per node +Grid : Message : Average mflops/s per call per node : 662617 +Grid : Message : Average mflops/s per call per node : 805707 +Grid : Message : Average mflops/s per call per node : 819975 +Grid : Message : Average mflops/s per call per node : 663049 +Grid : Message : Average mflops/s per call per node (full): 314151 +Grid : Message : Average mflops/s per call per node (full): 432487 +Grid : Message : Average mflops/s per call per node (full): 441567 +Grid : Message : Average mflops/s per call per node (full): 300225 +Grid : Message : Stencil 13.494 GB/s per node +Grid : Message : Stencil 17.4044 GB/s per node +Grid : Message : Stencil 17.391 GB/s per node +Grid : Message : Stencil 13.3663 GB/s per node +Grid : Message : Average mflops/s per call per node : 659710 +Grid : Message : Average mflops/s per call per node : 804423 +Grid : Message : Average mflops/s per call per node : 819191 +Grid : Message : Average mflops/s per call per node : 659182 +Grid : Message : Average mflops/s per call per node (full): 313886 +Grid : Message : Average mflops/s per call per node (full): 439049 +Grid : Message : Average mflops/s per call per node (full): 443820 +Grid : Message : Average mflops/s per call per node (full): 304886 +Grid : Message : Stencil 13.3257 GB/s per node +Grid : Message : Stencil 16.5307 GB/s per node +Grid : Message : Stencil 17.5747 GB/s per node +Grid : Message : Stencil 14.698 GB/s per node +Grid : Message : Average mflops/s per call per node : 660623 +Grid : Message : Average mflops/s per call per node : 807067 +Grid : Message : Average mflops/s per call per node : 818778 +Grid : Message : Average mflops/s per call per node : 662805 +Grid : Message : Average mflops/s per call per node (full): 314423 +Grid : Message : Average mflops/s per call per node (full): 432619 +Grid : Message : Average mflops/s per call per node (full): 444622 +Grid : Message : Average mflops/s per call per node (full): 306808 +Grid : Message : Stencil 13.282 GB/s per node +Grid : Message : Stencil 16.7499 GB/s per node +Grid : Message : Stencil 18.5887 GB/s per node +Grid : Message : Stencil 13.7764 GB/s per node +Grid : Message : Average mflops/s per call per node : 663404 +Grid : Message : Average mflops/s per call per node : 804737 +Grid : Message : Average mflops/s per call per node : 820664 +Grid : Message : Average mflops/s per call per node : 666191 +Grid : Message : Average mflops/s per call per node (full): 314406 +Grid : Message : Average mflops/s per call per node (full): 435830 +Grid : Message : Average mflops/s per call per node (full): 448424 +Grid : Message : Average mflops/s per call per node (full): 306801 +Grid : Message : Stencil 13.2265 GB/s per node +Grid : Message : Stencil 17.4913 GB/s per node +Grid : Message : Stencil 17.7804 GB/s per node +Grid : Message : Stencil 12.8921 GB/s per node +Grid : Message : Average mflops/s per call per node : 661827 +Grid : Message : Average mflops/s per call per node : 804946 +Grid : Message : Average mflops/s per call per node : 823488 +Grid : Message : Average mflops/s per call per node : 667613 +Grid : Message : Average mflops/s per call per node (full): 314051 +Grid : Message : Average mflops/s per call per node (full): 438066 +Grid : Message : Average mflops/s per call per node (full): 446464 +Grid : Message : Average mflops/s per call per node (full): 305594 +Grid : Message : Stencil 13.2206 GB/s per node +Grid : Message : Stencil 17.0128 GB/s per node +Grid : Message : Stencil 18.401 GB/s per node +Grid : Message : Stencil 13.7508 GB/s per node +Grid : Message : Average mflops/s per call per node : 663232 +Grid : Message : Average mflops/s per call per node : 798296 +Grid : Message : Average mflops/s per call per node : 822587 +Grid : Message : Average mflops/s per call per node : 660829 +Grid : Message : Average mflops/s per call per node (full): 314733 +Grid : Message : Average mflops/s per call per node (full): 437104 +Grid : Message : Average mflops/s per call per node (full): 447399 +Grid : Message : Average mflops/s per call per node (full): 306421 +Grid : Message : Stencil 12.5284 GB/s per node +Grid : Message : Stencil 16.9445 GB/s per node +Grid : Message : Stencil 17.8376 GB/s per node +Grid : Message : Stencil 12.8427 GB/s per node +Grid : Message : Average mflops/s per call per node : 668494 +Grid : Message : Average mflops/s per call per node : 807032 +Grid : Message : Average mflops/s per call per node : 823168 +Grid : Message : Average mflops/s per call per node : 663439 +Grid : Message : Average mflops/s per call per node (full): 313697 +Grid : Message : Average mflops/s per call per node (full): 437154 +Grid : Message : Average mflops/s per call per node (full): 446625 +Grid : Message : Average mflops/s per call per node (full): 303679 +Grid : Message : Stencil 13.193 GB/s per node +Grid : Message : Stencil 16.6136 GB/s per node +Grid : Message : Stencil 17.4497 GB/s per node +Grid : Message : Stencil 12.2637 GB/s per node +Grid : Message : Average mflops/s per call per node : 666149 +Grid : Message : Average mflops/s per call per node : 805048 +Grid : Message : Average mflops/s per call per node : 825446 +Grid : Message : Average mflops/s per call per node : 663333 +Grid : Message : Average mflops/s per call per node (full): 315673 +Grid : Message : Average mflops/s per call per node (full): 436055 +Grid : Message : Average mflops/s per call per node (full): 444547 +Grid : Message : Average mflops/s per call per node (full): 303758 +Grid : Message : Stencil 13.4605 GB/s per node +Grid : Message : Stencil 16.4581 GB/s per node +Grid : Message : Stencil 18.7226 GB/s per node +Grid : Message : Stencil 12.3411 GB/s per node +Grid : Message : Average mflops/s per call per node : 667912 +Grid : Message : Average mflops/s per call per node : 807641 +Grid : Message : Average mflops/s per call per node : 827192 +Grid : Message : Average mflops/s per call per node : 663584 +Grid : Message : Average mflops/s per call per node (full): 313717 +Grid : Message : Average mflops/s per call per node (full): 434972 +Grid : Message : Average mflops/s per call per node (full): 450158 +Grid : Message : Average mflops/s per call per node (full): 303868 +Grid : Message : Stencil 12.5401 GB/s per node +Grid : Message : Stencil 11.6789 GB/s per node +Grid : Message : Stencil 17.4623 GB/s per node +Grid : Message : Stencil 14.3254 GB/s per node +Grid : Message : Average mflops/s per call per node : 671632 +Grid : Message : Average mflops/s per call per node : 813861 +Grid : Message : Average mflops/s per call per node : 829044 +Grid : Message : Average mflops/s per call per node : 657813 +Grid : Message : Average mflops/s per call per node (full): 314374 +Grid : Message : Average mflops/s per call per node (full): 354116 +Grid : Message : Average mflops/s per call per node (full): 446129 +Grid : Message : Average mflops/s per call per node (full): 306680 +Grid : Message : Stencil 12.7752 GB/s per node +Grid : Message : Stencil 16.5636 GB/s per node +Grid : Message : Stencil 17.2492 GB/s per node +Grid : Message : Stencil 12.8813 GB/s per node +Grid : Message : Average mflops/s per call per node : 667719 +Grid : Message : Average mflops/s per call per node : 803560 +Grid : Message : Average mflops/s per call per node : 827933 +Grid : Message : Average mflops/s per call per node : 661443 +Grid : Message : Average mflops/s per call per node (full): 314865 +Grid : Message : Average mflops/s per call per node (full): 434684 +Grid : Message : Average mflops/s per call per node (full): 443851 +Grid : Message : Average mflops/s per call per node (full): 304589 +Grid : Message : Stencil 14.3801 GB/s per node +Grid : Message : Stencil 9.78086 GB/s per node +Grid : Message : Stencil 17.8346 GB/s per node +Grid : Message : Stencil 12.1078 GB/s per node +Grid : Message : Average mflops/s per call per node : 662537 +Grid : Message : Average mflops/s per call per node : 813351 +Grid : Message : Average mflops/s per call per node : 821884 +Grid : Message : Average mflops/s per call per node : 664214 +Grid : Message : Average mflops/s per call per node (full): 315769 +Grid : Message : Average mflops/s per call per node (full): 311062 +Grid : Message : Average mflops/s per call per node (full): 443457 +Grid : Message : Average mflops/s per call per node (full): 302417 +Grid : Message : Stencil 13.2075 GB/s per node +Grid : Message : Stencil 12.8025 GB/s per node +Grid : Message : Stencil 17.5761 GB/s per node +Grid : Message : Stencil 15.2306 GB/s per node +Grid : Message : Average mflops/s per call per node : 662740 +Grid : Message : Average mflops/s per call per node : 807134 +Grid : Message : Average mflops/s per call per node : 820897 +Grid : Message : Average mflops/s per call per node : 658357 +Grid : Message : Average mflops/s per call per node (full): 315204 +Grid : Message : Average mflops/s per call per node (full): 377502 +Grid : Message : Average mflops/s per call per node (full): 445586 +Grid : Message : Average mflops/s per call per node (full): 307638 +Grid : Message : Stencil 12.9764 GB/s per node +Grid : Message : Stencil 17.1323 GB/s per node +Grid : Message : Stencil 18.2949 GB/s per node +Grid : Message : Stencil 12.4703 GB/s per node +Grid : Message : Average mflops/s per call per node : 665907 +Grid : Message : Average mflops/s per call per node : 800038 +Grid : Message : Average mflops/s per call per node : 822861 +Grid : Message : Average mflops/s per call per node : 662657 +Grid : Message : Average mflops/s per call per node (full): 313891 +Grid : Message : Average mflops/s per call per node (full): 436920 +Grid : Message : Average mflops/s per call per node (full): 444330 +Grid : Message : Average mflops/s per call per node (full): 298764 +Grid : Message : Stencil 15.4686 GB/s per node +Grid : Message : Stencil 16.8296 GB/s per node +Grid : Message : Stencil 17.0712 GB/s per node +Grid : Message : Stencil 12.3795 GB/s per node +Grid : Message : Average mflops/s per call per node : 662064 +Grid : Message : Average mflops/s per call per node : 804013 +Grid : Message : Average mflops/s per call per node : 824463 +Grid : Message : Average mflops/s per call per node : 670516 +Grid : Message : Average mflops/s per call per node (full): 317322 +Grid : Message : Average mflops/s per call per node (full): 437408 +Grid : Message : Average mflops/s per call per node (full): 441839 +Grid : Message : Average mflops/s per call per node (full): 305327 +Grid : Message : Stencil 13.1037 GB/s per node +Grid : Message : Stencil 16.9373 GB/s per node +Grid : Message : Stencil 17.7799 GB/s per node +Grid : Message : Stencil 13.197 GB/s per node +Grid : Message : Average mflops/s per call per node : 665706 +Grid : Message : Average mflops/s per call per node : 806766 +Grid : Message : Average mflops/s per call per node : 819907 +Grid : Message : Average mflops/s per call per node : 662447 +Grid : Message : Average mflops/s per call per node (full): 315174 +Grid : Message : Average mflops/s per call per node (full): 438405 +Grid : Message : Average mflops/s per call per node (full): 445012 +Grid : Message : Average mflops/s per call per node (full): 304715 +Grid : Message : Stencil 13.7707 GB/s per node +Grid : Message : Stencil 16.8391 GB/s per node +Grid : Message : Stencil 17.6309 GB/s per node +Grid : Message : Stencil 12.15 GB/s per node +Grid : Message : Average mflops/s per call per node : 661970 +Grid : Message : Average mflops/s per call per node : 807154 +Grid : Message : Average mflops/s per call per node : 819629 +Grid : Message : Average mflops/s per call per node : 664143 +Grid : Message : Average mflops/s per call per node (full): 314464 +Grid : Message : Average mflops/s per call per node (full): 435569 +Grid : Message : Average mflops/s per call per node (full): 441555 +Grid : Message : Average mflops/s per call per node (full): 302821 +Grid : Message : Stencil 14.2817 GB/s per node +Grid : Message : Stencil 17.0494 GB/s per node +Grid : Message : Stencil 16.4168 GB/s per node +Grid : Message : Stencil 12.3744 GB/s per node +Grid : Message : Average mflops/s per call per node : 661496 +Grid : Message : Average mflops/s per call per node : 803949 +Grid : Message : Average mflops/s per call per node : 825886 +Grid : Message : Average mflops/s per call per node : 663562 +Grid : Message : Average mflops/s per call per node (full): 316101 +Grid : Message : Average mflops/s per call per node (full): 436399 +Grid : Message : Average mflops/s per call per node (full): 428930 +Grid : Message : Average mflops/s per call per node (full): 301876 +Grid : Message : Stencil 13.2501 GB/s per node +Grid : Message : Stencil 17.3485 GB/s per node +Grid : Message : Stencil 17.4801 GB/s per node +Grid : Message : Stencil 12.7829 GB/s per node +Grid : Message : Average mflops/s per call per node : 663072 +Grid : Message : Average mflops/s per call per node : 802940 +Grid : Message : Average mflops/s per call per node : 822540 +Grid : Message : Average mflops/s per call per node : 662228 +Grid : Message : Average mflops/s per call per node (full): 314512 +Grid : Message : Average mflops/s per call per node (full): 438926 +Grid : Message : Average mflops/s per call per node (full): 442773 +Grid : Message : Average mflops/s per call per node (full): 305542 +Grid : Message : Stencil 14.7135 GB/s per node +Grid : Message : Stencil 16.5017 GB/s per node +Grid : Message : Stencil 17.6741 GB/s per node +Grid : Message : Stencil 12.5544 GB/s per node +Grid : Message : Average mflops/s per call per node : 663528 +Grid : Message : Average mflops/s per call per node : 803113 +Grid : Message : Average mflops/s per call per node : 824588 +Grid : Message : Average mflops/s per call per node : 664828 +Grid : Message : Average mflops/s per call per node (full): 316604 +Grid : Message : Average mflops/s per call per node (full): 435590 +Grid : Message : Average mflops/s per call per node (full): 446359 +Grid : Message : Average mflops/s per call per node (full): 305030 +Grid : Message : Stencil 13.0648 GB/s per node +Grid : Message : Stencil 17.6308 GB/s per node +Grid : Message : Stencil 18.0907 GB/s per node +Grid : Message : Stencil 12.67 GB/s per node +Grid : Message : Average mflops/s per call per node : 664297 +Grid : Message : Average mflops/s per call per node : 803366 +Grid : Message : Average mflops/s per call per node : 818922 +Grid : Message : Average mflops/s per call per node : 662458 +Grid : Message : Average mflops/s per call per node (full): 314283 +Grid : Message : Average mflops/s per call per node (full): 437036 +Grid : Message : Average mflops/s per call per node (full): 445853 +Grid : Message : Average mflops/s per call per node (full): 303738 +Grid : Message : Stencil 12.5967 GB/s per node +Grid : Message : Stencil 16.0996 GB/s per node +Grid : Message : Stencil 17.1523 GB/s per node +Grid : Message : Stencil 11.9602 GB/s per node +Grid : Message : Average mflops/s per call per node : 671367 +Grid : Message : Average mflops/s per call per node : 802117 +Grid : Message : Average mflops/s per call per node : 822388 +Grid : Message : Average mflops/s per call per node : 670004 +Grid : Message : Average mflops/s per call per node (full): 314223 +Grid : Message : Average mflops/s per call per node (full): 429436 +Grid : Message : Average mflops/s per call per node (full): 442282 +Grid : Message : Average mflops/s per call per node (full): 301663 +Grid : Message : Stencil 13.3652 GB/s per node +Grid : Message : Stencil 14.7891 GB/s per node +Grid : Message : Stencil 17.3862 GB/s per node +Grid : Message : Stencil 13.2581 GB/s per node +Grid : Message : Average mflops/s per call per node : 666124 +Grid : Message : Average mflops/s per call per node : 805548 +Grid : Message : Average mflops/s per call per node : 822880 +Grid : Message : Average mflops/s per call per node : 661956 +Grid : Message : Average mflops/s per call per node (full): 315515 +Grid : Message : Average mflops/s per call per node (full): 411718 +Grid : Message : Average mflops/s per call per node (full): 444732 +Grid : Message : Average mflops/s per call per node (full): 303817 +Grid : Message : Stencil 13.6342 GB/s per node +Grid : Message : Stencil 16.4294 GB/s per node +Grid : Message : Stencil 17.1953 GB/s per node +Grid : Message : Stencil 13.369 GB/s per node +Grid : Message : Average mflops/s per call per node : 665092 +Grid : Message : Average mflops/s per call per node : 804618 +Grid : Message : Average mflops/s per call per node : 824222 +Grid : Message : Average mflops/s per call per node : 657667 +Grid : Message : Average mflops/s per call per node (full): 315976 +Grid : Message : Average mflops/s per call per node (full): 433965 +Grid : Message : Average mflops/s per call per node (full): 441241 +Grid : Message : Average mflops/s per call per node (full): 304885 +Grid : Message : Stencil 14.6131 GB/s per node +Grid : Message : Stencil 16.4093 GB/s per node +Grid : Message : Stencil 18.1192 GB/s per node +Grid : Message : Stencil 13.5067 GB/s per node +Grid : Message : Average mflops/s per call per node : 665242 +Grid : Message : Average mflops/s per call per node : 807371 +Grid : Message : Average mflops/s per call per node : 822732 +Grid : Message : Average mflops/s per call per node : 661930 +Grid : Message : Average mflops/s per call per node (full): 317716 +Grid : Message : Average mflops/s per call per node (full): 431823 +Grid : Message : Average mflops/s per call per node (full): 447833 +Grid : Message : Average mflops/s per call per node (full): 306907 +Grid : Message : Stencil 14.0475 GB/s per node +Grid : Message : Stencil 14.4059 GB/s per node +Grid : Message : Stencil 18.8707 GB/s per node +Grid : Message : Stencil 12.4078 GB/s per node +Grid : Message : Average mflops/s per call per node : 667247 +Grid : Message : Average mflops/s per call per node : 807574 +Grid : Message : Average mflops/s per call per node : 819580 +Grid : Message : Average mflops/s per call per node : 670418 +Grid : Message : Average mflops/s per call per node (full): 316787 +Grid : Message : Average mflops/s per call per node (full): 407665 +Grid : Message : Average mflops/s per call per node (full): 447793 +Grid : Message : Average mflops/s per call per node (full): 304722 +Grid : Message : Stencil 12.8318 GB/s per node +Grid : Message : Stencil 16.8843 GB/s per node +Grid : Message : Stencil 18.1065 GB/s per node +Grid : Message : Stencil 11.7948 GB/s per node +Grid : Message : Average mflops/s per call per node : 667312 +Grid : Message : Average mflops/s per call per node : 799072 +Grid : Message : Average mflops/s per call per node : 826647 +Grid : Message : Average mflops/s per call per node : 667891 +Grid : Message : Average mflops/s per call per node (full): 314263 +Grid : Message : Average mflops/s per call per node (full): 436737 +Grid : Message : Average mflops/s per call per node (full): 438770 +Grid : Message : Average mflops/s per call per node (full): 297490 +Grid : Message : Stencil 13.3866 GB/s per node +Grid : Message : Stencil 12.1769 GB/s per node +Grid : Message : Stencil 17.4186 GB/s per node +Grid : Message : Stencil 11.9079 GB/s per node +Grid : Message : Average mflops/s per call per node : 664332 +Grid : Message : Average mflops/s per call per node : 810631 +Grid : Message : Average mflops/s per call per node : 821828 +Grid : Message : Average mflops/s per call per node : 664980 +Grid : Message : Average mflops/s per call per node (full): 315441 +Grid : Message : Average mflops/s per call per node (full): 364776 +Grid : Message : Average mflops/s per call per node (full): 444564 +Grid : Message : Average mflops/s per call per node (full): 301166 +Grid : Message : Stencil 13.3728 GB/s per node +Grid : Message : Stencil 17.5614 GB/s per node +Grid : Message : Stencil 18.0237 GB/s per node +Grid : Message : Stencil 13.8207 GB/s per node +Grid : Message : Average mflops/s per call per node : 667161 +Grid : Message : Average mflops/s per call per node : 800879 +Grid : Message : Average mflops/s per call per node : 815462 +Grid : Message : Average mflops/s per call per node : 661988 +Grid : Message : Average mflops/s per call per node (full): 315692 +Grid : Message : Average mflops/s per call per node (full): 437055 +Grid : Message : Average mflops/s per call per node (full): 444586 +Grid : Message : Average mflops/s per call per node (full): 305587 +Grid : Message : Stencil 12.6136 GB/s per node +Grid : Message : Stencil 17.8142 GB/s per node +Grid : Message : Stencil 18.7351 GB/s per node +Grid : Message : Stencil 12.0376 GB/s per node +Grid : Message : Average mflops/s per call per node : 667354 +Grid : Message : Average mflops/s per call per node : 803405 +Grid : Message : Average mflops/s per call per node : 823025 +Grid : Message : Average mflops/s per call per node : 665668 +Grid : Message : Average mflops/s per call per node (full): 313391 +Grid : Message : Average mflops/s per call per node (full): 440352 +Grid : Message : Average mflops/s per call per node (full): 448308 +Grid : Message : Average mflops/s per call per node (full): 302534 +Grid : Message : Stencil 13.6781 GB/s per node +Grid : Message : Stencil 16.4091 GB/s per node +Grid : Message : Stencil 17.8156 GB/s per node +Grid : Message : Stencil 12.0971 GB/s per node +Grid : Message : Average mflops/s per call per node : 661133 +Grid : Message : Average mflops/s per call per node : 801506 +Grid : Message : Average mflops/s per call per node : 820190 +Grid : Message : Average mflops/s per call per node : 669406 +Grid : Message : Average mflops/s per call per node (full): 314497 +Grid : Message : Average mflops/s per call per node (full): 433770 +Grid : Message : Average mflops/s per call per node (full): 445647 +Grid : Message : Average mflops/s per call per node (full): 303000 +Grid : Message : Stencil 14.0661 GB/s per node +Grid : Message : Stencil 16.698 GB/s per node +Grid : Message : Stencil 16.8629 GB/s per node +Grid : Message : Stencil 13.4859 GB/s per node +Grid : Message : Average mflops/s per call per node : 660056 +Grid : Message : Average mflops/s per call per node : 800700 +Grid : Message : Average mflops/s per call per node : 826733 +Grid : Message : Average mflops/s per call per node : 660054 +Grid : Message : Average mflops/s per call per node (full): 314176 +Grid : Message : Average mflops/s per call per node (full): 431452 +Grid : Message : Average mflops/s per call per node (full): 438813 +Grid : Message : Average mflops/s per call per node (full): 306706 +Grid : Message : Stencil 15.2755 GB/s per node +Grid : Message : Stencil 16.3466 GB/s per node +Grid : Message : Stencil 16.9219 GB/s per node +Grid : Message : Stencil 12.5556 GB/s per node +Grid : Message : Average mflops/s per call per node : 662693 +Grid : Message : Average mflops/s per call per node : 802976 +Grid : Message : Average mflops/s per call per node : 819842 +Grid : Message : Average mflops/s per call per node : 661755 +Grid : Message : Average mflops/s per call per node (full): 317332 +Grid : Message : Average mflops/s per call per node (full): 433726 +Grid : Message : Average mflops/s per call per node (full): 439478 +Grid : Message : Average mflops/s per call per node (full): 303366 +Grid : Message : Stencil 12.7011 GB/s per node +Grid : Message : Stencil 9.36114 GB/s per node +Grid : Message : Stencil 18.2933 GB/s per node +Grid : Message : Stencil 13.0964 GB/s per node +Grid : Message : Average mflops/s per call per node : 665822 +Grid : Message : Average mflops/s per call per node : 812056 +Grid : Message : Average mflops/s per call per node : 823525 +Grid : Message : Average mflops/s per call per node : 660552 +Grid : Message : Average mflops/s per call per node (full): 314345 +Grid : Message : Average mflops/s per call per node (full): 301000 +Grid : Message : Average mflops/s per call per node (full): 447518 +Grid : Message : Average mflops/s per call per node (full): 303723 +Grid : Message : Stencil 13.1927 GB/s per node +Grid : Message : Stencil 16.8595 GB/s per node +Grid : Message : Stencil 17.886 GB/s per node +Grid : Message : Stencil 12.3675 GB/s per node +Grid : Message : Average mflops/s per call per node : 667425 +Grid : Message : Average mflops/s per call per node : 807531 +Grid : Message : Average mflops/s per call per node : 820923 +Grid : Message : Average mflops/s per call per node : 672801 +Grid : Message : Average mflops/s per call per node (full): 315755 +Grid : Message : Average mflops/s per call per node (full): 438818 +Grid : Message : Average mflops/s per call per node (full): 444555 +Grid : Message : Average mflops/s per call per node (full): 304944 +Grid : Message : Stencil 12.9532 GB/s per node +Grid : Message : Stencil 10.464 GB/s per node +Grid : Message : Stencil 16.678 GB/s per node +Grid : Message : Stencil 13.437 GB/s per node +Grid : Message : Average mflops/s per call per node : 668065 +Grid : Message : Average mflops/s per call per node : 811810 +Grid : Message : Average mflops/s per call per node : 821025 +Grid : Message : Average mflops/s per call per node : 665160 +Grid : Message : Average mflops/s per call per node (full): 313479 +Grid : Message : Average mflops/s per call per node (full): 327432 +Grid : Message : Average mflops/s per call per node (full): 428518 +Grid : Message : Average mflops/s per call per node (full): 307295 +Grid : Message : Stencil 13.5958 GB/s per node +Grid : Message : Stencil 10.7364 GB/s per node +Grid : Message : Stencil 17.6069 GB/s per node +Grid : Message : Stencil 13.6256 GB/s per node +Grid : Message : Average mflops/s per call per node : 664957 +Grid : Message : Average mflops/s per call per node : 812429 +Grid : Message : Average mflops/s per call per node : 819163 +Grid : Message : Average mflops/s per call per node : 662325 +Grid : Message : Average mflops/s per call per node (full): 315613 +Grid : Message : Average mflops/s per call per node (full): 333839 +Grid : Message : Average mflops/s per call per node (full): 444411 +Grid : Message : Average mflops/s per call per node (full): 304561 diff --git a/scripts/scatter b/scripts/scatter new file mode 100644 index 00000000..09cb2743 --- /dev/null +++ b/scripts/scatter @@ -0,0 +1,444 @@ +11.7987 665962 +15.6078 804738 +16.8195 817117 +11.7002 667457 +12.531 663347 +15.6475 802544 +16.129 819802 +11.7684 665000 +11.6168 667651 +15.1805 800892 +17.6561 822562 +11.4019 664136 +11.9189 663385 +16.0362 798230 +16.0763 818004 +11.5681 662081 +19.7502 662637 +15.5355 806539 +15.8243 821544 +11.16 668250 +13.0345 663874 +15.5772 801823 +16.2306 818114 +11.3338 663786 +11.5693 668819 +15.8559 798630 +16.077 819714 +11.6406 659723 +11.6879 664649 +15.4282 805797 +16.1491 819745 +11.2908 671340 +11.3591 669315 +15.9951 804795 +17.0154 818908 +11.179 668178 +11.6526 664287 +15.688 802454 +16.4822 815271 +11.7621 661223 +11.6672 664459 +15.4835 798722 +16.7267 820133 +11.624 668664 +11.6175 661994 +15.3587 802845 +16.044 821846 +11.3323 665252 +11.7211 662942 +15.4859 802990 +16.3677 824283 +11.4744 662839 +11.8487 664919 +15.6107 797731 +15.8069 820478 +11.2979 658007 +11.7564 663817 +15.541 797783 +17.0808 819743 +12.3383 661733 +36.0835 663562 +15.7731 798800 +15.8485 822127 +11.1223 666914 +12.4756 660221 +16.3073 799764 +16.9154 822327 +12.3991 657394 +11.7097 665528 +16.6175 800274 +15.9862 818149 +11.2491 664158 +12.0396 664001 +16.1483 799878 +16.1754 821353 +32.8124 663041 +11.9903 663036 +16.0966 803422 +16.3731 822092 +11.9109 660189 +11.9452 664600 +9.66713 808509 +17.1584 817437 +11.5856 664571 +11.8809 666913 +15.8264 797351 +16.5976 816830 +11.4465 666261 +12.2163 665461 +16.2477 800076 +16.083 821536 +11.5841 664139 +11.7192 666777 +15.5412 802674 +15.9575 816875 +11.0946 670884 +11.7391 668389 +15.3244 801328 +16.9828 816831 +11.5186 667912 +11.8553 668159 +15.9975 799798 +16.5647 818386 +11.3678 667215 +11.8375 664926 +15.6273 800393 +17.2328 816855 +11.4451 670339 +11.5145 670155 +15.442 801500 +16.8771 823647 +11.5946 664410 +11.7574 667053 +15.7901 801231 +16.1462 821653 +11.8786 656763 +11.5349 663420 +15.8349 802817 +17.0659 812767 +11.5311 662683 +11.6272 667676 +15.9587 803022 +15.809 811142 +11.3299 667866 +11.5119 666844 +15.7014 801714 +16.1993 819118 +11.2353 669254 +11.8388 664464 +15.8903 797444 +16.8866 816635 +11.7656 658494 +11.6656 666166 +15.256 805288 +16.284 817737 +11.2608 659269 +11.3609 669138 +15.0724 801659 +15.7254 819883 +11.0887 667137 +11.5796 669283 +15.7326 802443 +16.2654 817360 +11.9706 661339 +12.5527 663043 +15.2276 794816 +16.8155 823258 +11.4149 664258 +12.0766 663720 +15.8403 799492 +17.5981 818338 +11.5562 665220 +22.1544 661378 +12.0874 804829 +15.4826 816649 +10.9756 664841 +35.9173 665455 +15.288 801766 +16.1119 822671 +11.4758 663216 +12.3041 666616 +15.2521 800945 +16.1784 819014 +11.3541 663399 +11.863 664907 +15.2269 795055 +16.8058 821320 +11.5113 663369 +13.7629 666975 +15.2298 801234 +15.7366 822774 +11.2174 670346 +12.0583 663598 +15.8147 801926 +15.7665 821852 +11.2676 665752 +11.7262 670282 +15.6965 806129 +15.8385 820080 +11.663 660842 +11.4393 667578 +15.6585 798745 +16.5194 818426 +11.4044 663854 +11.9518 662532 +15.5785 798254 +16.1043 821756 +11.3142 667176 +12.1981 666031 +15.3541 799680 +16.9157 817076 +17.4885 667121 +11.83 660487 +12.1585 801534 +16.3931 820708 +11.1599 665780 +11.9377 663366 +16.555 802357 +16.3592 819296 +14.948 662876 +11.4625 668747 +16.4882 802712 +16.0941 820000 +31.9435 665040 +11.6125 669814 +15.3871 800464 +16.6008 820317 +11.4717 659916 +12.9048 664130 +16.7163 800849 +17.8285 813151 +11.5916 658383 +12.0202 663072 +15.5845 806755 +16.307 817906 +11.3942 669369 +11.8465 668763 +15.2451 802236 +16.5219 819934 +11.6291 662012 +11.6539 660430 +15.4529 803424 +15.8335 821817 +12.0544 659237 +11.6396 665152 +15.3411 802963 +16.6358 821266 +11.4613 665441 +11.902 663221 +15.747 803433 +15.8396 817497 +10.7179 664747 +11.6572 668304 +15.6532 804598 +17.1196 821115 +20.0255 661072 +11.4765 665925 +15.219 805882 +16.1594 815428 +11.3692 662945 +11.8594 663928 +16.1808 799668 +16.6602 821609 +11.5332 660173 +11.9081 662718 +15.9127 801288 +16.325 815644 +11.1836 665425 +11.4505 666855 +15.321 801245 +15.8323 814424 +11.2948 663168 +11.6377 661825 +15.8977 804792 +16.8616 820948 +11.919 659638 +11.6931 662183 +15.4429 803952 +16.2615 817444 +11.2996 664382 +12.692 663975 +15.4407 801448 +15.7445 818021 +11.1849 667460 +11.7541 664391 +16.0659 801714 +16.5602 819929 +11.5849 667401 +13.179 663303 +15.7583 802808 +16.2668 817484 +11.9216 663598 +11.5344 668783 +15.8009 799397 +16.5506 818528 +12.7165 658400 +11.4921 667373 +15.7904 796592 +17.3128 822103 +11.4334 660329 +11.5123 663333 +15.793 796578 +16.0427 818224 +11.4749 659239 +11.681 666986 +15.2297 801274 +15.4179 814059 +11.2333 673591 +11.6406 664406 +15.7716 801475 +16.0821 818385 +11.4388 661914 +11.7308 663324 +15.5508 803090 +15.8351 815836 +10.9782 663492 +11.6368 664583 +15.009 799445 +15.9809 818041 +11.5179 663563 +11.6583 665515 +15.5631 800525 +15.9809 824339 +11.5055 667720 +11.8791 664897 +15.834 805944 +16.4676 818236 +11.4656 658528 +11.6217 668202 +15.6001 801829 +16.1034 820178 +11.4374 665518 +11.8762 670965 +15.7434 796395 +16.3487 816459 +12.0269 662854 +11.6678 668820 +15.7425 803427 +16.7735 819775 +11.4964 666028 +12.5349 665748 +16.0529 803255 +16.7488 816778 +11.4022 663786 +11.8303 667042 +15.4075 797604 +19.2712 818582 +11.4384 663740 +12.1416 661861 +15.8256 801284 +15.963 825871 +11.2444 663654 +11.7056 668604 +15.4614 802378 +16.4954 816582 +11.2539 668951 +11.7545 665935 +15.38 800086 +16.1278 821042 +11.5973 660659 +11.6693 666349 +15.5384 798376 +16.4959 817743 +11.4558 666194 +11.5678 663109 +15.4735 804128 +16.1625 820650 +11.8488 659917 +12.4055 665791 +15.4519 800796 +16.4947 816381 +11.4139 661665 +11.6561 672431 +15.4001 805832 +16.2371 816502 +11.2281 659837 +11.7132 666310 +15.5137 799183 +16.0413 822949 +11.2602 670131 +11.9453 666615 +15.7025 801522 +17.0962 811704 +11.2628 662112 +11.7406 667851 +15.6135 801576 +16.5934 822863 +11.5247 663162 +11.7487 667251 +16.0055 805087 +16.4157 823424 +11.5227 659356 +11.7859 663296 +16.378 807641 +15.4783 819400 +10.9612 663134 +11.5343 668661 +15.3816 799786 +17.3117 822440 +11.8563 663912 +11.643 668021 +15.4162 801892 +16.4866 818794 +12.668 663039 +11.2972 670117 +16.0342 797551 +16.5605 816882 +11.446 662352 +11.7654 665324 +15.5969 797676 +16.6003 821891 +11.6407 663619 +11.7488 666635 +15.179 801874 +16.4199 817806 +17.372 657731 +11.84 666318 +15.5772 799644 +16.5417 817404 +11.5194 660614 +11.6988 669740 +15.4915 796250 +15.5265 819705 +11.1317 660988 +11.7593 667587 +15.5395 801921 +16.646 816722 +11.3554 661571 +11.6774 664913 +17.2897 801999 +16.3433 816831 +11.782 653740 +11.4549 664438 +15.7027 803217 +16.7364 812992 +11.8037 660087 +11.7091 659665 +15.6669 801636 +18.1318 820841 +11.5725 669543 +12.0148 663589 +15.462 800805 +16.417 818946 +12.7225 660913 +11.7885 667164 +15.527 803806 +16.681 817339 +12.0822 660985 +11.8257 662455 +15.9106 800697 +16.6026 820530 +11.5612 661736 +12.0533 664089 +10.5293 801898 +16.7891 823599 +10.7908 665845 +11.7947 665325 +31.6308 798813 +16.8129 818671 +11.7356 662020 +11.9572 665635 +15.2262 802465 +15.776 821759 +11.3732 669772 diff --git a/scripts/zmobius.sh b/scripts/zmobius.sh new file mode 100644 index 00000000..04b223d2 --- /dev/null +++ b/scripts/zmobius.sh @@ -0,0 +1,35 @@ +#!/bin/bash +fn=$1 + +grep "double zmobius_" $fn | +awk 'BEGIN{ m["zmobius_b_coeff"]=0; m["zmobius_c_coeff"]=1; }{ val[m[substr($2,0,15)]][substr($2,17)+0]=$4; }END{ + + ls=length(val[0])/2; + + print "ls = " ls + + bmc=-111; + + for (s=0;s(%.15g,%.15g) );\n",omegar[s],omegai[s]); + } + +}' diff --git a/tests/IO/Test_nersc_read.cc b/tests/IO/Test_nersc_read.cc new file mode 100644 index 00000000..9e9280a1 --- /dev/null +++ b/tests/IO/Test_nersc_read.cc @@ -0,0 +1,112 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_nersc_io.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + std::vector simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector latt_size = GridDefaultLatt(); + int orthodir=3; + int orthosz =latt_size[orthodir]; + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + + LatticeGaugeField Umu(&Fine); + std::vector U(4,&Fine); + + NerscField header; + std::string file("./ckpoint_lat"); + NerscIO::readConfiguration(Umu,header,file); + + for(int mu=0;mu(Umu,mu); + } + + // Painful ; fix syntactical niceness + LatticeComplex LinkTrace(&Fine); + LinkTrace=zero; + for(int mu=0;mu Plaq_T(orthosz); + sliceSum(Plaq,Plaq_T,Nd-1); + int Nt = Plaq_T.size(); + + TComplex Plaq_T_sum; + Plaq_T_sum=zero; + for(int t=0;t 0){ - std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["<1.0e-7){ + if ( abs(reference[i]-result[i])>1.0e-6){ std::cout< +void IntTester(const functor &func) +{ + typedef Integer scal; + typedef vInteger vec; + GridSerialRNG sRNG; + sRNG.SeedRandomDevice(); + + int Nsimd = vec::Nsimd(); + + std::vector input1(Nsimd); + std::vector input2(Nsimd); + std::vector result(Nsimd); + std::vector reference(Nsimd); + + std::vector > buf(3); + vec & v_input1 = buf[0]; + vec & v_input2 = buf[1]; + vec & v_result = buf[2]; + + + for(int i=0;i(v_input1,input1); + merge(v_input2,input2); + merge(v_result,result); + + func(v_result,v_input1,v_input2); + + for(int i=0;i(v_result,result); + + std::cout << GridLogMessage << " " << func.name() << std::endl; + + std::cout << GridLogDebug << v_input1 << std::endl; + std::cout << GridLogDebug << v_input2 << std::endl; + std::cout << GridLogDebug << v_result << std::endl; + + int ok=0; + for(int i=0;i void ReductionTester(const functor &func) @@ -245,6 +302,28 @@ public: } std::string name(void) const { return std::string("Permute"); } }; + +class funcExchange { +public: + int n; + funcExchange(int _n) { n=_n;}; + template void operator()(vec &r1,vec &r2,vec &i1,vec &i2) const { exchange(r1,r2,i1,i2,n);} + template void apply(std::vector &r1,std::vector &r2,std::vector &in1,std::vector &in2) const { + int sz=in1.size(); + + + int msk = sz>>(n+1); + + int j1=0; + int j2=0; + for(int i=0;i +void ExchangeTester(const functor &func) +{ + GridSerialRNG sRNG; + sRNG.SeedRandomDevice(); + + int Nsimd = vec::Nsimd(); + + std::vector input1(Nsimd); + std::vector input2(Nsimd); + std::vector result1(Nsimd); + std::vector result2(Nsimd); + std::vector reference1(Nsimd); + std::vector reference2(Nsimd); + std::vector test1(Nsimd); + std::vector test2(Nsimd); + + std::vector > buf(6); + vec & v_input1 = buf[0]; + vec & v_input2 = buf[1]; + vec & v_result1 = buf[2]; + vec & v_result2 = buf[3]; + vec & v_test1 = buf[4]; + vec & v_test2 = buf[5]; + + for(int i=0;i(v_input1,input1); + merge(v_input2,input2); + merge(v_result1,result1); + merge(v_result2,result1); + + func(v_result1,v_result2,v_input1,v_input2); + func.apply(reference1,reference2,input1,input2); + + func(v_test1,v_test2,v_result1,v_result2); + + extract(v_result1,result1); + extract(v_result2,result2); + extract(v_test1,test1); + extract(v_test2,test2); + + std::cout<(funcPermute(i)); } + std::cout<(funcExchange(i)); + } + std::cout<(funcPermute(i)); } + std::cout<(funcExchange(i)); + } + std::cout<(funcPermute(i)); } + + std::cout<(funcExchange(i)); + } + + std::cout<(funcExchange(i)); + } + + std::cout<(funcRotate(r)); } + + std::cout< 1.0e-4) { + for(int i=0;i 1.0e-4) exit(-1); } } @@ -182,8 +190,6 @@ int main (int argc, char ** argv) SimpleCompressor compress; - EStencil.HaloExchange(EFoo,compress); - OStencil.HaloExchange(OFoo,compress); Bar = Cshift(Foo,dir,disp); @@ -196,6 +202,7 @@ int main (int argc, char ** argv) } // Implement a stencil code that should agree with that darn cshift! + EStencil.HaloExchange(EFoo,compress); for(int i=0;ioSites();i++){ int permute_type; StencilEntry *SE; @@ -209,6 +216,7 @@ int main (int argc, char ** argv) else OCheck._odata[i] = EStencil.CommBuf()[SE->_offset]; } + OStencil.HaloExchange(OFoo,compress); for(int i=0;ioSites();i++){ int permute_type; StencilEntry *SE; @@ -254,6 +262,7 @@ int main (int argc, char ** argv) }}}} + if (nrm > 1.0e-4) exit(-1); } } diff --git a/tests/core/Test_cf_coarsen_support.cc b/tests/core/Test_cf_coarsen_support.cc index fab8c97c..35a8a764 100644 --- a/tests/core/Test_cf_coarsen_support.cc +++ b/tests/core/Test_cf_coarsen_support.cc @@ -64,7 +64,7 @@ int main (int argc, char ** argv) LatticeFermion ref(FGrid); ref=zero; LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); for(int mu=0;mu U(4,UGrid); RealD mass=0.1; diff --git a/tests/core/Test_cshift_red_black.cc b/tests/core/Test_cshift_red_black.cc index 43f12e77..ae55cece 100644 --- a/tests/core/Test_cshift_red_black.cc +++ b/tests/core/Test_cshift_red_black.cc @@ -32,6 +32,8 @@ Author: paboyle using namespace Grid; using namespace Grid::QCD; +#define POWER10 + int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -52,6 +54,7 @@ int main (int argc, char ** argv) LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); LatticeComplex rbShiftU(&Fine); + LatticeComplex err(&Fine); LatticeComplex Ue(&RBFine); LatticeComplex Uo(&RBFine); LatticeComplex ShiftUe(&RBFine); @@ -68,7 +71,11 @@ int main (int argc, char ** argv) Integer i=0; LatticeCoordinate(coor,d); lex = lex + coor*stride+i; +#ifndef POWER10 stride=stride*latt_size[d]; +#else + stride=stride*10; +#endif } U=lex; } @@ -87,28 +94,31 @@ int main (int argc, char ** argv) // if ( dir!=1 ) continue; for(int shift=0;shift coor(4); - std::cout< scoor(coor); scoor[dir] = (scoor[dir]+shift)%latt_size[dir]; +#ifndef POWER10 + std::vector powers=latt_size; Integer slex = scoor[0] + latt_size[0]*scoor[1] + latt_size[0]*latt_size[1]*scoor[2] + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3]; - +#else + std::vector powers({1,10,100,1000}); + Integer slex = scoor[0] + + 10 *scoor[1] + + 100 *scoor[2] + + 1000 *scoor[3]; +#endif Complex scm(slex); double nrm = abs(scm-cm()()()); std::vector peer(4); Complex ctmp = cm; Integer index=real(ctmp); - Lexicographic::CoorFromIndex(peer,index,latt_size); + Lexicographic::CoorFromIndex(peer,index,powers); if (nrm > 0){ std::cout<<"FAIL shift "<< shift<<" in dir "<< dir @@ -145,9 +163,10 @@ int main (int argc, char ** argv) exit(-1); } }}}} + std::cout << " OK !"< scoor(coor); scoor[dir] = (scoor[dir]+shift)%latt_size[dir]; +#ifndef POWER10 + std::vector powers=latt_size; Integer slex = scoor[0] + latt_size[0]*scoor[1] + latt_size[0]*latt_size[1]*scoor[2] + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3]; - +#else + std::vector powers({1,10,100,1000}); + Integer slex = scoor[0] + + 10 *scoor[1] + + 100 *scoor[2] + + 1000 *scoor[3]; +#endif Complex scm(slex); std::vector peer(4); Complex ctmp=cmeo; Integer index=real(ctmp); - Lexicographic::CoorFromIndex(peer,index,latt_size); + Lexicographic::CoorFromIndex(peer,index,powers); double nrm = abs(cmeo()()()-scm); if (nrm != 0) { + + std::cout << " coor "<<" ["< using namespace Grid; using namespace Grid::QCD; +#define POWER10 + int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -49,6 +51,7 @@ int main (int argc, char ** argv) GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice(); + LatticeComplex err(&Fine); LatticeComplex U(&Fine); LatticeComplex ShiftU(&Fine); LatticeComplex rbShiftU(&Fine); @@ -66,9 +69,15 @@ int main (int argc, char ** argv) for(int d=0;d coor(4); - std::cout< scoor(coor); scoor[dir] = (scoor[dir]+shift)%latt_size[dir]; - + +#ifdef POWER10 + std::vector powers({1,10,100,1000}); + Integer slex = scoor[3] + + 10 *scoor[2] + + 100 *scoor[1] + + 1000 *scoor[0]; +#else + std::vector powers=latt_size; Integer slex = scoor[0] + latt_size[0]*scoor[1] + latt_size[0]*latt_size[1]*scoor[2] + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3]; +#endif Complex scm(slex); @@ -132,7 +152,7 @@ int main (int argc, char ** argv) std::vector peer(4); Complex ctmp = cm; Integer index=real(ctmp); - Lexicographic::CoorFromIndex(peer,index,latt_size); + Lexicographic::CoorFromIndex(peer,index,powers); if (nrm > 0){ std::cout<<"FAIL shift "<< shift<<" in dir "<< dir @@ -140,14 +160,16 @@ int main (int argc, char ** argv) << cm()()()<<" expect "< scoor(coor); scoor[dir] = (scoor[dir]+shift)%latt_size[dir]; - + +#ifdef POWER10 + std::vector powers({1,10,100,1000}); + Integer slex = scoor[3] + + 10 *scoor[2] + + 100 *scoor[1] + + 1000 *scoor[0]; +#else + std::vector powers = latt_size; Integer slex = scoor[0] + latt_size[0]*scoor[1] + latt_size[0]*latt_size[1]*scoor[2] + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3]; - +#endif Complex scm(slex); std::vector peer(4); Complex ctmp=cmeo; Integer index=real(ctmp); - Lexicographic::CoorFromIndex(peer,index,latt_size); + Lexicographic::CoorFromIndex(peer,index,powers); double nrm = abs(cmeo()()()-scm); if (nrm != 0) { @@ -189,10 +219,9 @@ int main (int argc, char ** argv) << cmeo()()()<<" expect "< U(4,UGrid); // Only one non-zero (y) diff --git a/tests/core/Test_dwf_rb5d.cc b/tests/core/Test_dwf_rb5d.cc index a3099c7c..68c1755e 100644 --- a/tests/core/Test_dwf_rb5d.cc +++ b/tests/core/Test_dwf_rb5d.cc @@ -81,7 +81,7 @@ int main (int argc, char ** argv) LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); // Only one non-zero (y) diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index b69bf266..b8b320d8 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) FermionField ref(&Grid); ref=zero; FermionField tmp(&Grid); tmp=zero; FermionField err(&Grid); tmp=zero; - LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); double volume=1; diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc new file mode 100644 index 00000000..89055fc7 --- /dev/null +++ b/tests/core/Test_staggered.cc @@ -0,0 +1,291 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_wilson.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedRandomDevice(); + + typedef typename ImprovedStaggeredFermionR::FermionField FermionField; + typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField; + typename ImprovedStaggeredFermionR::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=zero; + FermionField ref(&Grid); ref=zero; + FermionField tmp(&Grid); tmp=zero; + FermionField err(&Grid); tmp=zero; + FermionField phi (&Grid); random(pRNG,phi); + FermionField chi (&Grid); random(pRNG,chi); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + std::vector U(4,&Grid); + + + double volume=1; + for(int mu=0;mu(Umu,mu); + /* Debug force unit + U[mu] = 1.0; + PokeIndex(Umu,U[mu],mu); + */ + } + + ref = zero; + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + + { // Simple improved staggered implementation + ref = zero; + RealD c1tad = 0.5*c1/u0; + RealD c2tad = 0.5*c2/u0/u0/u0; + + Lattice > coor(&Grid); + + Lattice > x(&Grid); LatticeCoordinate(x,0); + Lattice > y(&Grid); LatticeCoordinate(y,1); + Lattice > z(&Grid); LatticeCoordinate(z,2); + Lattice > t(&Grid); LatticeCoordinate(t,3); + + Lattice > lin_z(&Grid); lin_z=x+y; + Lattice > lin_t(&Grid); lin_t=x+y+z; + + for(int mu=0;mu * = < chi | Deo^dag| phi> "< HermOpEO(Ds); + HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); + HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + + HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); + HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + + pDce = innerProduct(phi_e,dchi_e); + pDco = innerProduct(phi_o,dchi_o); + cDpe = innerProduct(chi_e,dphi_e); + cDpo = innerProduct(chi_o,dphi_o); + + std::cout< +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + std::cout << GridLogMessage << "Making s innermost grids"< seeds({1,2,3,4}); + GridParallelRNG pRNG4(UGrid); + GridParallelRNG pRNG5(FGrid); + pRNG4.SeedFixedIntegers(seeds); + pRNG5.SeedFixedIntegers(seeds); + + typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField; + typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; + typename ImprovedStaggeredFermion5DR::ImplParams params; + + FermionField src (FGrid); + + random(pRNG5,src); + + FermionField result(FGrid); result=zero; + FermionField ref(FGrid); ref=zero; + FermionField tmp(FGrid); tmp=zero; + FermionField err(FGrid); tmp=zero; + FermionField phi (FGrid); random(pRNG5,phi); + FermionField chi (FGrid); random(pRNG5,chi); + + LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu); + LatticeGaugeField Umua(UGrid); Umua=Umu; + + double volume=Ls; + for(int mu=0;muoSites();ss++){ + for(int s=0;s U(4,FGrid); + + for(int mu=0;mu(Umu5d,mu); + } + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + + { // Simple improved staggered implementation + ref = zero; + RealD c1tad = 0.5*c1/u0; + RealD c2tad = 0.5*c2/u0/u0/u0; + + Lattice > coor(FGrid); + + Lattice > x(FGrid); LatticeCoordinate(x,1); // s innermost + Lattice > y(FGrid); LatticeCoordinate(y,2); + Lattice > z(FGrid); LatticeCoordinate(z,3); + Lattice > t(FGrid); LatticeCoordinate(t,4); + + Lattice > lin_z(FGrid); lin_z=x+y; + Lattice > lin_t(FGrid); lin_t=x+y+z; + + for(int mu=0;mu * = < chi | Deo^dag| phi> "< HermOpEO(Ds); + HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); + HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + + HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); + HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + + pDce = innerProduct(phi_e,dchi_e); + pDco = innerProduct(phi_o,dchi_o); + cDpe = innerProduct(chi_e,dphi_e); + cDpo = innerProduct(chi_o,dphi_o); + + std::cout< +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + const int Ls=16; + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::cout << GridLogMessage << "Making s innermost grids"< seeds({1,2,3,4}); + + GridParallelRNG pRNG4(UGrid); + GridParallelRNG pRNG5(FGrid); + pRNG4.SeedFixedIntegers(seeds); + pRNG5.SeedFixedIntegers(seeds); + + typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField; + typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField; + typename ImprovedStaggeredFermion5DR::ImplParams params; + + FermionField src (FGrid); + random(pRNG5,src); + /* + std::vector site({0,1,2,0,0}); + ColourVector cv = zero; + cv()()(0)=1.0; + src = zero; + pokeSite(cv,src,site); + */ + FermionField result(FGrid); result=zero; + FermionField tmp(FGrid); tmp=zero; + FermionField err(FGrid); tmp=zero; + FermionField phi (FGrid); random(pRNG5,phi); + FermionField chi (FGrid); random(pRNG5,chi); + + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(pRNG4,Umu); + + /* + for(int mu=1;mu<4;mu++){ + auto tmp = PeekIndex(Umu,mu); + tmp = zero; + PokeIndex(Umu,tmp,mu); + } + */ + double volume=Ls; + for(int mu=0;mu U(4,&Grid); double volume=1; diff --git a/tests/core/Test_wilson_tm_even_odd.cc b/tests/core/Test_wilson_tm_even_odd.cc index 2e66438e..36de83ea 100644 --- a/tests/core/Test_wilson_tm_even_odd.cc +++ b/tests/core/Test_wilson_tm_even_odd.cc @@ -70,7 +70,7 @@ int main (int argc, char ** argv) LatticeFermion ref(&Grid); ref=zero; LatticeFermion tmp(&Grid); tmp=zero; LatticeFermion err(&Grid); tmp=zero; - LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); double volume=1; diff --git a/tests/core/Test_zmobius_even_odd.cc b/tests/core/Test_zmobius_even_odd.cc new file mode 100644 index 00000000..d547f2f7 --- /dev/null +++ b/tests/core/Test_zmobius_even_odd.cc @@ -0,0 +1,273 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_even_odd.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + + LatticeFermion src (FGrid); random(RNG5,src); + LatticeFermion phi (FGrid); random(RNG5,phi); + LatticeFermion chi (FGrid); random(RNG5,chi); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); tmp=zero; + LatticeFermion err(FGrid); tmp=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + std::vector U(4,UGrid); + + // Only one non-zero (y) + Umu=zero; + for(int nn=0;nn0 ) + U[nn]=zero; + PokeIndex(Umu,U[nn],nn); + } + + RealD mass=0.1; + RealD M5 =1.8; + std::vector < std::complex > omegas; + for(int i=0;i temp (0.25+0.01*i, imag*0.1); + omegas.push_back(temp); + } + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); +// DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + LatticeFermion src_e (FrbGrid); + LatticeFermion src_o (FrbGrid); + LatticeFermion r_e (FrbGrid); + LatticeFermion r_o (FrbGrid); + LatticeFermion r_eo (FGrid); + LatticeFermion r_eeoo(FGrid); + + std::cout< * = < chi | Deo^dag| phi> "< * = < chi | Deo^dag| phi> "< HermOpEO(Ddwf); + HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); + HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + + HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); + HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + + pDce = innerProduct(phi_e,dchi_e); + pDco = innerProduct(phi_o,dchi_o); + cDpe = innerProduct(chi_e,dphi_e); + cDpo = innerProduct(chi_o,dphi_o); + + std::cout< gamma(Ls,ComplexD(1.0,0.0)); + std::cout<(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout<(ZDmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout<(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); diff --git a/tests/debug/Test_cayley_coarsen_support.cc b/tests/debug/Test_cayley_coarsen_support.cc index 2d503df8..c6532a0d 100644 --- a/tests/debug/Test_cayley_coarsen_support.cc +++ b/tests/debug/Test_cayley_coarsen_support.cc @@ -77,7 +77,7 @@ int main (int argc, char ** argv) LatticeFermion ref(FGrid); ref=zero; LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); #if 0 std::vector U(4,UGrid); diff --git a/tests/debug/Test_cayley_even_odd.cc b/tests/debug/Test_cayley_even_odd.cc index 7d8d2a12..e226ac4a 100644 --- a/tests/debug/Test_cayley_even_odd.cc +++ b/tests/debug/Test_cayley_even_odd.cc @@ -70,7 +70,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); RealD mass=0.1; @@ -81,10 +81,16 @@ int main (int argc, char ** argv) RealD b=1.5;// Scale factor b+c=2, b-c=1 RealD c=0.5; + std::vector gamma(Ls,ComplexD(1.0,0.1)); + std::cout<(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout<(ZDmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout<(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); diff --git a/tests/debug/Test_zmm.cc b/tests/debug/Test_zmm.cc index 40263cb9..92bae882 100644 --- a/tests/debug/Test_zmm.cc +++ b/tests/debug/Test_zmm.cc @@ -26,7 +26,6 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ #include -#include #ifdef TEST_ZMM @@ -187,7 +186,7 @@ int main(int argc,char **argv) GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); random(RNG5,src); #if 1 - random(RNG4,Umu); + SU3::HotConfiguration(RNG4,Umu); #else int mmu=2; std::vector U(4,UGrid); diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index 0779b710..4cfe5ca3 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -31,8 +31,6 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for - int main (int argc, char ** argv) { Grid_init(&argc,&argv); diff --git a/tests/forces/Test_dwf_force.cc b/tests/forces/Test_dwf_force.cc index c817eed1..f7a3ba67 100644 --- a/tests/forces/Test_dwf_force.cc +++ b/tests/forces/Test_dwf_force.cc @@ -31,8 +31,6 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for - int main (int argc, char ** argv) { Grid_init(&argc,&argv); diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index fb041f7a..5094b8a7 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index a48dddc8..551c3a20 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc index 6c6f19f0..ee0df5dd 100644 --- a/tests/forces/Test_gpdwf_force.cc +++ b/tests/forces/Test_gpdwf_force.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index 25d57684..bae46800 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_partfrac_force.cc b/tests/forces/Test_partfrac_force.cc index 8fc0fb9b..0562fe3d 100644 --- a/tests/forces/Test_partfrac_force.cc +++ b/tests/forces/Test_partfrac_force.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index 67edba1b..97281854 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 1af156cc..60d31b51 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_wilson_force_phiMdagMphi.cc b/tests/forces/Test_wilson_force_phiMdagMphi.cc index 1022cf52..7717e9bc 100644 --- a/tests/forces/Test_wilson_force_phiMdagMphi.cc +++ b/tests/forces/Test_wilson_force_phiMdagMphi.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/forces/Test_wilson_force_phiMphi.cc b/tests/forces/Test_wilson_force_phiMphi.cc index d55d1bea..c9e56c32 100644 --- a/tests/forces/Test_wilson_force_phiMphi.cc +++ b/tests/forces/Test_wilson_force_phiMphi.cc @@ -31,7 +31,7 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; -#define parallel_for PARALLEL_FOR_LOOP for + int main (int argc, char ** argv) { diff --git a/tests/qdpxx/Test_qdpxx_loops_staples.cc b/tests/qdpxx/Test_qdpxx_loops_staples.cc index cf2e0796..3bed9601 100644 --- a/tests/qdpxx/Test_qdpxx_loops_staples.cc +++ b/tests/qdpxx/Test_qdpxx_loops_staples.cc @@ -282,8 +282,8 @@ double calc_grid_p(Grid::QCD::LatticeGaugeField & Umu) Grid::QCD::LatticeColourMatrix tmp(UGrid); tmp = Grid::zero; - Grid::QCD::PokeIndex(Umu,tmp,2); - Grid::QCD::PokeIndex(Umu,tmp,3); + Grid::QCD::PokeIndex(Umu,tmp,2); + Grid::QCD::PokeIndex(Umu,tmp,3); Grid::QCD::WilsonGaugeActionR Wilson(beta); // Just take beta = 1.0 @@ -311,7 +311,7 @@ double calc_grid_r_dir(Grid::QCD::LatticeGaugeField & Umu) std::vector U(4,UGrid); for(int mu=0;mu(Umu,mu); + U[mu] = Grid::PeekIndex(Umu,mu); } Grid::QCD::LatticeComplex rect(UGrid); @@ -322,7 +322,7 @@ double calc_grid_r_dir(Grid::QCD::LatticeGaugeField & Umu) for(int nu=0;nu::traceDirRectangle(rect,U,mu,nu); + Grid::QCD::ColourWilsonLoops::traceDirRectangle(rect,U,mu,nu); trect = Grid::sum(rect); crect = Grid::TensorRemove(trect); std::cout<< "mu/nu = "< + +double mq=0.1; + +typedef Grid::QCD::StaggeredImplR::FermionField FermionField; +typedef Grid::QCD::LatticeGaugeField GaugeField; + +void make_gauge (GaugeField & lat, FermionField &src); +void calc_grid (GaugeField & lat, GaugeField & uthin,GaugeField & ufat, FermionField &src, FermionField &res,int dag); +void calc_chroma (GaugeField & lat,GaugeField & uthin,GaugeField & ufat, FermionField &src, FermionField &res,int dag); + +#include +#include +#include + +namespace Chroma { + + +class ChromaWrapper { +public: + + typedef multi1d U; + typedef LatticeStaggeredFermion T4; + + static void ImportGauge(GaugeField & gr, + QDP::multi1d & ch) + { + Grid::QCD::LorentzColourMatrix LCM; + Grid::Complex cc; + QDP::ColorMatrix cm; + QDP::Complex c; + + std::vector x(4); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] & ch) + { + Grid::QCD::LorentzColourMatrix LCM; + Grid::Complex cc; + QDP::ColorMatrix cm; + QDP::Complex c; + + std::vector x(4); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] x(5); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] x(5); + QDP::multi1d cx(4); + std::vector gd= gr._grid->GlobalDimensions(); + + for (x[0]=0;x[0] > GetLinOp (U &u,U &u_fat,U &u_triple) + { + QDP::Real _mq(mq); + QDP::multi1d bcs(QDP::Nd); + + bcs[0] = bcs[1] = bcs[2] = bcs[3] = 1; + + Chroma::AsqtadFermActParams p; + p.Mass = _mq; + p.u0 = Real(1.0); + + + Chroma::Handle > fbc(new Chroma::SimpleFermBC< T4, U, U >(bcs)); + Chroma::Handle > cfs( new Chroma::CreateSimpleFermState(fbc)); + Chroma::AsqtadFermAct S_f(cfs,p); + Chroma::Handle< Chroma::FermState > ffs( S_f.createState(u) ); + u_fat =ffs.cast()->getFatLinks(); + u_triple=ffs.cast()->getTripleLinks(); + return S_f.linOp(ffs); + } + +}; +} + +int main (int argc,char **argv ) +{ + + /******************************************************** + * Setup QDP + *********************************************************/ + Chroma::initialize(&argc,&argv); + Chroma::WilsonTypeFermActs4DEnv::registerAll(); + + /******************************************************** + * Setup Grid + *********************************************************/ + Grid::Grid_init(&argc,&argv); + Grid::GridCartesian * UGrid = Grid::QCD::SpaceTimeGrid::makeFourDimGrid(Grid::GridDefaultLatt(), + Grid::GridDefaultSimd(Grid::QCD::Nd,Grid::vComplex::Nsimd()), + Grid::GridDefaultMpi()); + + std::vector gd = UGrid->GlobalDimensions(); + QDP::multi1d nrow(QDP::Nd); + for(int mu=0;mu<4;mu++) nrow[mu] = gd[mu]; + + QDP::Layout::setLattSize(nrow); + QDP::Layout::create(); + + GaugeField uthin (UGrid); + GaugeField ufat (UGrid); + GaugeField utriple(UGrid); + FermionField src(UGrid); + FermionField res_chroma(UGrid); + FermionField res_grid (UGrid); + + + { + + std::cout << "*****************************"< U; + + U u(4); + U ut(4); + U uf(4); + + // Chroma::HotSt(u); + Chroma::ChromaWrapper::ImportGauge(lat,u) ; + + QDP::LatticeStaggeredFermion check; + QDP::LatticeStaggeredFermion result; + QDP::LatticeStaggeredFermion tmp; + QDP::LatticeStaggeredFermion psi; + + Chroma::ChromaWrapper::ImportFermion(src,psi); + + auto linop =Chroma::ChromaWrapper::GetLinOp(u,uf,ut); + + Chroma::ChromaWrapper::ExportGauge(uthin,ut) ; + Chroma::ChromaWrapper::ExportGauge(ufat ,uf) ; + + enum Chroma::PlusMinus isign; + if ( dag ) { + isign=Chroma::MINUS; + } else { + isign=Chroma::PLUS; + } + + std::cout << "Calling Chroma Linop "<< std::endl; + linop->evenEvenLinOp(tmp,psi,isign); check[rb[0]] = tmp; + linop->oddOddLinOp (tmp,psi,isign); check[rb[1]] = tmp; + linop->evenOddLinOp(tmp,psi,isign) ; check[rb[0]]+= tmp; + linop->oddEvenLinOp(tmp,psi,isign) ; check[rb[1]]+= tmp; + + Chroma::ChromaWrapper::ExportFermion(res,check) ; +} + + +void make_gauge(GaugeField & Umu,FermionField &src) +{ + using namespace Grid; + using namespace Grid::QCD; + + std::vector seeds4({1,2,3,4}); + + Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu._grid; + Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + Grid::QCD::SU3::HotConfiguration(RNG4,Umu); + Grid::gaussian(RNG4,src); +} + +void calc_grid(GaugeField & Uthin, GaugeField & Utriple, GaugeField & Ufat, FermionField &src, FermionField &res,int dag) +{ + using namespace Grid; + using namespace Grid::QCD; + + Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Uthin._grid; + Grid::GridRedBlackCartesian * UrbGrid = Grid::QCD::SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + + Grid::QCD::ImprovedStaggeredFermionR Dstag(Uthin,Utriple,Ufat,*UGrid,*UrbGrid,mq*2.0); + + std::cout << Grid::GridLogMessage <<" Calling Grid staggered multiply "< U(4,UGrid); for(int mu=0;mu U(4,UGrid); RealD mass=0.1; diff --git a/tests/solver/Test_dwf_cg_schur.cc b/tests/solver/Test_dwf_cg_schur.cc index 62a95235..0f75eb3c 100644 --- a/tests/solver/Test_dwf_cg_schur.cc +++ b/tests/solver/Test_dwf_cg_schur.cc @@ -61,7 +61,7 @@ int main (int argc, char ** argv) LatticeFermion src(FGrid); random(RNG5,src); LatticeFermion result(FGrid); result=zero; - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); std::vector U(4,UGrid); for(int mu=0;mu U(4,UGrid); for(int mu=0;mu U(4,UGrid); diff --git a/tests/solver/Test_wilson_cg_prec.cc b/tests/solver/Test_wilson_cg_prec.cc index e336161f..011bc70b 100644 --- a/tests/solver/Test_wilson_cg_prec.cc +++ b/tests/solver/Test_wilson_cg_prec.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=zero; - LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); std::vector U(4,&Grid); diff --git a/tests/solver/Test_wilson_cg_schur.cc b/tests/solver/Test_wilson_cg_schur.cc index 8b62b040..7bbf74d3 100644 --- a/tests/solver/Test_wilson_cg_schur.cc +++ b/tests/solver/Test_wilson_cg_schur.cc @@ -57,7 +57,7 @@ int main (int argc, char ** argv) std::vector seeds({1,2,3,4}); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); - LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); LatticeFermion src(&Grid); random(pRNG,src); LatticeFermion result(&Grid); result=zero; diff --git a/tests/solver/Test_wilson_cg_unprec.cc b/tests/solver/Test_wilson_cg_unprec.cc index bb429468..19c5f854 100644 --- a/tests/solver/Test_wilson_cg_unprec.cc +++ b/tests/solver/Test_wilson_cg_unprec.cc @@ -60,7 +60,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); LatticeFermion result(&Grid); result=zero; - LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); double volume=1; for(int mu=0;mu U(4,&Grid); diff --git a/tests/solver/Test_zmobius_cg_prec.cc b/tests/solver/Test_zmobius_cg_prec.cc new file mode 100644 index 00000000..4ae98d71 --- /dev/null +++ b/tests/solver/Test_zmobius_cg_prec.cc @@ -0,0 +1,113 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_dwf_cg_prec.cc + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + +Gamma::Algebra Gmu[] = {Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT}; + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + const int Ls = 16; + + GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian* UrbGrid = + SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid); + GridRedBlackCartesian* FrbGrid = + SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid); + + std::vector seeds4({1, 2, 3, 4}); + std::vector seeds5({5, 6, 7, 8}); + GridParallelRNG RNG5(FGrid); + RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); + random(RNG5, src); + LatticeFermion result(FGrid); + result = zero; + LatticeGaugeField Umu(UGrid); + + SU3::HotConfiguration(RNG4, Umu); + + std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() + << " Ls: " << Ls << std::endl; + + std::vector U(4, UGrid); + for (int mu = 0; mu < Nd; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + RealD mass = 0.01; + RealD M5 = 1.8; + std::vector < std::complex > omegas; + for(int i=0;i temp (0.25+0.01*i, imag*0.01); + omegas.push_back(temp); + } + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omegas,1.,0.); + + LatticeFermion src_o(FrbGrid); + LatticeFermion result_o(FrbGrid); + pickCheckerboard(Odd, src_o, src); + result_o = zero; + + GridStopWatch CGTimer; + + SchurDiagMooeeOperator HermOpEO(Ddwf); + ConjugateGradient CG(1.0e-8, 10000, 0);// switch off the assert + + CGTimer.Start(); + CG(HermOpEO, src_o, result_o); + CGTimer.Stop(); + + std::cout << GridLogMessage << "Total CG time : " << CGTimer.Elapsed() + << std::endl; + + std::cout << GridLogMessage << "######## Dhop calls summary" << std::endl; + Ddwf.Report(); + + Grid_finalize(); +}