diff --git a/benchmarks/Benchmark_schur.cc b/benchmarks/Benchmark_schur.cc new file mode 100644 index 00000000..afee31b0 --- /dev/null +++ b/benchmarks/Benchmark_schur.cc @@ -0,0 +1,176 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_dwf.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +void benchDw(std::vector & L, int Ls); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + const int Ls=12; + std::vector< std::vector > latts; +#if 0 + latts.push_back(std::vector ({24,24,24,24}) ); + latts.push_back(std::vector ({48,24,24,24}) ); + latts.push_back(std::vector ({96,24,24,24}) ); + latts.push_back(std::vector ({96,48,24,24}) ); + // latts.push_back(std::vector ({96,48,48,24}) ); + // latts.push_back(std::vector ({96,48,48,48}) ); +#else + // latts.push_back(std::vector ({96,48,48,48}) ); + latts.push_back(std::vector ({96,96,96,192}) ); +#endif + + std::cout << GridLogMessage<< "*****************************************************************" < latt4 = latts[l]; + std::cout << GridLogMessage <<"\t"; + for(int d=0;d & latt4, int Ls) +{ + ///////////////////////////////////////////////////////////////////////////////////// + // for Nc=3 + ///////////////////////////////////////////////////////////////////////////////////// + // Dw : Ls*24*(7+48)= Ls*1320 + // + // M5D: Ls*(4*2*Nc mul + 4*2*Nc madd ) = 3*4*2*Nc*Ls = Ls*72 + // Meo: Ls*24*(7+48) + Ls*72 = Ls*1392 + // + // Mee: 3*Ns*2*Nc*Ls // Chroma 6*N5*Nc*Ns + // + // LeemInv : 2*2*Nc*madd*Ls + // LeeInv : 2*2*Nc*madd*Ls + // DeeInv : 4*2*Nc*mul *Ls + // UeeInv : 2*2*Nc*madd*Ls + // UeemInv : 2*2*Nc*madd*Ls = Nc*Ls*(8+8+8+8+8) = 40*Nc*Ls// Chroma (10*N5 - 8)*Nc*Ns ~ (40 N5 - 32)Nc flops + // QUDA counts as dense LsxLs real matrix x Ls x NcNsNreim => Nc*4*2 x Ls^2 FMA = 16Nc Ls^2 flops + // Mpc => 1452*cbvol*2*Ls flops // + // => (1344+Ls*48)*Ls*cbvol*2 flops QUDA = 1920 @Ls=12 and 2112 @Ls=16 + ///////////////////////////////////////////////////////////////////////////////////// + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + // long unsigned int single_site_flops = 8*Nc*(7+16*Nc)*Ls; + long unsigned int single_site_mpc_flops = 8*Nc*(7+16*Nc)*2*Ls + 40*Nc*2*Ls + 4*Nc*2*Ls; + long unsigned int single_site_quda_flops = 8*Nc*(7+16*Nc)*2*Ls + 16*Nc*Ls*Ls + 4*Nc*2*Ls; + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + + ColourMatrixF cm = ComplexF(1.0,0.0); + + int ncall=300; + RealD mass=0.1; + RealD M5 =1.8; + RealD NP = UGrid->_Nprocessors; + double volume=1; for(int mu=0;mu Mpc(Dw); + Chebyshev Cheby(0.0,60.0,order); + + { + Mpc.Mpc(src_o,r_o); + Mpc.Mpc(src_o,r_o); + Mpc.Mpc(src_o,r_o); + + double t0=usecond(); + for(int i=0;i