1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 07:17:06 +01:00

CG test written and passes i.e. converges with small true residual

in RedBlack MpcDagMpc, Unprec MdagM and Schur red black solver for
each of.

DomainWallFermion
MobiusFermion
MobiusZolotarevFermion
ScaledShamirFermion
ScaledShamirZolotarevFermion
This commit is contained in:
Peter Boyle
2015-06-03 10:54:03 +01:00
parent 260011670e
commit 84b5c7217d
30 changed files with 241 additions and 74 deletions

145
benchmarks/Benchmark_su3.cc Normal file
View File

@ -0,0 +1,145 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int Nloop=1000;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
int threads = GridThread::GetThreads();
std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid);// random(pRNG,z);
LatticeColourMatrix x(&Grid);// random(pRNG,x);
LatticeColourMatrix y(&Grid);// random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
x=x*y;
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3.0*vol*Nc*Nc*sizeof(Complex);
double footprint=2.0*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6.0+8.0+8.0)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 z= x*y"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
LatticeColourMatrix y(&Grid); //random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
z=x*y;
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
LatticeColourMatrix y(&Grid); //random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
mult(z,x,y);
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking SU3xSU3 mac(z,x,y)"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x);
LatticeColourMatrix y(&Grid); //random(pRNG,y);
double start=usecond();
for(int i=0;i<Nloop;i++){
mac(z,x,y);
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(8+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
Grid_finalize();
}