1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Weak scale the benchmarks automatically.

This commit is contained in:
Peter Boyle 2015-05-28 13:47:01 +01:00
parent a5c3424cfb
commit 445e38acf6
4 changed files with 60 additions and 46 deletions

View File

@ -27,8 +27,8 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -47,8 +47,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double flops=lat*lat*lat*lat*Nvec*2;// mul,add
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add
double bytes=3*vol*Nvec*sizeof(Real);
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
}
@ -61,8 +61,8 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -79,8 +79,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double flops=lat*lat*lat*lat*Nvec*2;// mul,add
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add
double bytes=3*vol*Nvec*sizeof(Real);
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
}
@ -92,7 +92,8 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
@ -111,8 +112,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double bytes=2*lat*lat*lat*lat*Nvec*sizeof(Real);
double flops=lat*lat*lat*lat*Nvec*1;// mul
double bytes=2*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*1;// mul
std::cout <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
}
@ -125,8 +126,8 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -144,8 +145,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double bytes=lat*lat*lat*lat*Nvec*sizeof(Real);
double flops=lat*lat*lat*lat*Nvec*2;// mul,add
double bytes=vol*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
}

View File

@ -24,8 +24,8 @@ int main (int argc, char ** argv)
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -40,9 +40,9 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3.0*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
double footprint=2.0*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6.0+8.0+8.0)*lat*lat*lat*lat;
double bytes=3.0*vol*Nc*Nc*sizeof(Complex);
double footprint=2.0*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6.0+8.0+8.0)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
@ -56,7 +56,8 @@ int main (int argc, char ** argv)
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -72,8 +73,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*lat*lat*lat*lat;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
@ -86,7 +87,8 @@ int main (int argc, char ** argv)
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -102,8 +104,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*lat*lat*lat*lat;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
@ -116,7 +118,8 @@ int main (int argc, char ** argv)
for(int lat=2;lat<=24;lat+=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
@ -132,8 +135,8 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=3*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(8+8+8)*lat*lat*lat*lat;
double bytes=3*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(8+8+8)*vol;
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}

View File

@ -54,7 +54,6 @@ namespace Grid {
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
////////////////////////////////////////////////////////////
// Command line parsing assist for stock controls
////////////////////////////////////////////////////////////

View File

@ -27,9 +27,11 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int bo = n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
@ -54,10 +56,12 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
}
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
@ -103,9 +107,11 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int bo =n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
@ -129,10 +135,11 @@ PARALLEL_NESTED_LOOP2
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
@ -156,10 +163,12 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs._grid->_slice_block[dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension]+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
@ -185,10 +194,12 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &r
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block [dimension];
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block [dimension];b++){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);