mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Weak scale the benchmarks automatically.
This commit is contained in:
parent
a5c3424cfb
commit
445e38acf6
@ -27,8 +27,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
@ -47,8 +47,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=lat*lat*lat*lat*Nvec*2;// mul,add
|
||||
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
|
||||
}
|
||||
@ -61,8 +61,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
@ -79,8 +79,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=lat*lat*lat*lat*Nvec*2;// mul,add
|
||||
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
|
||||
}
|
||||
@ -92,7 +92,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
@ -111,8 +112,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=2*lat*lat*lat*lat*Nvec*sizeof(Real);
|
||||
double flops=lat*lat*lat*lat*Nvec*1;// mul
|
||||
double bytes=2*vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*1;// mul
|
||||
std::cout <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
|
||||
}
|
||||
@ -125,8 +126,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
//GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
@ -144,8 +145,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=lat*lat*lat*lat*Nvec*sizeof(Real);
|
||||
double flops=lat*lat*lat*lat*Nvec*2;// mul,add
|
||||
double bytes=vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<std::endl;
|
||||
|
||||
}
|
||||
|
@ -24,8 +24,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=2;lat<=24;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
@ -40,9 +40,9 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=3.0*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
|
||||
double footprint=2.0*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6.0+8.0+8.0)*lat*lat*lat*lat;
|
||||
double bytes=3.0*vol*Nc*Nc*sizeof(Complex);
|
||||
double footprint=2.0*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6.0+8.0+8.0)*vol;
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<footprint<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
|
||||
}
|
||||
@ -56,7 +56,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=2;lat<=24;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
@ -72,8 +73,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=3*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*lat*lat*lat*lat;
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
|
||||
}
|
||||
@ -86,7 +87,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=2;lat<=24;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
@ -102,8 +104,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=3*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*lat*lat*lat*lat;
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(6+8+8)*vol;
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
|
||||
}
|
||||
@ -116,7 +118,8 @@ int main (int argc, char ** argv)
|
||||
|
||||
for(int lat=2;lat<=24;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
@ -132,8 +135,8 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000.0;
|
||||
|
||||
double bytes=3*lat*lat*lat*lat*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(8+8+8)*lat*lat*lat*lat;
|
||||
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||
double flops=Nc*Nc*(8+8+8)*vol;
|
||||
std::cout<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||
|
||||
}
|
||||
|
@ -54,7 +54,6 @@ namespace Grid {
|
||||
const std::vector<int> &GridDefaultLatt(void) {return Grid_default_latt;};
|
||||
const std::vector<int> &GridDefaultMpi(void) {return Grid_default_mpi;};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Command line parsing assist for stock controls
|
||||
////////////////////////////////////////////////////////////
|
||||
|
@ -27,9 +27,11 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int bo = n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
@ -54,10 +56,12 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o=n*rhs._grid->_slice_stride[dimension];
|
||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||
@ -103,9 +107,11 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
@ -129,10 +135,11 @@ PARALLEL_NESTED_LOOP2
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
@ -156,10 +163,12 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
|
||||
|
||||
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||
@ -185,10 +194,12 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &r
|
||||
|
||||
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block [dimension];
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block [dimension];b++){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||
|
Loading…
Reference in New Issue
Block a user