From 36ea5f6b7772a2295db0e4578f069706c393ea47 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 24 Feb 2018 22:20:14 +0000 Subject: [PATCH] gpu friendly coordinates ; no std::vector on GPU --- benchmarks/Benchmark_ITT.cc | 56 ++++++++++++------------ benchmarks/Benchmark_comms.cc | 20 ++++----- benchmarks/Benchmark_dwf.cc | 4 +- benchmarks/Benchmark_gparity.cc | 2 +- benchmarks/Benchmark_memory_asynch.cc | 6 +-- benchmarks/Benchmark_memory_bandwidth.cc | 12 ++--- benchmarks/Benchmark_mooee.cc | 2 +- benchmarks/Benchmark_staggered.cc | 6 +-- benchmarks/Benchmark_su3.cc | 12 ++--- benchmarks/Benchmark_wilson.cc | 6 +-- benchmarks/Benchmark_wilson_sweep.cc | 9 ++-- 11 files changed, 67 insertions(+), 68 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index b689866d..266251c5 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -109,8 +109,8 @@ public: int nmu=0; int maxlat=32; - std::vector simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); for(int mu=0;mu1) nmu++; @@ -125,7 +125,7 @@ public: for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat*mpi_layout[0], + Coordinate latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], lat*mpi_layout[2], lat*mpi_layout[3]}); @@ -217,8 +217,8 @@ public: typedef Lattice< iVector< vReal,Nvec> > LatticeVec; typedef iVector Vec; - std::vector simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); std::cout<({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=4){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); @@ -284,25 +284,25 @@ public: // Set/Get the layout & grid size /////////////////////////////////////////////////////// int threads = GridThread::GetThreads(); - std::vector mpi = GridDefaultMpi(); assert(mpi.size()==4); - std::vector local({L,L,L,L}); + Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4); + Coordinate local({L,L,L,L}); - GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector({64,64,64,64}), + GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); uint64_t NP = TmpGrid->RankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global=NN; uint64_t SHM=NP/NN; - std::vector internal; - if ( SHM == 1 ) internal = std::vector({1,1,1,1}); - else if ( SHM == 2 ) internal = std::vector({2,1,1,1}); - else if ( SHM == 4 ) internal = std::vector({2,2,1,1}); - else if ( SHM == 8 ) internal = std::vector({2,2,2,1}); + Coordinate internal; + if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); + else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); + else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); + else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); else assert(0); - std::vector nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - std::vector latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); + Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); ///////// Welcome message //////////// std::cout< mpi = GridDefaultMpi(); assert(mpi.size()==4); - std::vector local({L,L,L,L}); + Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4); + Coordinate local({L,L,L,L}); - GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector({64,64,64,64}), + GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); uint64_t NP = TmpGrid->RankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global=NN; uint64_t SHM=NP/NN; - std::vector internal; - if ( SHM == 1 ) internal = std::vector({1,1,1,1}); - else if ( SHM == 2 ) internal = std::vector({2,1,1,1}); - else if ( SHM == 4 ) internal = std::vector({2,2,1,1}); - else if ( SHM == 8 ) internal = std::vector({2,2,2,1}); + Coordinate internal; + if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); + else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); + else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); + else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); else assert(0); - std::vector nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - std::vector latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); + Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); ///////// Welcome message //////////// std::cout< L_list({8,12,16,24}); + Coordinate L_list({8,12,16,24}); #else int sel=1; - std::vector L_list({8,12}); + Coordinate L_list({8,12}); #endif int selm1=sel-1; std::vector robust_list; diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 8ece8580..0f95b115 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -61,8 +61,8 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); - std::vector simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); int threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0], + Coordinate latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], lat*mpi_layout[2], lat*mpi_layout[3]}); @@ -169,7 +169,7 @@ int main (int argc, char ** argv) for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat,lat,lat,lat}); + Coordinate latt_size ({lat,lat,lat,lat}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); RealD Nrank = Grid._Nprocessors; @@ -260,7 +260,7 @@ int main (int argc, char ** argv) for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat*mpi_layout[0], + Coordinate latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], lat*mpi_layout[2], lat*mpi_layout[3]}); @@ -357,7 +357,7 @@ int main (int argc, char ** argv) for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat*mpi_layout[0], + Coordinate latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], lat*mpi_layout[2], lat*mpi_layout[3]}); @@ -455,10 +455,10 @@ int main (int argc, char ** argv) for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat*mpi_layout[0], - lat*mpi_layout[1], - lat*mpi_layout[2], - lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); RealD Nrank = Grid._Nprocessors; diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 6dc3799b..8972a496 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -50,7 +50,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout< latt4 = GridDefaultLatt(); + Coordinate latt4 = GridDefaultLatt(); int Ls=16; for(int i=0;i origin({0,0,0,latt4[2]-1,0}); + Coordinate origin({0,0,0,latt4[2]-1,0}); SpinColourVectorF tmp; tmp=Zero(); tmp()(0)(0)=Complex(-2.0,0.0); diff --git a/benchmarks/Benchmark_gparity.cc b/benchmarks/Benchmark_gparity.cc index 640c6081..cb89ec25 100644 --- a/benchmarks/Benchmark_gparity.cc +++ b/benchmarks/Benchmark_gparity.cc @@ -36,7 +36,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({2*mpi_layout[0],2*mpi_layout[1],4*mpi_layout[2],lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 12b4b4e3..7312ed67 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -43,8 +43,8 @@ int main (int argc, char ** argv) Vec rn = Zero(); - std::vector simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); int threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); @@ -96,7 +96,7 @@ int main (int argc, char ** argv) for(int lat=8;lat<=lmax;lat+=8){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); @@ -132,7 +132,7 @@ int main (int argc, char ** argv) for(int lat=8;lat<=lmax;lat+=8){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; @@ -168,7 +168,7 @@ int main (int argc, char ** argv) for(int lat=8;lat<=lmax;lat+=8){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); diff --git a/benchmarks/Benchmark_mooee.cc b/benchmarks/Benchmark_mooee.cc index 788dc7d5..dfaaae30 100644 --- a/benchmarks/Benchmark_mooee.cc +++ b/benchmarks/Benchmark_mooee.cc @@ -40,7 +40,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout< latt4 = GridDefaultLatt(); + Coordinate latt4 = GridDefaultLatt(); const int Ls=16; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc index ba99ce38..93086927 100644 --- a/benchmarks/Benchmark_staggered.cc +++ b/benchmarks/Benchmark_staggered.cc @@ -36,9 +36,9 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); - std::vector latt_size = GridDefaultLatt(); - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBGrid(&Grid); diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 49e34136..4d7c51c3 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -39,8 +39,8 @@ int main (int argc, char ** argv) int64_t Nloop=20; - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); int64_t threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); @@ -85,7 +85,7 @@ int main (int argc, char ** argv) for(int lat=2;lat<=LMAX;lat+=2){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); @@ -116,7 +116,7 @@ int main (int argc, char ** argv) for(int lat=2;lat<=LMAX;lat+=2){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); @@ -147,7 +147,7 @@ int main (int argc, char ** argv) for(int lat=2;lat<=LMAX;lat+=2){ - std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); diff --git a/benchmarks/Benchmark_wilson.cc b/benchmarks/Benchmark_wilson.cc index fe8728fa..ad550bb8 100644 --- a/benchmarks/Benchmark_wilson.cc +++ b/benchmarks/Benchmark_wilson.cc @@ -54,9 +54,9 @@ int main (int argc, char ** argv) overlapComms = true; } - std::vector latt_size = GridDefaultLatt(); - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBGrid(&Grid); diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc index ea773f7b..ce2780f9 100644 --- a/benchmarks/Benchmark_wilson_sweep.cc +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -53,8 +53,8 @@ int main (int argc, char ** argv) typename WilsonFermionR::ImplParams params; params.overlapCommsCompute = overlapComms; - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); std::vector seeds({1,2,3,4}); RealD mass = 0.1; @@ -83,14 +83,13 @@ int main (int argc, char ** argv) if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN")); for (int L=8; L<=Lmax; L*=2) { - std::vector latt_size = std::vector(4,L); + Coordinate latt_size = Coordinate(4,L); for(int d=4; d>dmin; d--) { if ( d<=3 ) { latt_size[d] *= 2; } std::cout << GridLogMessage; - std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator( std::cout, std::string("x").c_str() ) ); - std::cout << latt_size.back() << "\t\t"; + std::cout << latt_size; GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridRedBlackCartesian RBGrid(&Grid);