diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 00526893..6696c8eb 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -166,18 +166,18 @@ int main (int argc, char ** argv) } + std::cout< requests; - - for(int mu=0;mu<4;mu++){ - - - if (mpi_layout[mu]>1 ) { - - ncomm++; - int comm_proc=1; - int xmit_to_rank; - int recv_from_rank; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - dbytes+= - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu][0], - xmit_to_rank,1, - (void *)&rbuf[mu][0], - recv_from_rank,1, - bytes,bytes,mu); - - comm_proc = mpi_layout[mu]-1; - - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - dbytes+= - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu+4][0], - xmit_to_rank,1, - (void *)&rbuf[mu+4][0], - recv_from_rank,1, - bytes,bytes,mu+4); - - } - } - Grid.StencilSendToRecvFromComplete(requests,0); - Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds - - } - - timestat.statistics(t_time); - - dbytes=dbytes*ppn; - double xbytes = dbytes*0.5; - // double rbytes = dbytes*0.5; - double bidibytes = dbytes; - - std::cout< xbuf(8); - std::vector rbuf(8); - Grid.ShmBufferFreeAll(); - uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); - for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); - } - int ncomm; double dbytes; for(int i=0;i requests; dbytes=0; ncomm=0; - for(int mu=0;mu<4;mu++){ - + + for(int dir=0;dir<8;dir++) { + + double tbytes; + int mu =dir % 4; + if (mpi_layout[mu]>1 ) { ncomm++; - int comm_proc=1; int xmit_to_rank; int recv_from_rank; - - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - dbytes+= - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu][0], - xmit_to_rank,1, - (void *)&rbuf[mu][0], - recv_from_rank,1, - bytes,bytes,mu); - Grid.StencilSendToRecvFromComplete(requests,mu); - requests.resize(0); + if ( dir == mu ) { + int comm_proc=1; + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + } else { + int comm_proc = mpi_layout[mu]-1; + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + } + int tid = omp_get_thread_num(); + tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1, + (void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid); - comm_proc = mpi_layout[mu]-1; - - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - dbytes+= - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu+4][0], - xmit_to_rank,1, - (void *)&rbuf[mu+4][0], - recv_from_rank,1, - bytes,bytes,mu+4); - Grid.StencilSendToRecvFromComplete(requests,mu+4); - requests.resize(0); - + dbytes+=tbytes; } - } + } Grid.Barrier(); double stop=usecond(); t_time[i] = stop-start; // microseconds - } timestat.statistics(t_time); diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 55135322..29772141 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -32,18 +32,18 @@ using namespace std; using namespace Grid; -template -struct scal { - d internal; +//////////////////////// +/// Move to domains //// +//////////////////////// + +Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT }; - Gamma::Algebra Gmu [] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ, - Gamma::Algebra::GammaT - }; - +void Benchmark(int Ls, Coordinate Dirichlet,bool Sloppy); int main (int argc, char ** argv) { @@ -52,39 +52,108 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); - Coordinate latt4 = GridDefaultLatt(); - int Ls=8; - for(int i=0;i> Ls; } + } + ////////////////// + // With comms + ////////////////// + Coordinate Dirichlet(Nd+1,0); + + std::cout << "\n\n\n\n\n\n" <1 ? 1 : 0; + Dirichlet[0] = 0; + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Benchmark(Ls,Dirichlet,false); + + std::cout << "\n\n\n\n\n\n" <1 ? 1 : 0; + + Benchmark(Ls,Dirichlet,true); + */ + + Grid_finalize(); + exit(0); +} +void Benchmark(int Ls, Coordinate Dirichlet,bool sloppy) +{ + Coordinate latt4 = GridDefaultLatt(); GridLogLayout(); long unsigned int single_site_flops = 8*Nc*(7+16*Nc); - - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); +#undef SINGLE +#ifdef SINGLE + typedef vComplexF Simd; + typedef LatticeFermionF FermionField; + typedef LatticeGaugeFieldF GaugeField; + typedef LatticeColourMatrixF ColourMatrixField; + typedef DomainWallFermionF FermionAction; +#else + typedef vComplexD Simd; + typedef LatticeFermionD FermionField; + typedef LatticeGaugeFieldD GaugeField; + typedef LatticeColourMatrixD ColourMatrixField; + typedef DomainWallFermionD FermionAction; +#endif + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,Simd::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - std::cout << GridLogMessage << "Making s innermost grids"< seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG")); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG")); - std::cout << GridLogMessage << "Initialised RNGs" << std::endl; - LatticeFermion src (FGrid); random(RNG5,src); + + FermionField src (FGrid); random(RNG5,src); #if 0 src = Zero(); { @@ -100,46 +169,39 @@ int main (int argc, char ** argv) src = src*N2; #endif - - LatticeFermion result(FGrid); result=Zero(); - LatticeFermion ref(FGrid); ref=Zero(); - LatticeFermion tmp(FGrid); - LatticeFermion err(FGrid); + FermionField result(FGrid); result=Zero(); + FermionField ref(FGrid); ref=Zero(); + FermionField tmp(FGrid); + FermionField err(FGrid); std::cout << GridLogMessage << "Drawing gauge field" << std::endl; - LatticeGaugeField Umu(UGrid); + GaugeField Umu(UGrid); + GaugeField UmuCopy(UGrid); SU::HotConfiguration(RNG4,Umu); + // SU::ColdConfiguration(Umu); + UmuCopy=Umu; std::cout << GridLogMessage << "Random gauge initialised " << std::endl; -#if 0 - Umu=1.0; - for(int mu=0;mu(Umu,mu); - // if (mu !=2 ) ttmp = 0; - // ttmp = ttmp* pow(10.0,mu); - PokeIndex(Umu,ttmp,mu); - } - std::cout << GridLogMessage << "Forced to diagonal " << std::endl; -#endif + //////////////////////////////////// + // Apply BCs + //////////////////////////////////// + Coordinate Block(4); + for(int d=0;d<4;d++) Block[d]= Dirichlet[d+1]; + + std::cout << GridLogMessage << "Applying BCs for Dirichlet Block5 " << Dirichlet << std::endl; + std::cout << GridLogMessage << "Applying BCs for Dirichlet Block4 " << Block << std::endl; + + DirichletFilter Filter(Block); + Filter.applyFilter(Umu); + //////////////////////////////////// // Naive wilson implementation //////////////////////////////////// - // replicate across fifth dimension - LatticeGaugeField Umu5d(FGrid); - std::vector U(4,FGrid); - { - autoView( Umu5d_v, Umu5d, CpuWrite); - autoView( Umu_v , Umu , CpuRead); - for(int ss=0;ssoSites();ss++){ - for(int s=0;s U(4,UGrid); for(int mu=0;mu(Umu5d,mu); + U[mu] = PeekIndex(Umu,mu); } + std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; if (1) @@ -147,10 +209,28 @@ int main (int argc, char ** argv) ref = Zero(); for(int mu=0;muoSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;sBarrier(); Dw.Dhop(src,result,0); @@ -198,8 +282,8 @@ int main (int argc, char ** argv) double volume=Ls; for(int mu=0;mu1.0e-4) ) { - /* - std::cout << "RESULT\n " << result<1.0e-4) ) { std::cout<Barrier(); + std::cout<Barrier(); exit(-1); } - assert (norm2(err)< 1.0e-4 ); + assert (n2e< 1.0e-4 ); } if (1) @@ -238,16 +321,30 @@ int main (int argc, char ** argv) for(int mu=0;muoSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;s1.0e-4)){ -/* - std::cout<< "DAG RESULT\n " <Barrier(); @@ -338,14 +427,9 @@ int main (int argc, char ** argv) setCheckerboard(r_eo,r_e); err = r_eo-result; - std::cout<1.0e-4)){ - /* - std::cout<< "Deo RESULT\n " <