mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	Compare commits
	
		
			15 Commits
		
	
	
		
			feature/mu
			...
			dirac-ITT-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | f9df685cde | ||
|  | 17c5b0f152 | ||
|  | c709883f3f | ||
|  | aed5de4d50 | ||
|  | ba27cc6571 | ||
|  | d75369cb56 | ||
|  | bf973d0d56 | ||
|  | 837bf8a5be | ||
|  | c05b2199f6 | ||
|  | b331be9101 | ||
|  | 49c20a9fa8 | ||
|  | 7359df3501 | ||
|  | 5b9267e88d | ||
|  | 15fd4003ef | ||
|  | 73aeca7dea | 
| @@ -232,9 +232,13 @@ public: | |||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; |     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|    |    | ||||||
|  |     uint64_t NP; | ||||||
|  |     uint64_t NN; | ||||||
|  |  | ||||||
|  |  | ||||||
|   uint64_t lmax=48; |   uint64_t lmax=48; | ||||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) | #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) | ||||||
|  |  | ||||||
| @@ -245,6 +249,9 @@ public: | |||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|  |       NP= Grid.RankCount(); | ||||||
|  |       NN =Grid.NodeCount(); | ||||||
|  |  | ||||||
|       Vec rn ; random(sRNG,rn); |       Vec rn ; random(sRNG,rn); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); z=rn; |       LatticeVec z(&Grid); z=rn; | ||||||
| @@ -266,7 +273,8 @@ public: | |||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3)  |       std::cout<<GridLogMessage<<std::setprecision(3)  | ||||||
| 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000. | ||||||
|  | 	       << "\t\t"<< bytes/time/NN <<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
| @@ -387,6 +395,8 @@ public: | |||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
| 	int nwarm = 100; | 	int nwarm = 100; | ||||||
|  | 	uint64_t ncall = 1000; | ||||||
|  |  | ||||||
| 	double t0=usecond(); | 	double t0=usecond(); | ||||||
| 	sFGrid->Barrier(); | 	sFGrid->Barrier(); | ||||||
| 	for(int i=0;i<nwarm;i++){ | 	for(int i=0;i<nwarm;i++){ | ||||||
| @@ -394,15 +404,8 @@ public: | |||||||
| 	} | 	} | ||||||
| 	sFGrid->Barrier(); | 	sFGrid->Barrier(); | ||||||
| 	double t1=usecond(); | 	double t1=usecond(); | ||||||
| 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); |  | ||||||
| 	//	if (ncall < 500) ncall = 500; |  | ||||||
| 	uint64_t ncall = 500; |  | ||||||
|  |  | ||||||
| 	sFGrid->Broadcast(0,&ncall,sizeof(ncall)); |  | ||||||
|  |  | ||||||
| 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; |  | ||||||
| 	sDw.ZeroCounters(); | 	sDw.ZeroCounters(); | ||||||
|  |  | ||||||
| 	time_statistics timestat; | 	time_statistics timestat; | ||||||
| 	std::vector<double> t_time(ncall); | 	std::vector<double> t_time(ncall); | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ | 	for(uint64_t i=0;i<ncall;i++){ | ||||||
| @@ -436,12 +439,14 @@ public: | |||||||
| 	sDw.Report(); | 	sDw.Report(); | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |       double robust = mflops_worst/mflops_best;; | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< mflops_worst/mflops_best <<std::endl; |  | ||||||
|  |       std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust <<std::endl; | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |       std::cout<<GridLogMessage <<fmt << std::endl; | ||||||
|       std::cout<<GridLogMessage ; |       std::cout<<GridLogMessage; | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |       for(int i=0;i<mflops_all.size();i++){ | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; | 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||||
| @@ -453,7 +458,7 @@ public: | |||||||
|     return mflops_best; |     return mflops_best; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   static double DWF(int Ls,int L) |   static double DWF(int Ls,int L, double & robust) | ||||||
|   { |   { | ||||||
|     RealD mass=0.1; |     RealD mass=0.1; | ||||||
|     RealD M5  =1.8; |     RealD M5  =1.8; | ||||||
| @@ -655,10 +660,11 @@ public: | |||||||
| 	assert((norm2(err)<1.0e-4)); | 	assert((norm2(err)<1.0e-4)); | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |       robust = mflops_worst/mflops_best; | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< mflops_worst/mflops_best <<std::endl; |       std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust  <<std::endl; | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |       std::cout<<GridLogMessage <<fmt << std::endl; | ||||||
|       std::cout<<GridLogMessage ; |       std::cout<<GridLogMessage ; | ||||||
|  |  | ||||||
| @@ -692,26 +698,19 @@ int main (int argc, char ** argv) | |||||||
|   int do_wilson=1; |   int do_wilson=1; | ||||||
|   int do_dwf   =1; |   int do_dwf   =1; | ||||||
|  |  | ||||||
|   if ( do_memory ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Memory(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_comms ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Comms(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_su3 ) { |   if ( do_su3 ) { | ||||||
|     // empty for now |     // empty for now | ||||||
|   } |   } | ||||||
|  | #if 1 | ||||||
|   int sel=2; |   int sel=2; | ||||||
|   std::vector<int> L_list({8,12,16,24}); |   std::vector<int> L_list({8,12,16,24}); | ||||||
|  | #else | ||||||
|  |   int sel=1; | ||||||
|  |   std::vector<int> L_list({8,12}); | ||||||
|  | #endif | ||||||
|  |   int selm1=sel-1; | ||||||
|  |   std::vector<double> robust_list; | ||||||
|  |  | ||||||
|   std::vector<double> wilson; |   std::vector<double> wilson; | ||||||
|   std::vector<double> dwf4; |   std::vector<double> dwf4; | ||||||
|   std::vector<double> dwf5; |   std::vector<double> dwf5; | ||||||
| @@ -722,7 +721,8 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; |     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|     for(int l=0;l<L_list.size();l++){ |     for(int l=0;l<L_list.size();l++){ | ||||||
|       wilson.push_back(Benchmark::DWF(1,L_list[l])); |       double robust; | ||||||
|  |       wilson.push_back(Benchmark::DWF(1,L_list[l],robust)); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -732,7 +732,10 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|     for(int l=0;l<L_list.size();l++){ |     for(int l=0;l<L_list.size();l++){ | ||||||
|       dwf4.push_back(Benchmark::DWF(Ls,L_list[l])); |       double robust; | ||||||
|  |       double result = Benchmark::DWF(Ls,L_list[l],robust) ; | ||||||
|  |       dwf4.push_back(result); | ||||||
|  |       robust_list.push_back(robust); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -744,6 +747,10 @@ int main (int argc, char ** argv) | |||||||
|       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); |       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ( do_dwf ) { | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; |   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
| @@ -752,11 +759,27 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; |     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; | ||||||
|   } |   } | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   int NN=NN_global; | ||||||
|  |   if ( do_memory ) { | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     Benchmark::Memory(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ( do_comms && (NN>1) ) { | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     Benchmark::Comms(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ( do_dwf ) { | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; |   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|   int NN=NN_global; |  | ||||||
|   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; |   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; | ||||||
|   for(int l=0;l<L_list.size();l++){ |   for(int l=0;l<L_list.size();l++){ | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; |     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; | ||||||
| @@ -764,10 +787,12 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << " Comparison point result: "  << dwf4[sel]/NN <<std::endl; |   std::cout<<GridLogMessage << " Comparison point     result: "  << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl; | ||||||
|  |   std::cout<<std::setprecision(3); | ||||||
|  |   std::cout<<GridLogMessage << " Comparison point robustness: "  << robust_list[sel] <<std::endl; | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										35
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										35
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -331,8 +331,41 @@ case ${ac_PRECISION} in | |||||||
|      double) |      double) | ||||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) |        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||||
|      ;; |      ;; | ||||||
|  |      *) | ||||||
|  |      AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); | ||||||
|  |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
|  | ######################  Shared memory allocation technique under MPI3 | ||||||
|  | AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], | ||||||
|  |               [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) | ||||||
|  |  | ||||||
|  | case ${ac_SHM} in | ||||||
|  |  | ||||||
|  |      shmget) | ||||||
|  |      AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) | ||||||
|  |      ;; | ||||||
|  |  | ||||||
|  |      shmopen) | ||||||
|  |      AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) | ||||||
|  |      ;; | ||||||
|  |  | ||||||
|  |      hugetlbfs) | ||||||
|  |      AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) | ||||||
|  |      ;; | ||||||
|  |  | ||||||
|  |      *) | ||||||
|  |      AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]); | ||||||
|  |      ;; | ||||||
|  | esac | ||||||
|  |  | ||||||
|  | ######################  Shared base path for SHMMMAP | ||||||
|  | AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], | ||||||
|  |               [Select SHM mmap base path for hugetlbfs])], | ||||||
|  | 	      [ac_SHMPATH=${enable_shmpath}], | ||||||
|  | 	      [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) | ||||||
|  | AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) | ||||||
|  |  | ||||||
| ############### communication type selection | ############### communication type selection | ||||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | ||||||
|               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) |               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||||
| @@ -482,6 +515,8 @@ compiler version            : ${ax_cv_gxx_version} | |||||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||||
| Threading                   : ${ac_openmp} | Threading                   : ${ac_openmp} | ||||||
| Communications type         : ${comms_type} | Communications type         : ${comms_type} | ||||||
|  | Shared memory allocator     : ${ac_SHM} | ||||||
|  | Shared memory mmap path     : ${ac_SHMPATH} | ||||||
| Default precision           : ${ac_PRECISION} | Default precision           : ${ac_PRECISION} | ||||||
| Software FP16 conversion    : ${ac_SFW_FP16} | Software FP16 conversion    : ${ac_SFW_FP16} | ||||||
| RNG choice                  : ${ac_RNG} | RNG choice                  : ${ac_RNG} | ||||||
|   | |||||||
| @@ -204,10 +204,12 @@ public: | |||||||
| #endif | #endif | ||||||
|     size_type bytes = __n*sizeof(_Tp); |     size_type bytes = __n*sizeof(_Tp); | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |     uint8_t *cp = (uint8_t *)ptr; | ||||||
|  |     if ( ptr ) {  | ||||||
|     // One touch per 4k page, static OMP loop to catch same loop order |     // One touch per 4k page, static OMP loop to catch same loop order | ||||||
| #pragma omp parallel for schedule(static) | #pragma omp parallel for schedule(static) | ||||||
|     for(size_type n=0;n<bytes;n+=4096){ |       for(size_type n=0;n<bytes;n+=4096){ | ||||||
|       cp[n]=0; | 	cp[n]=0; | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -37,7 +37,7 @@ namespace Grid { | |||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| void *              CartesianCommunicator::ShmCommBuf; | void *              CartesianCommunicator::ShmCommBuf; | ||||||
| uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;  | uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 1024LL*1024LL*1024LL;  | ||||||
| CartesianCommunicator::CommunicatorPolicy_t   | CartesianCommunicator::CommunicatorPolicy_t   | ||||||
| CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | ||||||
| int CartesianCommunicator::nCommThreads = -1; | int CartesianCommunicator::nCommThreads = -1; | ||||||
| @@ -157,6 +157,9 @@ void CartesianCommunicator::ShmInitGeneric(void){ | |||||||
|     perror("mmap failed "); |     perror("mmap failed "); | ||||||
|     exit(EXIT_FAILURE);   |     exit(EXIT_FAILURE);   | ||||||
|   } |   } | ||||||
|  | #ifdef MADV_HUGEPAGE | ||||||
|  |   if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); | ||||||
|  | #endif | ||||||
| #else  | #else  | ||||||
|   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); |   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); | ||||||
|   ShmCommBuf=(void *)&ShmBufStorageVector[0]; |   ShmCommBuf=(void *)&ShmBufStorageVector[0]; | ||||||
|   | |||||||
| @@ -198,7 +198,46 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|   ShmCommBuf = 0; |   ShmCommBuf = 0; | ||||||
|   ShmCommBufs.resize(ShmSize); |   ShmCommBufs.resize(ShmSize); | ||||||
|  |  | ||||||
| #if 1 |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Hugetlbf and others map filesystems as mappable huge pages | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | #ifdef GRID_MPI3_SHMMMAP | ||||||
|  |   char shm_name [NAME_MAX]; | ||||||
|  |   for(int r=0;r<ShmSize;r++){ | ||||||
|  |      | ||||||
|  |     size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||||
|  |     sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r); | ||||||
|  |     //sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r); | ||||||
|  |     //    printf("Opening file %s \n",shm_name); | ||||||
|  |     int fd=open(shm_name,O_RDWR|O_CREAT,0666); | ||||||
|  |     if ( fd == -1) {  | ||||||
|  |       printf("open %s failed\n",shm_name); | ||||||
|  |       perror("open hugetlbfs"); | ||||||
|  |       exit(0); | ||||||
|  |     } | ||||||
|  |     int mmap_flag = MAP_SHARED ; | ||||||
|  | #ifdef MAP_POPULATE     | ||||||
|  |     mmap_flag|=MAP_POPULATE; | ||||||
|  | #endif | ||||||
|  | #ifdef MAP_HUGETLB | ||||||
|  |     if ( Hugepages ) mmap_flag |= MAP_HUGETLB; | ||||||
|  | #endif | ||||||
|  |     void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);  | ||||||
|  |     if ( ptr == (void *)MAP_FAILED ) {     | ||||||
|  |       printf("mmap %s failed\n",shm_name); | ||||||
|  |       perror("failed mmap");      assert(0);     | ||||||
|  |     } | ||||||
|  |     assert(((uint64_t)ptr&0x3F)==0); | ||||||
|  |     ShmCommBufs[r] =ptr; | ||||||
|  |      | ||||||
|  |   } | ||||||
|  | #endif | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case | ||||||
|  |   // tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for  | ||||||
|  |   // the posix shm virtual file system | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | #ifdef GRID_MPI3_SHMOPEN | ||||||
|   char shm_name [NAME_MAX]; |   char shm_name [NAME_MAX]; | ||||||
|   if ( ShmRank == 0 ) { |   if ( ShmRank == 0 ) { | ||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
| @@ -213,12 +252,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       ftruncate(fd, size); |       ftruncate(fd, size); | ||||||
|        |        | ||||||
|       int mmap_flag = MAP_SHARED; |       int mmap_flag = MAP_SHARED; | ||||||
|  | #ifdef MAP_POPULATE  | ||||||
|  |       mmap_flag |= MAP_POPULATE; | ||||||
|  | #endif | ||||||
| #ifdef MAP_HUGETLB | #ifdef MAP_HUGETLB | ||||||
|       if (Hugepages) mmap_flag |= MAP_HUGETLB; |       if (Hugepages) mmap_flag |= MAP_HUGETLB; | ||||||
| #endif | #endif | ||||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); |       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); | ||||||
|  |  | ||||||
|       if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    } |       if ( ptr == (void * )MAP_FAILED ) {       perror("failed mmap");      assert(0);    } | ||||||
|       assert(((uint64_t)ptr&0x3F)==0); |       assert(((uint64_t)ptr&0x3F)==0); | ||||||
|  |  | ||||||
| // Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h | // Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h | ||||||
| @@ -240,7 +282,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
| 	  if (ierr && (page==0)) perror("numa relocate command failed"); | 	  if (ierr && (page==0)) perror("numa relocate command failed"); | ||||||
| 	} | 	} | ||||||
| #endif | #endif | ||||||
|       ShmCommBufs[r] =ptr; | 	ShmCommBufs[r] =ptr; | ||||||
|        |        | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -262,25 +304,32 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       ShmCommBufs[r] =ptr; |       ShmCommBufs[r] =ptr; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  | #endif | ||||||
| #else |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // SHMGET SHMAT and SHM_HUGETLB flag | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | #ifdef GRID_MPI3_SHMGET | ||||||
|   std::vector<int> shmids(ShmSize); |   std::vector<int> shmids(ShmSize); | ||||||
|  |  | ||||||
|   if ( ShmRank == 0 ) { |   if ( ShmRank == 0 ) { | ||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
|       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; |       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||||
|       key_t key   = 0x4545 + r; |       key_t key   = IPC_PRIVATE; | ||||||
|       int flags = IPC_CREAT | SHM_R | SHM_W; |       int flags = IPC_CREAT | SHM_R | SHM_W; | ||||||
| #ifdef SHM_HUGETLB | #ifdef SHM_HUGETLB | ||||||
|       flags|=SHM_HUGETLB; |       if (Hugepages) flags|=SHM_HUGETLB; | ||||||
| #endif | #endif | ||||||
|       if ((shmids[r]= shmget(key,size, flags)) < 0) { |       if ((shmids[r]= shmget(key,size, flags)) ==-1) { | ||||||
| 	int errsv = errno; | 	int errsv = errno; | ||||||
| 	printf("Errno %d\n",errsv); | 	printf("Errno %d\n",errsv); | ||||||
|  | 	printf("key   %d\n",key); | ||||||
|  | 	printf("size  %lld\n",size); | ||||||
|  | 	printf("flags %d\n",flags); | ||||||
| 	perror("shmget"); | 	perror("shmget"); | ||||||
| 	exit(1); | 	exit(1); | ||||||
|  |       } else {  | ||||||
|  | 	printf("shmid: 0x%x\n", shmids[r]); | ||||||
|       } |       } | ||||||
|       printf("shmid: 0x%x\n", shmids[r]); |  | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   MPI_Barrier(ShmComm); |   MPI_Barrier(ShmComm); | ||||||
|   | |||||||
| @@ -219,7 +219,8 @@ void Grid_init(int *argc,char ***argv) | |||||||
|     int MB; |     int MB; | ||||||
|     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); |     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); | ||||||
|     GridCmdOptionInt(arg,MB); |     GridCmdOptionInt(arg,MB); | ||||||
|     CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; |     uint64_t MB64 = MB; | ||||||
|  |     CartesianCommunicator::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user