mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-26 01:29:34 +00:00 
			
		
		
		
	Merge branch 'paboyle:develop' into sycl_slicesum_update
This commit is contained in:
		| @@ -281,12 +281,14 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> & | |||||||
|   return nrm; |   return nrm; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) { | inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) { | ||||||
|   GridBase *grid = left.Grid(); |   GridBase *grid = left.Grid(); | ||||||
|   ComplexD nrm = rankInnerProduct(left,right); |   ComplexD nrm = rankInnerProduct(left,right); | ||||||
|   //  std::cerr<<"flight log " << std::hexfloat << nrm <<" "<<crc(left)<<std::endl; |   //  GridNormLog(real(nrm)); // Could log before and after global sum to distinguish local and MPI | ||||||
|   grid->GlobalSum(nrm); |   grid->GlobalSum(nrm); | ||||||
|  |   //  GridNormLog(real(nrm));  | ||||||
|   return nrm; |   return nrm; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -411,7 +411,7 @@ public: | |||||||
|       std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl; |       std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl; | ||||||
|       SeedFixedIntegers(seeds); |       SeedFixedIntegers(seeds); | ||||||
|     } |     } | ||||||
|   void SeedFixedIntegers(const std::vector<int> &seeds){ |   void SeedFixedIntegers(const std::vector<int> &seeds, int britney=0){ | ||||||
|  |  | ||||||
|     // Everyone generates the same seed_seq based on input seeds |     // Everyone generates the same seed_seq based on input seeds | ||||||
|     CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); |     CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); | ||||||
| @@ -428,7 +428,6 @@ public: | |||||||
|     // MT implementation does not implement fast discard even though |     // MT implementation does not implement fast discard even though | ||||||
|     // in principle this is possible |     // in principle this is possible | ||||||
|     //////////////////////////////////////////////// |     //////////////////////////////////////////////// | ||||||
| #if 1 |  | ||||||
|     thread_for( lidx, _grid->lSites(), { |     thread_for( lidx, _grid->lSites(), { | ||||||
|  |  | ||||||
| 	int gidx; | 	int gidx; | ||||||
| @@ -449,29 +448,12 @@ public: | |||||||
| 	 | 	 | ||||||
| 	int l_idx=generator_idx(o_idx,i_idx); | 	int l_idx=generator_idx(o_idx,i_idx); | ||||||
| 	_generators[l_idx] = master_engine; | 	_generators[l_idx] = master_engine; | ||||||
| 	Skip(_generators[l_idx],gidx); // Skip to next RNG sequence | 	if ( britney ) {  | ||||||
|     }); | 	  Skip(_generators[l_idx],l_idx); // Skip to next RNG sequence | ||||||
| #else | 	} else { 	 | ||||||
|     // Everybody loops over global volume. |  | ||||||
|     thread_for( gidx, _grid->_gsites, { |  | ||||||
|  |  | ||||||
| 	// Where is it? |  | ||||||
| 	int rank; |  | ||||||
| 	int o_idx; |  | ||||||
| 	int i_idx; |  | ||||||
|  |  | ||||||
| 	Coordinate gcoor; |  | ||||||
| 	_grid->GlobalIndexToGlobalCoor(gidx,gcoor); |  | ||||||
| 	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); |  | ||||||
| 	 |  | ||||||
| 	// If this is one of mine we take it |  | ||||||
| 	if( rank == _grid->ThisRank() ){ |  | ||||||
| 	  int l_idx=generator_idx(o_idx,i_idx); |  | ||||||
| 	  _generators[l_idx] = master_engine; |  | ||||||
| 	  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence | 	  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence | ||||||
| 	} | 	} | ||||||
|     }); |     }); | ||||||
| #endif |  | ||||||
| #else  | #else  | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
|     // Machine and thread decomposition dependent seeding is efficient |     // Machine and thread decomposition dependent seeding is efficient | ||||||
|   | |||||||
| @@ -77,6 +77,10 @@ feenableexcept (unsigned int excepts) | |||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | #ifndef HOST_NAME_MAX | ||||||
|  | #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX | ||||||
|  | #endif | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| @@ -86,11 +90,83 @@ NAMESPACE_BEGIN(Grid); | |||||||
| static Coordinate Grid_default_latt; | static Coordinate Grid_default_latt; | ||||||
| static Coordinate Grid_default_mpi; | static Coordinate Grid_default_mpi; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /////////////////////////////////////////////////////// | ||||||
|  | // Grid Norm logging for repro testing | ||||||
|  | /////////////////////////////////////////////////////// | ||||||
|  | int GridNormLoggingMode; | ||||||
|  | int32_t GridNormLoggingCounter; | ||||||
|  | std::vector<double> GridNormLogVector; | ||||||
|  |  | ||||||
|  | void SetGridNormLoggingMode(GridNormLoggingMode_t mode) | ||||||
|  | { | ||||||
|  |   switch ( mode ) { | ||||||
|  |   case GridNormLoggingModePrint: | ||||||
|  |     SetGridNormLoggingModePrint(); | ||||||
|  |     break; | ||||||
|  |   case GridNormLoggingModeRecord: | ||||||
|  |     SetGridNormLoggingModeRecord(); | ||||||
|  |     break; | ||||||
|  |   case GridNormLoggingModeVerify: | ||||||
|  |     SetGridNormLoggingModeVerify(); | ||||||
|  |     break; | ||||||
|  |   case GridNormLoggingModeNone: | ||||||
|  |     GridNormLoggingMode = mode; | ||||||
|  |     GridNormLoggingCounter=0; | ||||||
|  |     GridNormLogVector.resize(0); | ||||||
|  |     break; | ||||||
|  |   default: | ||||||
|  |     assert(0); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void SetGridNormLoggingModePrint(void) | ||||||
|  | { | ||||||
|  |   GridNormLoggingCounter = 0; | ||||||
|  |   GridNormLogVector.resize(0); | ||||||
|  |   GridNormLoggingMode = GridNormLoggingModePrint; | ||||||
|  | } | ||||||
|  | void SetGridNormLoggingModeRecord(void) | ||||||
|  | { | ||||||
|  |   GridNormLoggingCounter = 0; | ||||||
|  |   GridNormLogVector.resize(0); | ||||||
|  |   GridNormLoggingMode = GridNormLoggingModeRecord; | ||||||
|  | } | ||||||
|  | void SetGridNormLoggingModeVerify(void) | ||||||
|  | { | ||||||
|  |   GridNormLoggingCounter = 0; | ||||||
|  |   GridNormLoggingMode = GridNormLoggingModeVerify; | ||||||
|  | } | ||||||
|  | void GridNormLog(double value) | ||||||
|  | { | ||||||
|  |   if(GridNormLoggingMode == GridNormLoggingModePrint) { | ||||||
|  |     std::cerr<<"GridNormLog : "<< GridNormLoggingCounter <<" " << std::hexfloat << value <<std::endl; | ||||||
|  |     GridNormLoggingCounter++; | ||||||
|  |   } | ||||||
|  |   if(GridNormLoggingMode == GridNormLoggingModeRecord) { | ||||||
|  |     GridNormLogVector.push_back(value); | ||||||
|  |     GridNormLoggingCounter++; | ||||||
|  |   } | ||||||
|  |   if(GridNormLoggingMode == GridNormLoggingModeVerify) { | ||||||
|  |     assert(GridNormLoggingCounter < GridNormLogVector.size()); | ||||||
|  |     if ( value != GridNormLogVector[GridNormLoggingCounter] ) { | ||||||
|  |       fprintf(stderr,"%s Oops, I did it again! Reproduce failure for norm %d/%zu %.16e %.16e\n",GridHostname(),GridNormLoggingCounter,GridNormLogVector.size(), | ||||||
|  | 	      value, GridNormLogVector[GridNormLoggingCounter]); fflush(stderr); | ||||||
|  |     } | ||||||
|  |     GridNormLoggingCounter++; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| int GridThread::_threads =1; | int GridThread::_threads =1; | ||||||
| int GridThread::_hyperthreads=1; | int GridThread::_hyperthreads=1; | ||||||
| int GridThread::_cores=1; | int GridThread::_cores=1; | ||||||
|  |  | ||||||
|  | char hostname[HOST_NAME_MAX+1]; | ||||||
|  |  | ||||||
|  | char *GridHostname(void) | ||||||
|  | { | ||||||
|  |   return hostname; | ||||||
|  | } | ||||||
| const Coordinate &GridDefaultLatt(void)     {return Grid_default_latt;}; | const Coordinate &GridDefaultLatt(void)     {return Grid_default_latt;}; | ||||||
| const Coordinate &GridDefaultMpi(void)      {return Grid_default_mpi;}; | const Coordinate &GridDefaultMpi(void)      {return Grid_default_mpi;}; | ||||||
| const Coordinate GridDefaultSimd(int dims,int nsimd) | const Coordinate GridDefaultSimd(int dims,int nsimd) | ||||||
| @@ -393,7 +469,6 @@ void Grid_init(int *argc,char ***argv) | |||||||
|   std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl; |   std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl; | ||||||
|   std::cout << GridLogMessage << "================================================ "<<std::endl; |   std::cout << GridLogMessage << "================================================ "<<std::endl; | ||||||
|  |  | ||||||
|   char hostname[HOST_NAME_MAX+1]; |  | ||||||
|   gethostname(hostname, HOST_NAME_MAX+1); |   gethostname(hostname, HOST_NAME_MAX+1); | ||||||
|   std::cout << GridLogMessage << "This rank is running on host "<< hostname<<std::endl; |   std::cout << GridLogMessage << "This rank is running on host "<< hostname<<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -34,6 +34,8 @@ NAMESPACE_BEGIN(Grid); | |||||||
| void Grid_init(int *argc,char ***argv); | void Grid_init(int *argc,char ***argv); | ||||||
| void Grid_finalize(void); | void Grid_finalize(void); | ||||||
|  |  | ||||||
|  | char * GridHostname(void); | ||||||
|  |  | ||||||
| // internal, controled with --handle | // internal, controled with --handle | ||||||
| void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr); | void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr); | ||||||
| void Grid_debug_handler_init(void); | void Grid_debug_handler_init(void); | ||||||
| @@ -68,5 +70,20 @@ void GridParseLayout(char **argv,int argc, | |||||||
| void printHash(void); | void printHash(void); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | enum GridNormLoggingMode_t { | ||||||
|  |   GridNormLoggingModeNone, | ||||||
|  |   GridNormLoggingModePrint, | ||||||
|  |   GridNormLoggingModeRecord, | ||||||
|  |   GridNormLoggingModeVerify | ||||||
|  | }; | ||||||
|  | extern int GridNormLoggingMode; | ||||||
|  | extern int32_t GridNormLoggingCounter; | ||||||
|  | extern std::vector<double> GridNormLogVector; | ||||||
|  | void SetGridNormLoggingModePrint(void); | ||||||
|  | void SetGridNormLoggingModeRecord(void); | ||||||
|  | void SetGridNormLoggingModeVerify(void); | ||||||
|  | void SetGridNormLoggingMode(GridNormLoggingMode_t mode); | ||||||
|  | void GridNormLog(double value); | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										41
									
								
								systems/Aurora/tests/repro128.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								systems/Aurora/tests/repro128.pbs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | |||||||
|  | #!/bin/bash | ||||||
|  |  | ||||||
|  | ## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00 | ||||||
|  |  | ||||||
|  | #PBS -q EarlyAppAccess | ||||||
|  | #PBS -l select=128 | ||||||
|  | #PBS -l walltime=02:00:00 | ||||||
|  | #PBS -A LatticeQCD_aesp_CNDA | ||||||
|  |  | ||||||
|  | #export OMP_PROC_BIND=spread | ||||||
|  | #unset OMP_PLACES | ||||||
|  |  | ||||||
|  | cd $PBS_O_WORKDIR | ||||||
|  |  | ||||||
|  | source ../sourceme.sh | ||||||
|  |  | ||||||
|  | cat $PBS_NODEFILE | ||||||
|  |  | ||||||
|  | export OMP_NUM_THREADS=3 | ||||||
|  | export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1 | ||||||
|  |  | ||||||
|  | #unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE | ||||||
|  | #unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE | ||||||
|  | #unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST | ||||||
|  |  | ||||||
|  | export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0 | ||||||
|  | export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0 | ||||||
|  | export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1 | ||||||
|  | export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576 | ||||||
|  | export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072 | ||||||
|  | export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16 | ||||||
|  | export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16 | ||||||
|  | export MPICH_OFI_NIC_POLICY=GPU | ||||||
|  |  | ||||||
|  | # 12 ppn, 16 nodes, 192 ranks | ||||||
|  | # 12 ppn, 128 nodes, 1536 ranks | ||||||
|  | CMD="mpiexec -np 1536 -ppn 12  -envall \ | ||||||
|  | 	     ./gpu_tile_compact.sh \ | ||||||
|  | 	     ./Test_dwf_mixedcg_prec --mpi 4.4.4.24 --grid 128.128.128.384 \ | ||||||
|  | 		--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 7000 --comms-overlap " | ||||||
|  | $CMD  | ||||||
| @@ -4,7 +4,7 @@ | |||||||
|  |  | ||||||
| #PBS -q EarlyAppAccess | #PBS -q EarlyAppAccess | ||||||
| #PBS -l select=16 | #PBS -l select=16 | ||||||
| #PBS -l walltime=01:00:00 | #PBS -l walltime=02:00:00 | ||||||
| #PBS -A LatticeQCD_aesp_CNDA | #PBS -A LatticeQCD_aesp_CNDA | ||||||
|  |  | ||||||
| #export OMP_PROC_BIND=spread | #export OMP_PROC_BIND=spread | ||||||
| @@ -36,5 +36,6 @@ export MPICH_OFI_NIC_POLICY=GPU | |||||||
| CMD="mpiexec -np 192 -ppn 12  -envall \ | CMD="mpiexec -np 192 -ppn 12  -envall \ | ||||||
| 	     ./gpu_tile_compact.sh \ | 	     ./gpu_tile_compact.sh \ | ||||||
| 	     ./Test_dwf_mixedcg_prec --mpi 2.4.4.6 --grid 64.128.128.192 \ | 	     ./Test_dwf_mixedcg_prec --mpi 2.4.4.6 --grid 64.128.128.192 \ | ||||||
| 		--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000" | 		--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 " | ||||||
|  | #--comms-overlap | ||||||
| $CMD  | $CMD  | ||||||
|   | |||||||
| @@ -36,5 +36,5 @@ export MPICH_OFI_NIC_POLICY=GPU | |||||||
| CMD="mpiexec -np 192 -ppn 12  -envall \ | CMD="mpiexec -np 192 -ppn 12  -envall \ | ||||||
| 	     ./gpu_tile_compact.sh \ | 	     ./gpu_tile_compact.sh \ | ||||||
| 	     ./Test_staggered_cg_prec --mpi 2.4.4.6 --grid 128.128.128.192 \ | 	     ./Test_staggered_cg_prec --mpi 2.4.4.6 --grid 128.128.128.192 \ | ||||||
| 	     --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000" | 	     --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000 --comms-overlap" | ||||||
| $CMD  | $CMD  | ||||||
|   | |||||||
| @@ -30,6 +30,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| using namespace std; | using namespace std; | ||||||
| using namespace Grid; | using namespace Grid; | ||||||
|  |  | ||||||
|  | #ifndef HOST_NAME_MAX | ||||||
|  | #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX | ||||||
|  | #endif | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   char hostname[HOST_NAME_MAX+1]; |   char hostname[HOST_NAME_MAX+1]; | ||||||
| @@ -104,6 +108,11 @@ int main (int argc, char ** argv) | |||||||
|   csumref=0; |   csumref=0; | ||||||
|   int iter=0; |   int iter=0; | ||||||
|   do { |   do { | ||||||
|  |     if ( iter == 0 ) { | ||||||
|  |       SetGridNormLoggingMode(GridNormLoggingModeRecord); | ||||||
|  |     } else { | ||||||
|  |       SetGridNormLoggingMode(GridNormLoggingModeVerify); | ||||||
|  |     } | ||||||
|     std::cerr << "******************* SINGLE PRECISION SOLVE "<<iter<<std::endl; |     std::cerr << "******************* SINGLE PRECISION SOLVE "<<iter<<std::endl; | ||||||
|     result_o = Zero(); |     result_o = Zero(); | ||||||
|     t1=usecond(); |     t1=usecond(); | ||||||
| @@ -135,6 +144,11 @@ int main (int argc, char ** argv) | |||||||
|   csumref=0; |   csumref=0; | ||||||
|   int i=0; |   int i=0; | ||||||
|   do {  |   do {  | ||||||
|  |     if ( iter == 0 ) { | ||||||
|  |       SetGridNormLoggingMode(GridNormLoggingModeRecord); | ||||||
|  |     } else { | ||||||
|  |       SetGridNormLoggingMode(GridNormLoggingModeVerify); | ||||||
|  |     } | ||||||
|     std::cerr << "******************* DOUBLE PRECISION SOLVE "<<i<<std::endl; |     std::cerr << "******************* DOUBLE PRECISION SOLVE "<<i<<std::endl; | ||||||
|     result_o_2 = Zero(); |     result_o_2 = Zero(); | ||||||
|     t1=usecond(); |     t1=usecond(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user