mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Compare commits
	
		
			7 Commits
		
	
	
		
			3fa6827e43
			...
			aff3d50bae
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | aff3d50bae | ||
|  | 79ad567dd5 | ||
|  | fab1efb48c | ||
|  | 660eb76d93 | ||
|  | 62e7bf024a | ||
|  | 95f3d69cf9 | ||
| 32e6d58356 | 
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #define _GRID_FFT_H_ | ||||
|  | ||||
| #ifdef HAVE_FFTW | ||||
| #ifdef USE_MKL | ||||
| #if defined(USE_MKL) || defined(GRID_SYCL) | ||||
| #include <fftw/fftw3.h> | ||||
| #else | ||||
| #include <fftw3.h> | ||||
|   | ||||
| @@ -285,10 +285,13 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> & | ||||
| template<class vobj> | ||||
| inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) { | ||||
|   GridBase *grid = left.Grid(); | ||||
|   uint32_t csum=0; | ||||
|   //  Uint32Checksum(left,csum); | ||||
|   ComplexD nrm = rankInnerProduct(left,right); | ||||
|   //  GridNormLog(real(nrm)); // Could log before and after global sum to distinguish local and MPI | ||||
|   RealD local = real(nrm); | ||||
|   GridNormLog(real(nrm),csum); // Could log before and after global sum to distinguish local and MPI | ||||
|   grid->GlobalSum(nrm); | ||||
|   //  GridNormLog(real(nrm));  | ||||
|   GridMPINormLog(local,real(nrm));  | ||||
|   return nrm; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -86,8 +86,13 @@ public: | ||||
|     assert(ForceE.Checkerboard()==Even); | ||||
|     assert(ForceO.Checkerboard()==Odd); | ||||
|  | ||||
| #if defined(GRID_CUDA) || defined(GRID_HIP)  || defined(GRID_SYCL) | ||||
|     acceleratorSetCheckerboard(Force,ForceE); | ||||
|     acceleratorSetCheckerboard(Force,ForceO); | ||||
| #else | ||||
|     setCheckerboard(Force,ForceE);  | ||||
|     setCheckerboard(Force,ForceO); | ||||
| #endif | ||||
|     Force=-Force; | ||||
|  | ||||
|     delete forcecb; | ||||
| @@ -130,8 +135,13 @@ public: | ||||
|     assert(ForceE.Checkerboard()==Even); | ||||
|     assert(ForceO.Checkerboard()==Odd); | ||||
|  | ||||
| #if defined(GRID_CUDA) || defined(GRID_HIP)  || defined(GRID_SYCL) | ||||
|     acceleratorSetCheckerboard(Force,ForceE); | ||||
|     acceleratorSetCheckerboard(Force,ForceO); | ||||
| #else | ||||
|     setCheckerboard(Force,ForceE);  | ||||
|     setCheckerboard(Force,ForceO); | ||||
| #endif | ||||
|     Force=-Force; | ||||
|  | ||||
|     delete forcecb; | ||||
|   | ||||
| @@ -96,7 +96,10 @@ static Coordinate Grid_default_mpi; | ||||
| /////////////////////////////////////////////////////// | ||||
| int GridNormLoggingMode; | ||||
| int32_t GridNormLoggingCounter; | ||||
| int32_t GridMPINormLoggingCounter; | ||||
| std::vector<double> GridNormLogVector; | ||||
| std::vector<double> GridMPINormLogVector; | ||||
| std::vector<uint32_t> GridCsumLogVector; | ||||
|  | ||||
| void SetGridNormLoggingMode(GridNormLoggingMode_t mode) | ||||
| { | ||||
| @@ -113,7 +116,10 @@ void SetGridNormLoggingMode(GridNormLoggingMode_t mode) | ||||
|   case GridNormLoggingModeNone: | ||||
|     GridNormLoggingMode = mode; | ||||
|     GridNormLoggingCounter=0; | ||||
|     GridMPINormLoggingCounter=0; | ||||
|     GridNormLogVector.resize(0); | ||||
|     GridCsumLogVector.resize(0); | ||||
|     GridMPINormLogVector.resize(0); | ||||
|     break; | ||||
|   default: | ||||
|     assert(0); | ||||
| @@ -122,40 +128,90 @@ void SetGridNormLoggingMode(GridNormLoggingMode_t mode) | ||||
|  | ||||
| void SetGridNormLoggingModePrint(void) | ||||
| { | ||||
|   std::cout << " GridNormLogging Reproducibility logging set to print output " <<std::endl; | ||||
|   GridNormLoggingCounter = 0; | ||||
|   GridMPINormLoggingCounter=0; | ||||
|   GridNormLogVector.resize(0); | ||||
|   GridCsumLogVector.resize(0); | ||||
|   GridMPINormLogVector.resize(0); | ||||
|   GridNormLoggingMode = GridNormLoggingModePrint; | ||||
| } | ||||
| void SetGridNormLoggingModeRecord(void) | ||||
| { | ||||
|   std::cout << " GridNormLogging Reproducibility logging set to RECORD " <<std::endl; | ||||
|   GridNormLoggingCounter = 0; | ||||
|   GridMPINormLoggingCounter=0; | ||||
|   GridNormLogVector.resize(0); | ||||
|   GridCsumLogVector.resize(0); | ||||
|   GridMPINormLogVector.resize(0); | ||||
|   GridNormLoggingMode = GridNormLoggingModeRecord; | ||||
| } | ||||
| void SetGridNormLoggingModeVerify(void) | ||||
| { | ||||
|   std::cout << " GridNormLogging Reproducibility logging set to VERIFY " << GridNormLogVector.size()<< " log entries "<<std::endl; | ||||
|   GridNormLoggingCounter = 0; | ||||
|   GridMPINormLoggingCounter=0; | ||||
|   GridNormLoggingMode = GridNormLoggingModeVerify; | ||||
| } | ||||
| void GridNormLog(double value) | ||||
| void GridNormLog(double value,uint32_t csum) | ||||
| { | ||||
|   if(GridNormLoggingMode == GridNormLoggingModePrint) { | ||||
|     std::cerr<<"GridNormLog : "<< GridNormLoggingCounter <<" " << std::hexfloat << value <<std::endl; | ||||
|     std::cerr<<"GridNormLog : "<< GridNormLoggingCounter <<" " << std::hexfloat << value << " csum " <<std::hex<<csum<<std::dec <<std::endl; | ||||
|     GridNormLoggingCounter++; | ||||
|   } | ||||
|   if(GridNormLoggingMode == GridNormLoggingModeRecord) { | ||||
|     GridNormLogVector.push_back(value); | ||||
|     GridCsumLogVector.push_back(csum); | ||||
|     GridNormLoggingCounter++; | ||||
|   } | ||||
|   if(GridNormLoggingMode == GridNormLoggingModeVerify) { | ||||
|     assert(GridNormLoggingCounter < GridNormLogVector.size()); | ||||
|     if ( value != GridNormLogVector[GridNormLoggingCounter] ) { | ||||
|       fprintf(stderr,"%s Oops, I did it again! Reproduce failure for norm %d/%zu %.16e %.16e\n",GridHostname(),GridNormLoggingCounter,GridNormLogVector.size(), | ||||
| 	      value, GridNormLogVector[GridNormLoggingCounter]); fflush(stderr); | ||||
|     if ( (value != GridNormLogVector[GridNormLoggingCounter]) | ||||
| 	 || (csum!=GridCsumLogVector[GridNormLoggingCounter]) ) { | ||||
|       std::cerr << " Oops got norm "<< std::hexfloat<<value<<" expect "<<GridNormLogVector[GridNormLoggingCounter] <<std::endl; | ||||
|       std::cerr << " Oops got csum "<< std::hex<<csum<<" expect "<<GridCsumLogVector[GridNormLoggingCounter] <<std::endl; | ||||
|       fprintf(stderr,"%s:%d Oops, I did it again! Reproduce failure for norm %d/%zu %.16e %.16e %x %x\n", | ||||
| 	      GridHostname(), | ||||
| 	      GlobalSharedMemory::WorldShmRank, | ||||
| 	      GridNormLoggingCounter,GridNormLogVector.size(), | ||||
| 	      value, GridNormLogVector[GridNormLoggingCounter], | ||||
| 	      csum, GridCsumLogVector[GridNormLoggingCounter]); fflush(stderr); | ||||
|       assert(0); // Force takedown of job | ||||
|     } | ||||
|     if ( GridNormLogVector.size()==GridNormLoggingCounter ) { | ||||
|       std::cout << " GridNormLogging : Verified entire sequence of "<<GridNormLoggingCounter<<" norms "<<std::endl; | ||||
|     } | ||||
|     GridNormLoggingCounter++; | ||||
|   } | ||||
| } | ||||
| void GridMPINormLog(double local,double result) | ||||
| { | ||||
|   if(GridNormLoggingMode == GridNormLoggingModePrint) { | ||||
|     std::cerr<<"GridMPINormLog : "<< GridMPINormLoggingCounter <<" " << std::hexfloat << local << " -> " <<result <<std::endl; | ||||
|     GridMPINormLoggingCounter++; | ||||
|   } | ||||
|   if(GridNormLoggingMode == GridNormLoggingModeRecord) { | ||||
|     std::cerr<<"GridMPINormLog RECORDING : "<< GridMPINormLoggingCounter <<" " << std::hexfloat << local << "-> "<< result <<std::endl; | ||||
|     GridMPINormLogVector.push_back(result); | ||||
|     GridMPINormLoggingCounter++; | ||||
|   } | ||||
|   if(GridNormLoggingMode == GridNormLoggingModeVerify) { | ||||
|     std::cerr<<"GridMPINormLog : "<< GridMPINormLoggingCounter <<" " << std::hexfloat << local << "-> "<< result <<std::endl; | ||||
|     assert(GridMPINormLoggingCounter < GridMPINormLogVector.size()); | ||||
|     if ( result != GridMPINormLogVector[GridMPINormLoggingCounter] ) { | ||||
|       fprintf(stderr,"%s:%d MPI_Allreduce did it again! Reproduce failure for norm %d/%zu glb %.16e lcl %.16e hist %.16e\n", | ||||
| 	      GridHostname(), | ||||
| 	      GlobalSharedMemory::WorldShmRank, | ||||
| 	      GridMPINormLoggingCounter,GridMPINormLogVector.size(), | ||||
| 	      result, local, GridMPINormLogVector[GridMPINormLoggingCounter]); fflush(stderr); | ||||
|       assert(0); // Force takedown of job | ||||
|     } | ||||
|     if ( GridMPINormLogVector.size()==GridMPINormLoggingCounter ) { | ||||
|       std::cout << " GridMPINormLogging : Verified entire sequence of "<<GridMPINormLoggingCounter<<" norms "<<std::endl; | ||||
|     } | ||||
|     GridMPINormLoggingCounter++; | ||||
|   } | ||||
| } | ||||
|  | ||||
| int GridThread::_threads =1; | ||||
| int GridThread::_hyperthreads=1; | ||||
|   | ||||
| @@ -76,14 +76,15 @@ enum GridNormLoggingMode_t { | ||||
|   GridNormLoggingModeRecord, | ||||
|   GridNormLoggingModeVerify | ||||
| }; | ||||
| extern int GridNormLoggingMode; | ||||
| extern int32_t GridNormLoggingCounter; | ||||
| extern std::vector<double> GridNormLogVector; | ||||
| //extern int GridNormLoggingMode; | ||||
| //extern int32_t GridNormLoggingCounter; | ||||
| //extern std::vector<double> GridNormLogVector; | ||||
| void SetGridNormLoggingModePrint(void); | ||||
| void SetGridNormLoggingModeRecord(void); | ||||
| void SetGridNormLoggingModeVerify(void); | ||||
| void SetGridNormLoggingMode(GridNormLoggingMode_t mode); | ||||
| void GridNormLog(double value); | ||||
| void GridNormLog(double value,uint32_t csum); | ||||
| void GridMPINormLog(double lcl, double glbl); | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
|   | ||||
| @@ -2,26 +2,39 @@ | ||||
|  | ||||
| ## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00 | ||||
|  | ||||
| #PBS -q EarlyAppAccess | ||||
| #PBS -l select=16 | ||||
| #PBS -l walltime=02:00:00 | ||||
| #PBS -l select=16:system=sunspot,place=scatter | ||||
| #PBS -A LatticeQCD_aesp_CNDA | ||||
| #PBS -l walltime=01:00:00 | ||||
| #PBS -N dwf | ||||
| #PBS -k doe | ||||
|  | ||||
| #export OMP_PROC_BIND=spread | ||||
| #unset OMP_PLACES | ||||
|  | ||||
| cd $PBS_O_WORKDIR | ||||
|  | ||||
| source ../sourceme.sh | ||||
| #source ../sourceme.sh | ||||
|  | ||||
| cat $PBS_NODEFILE | ||||
|  | ||||
| #export MPICH_COLL_SYNC=1 | ||||
| #export MPICH_ENV_DISPLAY=1 | ||||
| export MPICH_ | ||||
| export OMP_NUM_THREADS=3 | ||||
| export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1 | ||||
| module load oneapi/eng-compiler/2023.05.15.003 | ||||
| module load mpich/51.2/icc-all-deterministic-pmix-gpu | ||||
| #export LD_LIBRARY_PATH=/soft/restricted/CNDA/updates/2023.05.15.001/oneapi/compiler/eng-20230512/compiler/linux/lib/:$LD_LIBRARY_PATH | ||||
|  | ||||
| #unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE | ||||
| #unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE | ||||
| #unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST | ||||
| export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0 | ||||
| export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0 | ||||
| export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling | ||||
| unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE | ||||
| unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE | ||||
| unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE | ||||
|  | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0 | ||||
| @@ -32,10 +45,17 @@ export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16 | ||||
| export MPICH_OFI_NIC_POLICY=GPU | ||||
|  | ||||
| # 12 ppn, 16 nodes, 192 ranks | ||||
| DIR=repro.$PBS_JOBID | ||||
| mkdir $DIR | ||||
| cd $DIR | ||||
|  | ||||
| CMD="mpiexec -np 192 -ppn 12  -envall \ | ||||
| 	     ./gpu_tile_compact.sh \ | ||||
| 	     ./Test_dwf_mixedcg_prec --mpi 2.4.4.6 --grid 64.128.128.192 \ | ||||
| 		--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 " | ||||
| 	     ../gpu_tile_compact.sh \ | ||||
| 	     ../Test_dwf_mixedcg_prec --mpi 2.4.4.6 --grid 64.128.128.192 \ | ||||
| 		--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000 --debug-stdout --log Message,Iterative" | ||||
| #--comms-overlap | ||||
| $CMD  | ||||
|  | ||||
| grep Oops Grid.stderr.* > failures.$PBS_JOBID | ||||
| rm core.* | ||||
|  | ||||
|   | ||||
							
								
								
									
										81
									
								
								systems/Aurora/tests/repro1gpu.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								systems/Aurora/tests/repro1gpu.pbs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| #PBS -l select=16:system=sunspot,place=scatter | ||||
| #PBS -A LatticeQCD_aesp_CNDA | ||||
| #PBS -l walltime=02:00:00 | ||||
| #PBS -N repro1gpu | ||||
| #PBS -k doe | ||||
|  | ||||
| #export OMP_PROC_BIND=spread | ||||
| #unset OMP_PLACES | ||||
|  | ||||
| module load oneapi/eng-compiler/2023.05.15.003 | ||||
| module load mpich/51.2/icc-all-deterministic-pmix-gpu | ||||
|  | ||||
| # 56 cores / 6 threads ~9 | ||||
| export OMP_NUM_THREADS=6 | ||||
| export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0 | ||||
| export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16 | ||||
| export MPICH_OFI_NIC_POLICY=GPU | ||||
|  | ||||
| export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0 | ||||
| export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0 | ||||
| export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling | ||||
| unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE | ||||
| unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE | ||||
| unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE | ||||
|  | ||||
| cd $PBS_O_WORKDIR | ||||
|  | ||||
| NN=`cat $PBS_NODEFILE | wc -l` | ||||
| echo $PBS_NODEFILE | ||||
| cat $PBS_NODEFILE | ||||
|  | ||||
| echo $NN nodes in node file | ||||
| for n in `eval echo {1..$NN}` | ||||
| do | ||||
|  | ||||
| THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 ` | ||||
| echo Node $n is $THIS_NODE | ||||
|  | ||||
|  | ||||
| for g in {0..11} | ||||
| do | ||||
| export NUMA_MAP=(0 0 0 1 1 1 0 0 0 1 1 1 ) | ||||
| export TILE_MAP=(0 0 0 0 0 0 1 1 1 1 1 1 ) | ||||
| export  GPU_MAP=(0 1 2 3 4 5 0 1 2 3 4 5 ) | ||||
|  | ||||
| export numa=${NUMA_MAP[$g]} | ||||
| export gpu_id=${GPU_MAP[$g]} | ||||
| export tile_id=${TILE_MAP[$g]} | ||||
| export gpu=$gpu_id.$tile_id | ||||
|  | ||||
| cd $PBS_O_WORKDIR | ||||
|  | ||||
| DIR=repro.1gpu.$PBS_JOBID/node-$n-$THIS_NODE-GPU-$gpu | ||||
| mkdir -p $DIR | ||||
| cd $DIR | ||||
|  | ||||
| echo $THIS_NODE > nodefile | ||||
| echo $gpu > gpu | ||||
|  | ||||
| export ZE_AFFINITY_MASK=$gpu | ||||
| export ONEAPI_DEVICE_FILTER=gpu,level_zero | ||||
|  | ||||
| CMD="mpiexec -np 1 -ppn 1  -envall --hostfile nodefile \ | ||||
| 	     numactl -N $numa -m $numa ../../Test_dwf_mixedcg_prec --mpi 1.1.1.1 --grid 16.16.32.32 \ | ||||
| 		--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message" | ||||
| echo $CMD | ||||
| $CMD & | ||||
|  | ||||
| done | ||||
| done | ||||
|  | ||||
| wait | ||||
|  | ||||
							
								
								
									
										78
									
								
								systems/Aurora/tests/reproN.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								systems/Aurora/tests/reproN.pbs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| #PBS -l select=16:system=sunspot,place=scatter | ||||
| #PBS -A LatticeQCD_aesp_CNDA | ||||
| #PBS -l walltime=02:00:00 | ||||
| #PBS -N reproN | ||||
| #PBS -k doe | ||||
|  | ||||
| #export OMP_PROC_BIND=spread | ||||
| #unset OMP_PLACES | ||||
|  | ||||
| module load oneapi/eng-compiler/2023.05.15.003 | ||||
| module load mpich/51.2/icc-all-deterministic-pmix-gpu | ||||
|  | ||||
| # 56 cores / 6 threads ~9 | ||||
| export OMP_NUM_THREADS=6 | ||||
| export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0 | ||||
| export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16 | ||||
| export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16 | ||||
| export MPICH_OFI_NIC_POLICY=GPU | ||||
|  | ||||
| export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0 | ||||
| export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0 | ||||
| export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling | ||||
| unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE | ||||
| unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE | ||||
| unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE | ||||
|  | ||||
| cd $PBS_O_WORKDIR | ||||
|  | ||||
| NN=`cat $PBS_NODEFILE | wc -l` | ||||
| echo $PBS_NODEFILE | ||||
| cat $PBS_NODEFILE | ||||
|  | ||||
| echo $NN nodes in node file | ||||
| for n in `eval echo {1..$NN}` | ||||
| do | ||||
|  | ||||
| cd $PBS_O_WORKDIR | ||||
|  | ||||
| THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 ` | ||||
| echo Node $n is $THIS_NODE | ||||
|  | ||||
| DIR=repro.$PBS_JOBID/node-$n-$THIS_NODE | ||||
|  | ||||
| mkdir -p $DIR | ||||
| cd $DIR | ||||
|  | ||||
| echo $THIS_NODE > nodefile | ||||
|  | ||||
| CMD="mpiexec -np 12 -ppn 12  -envall --hostfile nodefile \ | ||||
| 	     ../../gpu_tile_compact.sh \ | ||||
| 	     ../../Test_dwf_mixedcg_prec --mpi 1.2.2.3 --grid 32.64.64.96 \ | ||||
| 		--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --comms-overlap" | ||||
|  | ||||
| $CMD & | ||||
|  | ||||
| done | ||||
|  | ||||
| wait | ||||
|  | ||||
| for n in ` eval echo {1..$NN} ` | ||||
| do | ||||
|  | ||||
| THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 ` | ||||
| DIR=repro.$PBS_JOBID/node-$n-$THIS_NODE | ||||
|  | ||||
| cd $DIR | ||||
|  | ||||
| grep Oops Grid.stderr.* > failures.$PBS_JOBID | ||||
| rm core.* | ||||
|  | ||||
| done | ||||
| @@ -34,6 +34,45 @@ using namespace Grid; | ||||
| #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX | ||||
| #endif | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
| template<class Matrix,class Field> | ||||
|   class SchurDiagMooeeOperatorParanoid :  public SchurOperatorBase<Field> { | ||||
|  public: | ||||
|     Matrix &_Mat; | ||||
|     SchurDiagMooeeOperatorParanoid (Matrix &Mat): _Mat(Mat){}; | ||||
|     virtual  void Mpc      (const Field &in, Field &out) { | ||||
|       Field tmp(in.Grid()); | ||||
|       tmp.Checkerboard() = !in.Checkerboard(); | ||||
|       //      std::cout <<" Mpc starting"<<std::endl; | ||||
|  | ||||
|       RealD nn = norm2(in); // std::cout <<" Mpc Prior to dslash norm is "<<nn<<std::endl; | ||||
|       _Mat.Meooe(in,tmp); | ||||
|       nn = norm2(tmp); //std::cout <<" Mpc Prior to Mooeinv "<<nn<<std::endl; | ||||
|       _Mat.MooeeInv(tmp,out); | ||||
|       nn = norm2(out); //std::cout <<" Mpc Prior to dslash norm is "<<nn<<std::endl; | ||||
|       _Mat.Meooe(out,tmp); | ||||
|       nn = norm2(tmp); //std::cout <<" Mpc Prior to Mooee "<<nn<<std::endl; | ||||
|       _Mat.Mooee(in,out); | ||||
|       nn = norm2(out); //std::cout <<" Mpc Prior to axpy "<<nn<<std::endl; | ||||
|       axpy(out,-1.0,tmp,out); | ||||
|     } | ||||
|     virtual void MpcDag   (const Field &in, Field &out){ | ||||
|       Field tmp(in.Grid()); | ||||
|       //      std::cout <<" MpcDag starting"<<std::endl; | ||||
|       RealD nn = norm2(in);// std::cout <<" MpcDag Prior to dslash norm is "<<nn<<std::endl; | ||||
|       _Mat.MeooeDag(in,tmp); | ||||
|       _Mat.MooeeInvDag(tmp,out); | ||||
|       nn = norm2(out);// std::cout <<" MpcDag Prior to dslash norm is "<<nn<<std::endl; | ||||
|       _Mat.MeooeDag(out,tmp); | ||||
|       nn = norm2(tmp);// std::cout <<" MpcDag Prior to Mooee "<<nn<<std::endl; | ||||
|       _Mat.MooeeDag(in,out); | ||||
|       nn = norm2(out);// std::cout <<" MpcDag Prior to axpy "<<nn<<std::endl; | ||||
|       axpy(out,-1.0,tmp,out); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   char hostname[HOST_NAME_MAX+1]; | ||||
| @@ -82,8 +121,8 @@ int main (int argc, char ** argv) | ||||
|   result_o_2.Checkerboard() = Odd; | ||||
|   result_o_2 = Zero(); | ||||
|  | ||||
|   SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf); | ||||
|   SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f); | ||||
|   SchurDiagMooeeOperatorParanoid<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf); | ||||
|   SchurDiagMooeeOperatorParanoid<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f); | ||||
|  | ||||
|   int nsecs=600; | ||||
|   if( GridCmdOptionExists(argv,argv+argc,"--seconds") ){ | ||||
| @@ -144,7 +183,7 @@ int main (int argc, char ** argv) | ||||
|   csumref=0; | ||||
|   int i=0; | ||||
|   do {  | ||||
|     if ( iter == 0 ) { | ||||
|     if ( i == 0 ) { | ||||
|       SetGridNormLoggingMode(GridNormLoggingModeRecord); | ||||
|     } else { | ||||
|       SetGridNormLoggingMode(GridNormLoggingModeVerify); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user