mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Running on Frontier, fix RNG big volume y2k, affecting 5D RNG
This commit is contained in:
parent
fcf5023845
commit
6a87487544
@ -70,8 +70,8 @@ public:
|
|||||||
Coordinate _istride; // Inner stride i.e. within simd lane
|
Coordinate _istride; // Inner stride i.e. within simd lane
|
||||||
int _osites; // _isites*_osites = product(dimensions).
|
int _osites; // _isites*_osites = product(dimensions).
|
||||||
int _isites;
|
int _isites;
|
||||||
int _fsites; // _isites*_osites = product(dimensions).
|
int64_t _fsites; // _isites*_osites = product(dimensions).
|
||||||
int _gsites;
|
int64_t _gsites;
|
||||||
Coordinate _slice_block;// subslice information
|
Coordinate _slice_block;// subslice information
|
||||||
Coordinate _slice_stride;
|
Coordinate _slice_stride;
|
||||||
Coordinate _slice_nblock;
|
Coordinate _slice_nblock;
|
||||||
@ -183,7 +183,7 @@ public:
|
|||||||
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
||||||
inline int oSites(void) const { return _osites; };
|
inline int oSites(void) const { return _osites; };
|
||||||
inline int lSites(void) const { return _isites*_osites; };
|
inline int lSites(void) const { return _isites*_osites; };
|
||||||
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };
|
||||||
inline int Nd (void) const { return _ndimension;};
|
inline int Nd (void) const { return _ndimension;};
|
||||||
|
|
||||||
inline const Coordinate LocalStarts(void) { return _lstart; };
|
inline const Coordinate LocalStarts(void) { return _lstart; };
|
||||||
@ -214,7 +214,7 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Global addressing
|
// Global addressing
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){
|
void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){
|
||||||
assert(gidx< gSites());
|
assert(gidx< gSites());
|
||||||
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
||||||
}
|
}
|
||||||
@ -222,7 +222,7 @@ public:
|
|||||||
assert(lidx<lSites());
|
assert(lidx<lSites());
|
||||||
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
||||||
}
|
}
|
||||||
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){
|
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){
|
||||||
gidx=0;
|
gidx=0;
|
||||||
int mult=1;
|
int mult=1;
|
||||||
for(int mu=0;mu<_ndimension;mu++) {
|
for(int mu=0;mu<_ndimension;mu++) {
|
||||||
|
@ -360,7 +360,7 @@ public:
|
|||||||
|
|
||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
for(int g=0;g<o.Grid()->_gsites;g++){
|
for(int64_t g=0;g<o.Grid()->_gsites;g++){
|
||||||
|
|
||||||
Coordinate gcoor;
|
Coordinate gcoor;
|
||||||
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
||||||
|
@ -432,7 +432,7 @@ public:
|
|||||||
#if 1
|
#if 1
|
||||||
thread_for( lidx, _grid->lSites(), {
|
thread_for( lidx, _grid->lSites(), {
|
||||||
|
|
||||||
int gidx;
|
int64_t gidx;
|
||||||
int o_idx;
|
int o_idx;
|
||||||
int i_idx;
|
int i_idx;
|
||||||
int rank;
|
int rank;
|
||||||
|
@ -1054,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
|||||||
|
|
||||||
Coordinate fcoor(nd);
|
Coordinate fcoor(nd);
|
||||||
Coordinate ccoor(nd);
|
Coordinate ccoor(nd);
|
||||||
for(int g=0;g<fg->gSites();g++){
|
for(int64_t g=0;g<fg->gSites();g++){
|
||||||
|
|
||||||
fg->GlobalIndexToGlobalCoor(g,fcoor);
|
fg->GlobalIndexToGlobalCoor(g,fcoor);
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
|
@ -8,7 +8,7 @@ namespace Grid{
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static accelerator_inline void CoorFromIndex (coor_t& coor,int index,const coor_t &dims){
|
static accelerator_inline void CoorFromIndex (coor_t& coor,int64_t index,const coor_t &dims){
|
||||||
int nd= dims.size();
|
int nd= dims.size();
|
||||||
coor.resize(nd);
|
coor.resize(nd);
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
@ -18,28 +18,45 @@ namespace Grid{
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
static accelerator_inline void IndexFromCoor (const coor_t& coor,int64_t &index,const coor_t &dims){
|
||||||
int nd=dims.size();
|
int nd=dims.size();
|
||||||
int stride=1;
|
int stride=1;
|
||||||
index=0;
|
index=0;
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
index = index+stride*coor[d];
|
index = index+(int64_t)stride*coor[d];
|
||||||
stride=stride*dims[d];
|
stride=stride*dims[d];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template<class coor_t>
|
||||||
|
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
||||||
|
int64_t index64;
|
||||||
|
IndexFromCoor(coor,index64,dims);
|
||||||
|
assert(index64<2*1024*1024*1024LL);
|
||||||
|
index = (int) index64;
|
||||||
|
}
|
||||||
|
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){
|
static inline void IndexFromCoorReversed (const coor_t& coor,int64_t &index,const coor_t &dims){
|
||||||
int nd=dims.size();
|
int nd=dims.size();
|
||||||
int stride=1;
|
int stride=1;
|
||||||
index=0;
|
index=0;
|
||||||
for(int d=nd-1;d>=0;d--){
|
for(int d=nd-1;d>=0;d--){
|
||||||
index = index+stride*coor[d];
|
index = index+(int64_t)stride*coor[d];
|
||||||
stride=stride*dims[d];
|
stride=stride*dims[d];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static inline void CoorFromIndexReversed (coor_t& coor,int index,const coor_t &dims){
|
static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){
|
||||||
|
int64_t index64;
|
||||||
|
IndexFromCoorReversed(coor,index64,dims);
|
||||||
|
if ( index64>=2*1024*1024*1024LL ){
|
||||||
|
std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
|
||||||
|
}
|
||||||
|
assert(index64<2*1024*1024*1024LL);
|
||||||
|
index = (int) index64;
|
||||||
|
}
|
||||||
|
template<class coor_t>
|
||||||
|
static inline void CoorFromIndexReversed (coor_t& coor,int64_t index,const coor_t &dims){
|
||||||
int nd= dims.size();
|
int nd= dims.size();
|
||||||
coor.resize(nd);
|
coor.resize(nd);
|
||||||
for(int d=nd-1;d>=0;d--){
|
for(int d=nd-1;d>=0;d--){
|
||||||
|
43
systems/Frontier/benchmarks/bench2.slurm
Executable file
43
systems/Frontier/benchmarks/bench2.slurm
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/bin/bash -l
|
||||||
|
#SBATCH --job-name=bench
|
||||||
|
##SBATCH --partition=small-g
|
||||||
|
#SBATCH --nodes=2
|
||||||
|
#SBATCH --ntasks-per-node=8
|
||||||
|
#SBATCH --cpus-per-task=7
|
||||||
|
#SBATCH --gpus-per-node=8
|
||||||
|
#SBATCH --time=00:10:00
|
||||||
|
#SBATCH --account=phy157_dwf
|
||||||
|
#SBATCH --gpu-bind=none
|
||||||
|
#SBATCH --exclusive
|
||||||
|
#SBATCH --mem=0
|
||||||
|
|
||||||
|
cat << EOF > select_gpu
|
||||||
|
#!/bin/bash
|
||||||
|
export GPU_MAP=(0 1 2 3 7 6 5 4)
|
||||||
|
export NUMA_MAP=(3 3 1 1 2 2 0 0)
|
||||||
|
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||||
|
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||||
|
export HIP_VISIBLE_DEVICES=\$GPU
|
||||||
|
unset ROCR_VISIBLE_DEVICES
|
||||||
|
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||||
|
exec numactl -m \$NUMA -N \$NUMA \$*
|
||||||
|
EOF
|
||||||
|
|
||||||
|
chmod +x ./select_gpu
|
||||||
|
|
||||||
|
root=$HOME/Frontier/Grid/systems/Frontier/
|
||||||
|
source ${root}/sourceme.sh
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=7
|
||||||
|
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||||
|
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||||
|
|
||||||
|
for vol in 32.32.32.64
|
||||||
|
do
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
|
||||||
|
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
|
||||||
|
done
|
||||||
|
|
23
systems/Frontier/config-command
Normal file
23
systems/Frontier/config-command
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||||
|
../../configure --enable-comms=mpi-auto \
|
||||||
|
--with-lime=$CLIME \
|
||||||
|
--enable-unified=no \
|
||||||
|
--enable-shm=nvlink \
|
||||||
|
--enable-tracing=timer \
|
||||||
|
--enable-accelerator=hip \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--disable-gparity \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
--enable-simd=GPU \
|
||||||
|
--enable-accelerator-cshift \
|
||||||
|
--with-gmp=$OLCF_GMP_ROOT \
|
||||||
|
--with-fftw=$FFTW_DIR/.. \
|
||||||
|
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
CXX=hipcc MPICXX=mpicxx \
|
||||||
|
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \
|
||||||
|
LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 "
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
13
systems/Frontier/mpiwrapper.sh
Executable file
13
systems/Frontier/mpiwrapper.sh
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
lrank=$SLURM_LOCALID
|
||||||
|
lgpu=(0 1 2 3 7 6 5 4)
|
||||||
|
|
||||||
|
export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]}
|
||||||
|
|
||||||
|
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES "
|
||||||
|
|
||||||
|
$*
|
||||||
|
|
||||||
|
|
||||||
|
|
13
systems/Frontier/sourceme.sh
Normal file
13
systems/Frontier/sourceme.sh
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
|
||||||
|
spack load c-lime
|
||||||
|
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib
|
||||||
|
module load emacs
|
||||||
|
module load PrgEnv-gnu
|
||||||
|
module load rocm
|
||||||
|
module load cray-mpich/8.1.23
|
||||||
|
module load gmp
|
||||||
|
module load cray-fftw
|
||||||
|
module load craype-accel-amd-gfx90a
|
||||||
|
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||||
|
#Hack for lib
|
||||||
|
#export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH
|
9
systems/Frontier/wrap.sh
Executable file
9
systems/Frontier/wrap.sh
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES
|
||||||
|
unset ROCR_VISIBLE_DEVICES
|
||||||
|
|
||||||
|
#rank=$SLURM_PROCID
|
||||||
|
#rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@
|
||||||
|
|
||||||
|
$@
|
Loading…
Reference in New Issue
Block a user