mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Compare commits
6 Commits
832fc08809
...
c4b9f71357
Author | SHA1 | Date | |
---|---|---|---|
|
c4b9f71357 | ||
|
394e506aea | ||
|
e19b26341b | ||
|
cfe1b13225 | ||
|
890c5ea1cd | ||
|
a87378d3b6 |
@ -236,11 +236,18 @@ public:
|
|||||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
vobj vtmp;
|
vobj vtmp;
|
||||||
vtmp = r;
|
vtmp = r;
|
||||||
|
#if 1
|
||||||
|
auto me = View(CpuWrite);
|
||||||
|
thread_for(ss,me.size(),{
|
||||||
|
me[ss]= r;
|
||||||
|
});
|
||||||
|
#else
|
||||||
auto me = View(AcceleratorWrite);
|
auto me = View(AcceleratorWrite);
|
||||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||||
auto stmp=coalescedRead(vtmp);
|
auto stmp=coalescedRead(vtmp);
|
||||||
coalescedWrite(me[ss],stmp);
|
coalescedWrite(me[ss],stmp);
|
||||||
});
|
});
|
||||||
|
#endif
|
||||||
me.ViewClose();
|
me.ViewClose();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -539,12 +539,6 @@ inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
|
||||||
{
|
|
||||||
acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
|
|
||||||
acceleratorCopySynchronise();
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// CPU Target - No accelerator just thread instead
|
// CPU Target - No accelerator just thread instead
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -553,7 +547,6 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
|||||||
|
|
||||||
#undef GRID_SIMT
|
#undef GRID_SIMT
|
||||||
|
|
||||||
|
|
||||||
inline void acceleratorMem(void)
|
inline void acceleratorMem(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -656,6 +649,12 @@ accelerator_inline void acceleratorFence(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
||||||
|
{
|
||||||
|
acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
|
||||||
|
acceleratorCopySynchronise();
|
||||||
|
}
|
||||||
|
|
||||||
template<class T> void acceleratorPut(T& dev,T&host)
|
template<class T> void acceleratorPut(T& dev,T&host)
|
||||||
{
|
{
|
||||||
acceleratorCopyToDevice(&host,&dev,sizeof(T));
|
acceleratorCopyToDevice(&host,&dev,sizeof(T));
|
||||||
|
@ -2,11 +2,11 @@
|
|||||||
--enable-comms=mpi \
|
--enable-comms=mpi \
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
--enable-gen-simd-width=64 \
|
|
||||||
--enable-accelerator=cuda \
|
--enable-accelerator=cuda \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--disable-gparity \
|
||||||
--with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
|
--with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
|
||||||
--enable-accelerator-cshift \
|
|
||||||
--disable-unified \
|
--disable-unified \
|
||||||
CXX=nvcc \
|
CXX=nvcc \
|
||||||
LDFLAGS="-cudart shared " \
|
LDFLAGS="-cudart shared -lcublas " \
|
||||||
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
|
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared --diag-suppress 177,550,611"
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
module load cuda/11.4.1 openmpi/4.1.1-cuda11.4.1 ucx/1.12.0-cuda11.4.1
|
module load cuda/12.3
|
||||||
#module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
|
module load ucx/1.15.0-cuda12.3
|
||||||
export PREFIX=/home/tc002/tc002/shared/env/prefix/
|
module load openmpi/4.1.5-cuda12.3
|
||||||
export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
|
source /home/dp207/dp207/shared/env/production/env-base.sh
|
||||||
|
source /home/dp207/dp207/shared/env/production/env-gpu.sh
|
||||||
unset SBATCH_EXPORT
|
unset SBATCH_EXPORT
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user