mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Compare commits
6 Commits
832fc08809
...
c4b9f71357
Author | SHA1 | Date | |
---|---|---|---|
|
c4b9f71357 | ||
|
394e506aea | ||
|
e19b26341b | ||
|
cfe1b13225 | ||
|
890c5ea1cd | ||
|
a87378d3b6 |
@ -236,11 +236,18 @@ public:
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
vobj vtmp;
|
||||
vtmp = r;
|
||||
#if 1
|
||||
auto me = View(CpuWrite);
|
||||
thread_for(ss,me.size(),{
|
||||
me[ss]= r;
|
||||
});
|
||||
#else
|
||||
auto me = View(AcceleratorWrite);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto stmp=coalescedRead(vtmp);
|
||||
coalescedWrite(me[ss],stmp);
|
||||
});
|
||||
#endif
|
||||
me.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
|
@ -539,12 +539,6 @@ inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize
|
||||
|
||||
#endif
|
||||
|
||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
||||
{
|
||||
acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
|
||||
acceleratorCopySynchronise();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// CPU Target - No accelerator just thread instead
|
||||
//////////////////////////////////////////////
|
||||
@ -553,7 +547,6 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
||||
|
||||
#undef GRID_SIMT
|
||||
|
||||
|
||||
inline void acceleratorMem(void)
|
||||
{
|
||||
/*
|
||||
@ -656,6 +649,12 @@ accelerator_inline void acceleratorFence(void)
|
||||
return;
|
||||
}
|
||||
|
||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
||||
{
|
||||
acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
|
||||
acceleratorCopySynchronise();
|
||||
}
|
||||
|
||||
template<class T> void acceleratorPut(T& dev,T&host)
|
||||
{
|
||||
acceleratorCopyToDevice(&host,&dev,sizeof(T));
|
||||
|
@ -2,11 +2,11 @@
|
||||
--enable-comms=mpi \
|
||||
--enable-simd=GPU \
|
||||
--enable-shm=nvlink \
|
||||
--enable-gen-simd-width=64 \
|
||||
--enable-accelerator=cuda \
|
||||
--enable-gen-simd-width=64 \
|
||||
--disable-gparity \
|
||||
--with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
|
||||
--enable-accelerator-cshift \
|
||||
--disable-unified \
|
||||
CXX=nvcc \
|
||||
LDFLAGS="-cudart shared " \
|
||||
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
|
||||
LDFLAGS="-cudart shared -lcublas " \
|
||||
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared --diag-suppress 177,550,611"
|
||||
|
@ -1,6 +1,7 @@
|
||||
module load cuda/11.4.1 openmpi/4.1.1-cuda11.4.1 ucx/1.12.0-cuda11.4.1
|
||||
#module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
|
||||
export PREFIX=/home/tc002/tc002/shared/env/prefix/
|
||||
export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
|
||||
module load cuda/12.3
|
||||
module load ucx/1.15.0-cuda12.3
|
||||
module load openmpi/4.1.5-cuda12.3
|
||||
source /home/dp207/dp207/shared/env/production/env-base.sh
|
||||
source /home/dp207/dp207/shared/env/production/env-gpu.sh
|
||||
unset SBATCH_EXPORT
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user