mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Compare commits
	
		
			6 Commits
		
	
	
		
			832fc08809
			...
			c4b9f71357
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					c4b9f71357 | ||
| 
						 | 
					394e506aea | ||
| 
						 | 
					e19b26341b | ||
| 
						 | 
					cfe1b13225 | ||
| 
						 | 
					890c5ea1cd | ||
| 
						 | 
					a87378d3b6 | 
@@ -236,11 +236,18 @@ public:
 | 
			
		||||
  template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
 | 
			
		||||
    vobj vtmp;
 | 
			
		||||
    vtmp = r;
 | 
			
		||||
#if 1
 | 
			
		||||
    auto me  = View(CpuWrite);
 | 
			
		||||
    thread_for(ss,me.size(),{
 | 
			
		||||
       me[ss]= r;
 | 
			
		||||
      });
 | 
			
		||||
#else    
 | 
			
		||||
    auto me  = View(AcceleratorWrite);
 | 
			
		||||
    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 | 
			
		||||
	auto stmp=coalescedRead(vtmp);
 | 
			
		||||
	coalescedWrite(me[ss],stmp);
 | 
			
		||||
    });
 | 
			
		||||
#endif    
 | 
			
		||||
    me.ViewClose();
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -539,12 +539,6 @@ inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
			
		||||
{
 | 
			
		||||
  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
 | 
			
		||||
  acceleratorCopySynchronise();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////
 | 
			
		||||
// CPU Target - No accelerator just thread instead
 | 
			
		||||
//////////////////////////////////////////////
 | 
			
		||||
@@ -553,7 +547,6 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
			
		||||
 | 
			
		||||
#undef GRID_SIMT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
inline void acceleratorMem(void)
 | 
			
		||||
{
 | 
			
		||||
  /*
 | 
			
		||||
@@ -656,6 +649,12 @@ accelerator_inline void acceleratorFence(void)
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
			
		||||
{
 | 
			
		||||
  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
 | 
			
		||||
  acceleratorCopySynchronise();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class T> void acceleratorPut(T& dev,T&host)
 | 
			
		||||
{
 | 
			
		||||
  acceleratorCopyToDevice(&host,&dev,sizeof(T));
 | 
			
		||||
 
 | 
			
		||||
@@ -2,11 +2,11 @@
 | 
			
		||||
    --enable-comms=mpi \
 | 
			
		||||
    --enable-simd=GPU \
 | 
			
		||||
    --enable-shm=nvlink \
 | 
			
		||||
    --enable-gen-simd-width=64 \
 | 
			
		||||
    --enable-accelerator=cuda \
 | 
			
		||||
    --enable-gen-simd-width=64 \
 | 
			
		||||
    --disable-gparity \
 | 
			
		||||
    --with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
 | 
			
		||||
    --enable-accelerator-cshift \
 | 
			
		||||
    --disable-unified \
 | 
			
		||||
    CXX=nvcc \
 | 
			
		||||
    LDFLAGS="-cudart shared " \
 | 
			
		||||
    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
 | 
			
		||||
    LDFLAGS="-cudart shared -lcublas " \
 | 
			
		||||
    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared --diag-suppress 177,550,611"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
module load cuda/11.4.1  openmpi/4.1.1-cuda11.4.1  ucx/1.12.0-cuda11.4.1  
 | 
			
		||||
#module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
 | 
			
		||||
export PREFIX=/home/tc002/tc002/shared/env/prefix/
 | 
			
		||||
export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
 | 
			
		||||
module load cuda/12.3 
 | 
			
		||||
module load ucx/1.15.0-cuda12.3  
 | 
			
		||||
module load openmpi/4.1.5-cuda12.3
 | 
			
		||||
source /home/dp207/dp207/shared/env/production/env-base.sh 
 | 
			
		||||
source /home/dp207/dp207/shared/env/production/env-gpu.sh 
 | 
			
		||||
unset SBATCH_EXPORT
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user