mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Compare commits
	
		
			6 Commits
		
	
	
		
			832fc08809
			...
			c4b9f71357
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					c4b9f71357 | ||
| 
						 | 
					394e506aea | ||
| 
						 | 
					e19b26341b | ||
| 
						 | 
					cfe1b13225 | ||
| 
						 | 
					890c5ea1cd | ||
| 
						 | 
					a87378d3b6 | 
@@ -236,11 +236,18 @@ public:
 | 
				
			|||||||
  template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
 | 
					  template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
 | 
				
			||||||
    vobj vtmp;
 | 
					    vobj vtmp;
 | 
				
			||||||
    vtmp = r;
 | 
					    vtmp = r;
 | 
				
			||||||
 | 
					#if 1
 | 
				
			||||||
 | 
					    auto me  = View(CpuWrite);
 | 
				
			||||||
 | 
					    thread_for(ss,me.size(),{
 | 
				
			||||||
 | 
					       me[ss]= r;
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					#else    
 | 
				
			||||||
    auto me  = View(AcceleratorWrite);
 | 
					    auto me  = View(AcceleratorWrite);
 | 
				
			||||||
    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 | 
					    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 | 
				
			||||||
	auto stmp=coalescedRead(vtmp);
 | 
						auto stmp=coalescedRead(vtmp);
 | 
				
			||||||
	coalescedWrite(me[ss],stmp);
 | 
						coalescedWrite(me[ss],stmp);
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
 | 
					#endif    
 | 
				
			||||||
    me.ViewClose();
 | 
					    me.ViewClose();
 | 
				
			||||||
    return *this;
 | 
					    return *this;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -539,12 +539,6 @@ inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
 | 
					 | 
				
			||||||
  acceleratorCopySynchronise();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
// CPU Target - No accelerator just thread instead
 | 
					// CPU Target - No accelerator just thread instead
 | 
				
			||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
@@ -553,7 +547,6 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#undef GRID_SIMT
 | 
					#undef GRID_SIMT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
inline void acceleratorMem(void)
 | 
					inline void acceleratorMem(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  /*
 | 
					  /*
 | 
				
			||||||
@@ -656,6 +649,12 @@ accelerator_inline void acceleratorFence(void)
 | 
				
			|||||||
  return;
 | 
					  return;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
 | 
				
			||||||
 | 
					  acceleratorCopySynchronise();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class T> void acceleratorPut(T& dev,T&host)
 | 
					template<class T> void acceleratorPut(T& dev,T&host)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  acceleratorCopyToDevice(&host,&dev,sizeof(T));
 | 
					  acceleratorCopyToDevice(&host,&dev,sizeof(T));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,11 +2,11 @@
 | 
				
			|||||||
    --enable-comms=mpi \
 | 
					    --enable-comms=mpi \
 | 
				
			||||||
    --enable-simd=GPU \
 | 
					    --enable-simd=GPU \
 | 
				
			||||||
    --enable-shm=nvlink \
 | 
					    --enable-shm=nvlink \
 | 
				
			||||||
    --enable-gen-simd-width=64 \
 | 
					 | 
				
			||||||
    --enable-accelerator=cuda \
 | 
					    --enable-accelerator=cuda \
 | 
				
			||||||
 | 
					    --enable-gen-simd-width=64 \
 | 
				
			||||||
 | 
					    --disable-gparity \
 | 
				
			||||||
    --with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
 | 
					    --with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
 | 
				
			||||||
    --enable-accelerator-cshift \
 | 
					 | 
				
			||||||
    --disable-unified \
 | 
					    --disable-unified \
 | 
				
			||||||
    CXX=nvcc \
 | 
					    CXX=nvcc \
 | 
				
			||||||
    LDFLAGS="-cudart shared " \
 | 
					    LDFLAGS="-cudart shared -lcublas " \
 | 
				
			||||||
    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
 | 
					    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared --diag-suppress 177,550,611"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,7 @@
 | 
				
			|||||||
module load cuda/11.4.1  openmpi/4.1.1-cuda11.4.1  ucx/1.12.0-cuda11.4.1  
 | 
					module load cuda/12.3 
 | 
				
			||||||
#module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
 | 
					module load ucx/1.15.0-cuda12.3  
 | 
				
			||||||
export PREFIX=/home/tc002/tc002/shared/env/prefix/
 | 
					module load openmpi/4.1.5-cuda12.3
 | 
				
			||||||
export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
 | 
					source /home/dp207/dp207/shared/env/production/env-base.sh 
 | 
				
			||||||
 | 
					source /home/dp207/dp207/shared/env/production/env-gpu.sh 
 | 
				
			||||||
unset SBATCH_EXPORT
 | 
					unset SBATCH_EXPORT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user