CPU compile ordering is important

Compile options for tursa update
Tursa configure update
2025-11-04 05:54:32 +00:00 · 2024-05-21 02:22:32 +01:00 · 2024-05-21 02:10:04 +01:00 · 2024-05-21 01:14:27 +01:00 · 2024-05-21 01:14:08 +01:00 · 2024-05-20 20:08:31 +01:00
4 changed files with 22 additions and 15 deletions
--- a/Grid/lattice/Lattice_base.h
+++ b/Grid/lattice/Lattice_base.h
@@ -236,11 +236,18 @@ public:
  template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
    vobj vtmp;
    vtmp = r;
 #if 1
    auto me  = View(CpuWrite);
    thread_for(ss,me.size(),{
       me[ss]= r;
      });
 #else    
    auto me  = View(AcceleratorWrite);
    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 	auto stmp=coalescedRead(vtmp);
 	coalescedWrite(me[ss],stmp);
    });
 #endif    
    me.ViewClose();
    return *this;
  }
--- a/Grid/threads/Accelerator.h
+++ b/Grid/threads/Accelerator.h
@@ -539,12 +539,6 @@ inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize
 #endif
 inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 {
  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
  acceleratorCopySynchronise();
 }
 //////////////////////////////////////////////
 // CPU Target - No accelerator just thread instead
 //////////////////////////////////////////////
@@ -553,7 +547,6 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 #undef GRID_SIMT
 inline void acceleratorMem(void)
 {
  /*
@@ -656,6 +649,12 @@ accelerator_inline void acceleratorFence(void)
  return;
 }
 inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 {
  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
  acceleratorCopySynchronise();
 }
 template<class T> void acceleratorPut(T& dev,T&host)
 {
  acceleratorCopyToDevice(&host,&dev,sizeof(T));
--- a/systems/Tursa/config-command
+++ b/systems/Tursa/config-command
@@ -2,11 +2,11 @@
    --enable-comms=mpi \
    --enable-simd=GPU \
    --enable-shm=nvlink \
    --enable-gen-simd-width=64 \
    --enable-accelerator=cuda \
    --enable-gen-simd-width=64 \
    --disable-gparity \
    --with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
    --enable-accelerator-cshift \
    --disable-unified \
    CXX=nvcc \
-    LDFLAGS="-cudart shared " \
+    LDFLAGS="-cudart shared -lcublas " \
-    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
+    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared --diag-suppress 177,550,611"
--- a/systems/Tursa/sourceme.sh
+++ b/systems/Tursa/sourceme.sh
@@ -1,6 +1,7 @@
-module load cuda/11.4.1  openmpi/4.1.1-cuda11.4.1  ucx/1.12.0-cuda11.4.1  
+module load cuda/12.3 
-#module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
+module load ucx/1.15.0-cuda12.3  
-export PREFIX=/home/tc002/tc002/shared/env/prefix/
+module load openmpi/4.1.5-cuda12.3
-export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
+source /home/dp207/dp207/shared/env/production/env-base.sh 
 source /home/dp207/dp207/shared/env/production/env-gpu.sh 
 unset SBATCH_EXPORT
Author	SHA1	Message	Date
Peter Boyle	c4b9f71357	CPU compile ordering is important	2024-05-21 02:22:32 +01:00
Peter Boyle	394e506aea	Compile options for tursa update	2024-05-21 02:10:04 +01:00
Peter Boyle	e19b26341b	Tursa configure update	2024-05-21 01:14:27 +01:00
Peter Boyle	cfe1b13225	Back out zero change	2024-05-21 01:14:08 +01:00
Peter Boyle	890c5ea1cd	Warning disable	2024-05-20 20:08:31 +01:00
Peter Boyle	a87378d3b6	Update	2024-05-20 20:08:31 +01:00