CPU compile ordering is important

Compile options for tursa update
Tursa configure update
2025-11-01 20:44:33 +00:00 · 2024-05-21 02:22:32 +01:00 · 2024-05-21 02:10:04 +01:00 · 2024-05-21 01:14:27 +01:00 · 2024-05-21 01:14:08 +01:00 · 2024-05-20 20:08:31 +01:00
4 changed files with 22 additions and 15 deletions
--- a/Grid/lattice/Lattice_base.h
+++ b/Grid/lattice/Lattice_base.h
@@ -236,11 +236,18 @@ public:
  template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
    vobj vtmp;
    vtmp = r;
+#if 1
+    auto me  = View(CpuWrite);
+    thread_for(ss,me.size(),{
+       me[ss]= r;
+      });
+#else    
    auto me  = View(AcceleratorWrite);
    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 	auto stmp=coalescedRead(vtmp);
 	coalescedWrite(me[ss],stmp);
    });
+#endif    
    me.ViewClose();
    return *this;
  }
--- a/Grid/threads/Accelerator.h
+++ b/Grid/threads/Accelerator.h
@@ -539,12 +539,6 @@ inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize

 #endif

-inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
-{
-  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
-  acceleratorCopySynchronise();
-}
-
 //////////////////////////////////////////////
 // CPU Target - No accelerator just thread instead
 //////////////////////////////////////////////
@@ -553,7 +547,6 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)

 #undef GRID_SIMT

-
 inline void acceleratorMem(void)
 {
  /*
@@ -656,6 +649,12 @@ accelerator_inline void acceleratorFence(void)
  return;
 }

+inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
+{
+  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
+  acceleratorCopySynchronise();
+}
+
 template<class T> void acceleratorPut(T& dev,T&host)
 {
  acceleratorCopyToDevice(&host,&dev,sizeof(T));
--- a/systems/Tursa/config-command
+++ b/systems/Tursa/config-command
@@ -2,11 +2,11 @@
    --enable-comms=mpi \
    --enable-simd=GPU \
    --enable-shm=nvlink \
-    --enable-gen-simd-width=64 \
    --enable-accelerator=cuda \
+    --enable-gen-simd-width=64 \
+    --disable-gparity \
    --with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
-    --enable-accelerator-cshift \
    --disable-unified \
    CXX=nvcc \
-    LDFLAGS="-cudart shared " \
-    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
+    LDFLAGS="-cudart shared -lcublas " \
+    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared --diag-suppress 177,550,611"
--- a/systems/Tursa/sourceme.sh
+++ b/systems/Tursa/sourceme.sh
@@ -1,6 +1,7 @@
-module load cuda/11.4.1  openmpi/4.1.1-cuda11.4.1  ucx/1.12.0-cuda11.4.1  
-#module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
-export PREFIX=/home/tc002/tc002/shared/env/prefix/
-export LD_LIBRARY_PATH=$PREFIX/lib/:$LD_LIBRARY_PATH
+module load cuda/12.3 
+module load ucx/1.15.0-cuda12.3  
+module load openmpi/4.1.5-cuda12.3
+source /home/dp207/dp207/shared/env/production/env-base.sh 
+source /home/dp207/dp207/shared/env/production/env-gpu.sh 
 unset SBATCH_EXPORT
Author	SHA1	Message	Date
Peter Boyle	c4b9f71357	CPU compile ordering is important	2024-05-21 02:22:32 +01:00
Peter Boyle	394e506aea	Compile options for tursa update	2024-05-21 02:10:04 +01:00
Peter Boyle	e19b26341b	Tursa configure update	2024-05-21 01:14:27 +01:00
Peter Boyle	cfe1b13225	Back out zero change	2024-05-21 01:14:08 +01:00
Peter Boyle	890c5ea1cd	Warning disable	2024-05-20 20:08:31 +01:00
Peter Boyle	a87378d3b6	Update	2024-05-20 20:08:31 +01:00