Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet

Commet
2025-06-22 17:52:02 +01:00 · 2023-03-28 08:35:38 -07:00 · 2023-03-28 08:34:24 -07:00 · 2023-03-28 11:33:05 -04:00 · 2023-03-24 15:40:57 -04:00 · 2023-03-24 15:40:30 -04:00
2 changed files with 6 additions and 9 deletions
--- a/Grid/algorithms/iterative/ConjugateGradient.h
+++ b/Grid/algorithms/iterative/ConjugateGradient.h
@ -191,7 +191,7 @@ public:
 	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
 	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
-	std::cout << GridLogMessage << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
+	std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
--- a/Grid/lattice/Lattice_reduction_gpu.h
+++ b/Grid/lattice/Lattice_reduction_gpu.h
@ -211,25 +211,22 @@ inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osi
  assert(ok);
  Integer smemSize = numThreads * sizeof(sobj);
-  // UVM seems to be buggy under later CUDA drivers
+  // Move out of UVM
-  // This fails on A100 and driver 5.30.02 / CUDA 12.1
+  // Turns out I had messed up the synchronise after move to compute stream
-  // Fails with multiple NVCC versions back to 11.4,
+  // as running this on the default stream fools the synchronise
  // which worked with earlier drivers.
  // Not sure which driver had first fail and this bears checking
  // Is awkward as must install multiple driver versions
 #undef UVM_BLOCK_BUFFER  
 #ifndef UVM_BLOCK_BUFFER  
  commVector<sobj> buffer(numBlocks);
  sobj *buffer_v = &buffer[0];
  sobj result;
-  reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size);
+  reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size);
  accelerator_barrier();
  acceleratorCopyFromDevice(buffer_v,&result,sizeof(result));
 #else
  Vector<sobj> buffer(numBlocks);
  sobj *buffer_v = &buffer[0];
  sobj result;
-  reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size);
+  reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size);
  accelerator_barrier();
  result = *buffer_v;
 #endif
Author	SHA1	Message	Date
Peter Boyle	5068413cdb	Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet	2023-03-28 08:35:38 -07:00
Peter Boyle	71c6960eea	Commet	2023-03-28 08:34:24 -07:00
Peter Boyle	ddf6d5c9e3	Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet	2023-03-28 11:33:05 -04:00
Peter Boyle	5c85774ee3	Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet	2023-03-24 15:40:57 -04:00
Peter Boyle	d8a9a745d8	stream synchronise	2023-03-24 15:40:30 -04:00