OneMKL batched blas starting

2025-07-19 06:07:06 +01:00 · 2024-03-05 23:58:20 +00:00
parent 30228214f7
commit 21bc8c24df
1 changed files with 33 additions and 7 deletions
--- a/Grid/algorithms/blas/BatchedBlas.h
+++ b/Grid/algorithms/blas/BatchedBlas.h
@@ -34,9 +34,14 @@ Author: Peter Boyle <pboyle@bnl.gov>
 #include <hipblas/hipblas.h>
 #endif
 #ifdef GRID_SYCL
-#error // need oneMKL version
+#include <oneapi/mkl.hpp>
+#endif
+#if 0
+#define GRID_ONE_MKL
+#endif
+#ifdef GRID_ONE_MKL
+#include <oneapi/mkl.hpp>
 #endif
-
 ///////////////////////////////////////////////////////////////////////	  
 // Need to rearrange lattice data to be in the right format for a
 // batched multiply. Might as well make these static, dense packed
@@ -49,9 +54,12 @@ NAMESPACE_BEGIN(Grid);
  typedef cudablasHandle_t gridblasHandle_t;
 #endif
 #ifdef GRID_SYCL
-  typedef int32_t gridblasHandle_t;
+  typedef cl::sycl::queue *gridblasHandle_t;
 #endif
-#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
+#ifdef GRID_ONE_MKL
+  typedef cl::sycl::queue *gridblasHandle_t;
+#endif
+#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
  typedef int32_t gridblasHandle_t;
 #endif

@@ -76,6 +84,12 @@ public:
      hipblasCreate(&gridblasHandle);
 #endif
 #ifdef GRID_SYCL
+      gridblasHandle = theGridAccelerator;
+#endif
+#ifdef GRID_ONE_MKL
+      cl::sycl::cpu_selector selector;
+      cl::sycl::device selectedDevice { selector };
+      gridblasHandle =new sycl::queue (selectedDevice);
 #endif
      gridblasInit=1;
    }
@@ -110,6 +124,9 @@ public:
 #endif
 #ifdef GRID_SYCL
    accelerator_barrier();
+#endif
+#ifdef GRID_ONE_MKL
+    gridblasHandle->wait();
 #endif
  }
  
@@ -644,10 +661,19 @@ public:
 			      (cuDoubleComplex *) Cmn, ldc, sdc,
 			      batchCount);
 #endif
-#ifdef GRID_SYCL
-     #warning "oneMKL implementation not made "
+#if defined(GRID_SYCL) || defined(GRID_ONE_MKL)
+    oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
+						oneapi::mkl::transpose::N,
+						oneapi::mkl::transpose::N,
+						m,n,k,
+						alpha,
+						(const ComplexD *)Amk,lda,sda,
+						(const ComplexD *)Bkn,ldb,sdb,
+						beta,
+						(ComplexD *)Cmn,ldc,sdc,
+						batchCount);
 #endif
-#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
+#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
     // Need a default/reference implementation
     for (int p = 0; p < batchCount; ++p) {
       for (int mm = 0; mm < m; ++mm) {