Merge branch 'develop' of https://github.com/paboyle/Grid into develop

2026-07-21 11:03:27 +01:00 · 2024-06-10 15:09:25 -04:00
parent dc80b08969 a49a161f8d
commit 07a07b6fa3
1 changed files with 38 additions and 24 deletions
@@ -9,14 +9,19 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
 {
  typedef typename vobj::scalar_object sobj;
  typedef typename vobj::scalar_objectD sobjD;
-  sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
+  //  sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
  //  sobj *mysum =(sobj *) malloc(sizeof(sobj));
  sobj identity; zeroit(identity);
  sobj ret ; 
  Integer nsimd= vobj::Nsimd();
  {
    sycl::buffer<sobj, 1> abuff(&ret, {1});
    theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
-     auto Reduction = cl::sycl::reduction(mysum,identity,std::plus<>());
+
      auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::plus<>());
      cgh.parallel_for(cl::sycl::range<1>{osites},
 		       Reduction,
 		       [=] (cl::sycl::id<1> item, auto &sum) {
@@ -24,10 +29,14 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
 			 sum +=Reduce(lat[osite]);
 		       });
    });
  }
  theGridAccelerator->wait();
-  ret = mysum[0];
+  //  acceleratorCopyFromDevice(mysum,&ret,sizeof(sobj));
-  free(mysum,*theGridAccelerator);
+  //  ret = mysum[0];
  sobjD dret; convertType(dret,ret);
  //  free(mysum,*theGridAccelerator);
  //  free(mysum);
  return dret;
 }
@@ -73,19 +82,24 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
 template<class Word> Word svm_xor(Word *vec,uint64_t L)
 {
  Word xorResult; xorResult = 0;
-  Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator);
+  //  Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator);
  Word identity;  identity=0;
  Word ret;
  {
    sycl::buffer<Word, 1> abuff(&ret, {1});
    theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
-     auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>());
+      //     auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>());
      auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::bit_xor<>());
      cgh.parallel_for(cl::sycl::range<1>{L},
 		       Reduction,
 		       [=] (cl::sycl::id<1> index, auto &sum) {
 			 sum ^=vec[index];
 		       });
    });
  }
  theGridAccelerator->wait();
-  Word ret = d_sum[0];
+  //  ret = d_sum[0];
-  free(d_sum,*theGridAccelerator);
+  //  free(d_sum,*theGridAccelerator);
  return ret;
 }