mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	SYCL reduction
This commit is contained in:
		| @@ -28,6 +28,9 @@ Author: Christoph Lehner <christoph@lhnr.de> | |||||||
| #if defined(GRID_CUDA)||defined(GRID_HIP) | #if defined(GRID_CUDA)||defined(GRID_HIP) | ||||||
| #include <Grid/lattice/Lattice_reduction_gpu.h> | #include <Grid/lattice/Lattice_reduction_gpu.h> | ||||||
| #endif | #endif | ||||||
|  | #if defined(GRID_SYCL) | ||||||
|  | #include <Grid/lattice/Lattice_reduction_sycl.h> | ||||||
|  | #endif | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| @@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites) | |||||||
| template<class vobj> | template<class vobj> | ||||||
| inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) | inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA)||defined(GRID_HIP) | #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) | ||||||
|   return sum_gpu(arg,osites); |   return sum_gpu(arg,osites); | ||||||
| #else | #else | ||||||
|   return sum_cpu(arg,osites); |   return sum_cpu(arg,osites); | ||||||
| @@ -136,7 +139,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) | |||||||
| template<class vobj> | template<class vobj> | ||||||
| inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) | inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA)||defined(GRID_HIP) | #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) | ||||||
|   return sumD_gpu(arg,osites); |   return sumD_gpu(arg,osites); | ||||||
| #else | #else | ||||||
|   return sumD_cpu(arg,osites); |   return sumD_cpu(arg,osites); | ||||||
| @@ -145,7 +148,7 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) | |||||||
| template<class vobj> | template<class vobj> | ||||||
| inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) | inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA)||defined(GRID_HIP) | #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) | ||||||
|   return sumD_gpu_large(arg,osites); |   return sumD_gpu_large(arg,osites); | ||||||
| #else | #else | ||||||
|   return sumD_cpu(arg,osites); |   return sumD_cpu(arg,osites); | ||||||
| @@ -155,13 +158,13 @@ inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) | |||||||
| template<class vobj> | template<class vobj> | ||||||
| inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) | inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA)||defined(GRID_HIP) |  | ||||||
|   autoView( arg_v, arg, AcceleratorRead); |  | ||||||
|   Integer osites = arg.Grid()->oSites(); |   Integer osites = arg.Grid()->oSites(); | ||||||
|   auto ssum= sum_gpu(&arg_v[0],osites); | #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) | ||||||
|  |   typename vobj::scalar_object ssum; | ||||||
|  |   autoView( arg_v, arg, AcceleratorRead); | ||||||
|  |   ssum= sum_gpu(&arg_v[0],osites); | ||||||
| #else | #else | ||||||
|   autoView(arg_v, arg, CpuRead); |   autoView(arg_v, arg, CpuRead); | ||||||
|   Integer osites = arg.Grid()->oSites(); |  | ||||||
|   auto ssum= sum_cpu(&arg_v[0],osites); |   auto ssum= sum_cpu(&arg_v[0],osites); | ||||||
| #endif   | #endif   | ||||||
|   arg.Grid()->GlobalSum(ssum); |   arg.Grid()->GlobalSum(ssum); | ||||||
| @@ -171,7 +174,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) | |||||||
| template<class vobj> | template<class vobj> | ||||||
| inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg) | inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA)||defined(GRID_HIP) | #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) | ||||||
|   autoView( arg_v, arg, AcceleratorRead); |   autoView( arg_v, arg, AcceleratorRead); | ||||||
|   Integer osites = arg.Grid()->oSites(); |   Integer osites = arg.Grid()->oSites(); | ||||||
|   auto ssum= sum_gpu_large(&arg_v[0],osites); |   auto ssum= sum_gpu_large(&arg_v[0],osites); | ||||||
| @@ -235,11 +238,10 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> & | |||||||
|   typedef decltype(innerProductD(vobj(),vobj())) inner_t; |   typedef decltype(innerProductD(vobj(),vobj())) inner_t; | ||||||
|   Vector<inner_t> inner_tmp(sites); |   Vector<inner_t> inner_tmp(sites); | ||||||
|   auto inner_tmp_v = &inner_tmp[0]; |   auto inner_tmp_v = &inner_tmp[0]; | ||||||
|      |  | ||||||
|   { |   { | ||||||
|     autoView( left_v , left, AcceleratorRead); |     autoView( left_v , left, AcceleratorRead); | ||||||
|     autoView( right_v,right, AcceleratorRead); |     autoView( right_v,right, AcceleratorRead); | ||||||
|  |     // This code could read coalesce | ||||||
|     // GPU - SIMT lane compliance... |     // GPU - SIMT lane compliance... | ||||||
|     accelerator_for( ss, sites, 1,{ |     accelerator_for( ss, sites, 1,{ | ||||||
| 	auto x_l = left_v[ss]; | 	auto x_l = left_v[ss]; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user