From 82e959f66cac74c3863eaa0b42f3c8b42ae874d8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 8 Nov 2022 12:45:25 -0800 Subject: [PATCH] SYCL reduction --- Grid/lattice/Lattice_reduction.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 0ddac437..fb6a258c 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -28,6 +28,9 @@ Author: Christoph Lehner #if defined(GRID_CUDA)||defined(GRID_HIP) #include #endif +#if defined(GRID_SYCL) +#include +#endif NAMESPACE_BEGIN(Grid); @@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites) template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) return sum_gpu(arg,osites); #else return sum_cpu(arg,osites); @@ -136,7 +139,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) template inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) return sumD_gpu(arg,osites); #else return sumD_cpu(arg,osites); @@ -145,7 +148,7 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) template inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) return sumD_gpu_large(arg,osites); #else return sumD_cpu(arg,osites); @@ -155,13 +158,13 @@ inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) template inline typename vobj::scalar_object sum(const Lattice &arg) { -#if defined(GRID_CUDA)||defined(GRID_HIP) - autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); - auto ssum= sum_gpu(&arg_v[0],osites); +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) + typename vobj::scalar_object ssum; + autoView( arg_v, arg, AcceleratorRead); + ssum= sum_gpu(&arg_v[0],osites); #else autoView(arg_v, arg, CpuRead); - Integer osites = arg.Grid()->oSites(); auto ssum= sum_cpu(&arg_v[0],osites); #endif arg.Grid()->GlobalSum(ssum); @@ -171,7 +174,7 @@ inline typename vobj::scalar_object sum(const Lattice &arg) template inline typename vobj::scalar_object sum_large(const Lattice &arg) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); auto ssum= sum_gpu_large(&arg_v[0],osites); @@ -235,11 +238,10 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - { autoView( left_v , left, AcceleratorRead); autoView( right_v,right, AcceleratorRead); - + // This code could read coalesce // GPU - SIMT lane compliance... accelerator_for( ss, sites, 1,{ auto x_l = left_v[ss];