diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 0ddac437..fb6a258c 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -28,6 +28,9 @@ Author: Christoph Lehner #if defined(GRID_CUDA)||defined(GRID_HIP) #include #endif +#if defined(GRID_SYCL) +#include +#endif NAMESPACE_BEGIN(Grid); @@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites) template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) return sum_gpu(arg,osites); #else return sum_cpu(arg,osites); @@ -136,7 +139,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) template inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) return sumD_gpu(arg,osites); #else return sumD_cpu(arg,osites); @@ -145,7 +148,7 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) template inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) return sumD_gpu_large(arg,osites); #else return sumD_cpu(arg,osites); @@ -155,13 +158,13 @@ inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) template inline typename vobj::scalar_object sum(const Lattice &arg) { -#if defined(GRID_CUDA)||defined(GRID_HIP) - autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); - auto ssum= sum_gpu(&arg_v[0],osites); +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) + typename vobj::scalar_object ssum; + autoView( arg_v, arg, AcceleratorRead); + ssum= sum_gpu(&arg_v[0],osites); #else autoView(arg_v, arg, CpuRead); - Integer osites = arg.Grid()->oSites(); auto ssum= sum_cpu(&arg_v[0],osites); #endif arg.Grid()->GlobalSum(ssum); @@ -171,7 +174,7 @@ inline typename vobj::scalar_object sum(const Lattice &arg) template inline typename vobj::scalar_object sum_large(const Lattice &arg) { -#if defined(GRID_CUDA)||defined(GRID_HIP) +#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL) autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); auto ssum= sum_gpu_large(&arg_v[0],osites); @@ -235,11 +238,10 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - { autoView( left_v , left, AcceleratorRead); autoView( right_v,right, AcceleratorRead); - + // This code could read coalesce // GPU - SIMT lane compliance... accelerator_for( ss, sites, 1,{ auto x_l = left_v[ss];