mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
SYCL reduction
This commit is contained in:
parent
477ebf24f4
commit
82e959f66c
@ -28,6 +28,9 @@ Author: Christoph Lehner <christoph@lhnr.de>
|
|||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
||||||
#include <Grid/lattice/Lattice_reduction_gpu.h>
|
#include <Grid/lattice/Lattice_reduction_gpu.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(GRID_SYCL)
|
||||||
|
#include <Grid/lattice/Lattice_reduction_sycl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites)
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
|
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
|
||||||
{
|
{
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
|
||||||
return sum_gpu(arg,osites);
|
return sum_gpu(arg,osites);
|
||||||
#else
|
#else
|
||||||
return sum_cpu(arg,osites);
|
return sum_cpu(arg,osites);
|
||||||
@ -136,7 +139,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
|
inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
|
||||||
{
|
{
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
|
||||||
return sumD_gpu(arg,osites);
|
return sumD_gpu(arg,osites);
|
||||||
#else
|
#else
|
||||||
return sumD_cpu(arg,osites);
|
return sumD_cpu(arg,osites);
|
||||||
@ -145,7 +148,7 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
|
inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
|
||||||
{
|
{
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
|
||||||
return sumD_gpu_large(arg,osites);
|
return sumD_gpu_large(arg,osites);
|
||||||
#else
|
#else
|
||||||
return sumD_cpu(arg,osites);
|
return sumD_cpu(arg,osites);
|
||||||
@ -155,13 +158,13 @@ inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||||
{
|
{
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
|
||||||
autoView( arg_v, arg, AcceleratorRead);
|
|
||||||
Integer osites = arg.Grid()->oSites();
|
Integer osites = arg.Grid()->oSites();
|
||||||
auto ssum= sum_gpu(&arg_v[0],osites);
|
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
|
||||||
|
typename vobj::scalar_object ssum;
|
||||||
|
autoView( arg_v, arg, AcceleratorRead);
|
||||||
|
ssum= sum_gpu(&arg_v[0],osites);
|
||||||
#else
|
#else
|
||||||
autoView(arg_v, arg, CpuRead);
|
autoView(arg_v, arg, CpuRead);
|
||||||
Integer osites = arg.Grid()->oSites();
|
|
||||||
auto ssum= sum_cpu(&arg_v[0],osites);
|
auto ssum= sum_cpu(&arg_v[0],osites);
|
||||||
#endif
|
#endif
|
||||||
arg.Grid()->GlobalSum(ssum);
|
arg.Grid()->GlobalSum(ssum);
|
||||||
@ -171,7 +174,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
|
inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
|
||||||
{
|
{
|
||||||
#if defined(GRID_CUDA)||defined(GRID_HIP)
|
#if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
|
||||||
autoView( arg_v, arg, AcceleratorRead);
|
autoView( arg_v, arg, AcceleratorRead);
|
||||||
Integer osites = arg.Grid()->oSites();
|
Integer osites = arg.Grid()->oSites();
|
||||||
auto ssum= sum_gpu_large(&arg_v[0],osites);
|
auto ssum= sum_gpu_large(&arg_v[0],osites);
|
||||||
@ -235,11 +238,10 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
|
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
{
|
{
|
||||||
autoView( left_v , left, AcceleratorRead);
|
autoView( left_v , left, AcceleratorRead);
|
||||||
autoView( right_v,right, AcceleratorRead);
|
autoView( right_v,right, AcceleratorRead);
|
||||||
|
// This code could read coalesce
|
||||||
// GPU - SIMT lane compliance...
|
// GPU - SIMT lane compliance...
|
||||||
accelerator_for( ss, sites, 1,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto x_l = left_v[ss];
|
auto x_l = left_v[ss];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user