mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Think the reduction is now sorted and cleaned up
This commit is contained in:
parent
53e3ab4131
commit
ce97638bac
@ -22,15 +22,15 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Grid/Grid_Eigen_Dense.h>
|
#include <Grid/Grid_Eigen_Dense.h>
|
||||||
|
|
||||||
|
|
||||||
#ifdef GRID_NVCC
|
#ifdef GRID_NVCC
|
||||||
#include <Grid/lattice/Lattice_reduction_gpu.h>
|
#include <Grid/lattice/Lattice_reduction_gpu.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
#ifndef GRID_NVCC
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
inline typename vobj::scalar_object sum_cpu(const Lattice<vobj> &arg)
|
||||||
{
|
{
|
||||||
GridBase *grid=arg.Grid();
|
GridBase *grid=arg.Grid();
|
||||||
int Nsimd = grid->Nsimd();
|
int Nsimd = grid->Nsimd();
|
||||||
@ -69,8 +69,16 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
|||||||
|
|
||||||
return ssum;
|
return ssum;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||||
|
{
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
return sum_gpu(arg);
|
||||||
|
#else
|
||||||
|
return sum_cpu(arg);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Deterministic Reduction operations
|
// Deterministic Reduction operations
|
||||||
@ -109,7 +117,7 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
|
|
||||||
nrm = TensorRemove(sum(inner_tmp));
|
nrm = TensorRemove(sum(inner_tmp));
|
||||||
|
|
||||||
right.Grid()->GlobalSum(nrm);
|
// right.Grid()->GlobalSum(nrm);
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,7 +165,7 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
|
|
||||||
nrm = real(TensorRemove(sum(inner_tmp)));
|
nrm = real(TensorRemove(sum(inner_tmp)));
|
||||||
|
|
||||||
z.Grid()->GlobalSum(nrm);
|
// z.Grid()->GlobalSum(nrm);
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +180,7 @@ __global__ void reduceKernel(const LatticeView<vobj> lat, typename vobj::scalar_
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class vobj>
|
template <class vobj>
|
||||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &lat)
|
inline typename vobj::scalar_object sum_gpu(const Lattice<vobj> &lat)
|
||||||
{
|
{
|
||||||
|
|
||||||
LatticeView<vobj> lat_v = lat.View();
|
LatticeView<vobj> lat_v = lat.View();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user