mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Reduction GPU no compile fix
This commit is contained in:
parent
22cfbdbbb3
commit
936c5ecf69
@ -62,7 +62,6 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites)
|
|||||||
for(int i=0;i<nthread;i++){
|
for(int i=0;i<nthread;i++){
|
||||||
ssum = ssum+sumarray[i];
|
ssum = ssum+sumarray[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
return ssum;
|
return ssum;
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
@ -156,7 +155,7 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
|
|
||||||
// Might make all code paths go this way.
|
// Might make all code paths go this way.
|
||||||
typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t;
|
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
@ -168,12 +167,13 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
accelerator_for( ss, sites, 1,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto x_l = left_v[ss];
|
auto x_l = left_v[ss];
|
||||||
auto y_l = right_v[ss];
|
auto y_l = right_v[ss];
|
||||||
inner_tmp_v[ss]=Reduce(innerProductD(x_l,y_l));
|
inner_tmp_v[ss]=innerProductD(x_l,y_l);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is in single precision and fails some tests
|
// This is in single precision and fails some tests
|
||||||
nrm = TensorRemove(sum(inner_tmp_v,sites));
|
auto anrm = sum(inner_tmp_v,sites);
|
||||||
|
nrm = anrm;
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,13 +219,13 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
autoView( y_v, y, AcceleratorRead);
|
autoView( y_v, y, AcceleratorRead);
|
||||||
autoView( z_v, z, AcceleratorWrite);
|
autoView( z_v, z, AcceleratorWrite);
|
||||||
|
|
||||||
typedef decltype(Reduce(innerProductD(x_v[0],y_v[0]))) inner_t;
|
typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
accelerator_for( ss, sites, 1,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto tmp = a*x_v[ss]+b*y_v[ss];
|
auto tmp = a*x_v[ss]+b*y_v[ss];
|
||||||
inner_tmp_v[ss]=Reduce(innerProductD(tmp,tmp));
|
inner_tmp_v[ss]=innerProductD(tmp,tmp);
|
||||||
z_v[ss]=tmp;
|
z_v[ss]=tmp;
|
||||||
});
|
});
|
||||||
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
|
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
|
||||||
@ -248,8 +248,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
|
|||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
|
|
||||||
// GPU
|
// GPU
|
||||||
typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t;
|
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
|
||||||
typedef decltype(Reduce(innerProductD(vobj(),vobj()))) norm_t;
|
typedef decltype(innerProductD(vobj(),vobj())) norm_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
Vector<norm_t> norm_tmp(sites);
|
Vector<norm_t> norm_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
@ -259,8 +259,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
|
|||||||
autoView(right_v,right,AcceleratorRead);
|
autoView(right_v,right,AcceleratorRead);
|
||||||
accelerator_for( ss, sites, 1,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto left_tmp = left_v[ss];
|
auto left_tmp = left_v[ss];
|
||||||
inner_tmp_v[ss]=Reduce(innerProductD(left_tmp,right_v[ss]));
|
inner_tmp_v[ss]=innerProductD(left_tmp,right_v[ss]);
|
||||||
norm_tmp_v [ss]=Reduce(innerProductD(left_tmp,left_tmp));
|
norm_tmp_v [ss]=innerProductD(left_tmp,left_tmp);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,6 +59,20 @@ class GridTensorBase {};
|
|||||||
using DoublePrecision2= typename Traits::DoublePrecision2; \
|
using DoublePrecision2= typename Traits::DoublePrecision2; \
|
||||||
static constexpr int TensorLevel = Traits::TensorLevel
|
static constexpr int TensorLevel = Traits::TensorLevel
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Allows to turn scalar<scalar<scalar<double>>>> back to double.
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
template <class T>
|
||||||
|
accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
|
||||||
|
TensorRemove(T arg) {
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
template <class vtype>
|
||||||
|
accelerator_inline auto TensorRemove(iScalar<vtype> arg)
|
||||||
|
-> decltype(TensorRemove(arg._internal)) {
|
||||||
|
return TensorRemove(arg._internal);
|
||||||
|
}
|
||||||
|
|
||||||
template <class vtype>
|
template <class vtype>
|
||||||
class iScalar {
|
class iScalar {
|
||||||
public:
|
public:
|
||||||
@ -135,9 +149,10 @@ public:
|
|||||||
operator ComplexD() const {
|
operator ComplexD() const {
|
||||||
return (TensorRemove(_internal));
|
return (TensorRemove(_internal));
|
||||||
}
|
}
|
||||||
|
// instantiation of "Grid::iScalar<vtype>::operator Grid::RealD() const [with vtype=Grid::Real, U=Grid::Real, V=Grid::RealD, <unnamed>=0, <unnamed>=0U]"
|
||||||
template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> accelerator_inline
|
template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> accelerator_inline
|
||||||
operator RealD() const {
|
operator RealD() const {
|
||||||
return TensorRemove(_internal);
|
return (RealD) TensorRemove(_internal);
|
||||||
}
|
}
|
||||||
template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> accelerator_inline
|
template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> accelerator_inline
|
||||||
operator Integer() const {
|
operator Integer() const {
|
||||||
@ -169,20 +184,6 @@ public:
|
|||||||
strong_inline scalar_type * end() { return begin() + Traits::count; }
|
strong_inline scalar_type * end() { return begin() + Traits::count; }
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
// Allows to turn scalar<scalar<scalar<double>>>> back to double.
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
template <class T>
|
|
||||||
accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
|
|
||||||
TensorRemove(T arg) {
|
|
||||||
return arg;
|
|
||||||
}
|
|
||||||
template <class vtype>
|
|
||||||
accelerator_inline auto TensorRemove(iScalar<vtype> arg)
|
|
||||||
-> decltype(TensorRemove(arg._internal)) {
|
|
||||||
return TensorRemove(arg._internal);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class vtype, int N>
|
template <class vtype, int N>
|
||||||
class iVector {
|
class iVector {
|
||||||
public:
|
public:
|
||||||
|
Loading…
Reference in New Issue
Block a user