mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
GPU friendly
This commit is contained in:
parent
9dad7a0094
commit
9117f61109
@ -128,7 +128,6 @@ axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj
|
|||||||
template<class sobj,class vobj> strong_inline RealD
|
template<class sobj,class vobj> strong_inline RealD
|
||||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||||
{
|
{
|
||||||
const int pad = 8;
|
|
||||||
z.Checkerboard() = x.Checkerboard();
|
z.Checkerboard() = x.Checkerboard();
|
||||||
conformable(z,x);
|
conformable(z,x);
|
||||||
conformable(x,y);
|
conformable(x,y);
|
||||||
@ -148,14 +147,15 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
|
|
||||||
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
|
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
|
||||||
Lattice<inner_t> inner_tmp(grid);
|
Lattice<inner_t> inner_tmp(grid);
|
||||||
|
auto inner_tmp_v = inner_tmp.View();
|
||||||
|
|
||||||
accelerator_for( ss, sites, nsimd,{
|
accelerator_for( ss, sites, nsimd,{
|
||||||
auto tmp = a*x_v(ss)+b*y_v(ss);
|
auto tmp = a*x_v(ss)+b*y_v(ss);
|
||||||
coalescedWrite(inner_tmp[ss],innerProduct(tmp,tmp));
|
coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp));
|
||||||
coalescedWrite(z_v[ss],tmp);
|
coalescedWrite(z_v[ss],tmp);
|
||||||
})
|
})
|
||||||
|
|
||||||
nrm = TensorRemove(sum(inner_tmp));
|
nrm = real(TensorRemove(sum(inner_tmp)));
|
||||||
|
|
||||||
z.Grid()->GlobalSum(nrm);
|
z.Grid()->GlobalSum(nrm);
|
||||||
return nrm;
|
return nrm;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user