From 9117f61109a24253901fb9c728ae7885be7ce790 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 31 Jul 2019 01:22:54 +0100 Subject: [PATCH] GPU friendly --- Grid/lattice/Lattice_reduction.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 7ec25410..96654cef 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -128,7 +128,6 @@ axpy_norm_fast(Lattice &z,sobj a,const Lattice &x,const Lattice strong_inline RealD axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Lattice &y) { - const int pad = 8; z.Checkerboard() = x.Checkerboard(); conformable(z,x); conformable(x,y); @@ -148,14 +147,15 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; Lattice inner_tmp(grid); + auto inner_tmp_v = inner_tmp.View(); accelerator_for( ss, sites, nsimd,{ auto tmp = a*x_v(ss)+b*y_v(ss); - coalescedWrite(inner_tmp[ss],innerProduct(tmp,tmp)); + coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); coalescedWrite(z_v[ss],tmp); }) - nrm = TensorRemove(sum(inner_tmp)); + nrm = real(TensorRemove(sum(inner_tmp))); z.Grid()->GlobalSum(nrm); return nrm;