From dd13937bb60f453cc3e2d15d1e53a63a27ae0ae0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 22 Dec 2023 18:03:38 -0500 Subject: [PATCH] Better opt face gather scatter --- Grid/lattice/PaddedCell.h | 45 +++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/Grid/lattice/PaddedCell.h b/Grid/lattice/PaddedCell.h index 97c95010..3fb87a25 100644 --- a/Grid/lattice/PaddedCell.h +++ b/Grid/lattice/PaddedCell.h @@ -62,6 +62,8 @@ template inline void ScatterSlice(const cshiftVector &buf, { const int Nsimd=vobj::Nsimd(); typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; GridBase *grid = lat.Grid(); Coordinate simd = grid->_simd_layout; @@ -124,8 +126,19 @@ template inline void ScatterSlice(const cshiftVector &buf, /////////////////////////////////////////// // Transfer into lattice - will coalesce /////////////////////////////////////////// +#if 0 sobj obj = extractLane(blane,buf_p[ss+offset]); insertLane(lane,lat_v[osite],obj); +#else + const int words=sizeof(vobj)/sizeof(vector_type); + vector_type * from = (vector_type *)&buf_p[ss+offset]; + vector_type * to = (vector_type *)&lat_v[osite]; + scalar_type stmp; + for(int w=0;w inline void GatherSlice(cshiftVector &buf, { const int Nsimd=vobj::Nsimd(); typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; autoView(lat_v, lat, AcceleratorRead); @@ -200,9 +215,20 @@ template inline void GatherSlice(cshiftVector &buf, /////////////////////////////////////////// // Take out of lattice /////////////////////////////////////////// - +#if 0 sobj obj = extractLane(lane,lat_v[osite]); insertLane(blane,buf_p[ss+offset],obj); +#else + const int words=sizeof(vobj)/sizeof(vector_type); + vector_type * to = (vector_type *)&buf_p[ss+offset]; + vector_type * from = (vector_type *)&lat_v[osite]; + scalar_type stmp; + for(int w=0;w