From e896d8123598f5258b65919ab6fd507fc8595974 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 15 Jun 2019 07:52:44 +0100 Subject: [PATCH] Accelerator loop redefine. Coalesce most accesses, but ET engine still to go clean. --- Grid/lattice/Lattice_base.h | 76 +++++++++++++------------------------ 1 file changed, 27 insertions(+), 49 deletions(-) diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index b3b85791..d48e5090 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -75,25 +75,29 @@ public: if (grid) conformable(grid, _grid); else grid = _grid; }; -#ifndef LATTICE_VIEW_STRICT - accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; - accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; -#endif }; ///////////////////////////////////////////////////////////////////////////////////////// // A View class which provides accessor to the data. -// This will be safe to call from accelerator_loops and is trivially copy constructible +// This will be safe to call from accelerator_for and is trivially copy constructible // The copy constructor for this will need to be used by device lambda functions ///////////////////////////////////////////////////////////////////////////////////////// template class LatticeView : public LatticeAccelerator { public: -#ifdef LATTICE_VIEW_STRICT - accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; - accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; + + + // Rvalue +#ifdef __CUDA_ARCH__ + accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } +#else + accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } #endif + + accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; + accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; + accelerator_inline uint64_t begin(void) const { return 0;}; accelerator_inline uint64_t end(void) const { return this->_odata_size; }; accelerator_inline uint64_t size(void) const { return this->_odata_size; }; @@ -193,14 +197,6 @@ private: else this->_odata = nullptr; } -#if 0 - void copy_vec(vobj *ptr,uint64_t count) - { - dealloc(); - this->_odata = ptr; - assert(this->_odata_size == count); - } -#endif public: ///////////////////////////////////////////////////////////////////////////////// // Return a view object that may be dereferenced in site loops. @@ -234,16 +230,10 @@ public: this->checkerboard=cb; auto me = View(); -#ifdef STREAMING_STORES - accelerator_loop(ss,me,{ - vobj tmp = eval(ss,expr); - vstream(me[ss] ,tmp); + accelerator_for(ss,me.size(),1,{ + auto tmp = eval(ss,expr); + vstream(me[ss],tmp); }); -#else - accelerator_loop(ss,me,{ - me[ss]=eval(ss,expr); - }); -#endif return *this; } template inline Lattice & operator=(const LatticeBinaryExpression &expr) @@ -259,16 +249,10 @@ public: this->checkerboard=cb; auto me = View(); -#ifdef STREAMING_STORES - accelerator_loop(ss,me,{ - vobj tmp = eval(ss,expr); - vstream(me[ss] ,tmp); + accelerator_for(ss,me.size(),1,{ + auto tmp = eval(ss,expr); + vstream(me[ss],tmp); }); -#else - accelerator_loop(ss,me,{ - me[ss]=eval(ss,expr); - }); -#endif return *this; } template inline Lattice & operator=(const LatticeTrinaryExpression &expr) @@ -283,16 +267,10 @@ public: assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; auto me = View(); -#ifdef STREAMING_STORES - accelerator_loop(ss,me,{ - vobj tmp = eval(ss,expr); - vstream(me[ss] ,tmp); + accelerator_for(ss,me.size(),1,{ + auto tmp = eval(ss,expr); + vstream(me[ss],tmp); }); -#else - accelerator_loop(ss,me,{ - me[ss] = eval(ss,expr); - }); -#endif return *this; } //GridFromExpression is tricky to do @@ -344,8 +322,8 @@ public: template inline Lattice & operator = (const sobj & r){ auto me = View(); - accelerator_loop(ss,me,{ - me[ss]=r; + thread_for(ss,me.size(),{ + me[ss] = r; }); return *this; } @@ -401,8 +379,8 @@ public: this->checkerboard = r.Checkerboard(); auto me = View(); auto him= r.View(); - accelerator_loop(ss,me,{ - me[ss]=him[ss]; + accelerator_for(ss,me.size(),vobj::Nsimd(),{ + coalescedWrite(me[ss],him(ss)); }); return *this; } @@ -415,8 +393,8 @@ public: conformable(*this,r); auto me = View(); auto him= r.View(); - accelerator_loop(ss,me,{ - me[ss]=him[ss]; + accelerator_for(ss,me.size(),vobj::Nsimd(),{ + coalescedWrite(me[ss],him(ss)); }); return *this; }