mirror of
https://github.com/paboyle/Grid.git
synced 2025-12-19 12:14:29 +00:00
merged sycl to feature-gpt
This commit is contained in:
@@ -29,6 +29,7 @@ See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
#define STREAMING_STORES
|
||||
@@ -37,161 +38,6 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
extern int GridCshiftPermuteMap[4][16];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Base class which can be used by traits to pick up behaviour
|
||||
///////////////////////////////////////////////////////////////////
|
||||
class LatticeBase {};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Conformable checks; same instance of Grid required
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
{
|
||||
assert(lhs == rhs);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Advise the LatticeAccelerator class
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum LatticeAcceleratorAdvise {
|
||||
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
||||
// significantly influence performance of bulk storage.
|
||||
AdviseReadMostly = 0x2, // Data will mostly be read. On some architectures
|
||||
// enables read-only copies of memory to be kept on
|
||||
// host and device.
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// View Access Mode
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum ViewMode {
|
||||
ViewRead = 0x1,
|
||||
ViewWrite = 0x2,
|
||||
ViewReadWrite = 0x3
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Minimal base class containing only data valid to access from accelerator
|
||||
// _odata will be a managed pointer in CUDA
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Force access to lattice through a view object.
|
||||
// prevents writing of code that will not offload to GPU, but perhaps annoyingly
|
||||
// strict since host could could in principle direct access through the lattice object
|
||||
// Need to decide programming model.
|
||||
#define LATTICE_VIEW_STRICT
|
||||
template<class vobj> class LatticeAccelerator : public LatticeBase
|
||||
{
|
||||
protected:
|
||||
GridBase *_grid;
|
||||
int checkerboard;
|
||||
vobj *_odata; // A managed pointer
|
||||
uint64_t _odata_size;
|
||||
public:
|
||||
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { };
|
||||
accelerator_inline uint64_t oSites(void) const { return _odata_size; };
|
||||
accelerator_inline int Checkerboard(void) const { return checkerboard; };
|
||||
accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
||||
accelerator_inline void Conformable(GridBase * &grid) const
|
||||
{
|
||||
if (grid) conformable(grid, _grid);
|
||||
else grid = _grid;
|
||||
};
|
||||
|
||||
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
gridAcceleratorPrefetch(_odata,_odata_size*sizeof(vobj));
|
||||
};
|
||||
|
||||
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
//cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// A View class which provides accessor to the data.
|
||||
// This will be safe to call from accelerator_for and is trivially copy constructible
|
||||
// The copy constructor for this will need to be used by device lambda functions
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
class LatticeView : public LatticeAccelerator<vobj>
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
// Rvalue
|
||||
#ifdef __CUDA_ARCH__
|
||||
accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); }
|
||||
#else
|
||||
accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
|
||||
#endif
|
||||
|
||||
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
|
||||
|
||||
accelerator_inline uint64_t begin(void) const { return 0;};
|
||||
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
||||
accelerator_inline uint64_t size(void) const { return this->_odata_size; };
|
||||
|
||||
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice expression types used by ET to assemble the AST
|
||||
//
|
||||
// Need to be able to detect code paths according to the whether a lattice object or not
|
||||
// so introduce some trait type things
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LatticeExpressionBase {};
|
||||
|
||||
template <typename T> using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||
|
||||
template<class T, bool isLattice> struct ViewMapBase { typedef T Type; };
|
||||
template<class T> struct ViewMapBase<T,true> { typedef LatticeView<typename T::vector_object> Type; };
|
||||
template<class T> using ViewMap = ViewMapBase<T,std::is_base_of<LatticeBase, T>::value >;
|
||||
|
||||
template <typename Op, typename _T1>
|
||||
class LatticeUnaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename _T1, typename _T2>
|
||||
class LatticeBinaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
typedef typename ViewMap<_T2>::Type T2;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
T2 arg2;
|
||||
LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename _T1, typename _T2, typename _T3>
|
||||
class LatticeTrinaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
typedef typename ViewMap<_T2>::Type T2;
|
||||
typedef typename ViewMap<_T3>::Type T3;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
T2 arg2;
|
||||
T3 arg3;
|
||||
LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {};
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// The real lattice class, with normal copy and assignment semantics.
|
||||
// This contains extra (host resident) grid pointer data that may be accessed by host code
|
||||
@@ -248,31 +94,25 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
void SetViewMode(ViewMode mode) {
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||
accessor.ViewClose();
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Return a view object that may be dereferenced in site loops.
|
||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||
// in device lambdas
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
LatticeView<vobj> View (void) const // deprecated, should pick AcceleratorView for accelerator_for
|
||||
{ // and HostView for thread_for
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
|
||||
LatticeView<vobj> View (ViewMode mode) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
LatticeView<vobj> AcceleratorView(int mode = ViewReadWrite) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
//accessor.AcceleratorPrefetch(mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
LatticeView<vobj> HostView(int mode = ViewReadWrite) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
//accessor.HostPrefetch(mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
~Lattice() {
|
||||
if ( this->_odata_size ) {
|
||||
dealloc();
|
||||
@@ -292,12 +132,16 @@ public:
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
vstream(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
@@ -312,11 +156,15 @@ public:
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
vstream(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
@@ -330,11 +178,15 @@ public:
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
auto tmp = eval(ss,expr);
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
vstream(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
return *this;
|
||||
}
|
||||
//GridFromExpression is tricky to do
|
||||
@@ -385,10 +237,11 @@ public:
|
||||
}
|
||||
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
auto me = View();
|
||||
auto me = View(CpuWrite);
|
||||
thread_for(ss,me.size(),{
|
||||
me[ss] = r;
|
||||
me[ss]= r;
|
||||
});
|
||||
me.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -398,11 +251,12 @@ public:
|
||||
///////////////////////////////////////////
|
||||
// user defined constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(GridBase *grid) {
|
||||
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
|
||||
this->_grid = grid;
|
||||
resize(this->_grid->oSites());
|
||||
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
||||
this->checkerboard=0;
|
||||
SetViewMode(mode);
|
||||
}
|
||||
|
||||
// virtual ~Lattice(void) = default;
|
||||
@@ -440,11 +294,12 @@ public:
|
||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||
conformable(*this,r);
|
||||
this->checkerboard = r.Checkerboard();
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto him= r.AcceleratorView(ViewRead);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
auto him= r.View(AcceleratorRead);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
me.ViewClose(); him.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -454,11 +309,12 @@ public:
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
this->checkerboard = r.Checkerboard();
|
||||
conformable(*this,r);
|
||||
auto me = AcceleratorView(ViewWrite);
|
||||
auto him= r.AcceleratorView(ViewRead);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
auto him= r.View(AcceleratorRead);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
me.ViewClose(); him.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
|
||||
Reference in New Issue
Block a user