1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 15:27:06 +01:00

removed print flags

This commit is contained in:
Mohammad Atif
2023-12-04 15:12:03 -05:00
parent 157368ed04
commit 867abeaf8e
9 changed files with 11566 additions and 95 deletions

View File

@ -222,6 +222,9 @@ void MemoryManager::InitMessage(void) {
#ifdef GRID_SYCL #ifdef GRID_SYCL
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl; std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
#endif #endif
#ifdef GRID_OMPTARGET
std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET omp_alloc_device"<<std::endl;
#endif
#else #else
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl; std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
#ifdef GRID_CUDA #ifdef GRID_CUDA
@ -233,6 +236,9 @@ void MemoryManager::InitMessage(void) {
#ifdef GRID_SYCL #ifdef GRID_SYCL
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl; std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
#endif #endif
#ifdef GRID_OMPTARGET
std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET managed memory"<<std::endl;
#endif
#endif #endif
} }

View File

@ -220,28 +220,22 @@ void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
} }
void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
uint64_t CpuPtr = (uint64_t)_CpuPtr; uint64_t CpuPtr = (uint64_t)_CpuPtr;
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr); dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint); return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
} else if( (mode==CpuRead)||(mode==CpuWrite)){ } else if( (mode==CpuRead)||(mode==CpuWrite)){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint); return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
} else { } else {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
assert(0); assert(0);
return NULL; return NULL;
} }
} }
void MemoryManager::EvictVictims(uint64_t bytes) void MemoryManager::EvictVictims(uint64_t bytes)
{ {
std::cout << __FILE__ << " " << __LINE__ << " " << bytes << " " << DeviceLRUBytes << " " << DeviceMaxBytes << std::endl;
assert(bytes<DeviceMaxBytes); assert(bytes<DeviceMaxBytes);
while(bytes+DeviceLRUBytes > DeviceMaxBytes){ while(bytes+DeviceLRUBytes > DeviceMaxBytes){
if ( DeviceLRUBytes > 0){ if ( DeviceLRUBytes > 0){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
assert(LRU.size()>0); assert(LRU.size()>0);
uint64_t victim = LRU.back(); // From the LRU uint64_t victim = LRU.back(); // From the LRU
auto AccCacheIterator = EntryLookup(victim); auto AccCacheIterator = EntryLookup(victim);
@ -251,7 +245,6 @@ void MemoryManager::EvictVictims(uint64_t bytes)
return; return;
} }
} }
std::cout << __FILE__ << " " << __LINE__ << std::endl;
} }
uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
{ {
@ -261,18 +254,13 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
if ( EntryPresent(CpuPtr)==0 ){ if ( EntryPresent(CpuPtr)==0 ){
EntryCreate(CpuPtr,bytes,mode,hint); EntryCreate(CpuPtr,bytes,mode,hint);
} }
std::cout << __FILE__ << " " << __LINE__ << std::endl;
auto AccCacheIterator = EntryLookup(CpuPtr); auto AccCacheIterator = EntryLookup(CpuPtr);
std::cout << __FILE__ << " " << __LINE__ << std::endl;
auto & AccCache = AccCacheIterator->second; auto & AccCache = AccCacheIterator->second;
std::cout << __FILE__ << " " << __LINE__ << std::endl;
if (!AccCache.AccPtr) { if (!AccCache.AccPtr) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
EvictVictims(bytes); EvictVictims(bytes);
} }
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)); assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
std::cout << __FILE__ << " " << __LINE__ << std::endl;
assert(AccCache.cpuLock==0); // Programming error assert(AccCache.cpuLock==0); // Programming error
@ -286,7 +274,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
assert(AccCache.CpuPtr == CpuPtr); assert(AccCache.CpuPtr == CpuPtr);
assert(AccCache.bytes ==bytes); assert(AccCache.bytes ==bytes);
} }
std::cout << __FILE__ << " " << __LINE__ << std::endl;
/* /*
* State transitions and actions * State transitions and actions
* *
@ -302,7 +289,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
* AccWrite AccDirty AccDirty - - * AccWrite AccDirty AccDirty - -
*/ */
if(AccCache.state==Empty) { if(AccCache.state==Empty) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
assert(AccCache.LRU_valid==0); assert(AccCache.LRU_valid==0);
AccCache.CpuPtr = CpuPtr; AccCache.CpuPtr = CpuPtr;
AccCache.AccPtr = (uint64_t)NULL; AccCache.AccPtr = (uint64_t)NULL;
@ -321,7 +307,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.accLock= 1; AccCache.accLock= 1;
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock); dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==CpuDirty ){ } else if(AccCache.state==CpuDirty ){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
if(mode==AcceleratorWriteDiscard) { if(mode==AcceleratorWriteDiscard) {
CpuDiscard(AccCache); CpuDiscard(AccCache);
AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty
@ -335,7 +320,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.accLock++; AccCache.accLock++;
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock); dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==Consistent) { } else if(AccCache.state==Consistent) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
else else
@ -343,7 +327,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.accLock++; AccCache.accLock++;
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock); dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==AccDirty) { } else if(AccCache.state==AccDirty) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
else else
@ -351,14 +334,12 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.accLock++; AccCache.accLock++;
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock); dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
} else { } else {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
assert(0); assert(0);
} }
assert(AccCache.accLock>0); assert(AccCache.accLock>0);
// If view is opened on device must remove from LRU // If view is opened on device must remove from LRU
if(AccCache.LRU_valid==1){ if(AccCache.LRU_valid==1){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
// must possibly remove from LRU as now locked on GPU // must possibly remove from LRU as now locked on GPU
dprintf("AccCache entry removed from LRU \n"); dprintf("AccCache entry removed from LRU \n");
LRUremove(AccCache); LRUremove(AccCache);
@ -367,7 +348,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
int transient =hint; int transient =hint;
AccCache.transient= transient? EvictNext : 0; AccCache.transient= transient? EvictNext : 0;
std::cout << __FILE__ << " " << __LINE__ << std::endl;
return AccCache.AccPtr; return AccCache.AccPtr;
} }
//////////////////////////////////// ////////////////////////////////////

View File

@ -78,7 +78,6 @@ private:
else else
this->_odata = nullptr; this->_odata = nullptr;
} }
std::cout << __FILE__ << " " << __LINE__ << std::endl;
} }
public: public:
@ -86,17 +85,13 @@ public:
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents // Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
///////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////
void SetViewMode(ViewMode mode) { void SetViewMode(ViewMode mode) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode); LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
std::cout << __FILE__ << " " << __LINE__ << std::endl;
accessor.ViewClose(); accessor.ViewClose();
std::cout << __FILE__ << " " << __LINE__ << std::endl;
} }
// Helper function to print the state of this object in the AccCache // Helper function to print the state of this object in the AccCache
void PrintCacheState(void) void PrintCacheState(void)
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
MemoryManager::PrintState(this->_odata); MemoryManager::PrintState(this->_odata);
} }
@ -108,7 +103,6 @@ public:
LatticeView<vobj> View (ViewMode mode) const LatticeView<vobj> View (ViewMode mode) const
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode); LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
return accessor; return accessor;
} }
@ -123,7 +117,6 @@ public:
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr) template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
GRID_TRACE("ExpressionTemplateEval"); GRID_TRACE("ExpressionTemplateEval");
GridBase *egrid(nullptr); GridBase *egrid(nullptr);
GridFromExpression(egrid,expr); GridFromExpression(egrid,expr);
@ -148,7 +141,6 @@ public:
} }
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr) template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
GRID_TRACE("ExpressionTemplateEval"); GRID_TRACE("ExpressionTemplateEval");
GridBase *egrid(nullptr); GridBase *egrid(nullptr);
GridFromExpression(egrid,expr); GridFromExpression(egrid,expr);
@ -173,7 +165,6 @@ public:
} }
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr) template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
GRID_TRACE("ExpressionTemplateEval"); GRID_TRACE("ExpressionTemplateEval");
GridBase *egrid(nullptr); GridBase *egrid(nullptr);
GridFromExpression(egrid,expr); GridFromExpression(egrid,expr);
@ -198,7 +189,6 @@ public:
//GridFromExpression is tricky to do //GridFromExpression is tricky to do
template<class Op,class T1> template<class Op,class T1>
Lattice(const LatticeUnaryExpression<Op,T1> & expr) { Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->_grid = nullptr; this->_grid = nullptr;
GridFromExpression(this->_grid,expr); GridFromExpression(this->_grid,expr);
assert(this->_grid!=nullptr); assert(this->_grid!=nullptr);
@ -214,7 +204,6 @@ public:
} }
template<class Op,class T1, class T2> template<class Op,class T1, class T2>
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) { Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->_grid = nullptr; this->_grid = nullptr;
GridFromExpression(this->_grid,expr); GridFromExpression(this->_grid,expr);
assert(this->_grid!=nullptr); assert(this->_grid!=nullptr);
@ -230,7 +219,6 @@ public:
} }
template<class Op,class T1, class T2, class T3> template<class Op,class T1, class T2, class T3>
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) { Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->_grid = nullptr; this->_grid = nullptr;
GridFromExpression(this->_grid,expr); GridFromExpression(this->_grid,expr);
assert(this->_grid!=nullptr); assert(this->_grid!=nullptr);
@ -246,7 +234,6 @@ public:
} }
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){ template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
auto me = View(CpuWrite); auto me = View(CpuWrite);
thread_for(ss,me.size(),{ thread_for(ss,me.size(),{
me[ss]= r; me[ss]= r;
@ -262,19 +249,16 @@ public:
// user defined constructor // user defined constructor
/////////////////////////////////////////// ///////////////////////////////////////////
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) { Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->_grid = grid; this->_grid = grid;
resize(this->_grid->oSites()); resize(this->_grid->oSites());
assert((((uint64_t)&this->_odata[0])&0xF) ==0); assert((((uint64_t)&this->_odata[0])&0xF) ==0);
this->checkerboard=0; this->checkerboard=0;
SetViewMode(mode); SetViewMode(mode);
std::cout << __FILE__ << " " << __LINE__ << std::endl;
} }
// virtual ~Lattice(void) = default; // virtual ~Lattice(void) = default;
void reset(GridBase* grid) { void reset(GridBase* grid) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
if (this->_grid != grid) { if (this->_grid != grid) {
this->_grid = grid; this->_grid = grid;
this->resize(grid->oSites()); this->resize(grid->oSites());
@ -285,7 +269,6 @@ public:
// copy constructor // copy constructor
/////////////////////////////////////////// ///////////////////////////////////////////
Lattice(const Lattice& r){ Lattice(const Lattice& r){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->_grid = r.Grid(); this->_grid = r.Grid();
resize(this->_grid->oSites()); resize(this->_grid->oSites());
*this = r; *this = r;
@ -294,7 +277,6 @@ public:
// move constructor // move constructor
/////////////////////////////////////////// ///////////////////////////////////////////
Lattice(Lattice && r){ Lattice(Lattice && r){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->_grid = r.Grid(); this->_grid = r.Grid();
this->_odata = r._odata; this->_odata = r._odata;
this->_odata_size = r._odata_size; this->_odata_size = r._odata_size;
@ -306,7 +288,6 @@ public:
// assignment template // assignment template
/////////////////////////////////////////// ///////////////////////////////////////////
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){ template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0; typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
conformable(*this,r); conformable(*this,r);
this->checkerboard = r.Checkerboard(); this->checkerboard = r.Checkerboard();
@ -323,7 +304,6 @@ public:
// Copy assignment // Copy assignment
/////////////////////////////////////////// ///////////////////////////////////////////
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){ inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->checkerboard = r.Checkerboard(); this->checkerboard = r.Checkerboard();
conformable(*this,r); conformable(*this,r);
auto him= r.View(AcceleratorRead); auto him= r.View(AcceleratorRead);
@ -338,7 +318,6 @@ public:
// Move assignment possible if same type // Move assignment possible if same type
/////////////////////////////////////////// ///////////////////////////////////////////
inline Lattice<vobj> & operator = (Lattice<vobj> && r){ inline Lattice<vobj> & operator = (Lattice<vobj> && r){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
resize(0); // deletes if appropriate resize(0); // deletes if appropriate
this->_grid = r.Grid(); this->_grid = r.Grid();
@ -356,24 +335,20 @@ public:
// *=,+=,-= operators inherit behvour from correspond */+/- operation // *=,+=,-= operators inherit behvour from correspond */+/- operation
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
template<class T> inline Lattice<vobj> &operator *=(const T &r) { template<class T> inline Lattice<vobj> &operator *=(const T &r) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
*this = (*this)*r; *this = (*this)*r;
return *this; return *this;
} }
template<class T> inline Lattice<vobj> &operator -=(const T &r) { template<class T> inline Lattice<vobj> &operator -=(const T &r) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
*this = (*this)-r; *this = (*this)-r;
return *this; return *this;
} }
template<class T> inline Lattice<vobj> &operator +=(const T &r) { template<class T> inline Lattice<vobj> &operator +=(const T &r) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
*this = (*this)+r; *this = (*this)+r;
return *this; return *this;
} }
friend inline void swap(Lattice &l, Lattice &r) { friend inline void swap(Lattice &l, Lattice &r) {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
conformable(l,r); conformable(l,r);
LatticeAccelerator<vobj> tmp; LatticeAccelerator<vobj> tmp;
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l; LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
@ -384,7 +359,6 @@ public:
}; // class Lattice }; // class Lattice
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
std::cout << __FILE__ << " " << __LINE__ << std::endl;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
for(int g=0;g<o.Grid()->_gsites;g++){ for(int g=0;g<o.Grid()->_gsites;g++){

View File

@ -32,6 +32,9 @@ int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &
#ifdef GRID_HIP #ifdef GRID_HIP
hipGetDevice(&device); hipGetDevice(&device);
#endif #endif
#ifdef GRID_OMPTARGET
device = omp_get_device_num();
#endif
Iterator warpSize = gpu_props[device].warpSize; Iterator warpSize = gpu_props[device].warpSize;
Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock; Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock;

View File

@ -32,15 +32,14 @@ protected:
uint64_t _odata_size; uint64_t _odata_size;
ViewAdvise advise; ViewAdvise advise;
public: public:
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { std::cout << __FILE__ << " " << __LINE__ << std::endl; }; accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { };
accelerator_inline uint64_t oSites(void) const { std::cout << __FILE__ << " " << __LINE__ << std::endl; return _odata_size; }; accelerator_inline uint64_t oSites(void) const { return _odata_size; };
accelerator_inline int Checkerboard(void) const { std::cout << __FILE__ << " " << __LINE__ << std::endl; return checkerboard; }; accelerator_inline int Checkerboard(void) const { return checkerboard; };
accelerator_inline int &Checkerboard(void) { std::cout << __FILE__ << " " << __LINE__ << std::endl; return this->checkerboard; }; // can assign checkerboard on a container, not a view accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
accelerator_inline ViewAdvise Advise(void) const { std::cout << __FILE__ << " " << __LINE__ << std::endl; return advise; }; accelerator_inline ViewAdvise Advise(void) const { return advise; };
accelerator_inline ViewAdvise &Advise(void) { std::cout << __FILE__ << " " << __LINE__ << std::endl; return this->advise; }; // can assign advise on a container, not a view accelerator_inline ViewAdvise &Advise(void) { return this->advise; }; // can assign advise on a container, not a view
accelerator_inline void Conformable(GridBase * &grid) const accelerator_inline void Conformable(GridBase * &grid) const
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
if (grid) conformable(grid, _grid); if (grid) conformable(grid, _grid);
else grid = _grid; else grid = _grid;
}; };
@ -80,11 +79,10 @@ public:
accelerator_inline uint64_t end(void) const { return this->_odata_size; }; accelerator_inline uint64_t end(void) const { return this->_odata_size; };
accelerator_inline uint64_t size(void) const { return this->_odata_size; }; accelerator_inline uint64_t size(void) const { return this->_odata_size; };
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){ std::cout << __FILE__ << " " << __LINE__ << std::endl; } LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){ }
LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me) LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
{ {
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->ViewOpen(mode); this->ViewOpen(mode);
} }
@ -92,16 +90,13 @@ public:
void ViewOpen(ViewMode mode) void ViewOpen(ViewMode mode)
{ // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base { // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base
// std::cout << "View Open"<<std::hex<<this->_odata<<std::dec <<std::endl; // std::cout << "View Open"<<std::hex<<this->_odata<<std::dec <<std::endl;
std::cout << __FILE__ << " " << __LINE__ << std::endl;
this->cpu_ptr = (void *)this->_odata; this->cpu_ptr = (void *)this->_odata;
this->mode = mode; this->mode = mode;
std::cout << __FILE__ << " " << __LINE__ << " " << this->cpu_ptr << " " << this->_odata_size*sizeof(vobj) << std::endl;
this->_odata =(vobj *) this->_odata =(vobj *)
MemoryManager::ViewOpen(this->cpu_ptr, MemoryManager::ViewOpen(this->cpu_ptr,
this->_odata_size*sizeof(vobj), this->_odata_size*sizeof(vobj),
mode, mode,
this->advise); this->advise);
std::cout << __FILE__ << " " << __LINE__ << std::endl;
} }
void ViewClose(void) void ViewClose(void)
{ // Inform the manager { // Inform the manager

View File

@ -14,7 +14,15 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" #define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" #define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
#ifdef GRID_CUDA #ifdef __CUDA_ARCH__
#warning "ifdef cuda arch"
#endif
// fold omptarget into device specific acceleratorInit()
#include <cuda_runtime_api.h>
//#if defined(GRID_CUDA) || (defined(GRID_OMPTARGET) && defined(__CUDA_ARCH__))
#if defined(GRID_OMPTARGET)
#warning "using cuda from opmtarget"
cudaDeviceProp *gpu_props; cudaDeviceProp *gpu_props;
cudaStream_t copyStream; cudaStream_t copyStream;
cudaStream_t computeStream; cudaStream_t computeStream;
@ -113,7 +121,7 @@ void acceleratorInit(void)
} }
#endif #endif
#ifdef GRID_HIP #if defined(GRID_HIP) || (defined(GRID_OMPTARGET) && defined(__HIP_DEVICE_COMPILE__))
hipDeviceProp_t *gpu_props; hipDeviceProp_t *gpu_props;
hipStream_t copyStream; hipStream_t copyStream;
hipStream_t computeStream; hipStream_t computeStream;
@ -198,7 +206,7 @@ void acceleratorInit(void)
#endif #endif
#ifdef GRID_SYCL #if defined(GRID_SYCL) || (defined(GRID_OMPTARGET) && defined(__SYCL_DEVICE_ONLY__))
cl::sycl::queue *theGridAccelerator; cl::sycl::queue *theGridAccelerator;
cl::sycl::queue *theCopyAccelerator; cl::sycl::queue *theCopyAccelerator;
@ -270,7 +278,7 @@ void acceleratorInit(void)
} }
#endif #endif
#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP)) #if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP)) && (!defined(GRID_OMPTARGET))
void acceleratorInit(void){} void acceleratorInit(void){}
#endif #endif

View File

@ -477,16 +477,20 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
#endif #endif
////////////////////////////////////////////// //////////////////////////////////////////////
// CPU Target - No accelerator just thread instead // OpenMP Target acceleration
////////////////////////////////////////////// //////////////////////////////////////////////
#ifdef GRID_OMPTARGET
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) ) //TODO GRID_SIMT for OMPTARGET
#define GRID_ACCELERATED
#undef GRID_SIMT
//OpenMP Target Offloading
#ifdef OMPTARGET
#include<omp.h> #include<omp.h>
#ifdef __CUDA_ARCH__
#include <cuda_runtime_api.h>
#elif defined __HIP_DEVICE_COMPILE__
#include <hip/hip_runtime.h>
#elif defined __SYCL_DEVICE_ONLY__
#include <CL/sycl.hpp>
#include <CL/sycl/usm.hpp>
#endif
extern "C" void *llvm_omp_target_alloc_host (size_t Size, int DeviceNum); extern "C" void *llvm_omp_target_alloc_host (size_t Size, int DeviceNum);
extern "C" void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum); extern "C" void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
extern "C" void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum); extern "C" void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
@ -543,8 +547,33 @@ inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes)
} }
std::cout << "D->H copy from device end "<<std::endl; std::cout << "D->H copy from device end "<<std::endl;
}; };
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { printf("TODO acceleratorCopyDeviceToDeviceAsynch");memcpy(to,from,bytes);} inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)
inline void acceleratorCopySynchronise(void) {printf("TODO acceleratorCopySynchronize");}; {
printf("TODO acceleratorCopyDeviceToDeviceAsynch");//memcpy(to,from,bytes);
#ifdef __CUDA_ARCH__
extern cudaStream_t copyStream;
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
#elif defined __HIP_DEVICE_COMPILE__
extern hipStream_t copyStream;
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
#elif defined __SYCL_DEVICE_ONLY__
theCopyAccelerator->memcpy(to,from,bytes);
#endif
};
inline void acceleratorCopySynchronise(void)
{
printf("TODO acceleratorCopySynchronize");
//#pragma omp barrier
#ifdef __CUDA_ARCH__
extern cudaStream_t copyStream;
cudaStreamSynchronize(copyStream);
#elif defined __HIP_DEVICE_COMPILE__
extern hipStream_t copyStream;
hipStreamSynchronize(copyStream);
#elif defined __SYCL_DEVICE_ONLY__
theCopyAccelerator->wait();
#endif
};
inline int acceleratorIsCommunicable(void *ptr){ return 1; } inline int acceleratorIsCommunicable(void *ptr){ return 1; }
inline void acceleratorMemSet(void *base,int value,size_t bytes) inline void acceleratorMemSet(void *base,int value,size_t bytes)
@ -558,11 +587,10 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes)
printf(" omp_target_memcpy device to host failed in MemSet for %ld in device %d \n",bytes,devc); printf(" omp_target_memcpy device to host failed in MemSet for %ld in device %d \n",bytes,devc);
} }
}; };
#ifdef OMPTARGET_MANAGED
#include <cuda_runtime_api.h>
inline void *acceleratorAllocShared(size_t bytes) inline void *acceleratorAllocShared(size_t bytes)
{ {
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; #ifdef __CUDA_ARCH__
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED from CUDA l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
void *ptr=NULL; void *ptr=NULL;
auto err = cudaMallocManaged((void **)&ptr,bytes); auto err = cudaMallocManaged((void **)&ptr,bytes);
if( err != cudaSuccess ) { if( err != cudaSuccess ) {
@ -570,25 +598,22 @@ inline void *acceleratorAllocShared(size_t bytes)
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
} }
return ptr; return ptr;
}; #elif defined __HIP_DEVICE_COMPILE__
inline void acceleratorFreeShared(void *ptr){cudaFree(ptr);}; std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED from HIP l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
//inline void *acceleratorAllocDevice(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);};
inline void *acceleratorAllocDevice(size_t bytes)
{
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating device from OMPTARGET MANAGED l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
void *ptr=NULL; void *ptr=NULL;
auto err = cudaMallocManaged((void **)&ptr,bytes); auto err = hipMallocManaged((void **)&ptr,bytes);
if( err != cudaSuccess ) { if( err != hipSuccess ) {
ptr = (void *) NULL; ptr = (void *) NULL;
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); printf(" hipMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
} }
return ptr; return ptr;
}; #elif defined __SYCL_DEVICE_ONLY__
inline void acceleratorFreeDevice(void *ptr){free(ptr);}; std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED from SYCL l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
queue q;
//void *ptr = malloc_shared<void *>(bytes, q);
return ptr;
#else #else
inline void *acceleratorAllocShared(size_t bytes) std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared mem from OMPTARGET from LLVM l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
{
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared mem from OMPTARGET l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
int devc = omp_get_default_device(); int devc = omp_get_default_device();
void *ptr=NULL; void *ptr=NULL;
ptr = (void *) llvm_omp_target_alloc_shared(bytes, devc); ptr = (void *) llvm_omp_target_alloc_shared(bytes, devc);
@ -596,6 +621,7 @@ inline void *acceleratorAllocShared(size_t bytes)
printf(" llvm_omp_target_alloc_shared failed for %ld in device %d \n",bytes,devc); printf(" llvm_omp_target_alloc_shared failed for %ld in device %d \n",bytes,devc);
} }
return ptr; return ptr;
#endif
}; };
inline void *acceleratorAllocDevice(size_t bytes) inline void *acceleratorAllocDevice(size_t bytes)
{ {
@ -610,7 +636,6 @@ inline void *acceleratorAllocDevice(size_t bytes)
}; };
inline void acceleratorFreeShared(void *ptr){omp_target_free(ptr, omp_get_default_device());}; inline void acceleratorFreeShared(void *ptr){omp_target_free(ptr, omp_get_default_device());};
inline void acceleratorFreeDevice(void *ptr){omp_target_free(ptr, omp_get_default_device());}; inline void acceleratorFreeDevice(void *ptr){omp_target_free(ptr, omp_get_default_device());};
#endif
//OpenMP CPU threads //OpenMP CPU threads
#else #else
@ -644,6 +669,12 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr);};
#endif #endif
#endif #endif
//////////////////////////////////////////////
// CPU Target - No accelerator just thread instead
//////////////////////////////////////////////
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) ) && (!defined(GRID_OMPTARGET))
#undef GRID_SIMT
#endif // CPU target #endif // CPU target
#ifdef HAVE_MM_MALLOC_H #ifdef HAVE_MM_MALLOC_H

11471
configure vendored Executable file

File diff suppressed because it is too large Load Diff

View File

@ -229,13 +229,13 @@ case ${ac_ACC_CSHIFT} in
esac esac
############### SYCL/CUDA/HIP/none ############### SYCL/CUDA/HIP/OpenMP/none
AC_ARG_ENABLE([accelerator], AC_ARG_ENABLE([accelerator],
[AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none],[enable none,cuda,sycl,hip acceleration])], [AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|openmp|none],[enable none,openmp,cuda,sycl,hip acceleration])],
[ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none]) [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none])
case ${ac_ACCELERATOR} in case ${ac_ACCELERATOR} in
cuda) cuda)
echo CUDA acceleration echo CUDA acceleration ${ac_ACCELERATOR} ${enable_accelerator}
LIBS="${LIBS} -lcuda" LIBS="${LIBS} -lcuda"
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);; AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
sycl) sycl)
@ -244,12 +244,15 @@ case ${ac_ACCELERATOR} in
hip) hip)
echo HIP acceleration echo HIP acceleration
AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);; AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);;
openmp)
echo OMPTARGET acceleration
AC_DEFINE([GRID_OMPTARGET],[1],[Use OMPTARGET offload]);;
none) none)
echo NO acceleration ;; echo NO acceleration ;;
no) no)
echo NO acceleration ;; echo NO acceleration ;;
*) *)
AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);; AC_MSG_ERROR(["1Acceleration not suppoorted ${ac_ACCELERATOR}"]);;
esac esac
############### UNIFIED MEMORY ############### UNIFIED MEMORY