mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 15:27:06 +01:00
removed print flags
This commit is contained in:
@ -222,6 +222,9 @@ void MemoryManager::InitMessage(void) {
|
|||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GRID_OMPTARGET
|
||||||
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET omp_alloc_device"<<std::endl;
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
@ -233,6 +236,9 @@ void MemoryManager::InitMessage(void) {
|
|||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GRID_OMPTARGET
|
||||||
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET managed memory"<<std::endl;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -220,28 +220,22 @@ void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
|
|||||||
}
|
}
|
||||||
void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
uint64_t CpuPtr = (uint64_t)_CpuPtr;
|
uint64_t CpuPtr = (uint64_t)_CpuPtr;
|
||||||
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
|
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
|
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
|
||||||
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
|
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
|
||||||
} else if( (mode==CpuRead)||(mode==CpuWrite)){
|
} else if( (mode==CpuRead)||(mode==CpuWrite)){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
|
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
|
||||||
} else {
|
} else {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
assert(0);
|
assert(0);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void MemoryManager::EvictVictims(uint64_t bytes)
|
void MemoryManager::EvictVictims(uint64_t bytes)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << " " << bytes << " " << DeviceLRUBytes << " " << DeviceMaxBytes << std::endl;
|
|
||||||
assert(bytes<DeviceMaxBytes);
|
assert(bytes<DeviceMaxBytes);
|
||||||
while(bytes+DeviceLRUBytes > DeviceMaxBytes){
|
while(bytes+DeviceLRUBytes > DeviceMaxBytes){
|
||||||
if ( DeviceLRUBytes > 0){
|
if ( DeviceLRUBytes > 0){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
assert(LRU.size()>0);
|
assert(LRU.size()>0);
|
||||||
uint64_t victim = LRU.back(); // From the LRU
|
uint64_t victim = LRU.back(); // From the LRU
|
||||||
auto AccCacheIterator = EntryLookup(victim);
|
auto AccCacheIterator = EntryLookup(victim);
|
||||||
@ -251,7 +245,6 @@ void MemoryManager::EvictVictims(uint64_t bytes)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
}
|
}
|
||||||
uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
||||||
{
|
{
|
||||||
@ -261,18 +254,13 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
if ( EntryPresent(CpuPtr)==0 ){
|
if ( EntryPresent(CpuPtr)==0 ){
|
||||||
EntryCreate(CpuPtr,bytes,mode,hint);
|
EntryCreate(CpuPtr,bytes,mode,hint);
|
||||||
}
|
}
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
|
|
||||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
auto & AccCache = AccCacheIterator->second;
|
auto & AccCache = AccCacheIterator->second;
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
if (!AccCache.AccPtr) {
|
if (!AccCache.AccPtr) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
EvictVictims(bytes);
|
EvictVictims(bytes);
|
||||||
}
|
}
|
||||||
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
|
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
|
|
||||||
assert(AccCache.cpuLock==0); // Programming error
|
assert(AccCache.cpuLock==0); // Programming error
|
||||||
|
|
||||||
@ -286,7 +274,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
assert(AccCache.CpuPtr == CpuPtr);
|
assert(AccCache.CpuPtr == CpuPtr);
|
||||||
assert(AccCache.bytes ==bytes);
|
assert(AccCache.bytes ==bytes);
|
||||||
}
|
}
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
/*
|
/*
|
||||||
* State transitions and actions
|
* State transitions and actions
|
||||||
*
|
*
|
||||||
@ -302,7 +289,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
* AccWrite AccDirty AccDirty - -
|
* AccWrite AccDirty AccDirty - -
|
||||||
*/
|
*/
|
||||||
if(AccCache.state==Empty) {
|
if(AccCache.state==Empty) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
assert(AccCache.LRU_valid==0);
|
assert(AccCache.LRU_valid==0);
|
||||||
AccCache.CpuPtr = CpuPtr;
|
AccCache.CpuPtr = CpuPtr;
|
||||||
AccCache.AccPtr = (uint64_t)NULL;
|
AccCache.AccPtr = (uint64_t)NULL;
|
||||||
@ -321,7 +307,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
AccCache.accLock= 1;
|
AccCache.accLock= 1;
|
||||||
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
|
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
|
||||||
} else if(AccCache.state==CpuDirty ){
|
} else if(AccCache.state==CpuDirty ){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
if(mode==AcceleratorWriteDiscard) {
|
if(mode==AcceleratorWriteDiscard) {
|
||||||
CpuDiscard(AccCache);
|
CpuDiscard(AccCache);
|
||||||
AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty
|
AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty
|
||||||
@ -335,7 +320,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
AccCache.accLock++;
|
AccCache.accLock++;
|
||||||
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
|
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
|
||||||
} else if(AccCache.state==Consistent) {
|
} else if(AccCache.state==Consistent) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||||
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
|
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
|
||||||
else
|
else
|
||||||
@ -343,7 +327,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
AccCache.accLock++;
|
AccCache.accLock++;
|
||||||
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
|
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
|
||||||
} else if(AccCache.state==AccDirty) {
|
} else if(AccCache.state==AccDirty) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||||
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
|
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
|
||||||
else
|
else
|
||||||
@ -351,14 +334,12 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
AccCache.accLock++;
|
AccCache.accLock++;
|
||||||
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
|
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
|
||||||
} else {
|
} else {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(AccCache.accLock>0);
|
assert(AccCache.accLock>0);
|
||||||
// If view is opened on device must remove from LRU
|
// If view is opened on device must remove from LRU
|
||||||
if(AccCache.LRU_valid==1){
|
if(AccCache.LRU_valid==1){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
// must possibly remove from LRU as now locked on GPU
|
// must possibly remove from LRU as now locked on GPU
|
||||||
dprintf("AccCache entry removed from LRU \n");
|
dprintf("AccCache entry removed from LRU \n");
|
||||||
LRUremove(AccCache);
|
LRUremove(AccCache);
|
||||||
@ -367,7 +348,6 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
int transient =hint;
|
int transient =hint;
|
||||||
AccCache.transient= transient? EvictNext : 0;
|
AccCache.transient= transient? EvictNext : 0;
|
||||||
|
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
return AccCache.AccPtr;
|
return AccCache.AccPtr;
|
||||||
}
|
}
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
|
@ -78,7 +78,6 @@ private:
|
|||||||
else
|
else
|
||||||
this->_odata = nullptr;
|
this->_odata = nullptr;
|
||||||
}
|
}
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -86,17 +85,13 @@ public:
|
|||||||
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
|
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
|
||||||
/////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////
|
||||||
void SetViewMode(ViewMode mode) {
|
void SetViewMode(ViewMode mode) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
accessor.ViewClose();
|
accessor.ViewClose();
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to print the state of this object in the AccCache
|
// Helper function to print the state of this object in the AccCache
|
||||||
void PrintCacheState(void)
|
void PrintCacheState(void)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
MemoryManager::PrintState(this->_odata);
|
MemoryManager::PrintState(this->_odata);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,7 +103,6 @@ public:
|
|||||||
|
|
||||||
LatticeView<vobj> View (ViewMode mode) const
|
LatticeView<vobj> View (ViewMode mode) const
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||||
return accessor;
|
return accessor;
|
||||||
}
|
}
|
||||||
@ -123,7 +117,6 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
GRID_TRACE("ExpressionTemplateEval");
|
GRID_TRACE("ExpressionTemplateEval");
|
||||||
GridBase *egrid(nullptr);
|
GridBase *egrid(nullptr);
|
||||||
GridFromExpression(egrid,expr);
|
GridFromExpression(egrid,expr);
|
||||||
@ -148,7 +141,6 @@ public:
|
|||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
GRID_TRACE("ExpressionTemplateEval");
|
GRID_TRACE("ExpressionTemplateEval");
|
||||||
GridBase *egrid(nullptr);
|
GridBase *egrid(nullptr);
|
||||||
GridFromExpression(egrid,expr);
|
GridFromExpression(egrid,expr);
|
||||||
@ -173,7 +165,6 @@ public:
|
|||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
GRID_TRACE("ExpressionTemplateEval");
|
GRID_TRACE("ExpressionTemplateEval");
|
||||||
GridBase *egrid(nullptr);
|
GridBase *egrid(nullptr);
|
||||||
GridFromExpression(egrid,expr);
|
GridFromExpression(egrid,expr);
|
||||||
@ -198,7 +189,6 @@ public:
|
|||||||
//GridFromExpression is tricky to do
|
//GridFromExpression is tricky to do
|
||||||
template<class Op,class T1>
|
template<class Op,class T1>
|
||||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->_grid = nullptr;
|
this->_grid = nullptr;
|
||||||
GridFromExpression(this->_grid,expr);
|
GridFromExpression(this->_grid,expr);
|
||||||
assert(this->_grid!=nullptr);
|
assert(this->_grid!=nullptr);
|
||||||
@ -214,7 +204,6 @@ public:
|
|||||||
}
|
}
|
||||||
template<class Op,class T1, class T2>
|
template<class Op,class T1, class T2>
|
||||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->_grid = nullptr;
|
this->_grid = nullptr;
|
||||||
GridFromExpression(this->_grid,expr);
|
GridFromExpression(this->_grid,expr);
|
||||||
assert(this->_grid!=nullptr);
|
assert(this->_grid!=nullptr);
|
||||||
@ -230,7 +219,6 @@ public:
|
|||||||
}
|
}
|
||||||
template<class Op,class T1, class T2, class T3>
|
template<class Op,class T1, class T2, class T3>
|
||||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->_grid = nullptr;
|
this->_grid = nullptr;
|
||||||
GridFromExpression(this->_grid,expr);
|
GridFromExpression(this->_grid,expr);
|
||||||
assert(this->_grid!=nullptr);
|
assert(this->_grid!=nullptr);
|
||||||
@ -246,7 +234,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
auto me = View(CpuWrite);
|
auto me = View(CpuWrite);
|
||||||
thread_for(ss,me.size(),{
|
thread_for(ss,me.size(),{
|
||||||
me[ss]= r;
|
me[ss]= r;
|
||||||
@ -262,19 +249,16 @@ public:
|
|||||||
// user defined constructor
|
// user defined constructor
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
|
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->_grid = grid;
|
this->_grid = grid;
|
||||||
resize(this->_grid->oSites());
|
resize(this->_grid->oSites());
|
||||||
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
||||||
this->checkerboard=0;
|
this->checkerboard=0;
|
||||||
SetViewMode(mode);
|
SetViewMode(mode);
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// virtual ~Lattice(void) = default;
|
// virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
void reset(GridBase* grid) {
|
void reset(GridBase* grid) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
if (this->_grid != grid) {
|
if (this->_grid != grid) {
|
||||||
this->_grid = grid;
|
this->_grid = grid;
|
||||||
this->resize(grid->oSites());
|
this->resize(grid->oSites());
|
||||||
@ -285,7 +269,6 @@ public:
|
|||||||
// copy constructor
|
// copy constructor
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
Lattice(const Lattice& r){
|
Lattice(const Lattice& r){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->_grid = r.Grid();
|
this->_grid = r.Grid();
|
||||||
resize(this->_grid->oSites());
|
resize(this->_grid->oSites());
|
||||||
*this = r;
|
*this = r;
|
||||||
@ -294,7 +277,6 @@ public:
|
|||||||
// move constructor
|
// move constructor
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
Lattice(Lattice && r){
|
Lattice(Lattice && r){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->_grid = r.Grid();
|
this->_grid = r.Grid();
|
||||||
this->_odata = r._odata;
|
this->_odata = r._odata;
|
||||||
this->_odata_size = r._odata_size;
|
this->_odata_size = r._odata_size;
|
||||||
@ -306,7 +288,6 @@ public:
|
|||||||
// assignment template
|
// assignment template
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
this->checkerboard = r.Checkerboard();
|
this->checkerboard = r.Checkerboard();
|
||||||
@ -323,7 +304,6 @@ public:
|
|||||||
// Copy assignment
|
// Copy assignment
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->checkerboard = r.Checkerboard();
|
this->checkerboard = r.Checkerboard();
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
auto him= r.View(AcceleratorRead);
|
auto him= r.View(AcceleratorRead);
|
||||||
@ -338,7 +318,6 @@ public:
|
|||||||
// Move assignment possible if same type
|
// Move assignment possible if same type
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r){
|
inline Lattice<vobj> & operator = (Lattice<vobj> && r){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
|
|
||||||
resize(0); // deletes if appropriate
|
resize(0); // deletes if appropriate
|
||||||
this->_grid = r.Grid();
|
this->_grid = r.Grid();
|
||||||
@ -356,24 +335,20 @@ public:
|
|||||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
|
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
*this = (*this)*r;
|
*this = (*this)*r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
|
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
*this = (*this)-r;
|
*this = (*this)-r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
|
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
*this = (*this)+r;
|
*this = (*this)+r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend inline void swap(Lattice &l, Lattice &r) {
|
friend inline void swap(Lattice &l, Lattice &r) {
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
conformable(l,r);
|
conformable(l,r);
|
||||||
LatticeAccelerator<vobj> tmp;
|
LatticeAccelerator<vobj> tmp;
|
||||||
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
|
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
|
||||||
@ -384,7 +359,6 @@ public:
|
|||||||
}; // class Lattice
|
}; // class Lattice
|
||||||
|
|
||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
for(int g=0;g<o.Grid()->_gsites;g++){
|
for(int g=0;g<o.Grid()->_gsites;g++){
|
||||||
|
|
||||||
|
@ -32,7 +32,10 @@ int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &
|
|||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
hipGetDevice(&device);
|
hipGetDevice(&device);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GRID_OMPTARGET
|
||||||
|
device = omp_get_device_num();
|
||||||
|
#endif
|
||||||
|
|
||||||
Iterator warpSize = gpu_props[device].warpSize;
|
Iterator warpSize = gpu_props[device].warpSize;
|
||||||
Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock;
|
Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock;
|
||||||
Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock;
|
Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock;
|
||||||
|
@ -32,15 +32,14 @@ protected:
|
|||||||
uint64_t _odata_size;
|
uint64_t _odata_size;
|
||||||
ViewAdvise advise;
|
ViewAdvise advise;
|
||||||
public:
|
public:
|
||||||
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { std::cout << __FILE__ << " " << __LINE__ << std::endl; };
|
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { };
|
||||||
accelerator_inline uint64_t oSites(void) const { std::cout << __FILE__ << " " << __LINE__ << std::endl; return _odata_size; };
|
accelerator_inline uint64_t oSites(void) const { return _odata_size; };
|
||||||
accelerator_inline int Checkerboard(void) const { std::cout << __FILE__ << " " << __LINE__ << std::endl; return checkerboard; };
|
accelerator_inline int Checkerboard(void) const { return checkerboard; };
|
||||||
accelerator_inline int &Checkerboard(void) { std::cout << __FILE__ << " " << __LINE__ << std::endl; return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
||||||
accelerator_inline ViewAdvise Advise(void) const { std::cout << __FILE__ << " " << __LINE__ << std::endl; return advise; };
|
accelerator_inline ViewAdvise Advise(void) const { return advise; };
|
||||||
accelerator_inline ViewAdvise &Advise(void) { std::cout << __FILE__ << " " << __LINE__ << std::endl; return this->advise; }; // can assign advise on a container, not a view
|
accelerator_inline ViewAdvise &Advise(void) { return this->advise; }; // can assign advise on a container, not a view
|
||||||
accelerator_inline void Conformable(GridBase * &grid) const
|
accelerator_inline void Conformable(GridBase * &grid) const
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
if (grid) conformable(grid, _grid);
|
if (grid) conformable(grid, _grid);
|
||||||
else grid = _grid;
|
else grid = _grid;
|
||||||
};
|
};
|
||||||
@ -80,11 +79,10 @@ public:
|
|||||||
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
||||||
accelerator_inline uint64_t size(void) const { return this->_odata_size; };
|
accelerator_inline uint64_t size(void) const { return this->_odata_size; };
|
||||||
|
|
||||||
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){ std::cout << __FILE__ << " " << __LINE__ << std::endl; }
|
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){ }
|
||||||
LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
|
LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
|
||||||
LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
|
LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
|
||||||
{
|
{
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->ViewOpen(mode);
|
this->ViewOpen(mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,16 +90,13 @@ public:
|
|||||||
void ViewOpen(ViewMode mode)
|
void ViewOpen(ViewMode mode)
|
||||||
{ // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base
|
{ // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base
|
||||||
// std::cout << "View Open"<<std::hex<<this->_odata<<std::dec <<std::endl;
|
// std::cout << "View Open"<<std::hex<<this->_odata<<std::dec <<std::endl;
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
this->cpu_ptr = (void *)this->_odata;
|
this->cpu_ptr = (void *)this->_odata;
|
||||||
this->mode = mode;
|
this->mode = mode;
|
||||||
std::cout << __FILE__ << " " << __LINE__ << " " << this->cpu_ptr << " " << this->_odata_size*sizeof(vobj) << std::endl;
|
|
||||||
this->_odata =(vobj *)
|
this->_odata =(vobj *)
|
||||||
MemoryManager::ViewOpen(this->cpu_ptr,
|
MemoryManager::ViewOpen(this->cpu_ptr,
|
||||||
this->_odata_size*sizeof(vobj),
|
this->_odata_size*sizeof(vobj),
|
||||||
mode,
|
mode,
|
||||||
this->advise);
|
this->advise);
|
||||||
std::cout << __FILE__ << " " << __LINE__ << std::endl;
|
|
||||||
}
|
}
|
||||||
void ViewClose(void)
|
void ViewClose(void)
|
||||||
{ // Inform the manager
|
{ // Inform the manager
|
||||||
|
@ -14,7 +14,15 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
|||||||
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
|
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
|
||||||
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
|
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
|
||||||
|
|
||||||
#ifdef GRID_CUDA
|
#ifdef __CUDA_ARCH__
|
||||||
|
#warning "ifdef cuda arch"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// fold omptarget into device specific acceleratorInit()
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
//#if defined(GRID_CUDA) || (defined(GRID_OMPTARGET) && defined(__CUDA_ARCH__))
|
||||||
|
#if defined(GRID_OMPTARGET)
|
||||||
|
#warning "using cuda from opmtarget"
|
||||||
cudaDeviceProp *gpu_props;
|
cudaDeviceProp *gpu_props;
|
||||||
cudaStream_t copyStream;
|
cudaStream_t copyStream;
|
||||||
cudaStream_t computeStream;
|
cudaStream_t computeStream;
|
||||||
@ -113,7 +121,7 @@ void acceleratorInit(void)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_HIP
|
#if defined(GRID_HIP) || (defined(GRID_OMPTARGET) && defined(__HIP_DEVICE_COMPILE__))
|
||||||
hipDeviceProp_t *gpu_props;
|
hipDeviceProp_t *gpu_props;
|
||||||
hipStream_t copyStream;
|
hipStream_t copyStream;
|
||||||
hipStream_t computeStream;
|
hipStream_t computeStream;
|
||||||
@ -198,7 +206,7 @@ void acceleratorInit(void)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef GRID_SYCL
|
#if defined(GRID_SYCL) || (defined(GRID_OMPTARGET) && defined(__SYCL_DEVICE_ONLY__))
|
||||||
|
|
||||||
cl::sycl::queue *theGridAccelerator;
|
cl::sycl::queue *theGridAccelerator;
|
||||||
cl::sycl::queue *theCopyAccelerator;
|
cl::sycl::queue *theCopyAccelerator;
|
||||||
@ -270,7 +278,7 @@ void acceleratorInit(void)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP))
|
#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP)) && (!defined(GRID_OMPTARGET))
|
||||||
void acceleratorInit(void){}
|
void acceleratorInit(void){}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -477,16 +477,20 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// CPU Target - No accelerator just thread instead
|
// OpenMP Target acceleration
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
#ifdef GRID_OMPTARGET
|
||||||
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) )
|
//TODO GRID_SIMT for OMPTARGET
|
||||||
|
#define GRID_ACCELERATED
|
||||||
#undef GRID_SIMT
|
|
||||||
|
|
||||||
//OpenMP Target Offloading
|
|
||||||
#ifdef OMPTARGET
|
|
||||||
#include<omp.h>
|
#include<omp.h>
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
#elif defined __HIP_DEVICE_COMPILE__
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#elif defined __SYCL_DEVICE_ONLY__
|
||||||
|
#include <CL/sycl.hpp>
|
||||||
|
#include <CL/sycl/usm.hpp>
|
||||||
|
#endif
|
||||||
extern "C" void *llvm_omp_target_alloc_host (size_t Size, int DeviceNum);
|
extern "C" void *llvm_omp_target_alloc_host (size_t Size, int DeviceNum);
|
||||||
extern "C" void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
|
extern "C" void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
|
||||||
extern "C" void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
|
extern "C" void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
|
||||||
@ -543,8 +547,33 @@ inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes)
|
|||||||
}
|
}
|
||||||
std::cout << "D->H copy from device end "<<std::endl;
|
std::cout << "D->H copy from device end "<<std::endl;
|
||||||
};
|
};
|
||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { printf("TODO acceleratorCopyDeviceToDeviceAsynch");memcpy(to,from,bytes);}
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)
|
||||||
inline void acceleratorCopySynchronise(void) {printf("TODO acceleratorCopySynchronize");};
|
{
|
||||||
|
printf("TODO acceleratorCopyDeviceToDeviceAsynch");//memcpy(to,from,bytes);
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
extern cudaStream_t copyStream;
|
||||||
|
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
|
||||||
|
#elif defined __HIP_DEVICE_COMPILE__
|
||||||
|
extern hipStream_t copyStream;
|
||||||
|
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||||
|
#elif defined __SYCL_DEVICE_ONLY__
|
||||||
|
theCopyAccelerator->memcpy(to,from,bytes);
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
inline void acceleratorCopySynchronise(void)
|
||||||
|
{
|
||||||
|
printf("TODO acceleratorCopySynchronize");
|
||||||
|
//#pragma omp barrier
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
extern cudaStream_t copyStream;
|
||||||
|
cudaStreamSynchronize(copyStream);
|
||||||
|
#elif defined __HIP_DEVICE_COMPILE__
|
||||||
|
extern hipStream_t copyStream;
|
||||||
|
hipStreamSynchronize(copyStream);
|
||||||
|
#elif defined __SYCL_DEVICE_ONLY__
|
||||||
|
theCopyAccelerator->wait();
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes)
|
inline void acceleratorMemSet(void *base,int value,size_t bytes)
|
||||||
@ -558,11 +587,10 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes)
|
|||||||
printf(" omp_target_memcpy device to host failed in MemSet for %ld in device %d \n",bytes,devc);
|
printf(" omp_target_memcpy device to host failed in MemSet for %ld in device %d \n",bytes,devc);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#ifdef OMPTARGET_MANAGED
|
|
||||||
#include <cuda_runtime_api.h>
|
|
||||||
inline void *acceleratorAllocShared(size_t bytes)
|
inline void *acceleratorAllocShared(size_t bytes)
|
||||||
{
|
{
|
||||||
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
#ifdef __CUDA_ARCH__
|
||||||
|
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED from CUDA l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
||||||
void *ptr=NULL;
|
void *ptr=NULL;
|
||||||
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
||||||
if( err != cudaSuccess ) {
|
if( err != cudaSuccess ) {
|
||||||
@ -570,25 +598,22 @@ inline void *acceleratorAllocShared(size_t bytes)
|
|||||||
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
};
|
#elif defined __HIP_DEVICE_COMPILE__
|
||||||
inline void acceleratorFreeShared(void *ptr){cudaFree(ptr);};
|
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED from HIP l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
||||||
//inline void *acceleratorAllocDevice(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);};
|
|
||||||
inline void *acceleratorAllocDevice(size_t bytes)
|
|
||||||
{
|
|
||||||
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating device from OMPTARGET MANAGED l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
|
||||||
void *ptr=NULL;
|
void *ptr=NULL;
|
||||||
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
auto err = hipMallocManaged((void **)&ptr,bytes);
|
||||||
if( err != cudaSuccess ) {
|
if( err != hipSuccess ) {
|
||||||
ptr = (void *) NULL;
|
ptr = (void *) NULL;
|
||||||
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
printf(" hipMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
};
|
#elif defined __SYCL_DEVICE_ONLY__
|
||||||
inline void acceleratorFreeDevice(void *ptr){free(ptr);};
|
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared from OMPTARGET MANAGED from SYCL l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
||||||
|
queue q;
|
||||||
|
//void *ptr = malloc_shared<void *>(bytes, q);
|
||||||
|
return ptr;
|
||||||
#else
|
#else
|
||||||
inline void *acceleratorAllocShared(size_t bytes)
|
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared mem from OMPTARGET from LLVM l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
||||||
{
|
|
||||||
std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l Allocating shared mem from OMPTARGET l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl;
|
|
||||||
int devc = omp_get_default_device();
|
int devc = omp_get_default_device();
|
||||||
void *ptr=NULL;
|
void *ptr=NULL;
|
||||||
ptr = (void *) llvm_omp_target_alloc_shared(bytes, devc);
|
ptr = (void *) llvm_omp_target_alloc_shared(bytes, devc);
|
||||||
@ -596,6 +621,7 @@ inline void *acceleratorAllocShared(size_t bytes)
|
|||||||
printf(" llvm_omp_target_alloc_shared failed for %ld in device %d \n",bytes,devc);
|
printf(" llvm_omp_target_alloc_shared failed for %ld in device %d \n",bytes,devc);
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
inline void *acceleratorAllocDevice(size_t bytes)
|
inline void *acceleratorAllocDevice(size_t bytes)
|
||||||
{
|
{
|
||||||
@ -610,7 +636,6 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
|||||||
};
|
};
|
||||||
inline void acceleratorFreeShared(void *ptr){omp_target_free(ptr, omp_get_default_device());};
|
inline void acceleratorFreeShared(void *ptr){omp_target_free(ptr, omp_get_default_device());};
|
||||||
inline void acceleratorFreeDevice(void *ptr){omp_target_free(ptr, omp_get_default_device());};
|
inline void acceleratorFreeDevice(void *ptr){omp_target_free(ptr, omp_get_default_device());};
|
||||||
#endif
|
|
||||||
|
|
||||||
//OpenMP CPU threads
|
//OpenMP CPU threads
|
||||||
#else
|
#else
|
||||||
@ -644,6 +669,12 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr);};
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
// CPU Target - No accelerator just thread instead
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) ) && (!defined(GRID_OMPTARGET))
|
||||||
|
#undef GRID_SIMT
|
||||||
#endif // CPU target
|
#endif // CPU target
|
||||||
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
11
configure.ac
11
configure.ac
@ -229,13 +229,13 @@ case ${ac_ACC_CSHIFT} in
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
|
|
||||||
############### SYCL/CUDA/HIP/none
|
############### SYCL/CUDA/HIP/OpenMP/none
|
||||||
AC_ARG_ENABLE([accelerator],
|
AC_ARG_ENABLE([accelerator],
|
||||||
[AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none],[enable none,cuda,sycl,hip acceleration])],
|
[AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|openmp|none],[enable none,openmp,cuda,sycl,hip acceleration])],
|
||||||
[ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none])
|
[ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none])
|
||||||
case ${ac_ACCELERATOR} in
|
case ${ac_ACCELERATOR} in
|
||||||
cuda)
|
cuda)
|
||||||
echo CUDA acceleration
|
echo CUDA acceleration ${ac_ACCELERATOR} ${enable_accelerator}
|
||||||
LIBS="${LIBS} -lcuda"
|
LIBS="${LIBS} -lcuda"
|
||||||
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
|
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
|
||||||
sycl)
|
sycl)
|
||||||
@ -244,12 +244,15 @@ case ${ac_ACCELERATOR} in
|
|||||||
hip)
|
hip)
|
||||||
echo HIP acceleration
|
echo HIP acceleration
|
||||||
AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);;
|
AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);;
|
||||||
|
openmp)
|
||||||
|
echo OMPTARGET acceleration
|
||||||
|
AC_DEFINE([GRID_OMPTARGET],[1],[Use OMPTARGET offload]);;
|
||||||
none)
|
none)
|
||||||
echo NO acceleration ;;
|
echo NO acceleration ;;
|
||||||
no)
|
no)
|
||||||
echo NO acceleration ;;
|
echo NO acceleration ;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);;
|
AC_MSG_ERROR(["1Acceleration not suppoorted ${ac_ACCELERATOR}"]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
############### UNIFIED MEMORY
|
############### UNIFIED MEMORY
|
||||||
|
Reference in New Issue
Block a user