mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
commit
17ec9c5545
@ -173,7 +173,8 @@ template<class T> using cshiftAllocator = devAllocator<T>;
|
|||||||
template<class T> using cshiftAllocator = std::allocator<T>;
|
template<class T> using cshiftAllocator = std::allocator<T>;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
|
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
|
||||||
|
template<class T> using stencilVector = std::vector<T,alignedAllocator<T> >;
|
||||||
template<class T> using commVector = std::vector<T,devAllocator<T> >;
|
template<class T> using commVector = std::vector<T,devAllocator<T> >;
|
||||||
template<class T> using cshiftVector = std::vector<T,cshiftAllocator<T> >;
|
template<class T> using cshiftVector = std::vector<T,cshiftAllocator<T> >;
|
||||||
|
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
#ifndef GRID_UVM
|
#ifndef GRID_UVM
|
||||||
|
|
||||||
#warning "Using explicit device memory copies"
|
#warning "Using explicit device memory copies"
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
//define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
|
||||||
#define dprintf(...)
|
#define dprintf(...)
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// For caching copies of data on device
|
// For caching copies of data on device
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@ -103,7 +104,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
|||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
assert(AccCache.state!=Empty);
|
assert(AccCache.state!=Empty);
|
||||||
|
|
||||||
// dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||||
assert(AccCache.accLock==0);
|
assert(AccCache.accLock==0);
|
||||||
assert(AccCache.cpuLock==0);
|
assert(AccCache.cpuLock==0);
|
||||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||||
@ -111,7 +112,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
|||||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||||
DeviceBytes -=AccCache.bytes;
|
DeviceBytes -=AccCache.bytes;
|
||||||
LRUremove(AccCache);
|
LRUremove(AccCache);
|
||||||
// dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||||
}
|
}
|
||||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||||
EntryErase(CpuPtr);
|
EntryErase(CpuPtr);
|
||||||
@ -125,7 +126,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
assert(AccCache.state!=Empty);
|
assert(AccCache.state!=Empty);
|
||||||
|
|
||||||
// dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||||
assert(AccCache.accLock==0);
|
assert(AccCache.accLock==0);
|
||||||
assert(AccCache.cpuLock==0);
|
assert(AccCache.cpuLock==0);
|
||||||
if(AccCache.state==AccDirty) {
|
if(AccCache.state==AccDirty) {
|
||||||
@ -136,7 +137,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
|||||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||||
DeviceBytes -=AccCache.bytes;
|
DeviceBytes -=AccCache.bytes;
|
||||||
LRUremove(AccCache);
|
LRUremove(AccCache);
|
||||||
// dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||||
}
|
}
|
||||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||||
EntryErase(CpuPtr);
|
EntryErase(CpuPtr);
|
||||||
@ -149,7 +150,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
|
|||||||
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
||||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||||
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
||||||
// dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||||
DeviceToHostBytes+=AccCache.bytes;
|
DeviceToHostBytes+=AccCache.bytes;
|
||||||
DeviceToHostXfer++;
|
DeviceToHostXfer++;
|
||||||
AccCache.state=Consistent;
|
AccCache.state=Consistent;
|
||||||
@ -164,7 +165,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
|
|||||||
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
||||||
DeviceBytes+=AccCache.bytes;
|
DeviceBytes+=AccCache.bytes;
|
||||||
}
|
}
|
||||||
// dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||||
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
||||||
HostToDeviceBytes+=AccCache.bytes;
|
HostToDeviceBytes+=AccCache.bytes;
|
||||||
HostToDeviceXfer++;
|
HostToDeviceXfer++;
|
||||||
@ -232,14 +233,19 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
|
|
||||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||||
auto & AccCache = AccCacheIterator->second;
|
auto & AccCache = AccCacheIterator->second;
|
||||||
if (!AccCache.AccPtr)
|
if (!AccCache.AccPtr) {
|
||||||
EvictVictims(bytes);
|
EvictVictims(bytes);
|
||||||
|
}
|
||||||
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
|
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
|
||||||
|
|
||||||
assert(AccCache.cpuLock==0); // Programming error
|
assert(AccCache.cpuLock==0); // Programming error
|
||||||
|
|
||||||
if(AccCache.state!=Empty) {
|
if(AccCache.state!=Empty) {
|
||||||
|
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n",
|
||||||
|
(uint64_t)AccCache.CpuPtr,
|
||||||
|
(uint64_t)CpuPtr,
|
||||||
|
(uint64_t)AccCache.bytes,
|
||||||
|
(uint64_t)bytes);
|
||||||
assert(AccCache.CpuPtr == CpuPtr);
|
assert(AccCache.CpuPtr == CpuPtr);
|
||||||
assert(AccCache.bytes ==bytes);
|
assert(AccCache.bytes ==bytes);
|
||||||
}
|
}
|
||||||
@ -286,21 +292,21 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
|
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
|
||||||
}
|
}
|
||||||
AccCache.accLock++;
|
AccCache.accLock++;
|
||||||
// printf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
|
dprintf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
|
||||||
} else if(AccCache.state==Consistent) {
|
} else if(AccCache.state==Consistent) {
|
||||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||||
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
|
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
|
||||||
else
|
else
|
||||||
AccCache.state = Consistent; // Consistent + AccRead => Consistent
|
AccCache.state = Consistent; // Consistent + AccRead => Consistent
|
||||||
AccCache.accLock++;
|
AccCache.accLock++;
|
||||||
// printf("Consistent entry into device accLock %d\n",AccCache.accLock);
|
dprintf("Consistent entry into device accLock %d\n",AccCache.accLock);
|
||||||
} else if(AccCache.state==AccDirty) {
|
} else if(AccCache.state==AccDirty) {
|
||||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||||
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
|
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
|
||||||
else
|
else
|
||||||
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
|
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
|
||||||
AccCache.accLock++;
|
AccCache.accLock++;
|
||||||
// printf("AccDirty entry into device accLock %d\n",AccCache.accLock);
|
dprintf("AccDirty entry into device accLock %d\n",AccCache.accLock);
|
||||||
} else {
|
} else {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -367,9 +373,11 @@ uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,V
|
|||||||
|
|
||||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||||
auto & AccCache = AccCacheIterator->second;
|
auto & AccCache = AccCacheIterator->second;
|
||||||
if (!AccCache.AccPtr)
|
|
||||||
EvictVictims(bytes);
|
if (!AccCache.AccPtr) {
|
||||||
|
EvictVictims(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
assert((mode==CpuRead)||(mode==CpuWrite));
|
assert((mode==CpuRead)||(mode==CpuWrite));
|
||||||
assert(AccCache.accLock==0); // Programming error
|
assert(AccCache.accLock==0); // Programming error
|
||||||
|
|
||||||
|
@ -269,7 +269,7 @@ public:
|
|||||||
std::vector<Vector<std::pair<int,int> > > face_table ;
|
std::vector<Vector<std::pair<int,int> > > face_table ;
|
||||||
Vector<int> surface_list;
|
Vector<int> surface_list;
|
||||||
|
|
||||||
Vector<StencilEntry> _entries; // Resident in managed memory
|
stencilVector<StencilEntry> _entries; // Resident in managed memory
|
||||||
std::vector<Packet> Packets;
|
std::vector<Packet> Packets;
|
||||||
std::vector<Merge> Mergers;
|
std::vector<Merge> Mergers;
|
||||||
std::vector<Merge> MergersSHM;
|
std::vector<Merge> MergersSHM;
|
||||||
|
@ -445,7 +445,7 @@ public:
|
|||||||
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
|
||||||
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
|
||||||
// double flops=(1344.0*volume)/2;
|
// double flops=(1344.0*volume)/2;
|
||||||
#if 0
|
#if 1
|
||||||
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
|
||||||
#else
|
#else
|
||||||
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns + 2*Nd*Nc*Ns*2;
|
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns + 2*Nd*Nc*Ns*2;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user