mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Memory Vector UVM and Lattice alignedAllocator separate
This commit is contained in:
parent
6c5fa8dcd8
commit
c48da35921
@ -0,0 +1,241 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_PREC_GCR_NON_HERM_H
|
||||||
|
#define GRID_PREC_GCR_NON_HERM_H
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//VPGCR Abe and Zhang, 2005.
|
||||||
|
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||||
|
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||||
|
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||||
|
// but VPGCR was nicely written up in their paper
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxIterations;
|
||||||
|
int verbose;
|
||||||
|
int mmax;
|
||||||
|
int nstep;
|
||||||
|
int steps;
|
||||||
|
int level;
|
||||||
|
GridStopWatch PrecTimer;
|
||||||
|
GridStopWatch MatTimer;
|
||||||
|
GridStopWatch LinalgTimer;
|
||||||
|
|
||||||
|
LinearFunction<Field> &Preconditioner;
|
||||||
|
LinearOperatorBase<Field> &Linop;
|
||||||
|
|
||||||
|
void Level(int lv) { level=lv; };
|
||||||
|
|
||||||
|
PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||||
|
Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
Linop(_Linop),
|
||||||
|
Preconditioner(Prec),
|
||||||
|
mmax(_mmax),
|
||||||
|
nstep(_nstep)
|
||||||
|
{
|
||||||
|
level=1;
|
||||||
|
verbose=1;
|
||||||
|
};
|
||||||
|
|
||||||
|
void operator() (const Field &src, Field &psi){
|
||||||
|
|
||||||
|
psi=Zero();
|
||||||
|
RealD cp, ssq,rsq;
|
||||||
|
ssq=norm2(src);
|
||||||
|
rsq=Tolerance*Tolerance*ssq;
|
||||||
|
|
||||||
|
Field r(src.Grid());
|
||||||
|
|
||||||
|
PrecTimer.Reset();
|
||||||
|
MatTimer.Reset();
|
||||||
|
LinalgTimer.Reset();
|
||||||
|
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
SolverTimer.Start();
|
||||||
|
|
||||||
|
steps=0;
|
||||||
|
for(int k=0;k<MaxIterations;k++){
|
||||||
|
|
||||||
|
cp=GCRnStep(src,psi,rsq);
|
||||||
|
|
||||||
|
GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
|
||||||
|
|
||||||
|
if(cp<rsq) {
|
||||||
|
|
||||||
|
SolverTimer.Stop();
|
||||||
|
|
||||||
|
Linop.Op(psi,r);
|
||||||
|
axpy(r,-1.0,src,r);
|
||||||
|
RealD tr = norm2(r);
|
||||||
|
GCRLogLevel<<"PGCR: Converged on iteration " <<steps
|
||||||
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
|
<< " true residual " <<sqrt(tr/ssq)
|
||||||
|
<< " target " <<Tolerance <<std::endl;
|
||||||
|
|
||||||
|
GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||||
|
// assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
||||||
|
|
||||||
|
RealD cp;
|
||||||
|
ComplexD a, b, zAz;
|
||||||
|
RealD zAAz;
|
||||||
|
ComplexD rq;
|
||||||
|
|
||||||
|
GridBase *grid = src.Grid();
|
||||||
|
|
||||||
|
Field r(grid);
|
||||||
|
Field z(grid);
|
||||||
|
Field tmp(grid);
|
||||||
|
Field ttmp(grid);
|
||||||
|
Field Az(grid);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// history for flexible orthog
|
||||||
|
////////////////////////////////
|
||||||
|
std::vector<Field> q(mmax,grid);
|
||||||
|
std::vector<Field> p(mmax,grid);
|
||||||
|
std::vector<RealD> qq(mmax);
|
||||||
|
|
||||||
|
GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
|
||||||
|
|
||||||
|
//////////////////////////////////
|
||||||
|
// initial guess x0 is taken as nonzero.
|
||||||
|
// r0=src-A x0 = src
|
||||||
|
//////////////////////////////////
|
||||||
|
MatTimer.Start();
|
||||||
|
Linop.Op(psi,Az);
|
||||||
|
zAz = innerProduct(Az,psi);
|
||||||
|
zAAz= norm2(Az);
|
||||||
|
MatTimer.Stop();
|
||||||
|
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
r=src-Az;
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
|
||||||
|
|
||||||
|
/////////////////////
|
||||||
|
// p = Prec(r)
|
||||||
|
/////////////////////
|
||||||
|
|
||||||
|
PrecTimer.Start();
|
||||||
|
Preconditioner(r,z);
|
||||||
|
PrecTimer.Stop();
|
||||||
|
|
||||||
|
MatTimer.Start();
|
||||||
|
Linop.Op(z,Az);
|
||||||
|
MatTimer.Stop();
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
|
||||||
|
zAz = innerProduct(Az,psi);
|
||||||
|
zAAz= norm2(Az);
|
||||||
|
|
||||||
|
//p[0],q[0],qq[0]
|
||||||
|
p[0]= z;
|
||||||
|
q[0]= Az;
|
||||||
|
qq[0]= zAAz;
|
||||||
|
|
||||||
|
cp =norm2(r);
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
|
||||||
|
for(int k=0;k<nstep;k++){
|
||||||
|
|
||||||
|
steps++;
|
||||||
|
|
||||||
|
int kp = k+1;
|
||||||
|
int peri_k = k %mmax;
|
||||||
|
int peri_kp= kp%mmax;
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
rq= innerProduct(q[peri_k],r); // what if rAr not real?
|
||||||
|
a = rq/qq[peri_k];
|
||||||
|
|
||||||
|
axpy(psi,a,p[peri_k],psi);
|
||||||
|
|
||||||
|
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
|
||||||
|
GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
|
||||||
|
|
||||||
|
if((k==nstep-1)||(cp<rsq)){
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PrecTimer.Start();
|
||||||
|
Preconditioner(r,z);// solve Az = r
|
||||||
|
PrecTimer.Stop();
|
||||||
|
|
||||||
|
MatTimer.Start();
|
||||||
|
Linop.Op(z,Az);
|
||||||
|
MatTimer.Stop();
|
||||||
|
zAz = innerProduct(Az,psi);
|
||||||
|
zAAz= norm2(Az);
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
|
||||||
|
q[peri_kp]=Az;
|
||||||
|
p[peri_kp]=z;
|
||||||
|
|
||||||
|
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||||
|
for(int back=0;back<northog;back++){
|
||||||
|
|
||||||
|
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||||
|
|
||||||
|
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||||
|
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||||
|
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||||
|
|
||||||
|
}
|
||||||
|
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
}
|
||||||
|
assert(0); // never reached
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
#endif
|
@ -52,41 +52,79 @@ public:
|
|||||||
pointer allocate(size_type __n, const void* _p= 0)
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
{
|
{
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
|
|
||||||
profilerAllocate(bytes);
|
profilerAllocate(bytes);
|
||||||
|
|
||||||
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
||||||
|
|
||||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deallocate(pointer __p, size_type __n)
|
void deallocate(pointer __p, size_type __n)
|
||||||
{
|
{
|
||||||
size_type bytes = __n * sizeof(_Tp);
|
size_type bytes = __n * sizeof(_Tp);
|
||||||
|
|
||||||
profilerFree(bytes);
|
profilerFree(bytes);
|
||||||
|
|
||||||
MemoryManager::CpuFree((void *)__p,bytes);
|
MemoryManager::CpuFree((void *)__p,bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: hack for the copy constructor, eventually it must be avoided
|
||||||
|
//void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||||
|
void construct(pointer __p, const _Tp& __val) { assert(0);};
|
||||||
|
void construct(pointer __p) { };
|
||||||
|
void destroy(pointer __p) { };
|
||||||
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
template<typename _Tp>
|
||||||
|
class uvmAllocator {
|
||||||
|
public:
|
||||||
|
typedef std::size_t size_type;
|
||||||
|
typedef std::ptrdiff_t difference_type;
|
||||||
|
typedef _Tp* pointer;
|
||||||
|
typedef const _Tp* const_pointer;
|
||||||
|
typedef _Tp& reference;
|
||||||
|
typedef const _Tp& const_reference;
|
||||||
|
typedef _Tp value_type;
|
||||||
|
|
||||||
|
template<typename _Tp1> struct rebind { typedef uvmAllocator<_Tp1> other; };
|
||||||
|
uvmAllocator() throw() { }
|
||||||
|
uvmAllocator(const uvmAllocator&) throw() { }
|
||||||
|
template<typename _Tp1> uvmAllocator(const uvmAllocator<_Tp1>&) throw() { }
|
||||||
|
~uvmAllocator() throw() { }
|
||||||
|
pointer address(reference __x) const { return &__x; }
|
||||||
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
|
profilerAllocate(bytes);
|
||||||
|
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
||||||
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate(pointer __p, size_type __n)
|
||||||
|
{
|
||||||
|
size_type bytes = __n * sizeof(_Tp);
|
||||||
|
profilerFree(bytes);
|
||||||
|
MemoryManager::SharedFree((void *)__p,bytes);
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: hack for the copy constructor, eventually it must be avoided
|
// FIXME: hack for the copy constructor, eventually it must be avoided
|
||||||
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||||
//void construct(pointer __p, const _Tp& __val) { };
|
//void construct(pointer __p, const _Tp& __val) { };
|
||||||
void construct(pointer __p) { };
|
void construct(pointer __p) { };
|
||||||
void destroy(pointer __p) { };
|
void destroy(pointer __p) { };
|
||||||
};
|
};
|
||||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
template<typename _Tp> inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
|
||||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
template<typename _Tp> inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Template typedefs
|
// Template typedefs
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class T> using commAllocator = alignedAllocator<T>;
|
template<class T> using commAllocator = uvmAllocator<T>;
|
||||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
|
||||||
template<class T> using commVector = std::vector<T,alignedAllocator<T> >;
|
template<class T> using commVector = std::vector<T,uvmAllocator<T> >;
|
||||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
//template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -7,6 +7,17 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define CpuSmall (1)
|
#define CpuSmall (1)
|
||||||
#define Acc (2)
|
#define Acc (2)
|
||||||
#define AccSmall (3)
|
#define AccSmall (3)
|
||||||
|
#define Shared (4)
|
||||||
|
#define SharedSmall (5)
|
||||||
|
uint64_t total_shared;
|
||||||
|
uint64_t total_device;
|
||||||
|
uint64_t total_host;;
|
||||||
|
void MemoryManager::PrintBytes(void)
|
||||||
|
{
|
||||||
|
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl;
|
||||||
|
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
|
||||||
|
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Data tables for recently freed pooiniter caches
|
// Data tables for recently freed pooiniter caches
|
||||||
@ -21,39 +32,63 @@ int MemoryManager::Ncache[MemoryManager::NallocType];
|
|||||||
void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
||||||
{
|
{
|
||||||
void *ptr = (void *) Lookup(bytes,Acc);
|
void *ptr = (void *) Lookup(bytes,Acc);
|
||||||
|
|
||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocDevice(bytes);
|
ptr = (void *) acceleratorAllocDevice(bytes);
|
||||||
// std::cout <<"AcceleratorAllocate: allocated Accelerator pointer "<<std::hex<<ptr<<std::endl;
|
total_device+=bytes;
|
||||||
|
// std::cout <<"AcceleratorAllocate: allocated Accelerator pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||||
|
// PrintBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
|
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
|
||||||
{
|
{
|
||||||
void *__freeme = Insert(ptr,bytes,Acc);
|
void *__freeme = Insert(ptr,bytes,Acc);
|
||||||
|
if ( __freeme ) {
|
||||||
if ( __freeme ) acceleratorFreeDevice(__freeme);
|
acceleratorFreeDevice(__freeme);
|
||||||
|
total_device-=bytes;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void *MemoryManager::SharedAllocate(size_t bytes)
|
||||||
|
{
|
||||||
|
void *ptr = (void *) Lookup(bytes,Shared);
|
||||||
|
if ( ptr == (void *) NULL ) {
|
||||||
|
ptr = (void *) acceleratorAllocShared(bytes);
|
||||||
|
total_shared+=bytes;
|
||||||
|
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void MemoryManager::SharedFree (void *ptr,size_t bytes)
|
||||||
|
{
|
||||||
|
void *__freeme = Insert(ptr,bytes,Shared);
|
||||||
|
if ( __freeme ) {
|
||||||
|
acceleratorFreeShared(__freeme);
|
||||||
|
total_shared-=bytes;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void *MemoryManager::CpuAllocate(size_t bytes)
|
void *MemoryManager::CpuAllocate(size_t bytes)
|
||||||
{
|
{
|
||||||
void *ptr = (void *) Lookup(bytes,Cpu);
|
void *ptr = (void *) Lookup(bytes,Cpu);
|
||||||
|
|
||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocShared(bytes);
|
ptr = (void *) acceleratorAllocCpu(bytes);
|
||||||
// std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::endl;
|
total_host+=bytes;
|
||||||
|
// std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||||
|
// PrintBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
||||||
{
|
{
|
||||||
NotifyDeletion(_ptr);
|
NotifyDeletion(_ptr);
|
||||||
|
|
||||||
// If present remove entry and free accelerator too.
|
|
||||||
// Can we ever hit a free event with a view still in scope?
|
|
||||||
void *__freeme = Insert(_ptr,bytes,Cpu);
|
void *__freeme = Insert(_ptr,bytes,Cpu);
|
||||||
if ( __freeme ) acceleratorFreeShared(__freeme);
|
if ( __freeme ) {
|
||||||
|
acceleratorFreeCpu(__freeme);
|
||||||
|
total_host-=bytes;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// call only once
|
// call only once
|
||||||
@ -62,8 +97,10 @@ void MemoryManager::Init(void)
|
|||||||
{
|
{
|
||||||
Ncache[Cpu] = 8;
|
Ncache[Cpu] = 8;
|
||||||
Ncache[Acc] = 8;
|
Ncache[Acc] = 8;
|
||||||
|
Ncache[Shared] = 8;
|
||||||
Ncache[CpuSmall] = 32;
|
Ncache[CpuSmall] = 32;
|
||||||
Ncache[AccSmall] = 32;
|
Ncache[AccSmall] = 32;
|
||||||
|
Ncache[SharedSmall] = 32;
|
||||||
|
|
||||||
char * str;
|
char * str;
|
||||||
int Nc;
|
int Nc;
|
||||||
@ -75,6 +112,7 @@ void MemoryManager::Init(void)
|
|||||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||||
Ncache[Cpu]=Nc;
|
Ncache[Cpu]=Nc;
|
||||||
Ncache[Acc]=Nc;
|
Ncache[Acc]=Nc;
|
||||||
|
Ncache[Shared]=Nc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,6 +122,7 @@ void MemoryManager::Init(void)
|
|||||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||||
Ncache[CpuSmall]=Nc;
|
Ncache[CpuSmall]=Nc;
|
||||||
Ncache[AccSmall]=Nc;
|
Ncache[AccSmall]=Nc;
|
||||||
|
Ncache[SharedSmall]=Nc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
||||||
|
@ -44,14 +44,14 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
enum ViewAdvise {
|
enum ViewAdvise {
|
||||||
AdviseDefault = 0x0, // Regular data
|
AdviseDefault = 0x0, // Regular data
|
||||||
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
AdviseInfrequentUse = 0x1 // Advise that the data is used infrequently. This can
|
||||||
// significantly influence performance of bulk storage.
|
// significantly influence performance of bulk storage.
|
||||||
|
|
||||||
AdviseTransient = 0x2, // Data will mostly be read. On some architectures
|
// AdviseTransient = 0x2, // Data will mostly be read. On some architectures
|
||||||
// enables read-only copies of memory to be kept on
|
// enables read-only copies of memory to be kept on
|
||||||
// host and device.
|
// host and device.
|
||||||
|
|
||||||
AdviseAcceleratorWriteDiscard = 0x4 // Field will be written in entirety on device
|
// AdviseAcceleratorWriteDiscard = 0x4 // Field will be written in entirety on device
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ private:
|
|||||||
} AllocationCacheEntry;
|
} AllocationCacheEntry;
|
||||||
|
|
||||||
static const int NallocCacheMax=128;
|
static const int NallocCacheMax=128;
|
||||||
static const int NallocType=4;
|
static const int NallocType=6;
|
||||||
static AllocationCacheEntry Entries[NallocType][NallocCacheMax];
|
static AllocationCacheEntry Entries[NallocType][NallocCacheMax];
|
||||||
static int Victim[NallocType];
|
static int Victim[NallocType];
|
||||||
static int Ncache[NallocType];
|
static int Ncache[NallocType];
|
||||||
@ -95,9 +95,11 @@ private:
|
|||||||
|
|
||||||
static void *AcceleratorAllocate(size_t bytes);
|
static void *AcceleratorAllocate(size_t bytes);
|
||||||
static void AcceleratorFree (void *ptr,size_t bytes);
|
static void AcceleratorFree (void *ptr,size_t bytes);
|
||||||
|
static void PrintBytes(void);
|
||||||
public:
|
public:
|
||||||
static void Init(void);
|
static void Init(void);
|
||||||
|
static void *SharedAllocate(size_t bytes);
|
||||||
|
static void SharedFree (void *ptr,size_t bytes);
|
||||||
static void *CpuAllocate(size_t bytes);
|
static void *CpuAllocate(size_t bytes);
|
||||||
static void CpuFree (void *ptr,size_t bytes);
|
static void CpuFree (void *ptr,size_t bytes);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user