diff --git a/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h new file mode 100644 index 00000000..22b7725e --- /dev/null +++ b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h @@ -0,0 +1,241 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_PREC_GCR_NON_HERM_H +#define GRID_PREC_GCR_NON_HERM_H + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +//VPGCR Abe and Zhang, 2005. +//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING +//Computing and Information Volume 2, Number 2, Pages 147-161 +//NB. Likely not original reference since they are focussing on a preconditioner variant. +// but VPGCR was nicely written up in their paper +/////////////////////////////////////////////////////////////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); + +#define GCRLogLevel std::cout << GridLogMessage < +class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction { +public: + + RealD Tolerance; + Integer MaxIterations; + int verbose; + int mmax; + int nstep; + int steps; + int level; + GridStopWatch PrecTimer; + GridStopWatch MatTimer; + GridStopWatch LinalgTimer; + + LinearFunction &Preconditioner; + LinearOperatorBase &Linop; + + void Level(int lv) { level=lv; }; + + PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase &_Linop,LinearFunction &Prec,int _mmax,int _nstep) : + Tolerance(tol), + MaxIterations(maxit), + Linop(_Linop), + Preconditioner(Prec), + mmax(_mmax), + nstep(_nstep) + { + level=1; + verbose=1; + }; + + void operator() (const Field &src, Field &psi){ + + psi=Zero(); + RealD cp, ssq,rsq; + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; + + Field r(src.Grid()); + + PrecTimer.Reset(); + MatTimer.Reset(); + LinalgTimer.Reset(); + + GridStopWatch SolverTimer; + SolverTimer.Start(); + + steps=0; + for(int k=0;k q(mmax,grid); + std::vector p(mmax,grid); + std::vector qq(mmax); + + GCRLogLevel<< "PGCR nStep("<(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history. + for(int back=0;back=0); + + b=-real(innerProduct(q[peri_back],Az))/qq[peri_back]; + p[peri_kp]=p[peri_kp]+b*p[peri_back]; + q[peri_kp]=q[peri_kp]+b*q[peri_back]; + + } + qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm + LinalgTimer.Stop(); + } + assert(0); // never reached + return cp; + } +}; +NAMESPACE_END(Grid); +#endif diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 6c6dd7d8..ebb3162b 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -52,41 +52,79 @@ public: pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); - profilerAllocate(bytes); - _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); - assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); - return ptr; } void deallocate(pointer __p, size_type __n) { size_type bytes = __n * sizeof(_Tp); - profilerFree(bytes); - MemoryManager::CpuFree((void *)__p,bytes); } + // FIXME: hack for the copy constructor, eventually it must be avoided + //void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; + void construct(pointer __p, const _Tp& __val) { assert(0);}; + void construct(pointer __p) { }; + void destroy(pointer __p) { }; +}; +template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } +template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } + +template +class uvmAllocator { +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef _Tp* pointer; + typedef const _Tp* const_pointer; + typedef _Tp& reference; + typedef const _Tp& const_reference; + typedef _Tp value_type; + + template struct rebind { typedef uvmAllocator<_Tp1> other; }; + uvmAllocator() throw() { } + uvmAllocator(const uvmAllocator&) throw() { } + template uvmAllocator(const uvmAllocator<_Tp1>&) throw() { } + ~uvmAllocator() throw() { } + pointer address(reference __x) const { return &__x; } + size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } + + pointer allocate(size_type __n, const void* _p= 0) + { + size_type bytes = __n*sizeof(_Tp); + profilerAllocate(bytes); + _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); + return ptr; + } + + void deallocate(pointer __p, size_type __n) + { + size_type bytes = __n * sizeof(_Tp); + profilerFree(bytes); + MemoryManager::SharedFree((void *)__p,bytes); + } + // FIXME: hack for the copy constructor, eventually it must be avoided void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; //void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; -template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } -template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } +template inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; } +template inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; } //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// -template using commAllocator = alignedAllocator; -template using Vector = std::vector >; -template using commVector = std::vector >; -template using Matrix = std::vector > >; +template using commAllocator = uvmAllocator; +template using Vector = std::vector >; +template using commVector = std::vector >; +//template using Matrix = std::vector > >; NAMESPACE_END(Grid); diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index 6d638b60..fa60e820 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -7,6 +7,17 @@ NAMESPACE_BEGIN(Grid); #define CpuSmall (1) #define Acc (2) #define AccSmall (3) +#define Shared (4) +#define SharedSmall (5) +uint64_t total_shared; +uint64_t total_device; +uint64_t total_host;; +void MemoryManager::PrintBytes(void) +{ + std::cout << " MemoryManager : "<=0) && (Nc < NallocCacheMax)) { Ncache[Cpu]=Nc; Ncache[Acc]=Nc; + Ncache[Shared]=Nc; } } @@ -84,6 +122,7 @@ void MemoryManager::Init(void) if ( (Nc>=0) && (Nc < NallocCacheMax)) { Ncache[CpuSmall]=Nc; Ncache[AccSmall]=Nc; + Ncache[SharedSmall]=Nc; } } std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<