mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Compare commits
16 Commits
d299c86633
...
41d8adca95
Author | SHA1 | Date | |
---|---|---|---|
|
41d8adca95 | ||
|
059e8e5bb0 | ||
|
b3ee8ded96 | ||
|
cf3584ad15 | ||
|
a66973163f | ||
|
4502a8c8a1 | ||
|
9c902e4c2d | ||
|
f3eb36adcf | ||
|
7c246606c1 | ||
|
172c75029e | ||
|
89fdd7f8dd | ||
|
c328be24b7 | ||
|
a73dc6dbf4 | ||
|
eee2a2657f | ||
|
63c223ea5d | ||
|
2877fb4a2c |
@ -30,9 +30,14 @@ directory
|
|||||||
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <exception>
|
||||||
|
|
||||||
#define NAMESPACE_BEGIN(A) namespace A {
|
#define NAMESPACE_BEGIN(A) namespace A {
|
||||||
#define NAMESPACE_END(A) }
|
#define NAMESPACE_END(A) }
|
||||||
#define GRID_NAMESPACE_BEGIN NAMESPACE_BEGIN(Grid)
|
#define GRID_NAMESPACE_BEGIN NAMESPACE_BEGIN(Grid)
|
||||||
#define GRID_NAMESPACE_END NAMESPACE_END(Grid)
|
#define GRID_NAMESPACE_END NAMESPACE_END(Grid)
|
||||||
#define NAMESPACE_CHECK(x) struct namespaceTEST##x {}; static_assert(std::is_same<namespaceTEST##x, ::namespaceTEST##x>::value,"Not in :: at" );
|
#define NAMESPACE_CHECK(x) struct namespaceTEST##x {}; static_assert(std::is_same<namespaceTEST##x, ::namespaceTEST##x>::value,"Not in :: at" );
|
||||||
|
|
||||||
|
#define EXCEPTION_CHECK_BEGIN(A) try {
|
||||||
|
#define EXCEPTION_CHECK_END(A) } catch ( std::exception e ) { BACKTRACEFP(stderr); std::cerr << __PRETTY_FUNCTION__ << " : " <<__LINE__<< " Caught exception "<<e.what()<<std::endl; throw; }
|
||||||
|
|
||||||
|
@ -89,9 +89,10 @@ public:
|
|||||||
gridblasHandle = theGridAccelerator;
|
gridblasHandle = theGridAccelerator;
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_ONE_MKL
|
#ifdef GRID_ONE_MKL
|
||||||
cl::sycl::cpu_selector selector;
|
cl::sycl::gpu_selector selector;
|
||||||
cl::sycl::device selectedDevice { selector };
|
cl::sycl::device selectedDevice { selector };
|
||||||
gridblasHandle =new sycl::queue (selectedDevice);
|
cl::sycl::property_list q_prop{cl::sycl::property::queue::in_order()};
|
||||||
|
gridblasHandle =new sycl::queue (selectedDevice,q_prop);
|
||||||
#endif
|
#endif
|
||||||
gridblasInit=1;
|
gridblasInit=1;
|
||||||
}
|
}
|
||||||
@ -266,8 +267,46 @@ public:
|
|||||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
std::cerr << " Calling SYCL batched ZGEMM "<<std::endl;
|
||||||
#warning "oneMKL implementation not built "
|
int64_t m64=m;
|
||||||
|
int64_t n64=n;
|
||||||
|
int64_t k64=k;
|
||||||
|
int64_t lda64=lda;
|
||||||
|
int64_t ldb64=ldb;
|
||||||
|
int64_t ldc64=ldc;
|
||||||
|
int64_t batchCount64=batchCount;
|
||||||
|
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||||
|
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||||
|
¬ransp,
|
||||||
|
¬ransp,
|
||||||
|
&m64,&n64,&k64,
|
||||||
|
(ComplexD *) &alpha_p[0],
|
||||||
|
(const ComplexD **)&Amk[0], (const int64_t *)&lda64,
|
||||||
|
(const ComplexD **)&Bkn[0], (const int64_t *)&ldb64,
|
||||||
|
(ComplexD *) &beta_p[0],
|
||||||
|
(ComplexD **)&Cmn[0], (const int64_t *)&ldc64,
|
||||||
|
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||||
|
synchronise();
|
||||||
|
std::cerr << " Called SYCL batched ZGEMM "<<std::endl;
|
||||||
|
std::vector<ComplexD> A(m*k); // pointer list to matrices
|
||||||
|
std::vector<ComplexD> B(k*n);
|
||||||
|
std::vector<ComplexD> C(m*n);
|
||||||
|
int sda = lda*k;
|
||||||
|
int sdb = ldb*k;
|
||||||
|
int sdc = ldc*n;
|
||||||
|
for (int p = 0; p < 1; ++p) {
|
||||||
|
acceleratorCopyFromDevice((void *)&Amk[p][0],(void *)&A[0],m*k*sizeof(ComplexD));
|
||||||
|
acceleratorCopyFromDevice((void *)&Bkn[p][0],(void *)&B[0],k*n*sizeof(ComplexD));
|
||||||
|
acceleratorCopyFromDevice((void *)&Cmn[p][0],(void *)&C[0],m*n*sizeof(ComplexD));
|
||||||
|
for (int mm = 0; mm < m; ++mm) {
|
||||||
|
for (int nn = 0; nn < n; ++nn) {
|
||||||
|
ComplexD c_mn(0.0);
|
||||||
|
for (int kk = 0; kk < k; ++kk)
|
||||||
|
c_mn += A[mm + kk*lda ] * B[kk + nn*ldb];
|
||||||
|
std::cout << " beta "<<beta<<" C_"<<mm<<","<<nn<<" "<<c_mn<<" "<<C[mm + nn*ldc]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||||
// Need a default/reference implementation
|
// Need a default/reference implementation
|
||||||
@ -285,7 +324,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// synchronise();
|
|
||||||
RealD t1=usecond();
|
RealD t1=usecond();
|
||||||
RealD flops = 8.0*m*n*k*batchCount;
|
RealD flops = 8.0*m*n*k*batchCount;
|
||||||
RealD bytes = 1.0*sizeof(ComplexD)*(m*k+k*n+m*n)*batchCount;
|
RealD bytes = 1.0*sizeof(ComplexD)*(m*k+k*n+m*n)*batchCount;
|
||||||
@ -366,8 +404,25 @@ public:
|
|||||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
int64_t m64=m;
|
||||||
#warning "oneMKL implementation not built "
|
int64_t n64=n;
|
||||||
|
int64_t k64=k;
|
||||||
|
int64_t lda64=lda;
|
||||||
|
int64_t ldb64=ldb;
|
||||||
|
int64_t ldc64=ldc;
|
||||||
|
int64_t batchCount64=batchCount;
|
||||||
|
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||||
|
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||||
|
¬ransp,
|
||||||
|
¬ransp,
|
||||||
|
&m64,&n64,&k64,
|
||||||
|
(ComplexF *) &alpha_p[0],
|
||||||
|
(const ComplexF **)&Amk[0], (const int64_t *)&lda64,
|
||||||
|
(const ComplexF **)&Bkn[0], (const int64_t *)&ldb64,
|
||||||
|
(ComplexF *) &beta_p[0],
|
||||||
|
(ComplexF **)&Cmn[0], (const int64_t *)&ldc64,
|
||||||
|
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||||
|
synchronise();
|
||||||
#endif
|
#endif
|
||||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||||
int sda = lda*k;
|
int sda = lda*k;
|
||||||
@ -467,8 +522,25 @@ public:
|
|||||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
int64_t m64=m;
|
||||||
#warning "oneMKL implementation not built "
|
int64_t n64=n;
|
||||||
|
int64_t k64=k;
|
||||||
|
int64_t lda64=lda;
|
||||||
|
int64_t ldb64=ldb;
|
||||||
|
int64_t ldc64=ldc;
|
||||||
|
int64_t batchCount64=batchCount;
|
||||||
|
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||||
|
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||||
|
¬ransp,
|
||||||
|
¬ransp,
|
||||||
|
&m64,&n64,&k64,
|
||||||
|
(float *) &alpha_p[0],
|
||||||
|
(const float **)&Amk[0], (const int64_t *)&lda64,
|
||||||
|
(const float **)&Bkn[0], (const int64_t *)&ldb64,
|
||||||
|
(float *) &beta_p[0],
|
||||||
|
(float **)&Cmn[0], (const int64_t *)&ldc64,
|
||||||
|
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||||
|
synchronise();
|
||||||
#endif
|
#endif
|
||||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||||
int sda = lda*k;
|
int sda = lda*k;
|
||||||
@ -568,24 +640,25 @@ public:
|
|||||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
/*
|
|
||||||
int64_t m64=m;
|
int64_t m64=m;
|
||||||
int64_t n64=n;
|
int64_t n64=n;
|
||||||
int64_t k64=k;
|
int64_t k64=k;
|
||||||
|
int64_t lda64=lda;
|
||||||
|
int64_t ldb64=ldb;
|
||||||
|
int64_t ldc64=ldc;
|
||||||
int64_t batchCount64=batchCount;
|
int64_t batchCount64=batchCount;
|
||||||
oneapi::mkl::blas::column_major::gemm_batch(*theGridAccelerator,
|
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||||
onemkl::transpose::N,
|
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||||
onemkl::transpose::N,
|
¬ransp,
|
||||||
&m64,&n64,&k64,
|
¬ransp,
|
||||||
(double *) &alpha_p[0],
|
&m64,&n64,&k64,
|
||||||
(double **)&Amk[0], lda,
|
(double *) &alpha_p[0],
|
||||||
(double **)&Bkn[0], ldb,
|
(const double **)&Amk[0], (const int64_t *)&lda64,
|
||||||
(double *) &beta_p[0],
|
(const double **)&Bkn[0], (const int64_t *)&ldb64,
|
||||||
(double **)&Cmn[0], ldc,
|
(double *) &beta_p[0],
|
||||||
1,&batchCount64);
|
(double **)&Cmn[0], (const int64_t *)&ldc64,
|
||||||
*/
|
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
synchronise();
|
||||||
#warning "oneMKL implementation not built "
|
|
||||||
#endif
|
#endif
|
||||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||||
int sda = lda*k;
|
int sda = lda*k;
|
||||||
@ -673,6 +746,7 @@ public:
|
|||||||
beta,
|
beta,
|
||||||
(ComplexD *)Cmn,ldc,sdc,
|
(ComplexD *)Cmn,ldc,sdc,
|
||||||
batchCount);
|
batchCount);
|
||||||
|
synchronise();
|
||||||
#endif
|
#endif
|
||||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
|
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
|
||||||
// Need a default/reference implementation
|
// Need a default/reference implementation
|
||||||
|
@ -499,6 +499,87 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Site diagonal is identity, left preconditioned by Mee^inv
|
||||||
|
// ( 1 - Mee^inv Meo Moo^inv Moe ) phi = Mee_inv ( Mee - Meo Moo^inv Moe Mee^inv ) phi = Mee_inv eta
|
||||||
|
//
|
||||||
|
// Solve:
|
||||||
|
// ( 1 - Mee^inv Meo Moo^inv Moe )^dag ( 1 - Mee^inv Meo Moo^inv Moe ) phi = ( 1 - Mee^inv Meo Moo^inv Moe )^dag Mee_inv eta
|
||||||
|
//
|
||||||
|
// Old notation e<->o
|
||||||
|
//
|
||||||
|
// Left precon by Moo^-1
|
||||||
|
// b) (Doo^{dag} M_oo^-dag) (Moo^-1 Doo) psi_o = [ (D_oo)^dag M_oo^-dag ] Moo^-1 L^{-1} eta_o
|
||||||
|
// eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class Field> class SchurRedBlackDiagOneSolve : public SchurRedBlackBase<Field> {
|
||||||
|
public:
|
||||||
|
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations Schur trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
SchurRedBlackDiagOneSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
|
||||||
|
const bool _solnAsInitGuess = false)
|
||||||
|
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
|
||||||
|
|
||||||
|
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
SchurDiagOneOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field Mtmp(grid);
|
||||||
|
|
||||||
|
pickCheckerboard(Even,src_e,src);
|
||||||
|
pickCheckerboard(Odd ,src_o,src);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// src_o = Mpcdag *MooeeInv * (source_o - Moe MeeInv source_e)
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||||
|
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||||
|
Mtmp=src_o-Mtmp;
|
||||||
|
_Matrix.MooeeInv(Mtmp,tmp); assert( tmp.Checkerboard() ==Odd);
|
||||||
|
|
||||||
|
// get the right MpcDag
|
||||||
|
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
GridBase *fgrid= _Matrix.Grid();
|
||||||
|
|
||||||
|
Field tmp(grid);
|
||||||
|
Field sol_e(grid);
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||||
|
tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||||
|
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||||
|
|
||||||
|
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||||
|
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||||
|
{
|
||||||
|
SchurDiagOneOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||||
|
};
|
||||||
|
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||||
|
{
|
||||||
|
SchurDiagOneOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||||
|
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||||
|
@ -54,6 +54,9 @@ public:
|
|||||||
size_type bytes = __n*sizeof(_Tp);
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
profilerAllocate(bytes);
|
profilerAllocate(bytes);
|
||||||
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
||||||
|
if ( (_Tp*)ptr == (_Tp *) NULL ) {
|
||||||
|
printf("Grid CPU Allocator got NULL for %lu bytes\n",(unsigned long) bytes );
|
||||||
|
}
|
||||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -100,6 +103,9 @@ public:
|
|||||||
size_type bytes = __n*sizeof(_Tp);
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
profilerAllocate(bytes);
|
profilerAllocate(bytes);
|
||||||
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
||||||
|
if ( (_Tp*)ptr == (_Tp *) NULL ) {
|
||||||
|
printf("Grid Shared Allocator got NULL for %lu bytes\n",(unsigned long) bytes );
|
||||||
|
}
|
||||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -145,6 +151,9 @@ public:
|
|||||||
size_type bytes = __n*sizeof(_Tp);
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
profilerAllocate(bytes);
|
profilerAllocate(bytes);
|
||||||
_Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes);
|
_Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes);
|
||||||
|
if ( (_Tp*)ptr == (_Tp *) NULL ) {
|
||||||
|
printf("Grid Device Allocator got NULL for %lu bytes\n",(unsigned long) bytes );
|
||||||
|
}
|
||||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,44 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
uint64_t total_shared;
|
uint64_t total_shared;
|
||||||
uint64_t total_device;
|
uint64_t total_device;
|
||||||
uint64_t total_host;;
|
uint64_t total_host;;
|
||||||
|
|
||||||
|
#if defined(__has_feature)
|
||||||
|
#if __has_feature(leak_sanitizer)
|
||||||
|
#define ASAN_LEAK_CHECK
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ASAN_LEAK_CHECK
|
||||||
|
#include <sanitizer/asan_interface.h>
|
||||||
|
#include <sanitizer/common_interface_defs.h>
|
||||||
|
#include <sanitizer/lsan_interface.h>
|
||||||
|
#define LEAK_CHECK(A) { __lsan_do_recoverable_leak_check(); }
|
||||||
|
#else
|
||||||
|
#define LEAK_CHECK(A) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void MemoryManager::DisplayMallinfo(void)
|
||||||
|
{
|
||||||
|
#ifdef __linux__
|
||||||
|
struct mallinfo mi;
|
||||||
|
|
||||||
|
mi = mallinfo();
|
||||||
|
|
||||||
|
std::cout << "MemoryManager: Total non-mmapped bytes (arena): "<< (size_t)mi.arena<<std::endl;
|
||||||
|
std::cout << "MemoryManager: # of free chunks (ordblks): "<< (size_t)mi.ordblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: # of free fastbin blocks (smblks): "<< (size_t)mi.smblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: # of mapped regions (hblks): "<< (size_t)mi.hblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: Bytes in mapped regions (hblkhd): "<< (size_t)mi.hblkhd<<std::endl;
|
||||||
|
std::cout << "MemoryManager: Max. total allocated space (usmblks): "<< (size_t)mi.usmblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: Free bytes held in fastbins (fsmblks): "<< (size_t)mi.fsmblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: Total allocated space (uordblks): "<< (size_t)mi.uordblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: Total free space (fordblks): "<< (size_t)mi.fordblks<<std::endl;
|
||||||
|
std::cout << "MemoryManager: Topmost releasable block (keepcost): "<< (size_t)mi.keepcost<<std::endl;
|
||||||
|
#endif
|
||||||
|
LEAK_CHECK();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void MemoryManager::PrintBytes(void)
|
void MemoryManager::PrintBytes(void)
|
||||||
{
|
{
|
||||||
std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
|
std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
|
||||||
@ -35,7 +73,7 @@ void MemoryManager::PrintBytes(void)
|
|||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
cuda_mem();
|
cuda_mem();
|
||||||
#endif
|
#endif
|
||||||
|
DisplayMallinfo();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t MemoryManager::DeviceCacheBytes() { return CacheBytes[Acc] + CacheBytes[AccHuge] + CacheBytes[AccSmall]; }
|
uint64_t MemoryManager::DeviceCacheBytes() { return CacheBytes[Acc] + CacheBytes[AccHuge] + CacheBytes[AccSmall]; }
|
||||||
|
@ -211,6 +211,7 @@ private:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void DisplayMallinfo(void);
|
||||||
static void NotifyDeletion(void * CpuPtr);
|
static void NotifyDeletion(void * CpuPtr);
|
||||||
static void Print(void);
|
static void Print(void);
|
||||||
static void PrintAll(void);
|
static void PrintAll(void);
|
||||||
|
@ -373,7 +373,8 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
|
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
|
||||||
#else
|
#else
|
||||||
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
|
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
deviceVector<inner_t> inner_tmp;
|
||||||
|
inner_tmp.resize(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
accelerator_for( ss, sites, nsimd,{
|
accelerator_for( ss, sites, nsimd,{
|
||||||
|
@ -90,6 +90,7 @@ public:
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
Parameters.StartingType = arg;
|
Parameters.StartingType = arg;
|
||||||
|
std::cout <<GridLogMessage << " GenericHMCrunner --StartingType "<<arg<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GridCmdOptionExists(argv, argv + argc, "--StartingTrajectory")) {
|
if (GridCmdOptionExists(argv, argv + argc, "--StartingTrajectory")) {
|
||||||
@ -97,6 +98,7 @@ public:
|
|||||||
std::vector<int> ivec(0);
|
std::vector<int> ivec(0);
|
||||||
GridCmdOptionIntVector(arg, ivec);
|
GridCmdOptionIntVector(arg, ivec);
|
||||||
Parameters.StartTrajectory = ivec[0];
|
Parameters.StartTrajectory = ivec[0];
|
||||||
|
std::cout <<GridLogMessage << " GenericHMCrunner --StartingTrajectory "<<ivec[0]<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) {
|
if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) {
|
||||||
@ -104,6 +106,7 @@ public:
|
|||||||
std::vector<int> ivec(0);
|
std::vector<int> ivec(0);
|
||||||
GridCmdOptionIntVector(arg, ivec);
|
GridCmdOptionIntVector(arg, ivec);
|
||||||
Parameters.Trajectories = ivec[0];
|
Parameters.Trajectories = ivec[0];
|
||||||
|
std::cout << GridLogMessage<<" GenericHMCrunner Command Line --Trajectories "<<ivec[0]<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) {
|
if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) {
|
||||||
@ -111,6 +114,7 @@ public:
|
|||||||
std::vector<int> ivec(0);
|
std::vector<int> ivec(0);
|
||||||
GridCmdOptionIntVector(arg, ivec);
|
GridCmdOptionIntVector(arg, ivec);
|
||||||
Parameters.NoMetropolisUntil = ivec[0];
|
Parameters.NoMetropolisUntil = ivec[0];
|
||||||
|
std::cout << GridLogMessage<<" GenericHMCrunner --Thermalizations "<<ivec[0]<<std::endl;
|
||||||
}
|
}
|
||||||
if (GridCmdOptionExists(argv, argv + argc, "--ParameterFile")) {
|
if (GridCmdOptionExists(argv, argv + argc, "--ParameterFile")) {
|
||||||
arg = GridCmdOptionPayload(argv, argv + argc, "--ParameterFile");
|
arg = GridCmdOptionPayload(argv, argv + argc, "--ParameterFile");
|
||||||
|
@ -137,9 +137,11 @@ public:
|
|||||||
|
|
||||||
double start_force = usecond();
|
double start_force = usecond();
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
as[level].actions.at(a)->deriv_timer_start();
|
as[level].actions.at(a)->deriv_timer_start();
|
||||||
as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta
|
as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta
|
||||||
as[level].actions.at(a)->deriv_timer_stop();
|
as[level].actions.at(a)->deriv_timer_stop();
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
auto name = as[level].actions.at(a)->action_name();
|
auto name = as[level].actions.at(a)->action_name();
|
||||||
|
|
||||||
@ -246,7 +248,11 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual ~Integrator() {}
|
virtual ~Integrator()
|
||||||
|
{
|
||||||
|
// Pain in the ass to clean up the Level pointers
|
||||||
|
// Guido's design is at fault as per comment above in constructor
|
||||||
|
}
|
||||||
|
|
||||||
virtual std::string integrator_name() = 0;
|
virtual std::string integrator_name() = 0;
|
||||||
|
|
||||||
@ -460,6 +466,7 @@ public:
|
|||||||
for (int level = 0; level < as.size(); ++level) {
|
for (int level = 0; level < as.size(); ++level) {
|
||||||
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
|
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
// get gauge field from the SmearingPolicy and
|
// get gauge field from the SmearingPolicy and
|
||||||
// based on the boolean is_smeared in actionID
|
// based on the boolean is_smeared in actionID
|
||||||
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
|
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
|
||||||
@ -468,6 +475,7 @@ public:
|
|||||||
as[level].actions.at(actionID)->S_timer_stop();
|
as[level].actions.at(actionID)->S_timer_stop();
|
||||||
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
|
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
|
||||||
H += Hterm;
|
H += Hterm;
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
}
|
}
|
||||||
as[level].apply(S_hireps, Representations, level, H);
|
as[level].apply(S_hireps, Representations, level, H);
|
||||||
|
@ -58,7 +58,7 @@ int main(int argc, char **argv) {
|
|||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
HMCparams.StartTrajectory = 0;
|
HMCparams.StartTrajectory = 0;
|
||||||
HMCparams.Trajectories = 200;
|
HMCparams.Trajectories = 200;
|
||||||
HMCparams.NoMetropolisUntil= 20;
|
HMCparams.NoMetropolisUntil= 0;
|
||||||
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
HMCparams.StartingType =std::string("ColdStart");
|
HMCparams.StartingType =std::string("ColdStart");
|
||||||
HMCparams.MD = MD;
|
HMCparams.MD = MD;
|
||||||
@ -70,7 +70,7 @@ int main(int argc, char **argv) {
|
|||||||
CheckpointerParameters CPparams;
|
CheckpointerParameters CPparams;
|
||||||
CPparams.config_prefix = "ckpoint_EODWF_lat";
|
CPparams.config_prefix = "ckpoint_EODWF_lat";
|
||||||
CPparams.rng_prefix = "ckpoint_EODWF_rng";
|
CPparams.rng_prefix = "ckpoint_EODWF_rng";
|
||||||
CPparams.saveInterval = 10;
|
CPparams.saveInterval = 1;
|
||||||
CPparams.format = "IEEE64BIG";
|
CPparams.format = "IEEE64BIG";
|
||||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
|
|
||||||
@ -186,6 +186,8 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// HMC parameters are serialisable
|
// HMC parameters are serialisable
|
||||||
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
|
||||||
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||||
TheHMC.Run(); // no smearing
|
TheHMC.Run(); // no smearing
|
||||||
|
@ -1,16 +1,18 @@
|
|||||||
|
|
||||||
|
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl "
|
||||||
|
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -qmkl=parallel -fsycl -fno-exceptions -fsycl-targets=spir64_gen -Xs -device -Xs pvc "
|
||||||
../../configure \
|
../../configure \
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
|
--enable-debug \
|
||||||
--disable-gparity \
|
--disable-gparity \
|
||||||
--disable-fermion-reps \
|
--disable-fermion-reps \
|
||||||
|
--with-lime=$CLIME \
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
--enable-accelerator=sycl \
|
--enable-accelerator=sycl \
|
||||||
--enable-accelerator-aware-mpi=yes\
|
--enable-accelerator-aware-mpi=yes\
|
||||||
--enable-unified=no \
|
--enable-unified=no \
|
||||||
MPICXX=mpicxx \
|
MPICXX=mpicxx \
|
||||||
CXX=icpx \
|
CXX=icpx
|
||||||
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -lsycl" \
|
|
||||||
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -qmkl=parallel"
|
|
||||||
|
|
||||||
|
22
systems/Aurora/config-command-sanitize
Normal file
22
systems/Aurora/config-command-sanitize
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# -fsycl-targets=spir64_gen -Xs\" -device pvc \"
|
||||||
|
# -fsycl-targets=intel_gpu_pvc_vg,intel_gpu_pvc
|
||||||
|
# -fsycl-targets=intel_gpu_pvc
|
||||||
|
|
||||||
|
unset DEVICE
|
||||||
|
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -Xarch_host -fsanitize=address"
|
||||||
|
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -qmkl=parallel -fsycl -fno-exceptions -Xarch_host -fsanitize=address -fsycl-targets=spir64_gen -Xs -device -Xs pvc "
|
||||||
|
../../configure \
|
||||||
|
--enable-simd=GPU \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--enable-comms=mpi-auto \
|
||||||
|
--enable-debug \
|
||||||
|
--disable-gparity \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
--with-lime=$CLIME \
|
||||||
|
--enable-shm=nvlink \
|
||||||
|
--enable-accelerator=sycl \
|
||||||
|
--enable-accelerator-aware-mpi=yes\
|
||||||
|
--enable-unified=no \
|
||||||
|
MPICXX=mpicxx \
|
||||||
|
CXX=icpx
|
||||||
|
|
@ -1,28 +1,23 @@
|
|||||||
|
source ~/spack/share/spack/setup-env.sh
|
||||||
|
spack load c-lime
|
||||||
|
|
||||||
|
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
|
||||||
|
#export LD_LIBRARY_PATH=${TCMALLOC}/lib:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
export INTELGT_AUTO_ATTACH_DISABLE=1
|
||||||
|
|
||||||
#export ONEAPI_DEVICE_SELECTOR=level_zero:0.0
|
#export ONEAPI_DEVICE_SELECTOR=level_zero:0.0
|
||||||
|
|
||||||
module load oneapi/release/2023.12.15.001
|
|
||||||
|
|
||||||
#module use /soft/modulefiles
|
|
||||||
#module load intel_compute_runtime/release/agama-devel-682.22
|
|
||||||
|
|
||||||
export FI_CXI_DEFAULT_CQ_SIZE=131072
|
|
||||||
export FI_CXI_CQ_FILL_PERCENT=20
|
|
||||||
|
|
||||||
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
|
|
||||||
#export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-intel-enable-auto-large-GRF-mode"
|
|
||||||
|
|
||||||
#
|
|
||||||
# -ftarget-register-alloc-mode=pvc:default
|
# -ftarget-register-alloc-mode=pvc:default
|
||||||
# -ftarget-register-alloc-mode=pvc:small
|
# -ftarget-register-alloc-mode=pvc:small
|
||||||
# -ftarget-register-alloc-mode=pvc:large
|
# -ftarget-register-alloc-mode=pvc:large
|
||||||
# -ftarget-register-alloc-mode=pvc:auto
|
# -ftarget-register-alloc-mode=pvc:auto
|
||||||
#
|
#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
|
||||||
|
|
||||||
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
|
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
|
||||||
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
|
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
|
||||||
export http_proxy=http://proxy.alcf.anl.gov:3128
|
export http_proxy=http://proxy.alcf.anl.gov:3128
|
||||||
export https_proxy=http://proxy.alcf.anl.gov:3128
|
export https_proxy=http://proxy.alcf.anl.gov:3128
|
||||||
#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
|
|
||||||
git config --global http.proxy http://proxy.alcf.anl.gov:3128
|
git config --global http.proxy http://proxy.alcf.anl.gov:3128
|
||||||
|
|
||||||
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
|
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
|
||||||
|
@ -392,9 +392,27 @@ void TestCGschur(What & Ddwf,
|
|||||||
GridParallelRNG *RNG5)
|
GridParallelRNG *RNG5)
|
||||||
{
|
{
|
||||||
LatticeFermion src (FGrid); random(*RNG5,src);
|
LatticeFermion src (FGrid); random(*RNG5,src);
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result1(FGrid); result1=Zero();
|
||||||
|
LatticeFermion result2(FGrid); result2=Zero();
|
||||||
|
LatticeFermion result3(FGrid); result3=Zero();
|
||||||
|
|
||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||||
SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
|
SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
|
||||||
SchurSolver(Ddwf,src,result);
|
SchurSolver(Ddwf,src,result1);
|
||||||
|
|
||||||
|
SchurRedBlackDiagOneSolve<LatticeFermion> SchurSolverSymm1(CG);
|
||||||
|
SchurSolverSymm1(Ddwf,src,result2);
|
||||||
|
|
||||||
|
SchurRedBlackDiagTwoSolve<LatticeFermion> SchurSolverSymm2(CG);
|
||||||
|
SchurSolverSymm2(Ddwf,src,result3);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Standard " <<norm2(result1)<<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Symm1 " <<norm2(result2)<<std::endl;
|
||||||
|
result2=result2-result1;
|
||||||
|
std::cout << GridLogMessage << " diff " <<norm2(result2) <<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Symm2 " <<norm2(result3)<<std::endl;
|
||||||
|
result3=result3-result1;
|
||||||
|
std::cout << GridLogMessage << " diff " <<norm2(result3) <<std::endl;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user