mirror of
https://github.com/paboyle/Grid.git
synced 2026-04-04 11:06:09 +01:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
@@ -89,9 +89,10 @@ public:
|
||||
gridblasHandle = theGridAccelerator;
|
||||
#endif
|
||||
#ifdef GRID_ONE_MKL
|
||||
cl::sycl::cpu_selector selector;
|
||||
cl::sycl::gpu_selector selector;
|
||||
cl::sycl::device selectedDevice { selector };
|
||||
gridblasHandle =new sycl::queue (selectedDevice);
|
||||
cl::sycl::property_list q_prop{cl::sycl::property::queue::in_order()};
|
||||
gridblasHandle =new sycl::queue (selectedDevice,q_prop);
|
||||
#endif
|
||||
gridblasInit=1;
|
||||
}
|
||||
@@ -266,8 +267,46 @@ public:
|
||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||
#endif
|
||||
#ifdef GRID_SYCL
|
||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
||||
#warning "oneMKL implementation not built "
|
||||
std::cerr << " Calling SYCL batched ZGEMM "<<std::endl;
|
||||
int64_t m64=m;
|
||||
int64_t n64=n;
|
||||
int64_t k64=k;
|
||||
int64_t lda64=lda;
|
||||
int64_t ldb64=ldb;
|
||||
int64_t ldc64=ldc;
|
||||
int64_t batchCount64=batchCount;
|
||||
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||
¬ransp,
|
||||
¬ransp,
|
||||
&m64,&n64,&k64,
|
||||
(ComplexD *) &alpha_p[0],
|
||||
(const ComplexD **)&Amk[0], (const int64_t *)&lda64,
|
||||
(const ComplexD **)&Bkn[0], (const int64_t *)&ldb64,
|
||||
(ComplexD *) &beta_p[0],
|
||||
(ComplexD **)&Cmn[0], (const int64_t *)&ldc64,
|
||||
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||
synchronise();
|
||||
std::cerr << " Called SYCL batched ZGEMM "<<std::endl;
|
||||
std::vector<ComplexD> A(m*k); // pointer list to matrices
|
||||
std::vector<ComplexD> B(k*n);
|
||||
std::vector<ComplexD> C(m*n);
|
||||
int sda = lda*k;
|
||||
int sdb = ldb*k;
|
||||
int sdc = ldc*n;
|
||||
for (int p = 0; p < 1; ++p) {
|
||||
acceleratorCopyFromDevice((void *)&Amk[p][0],(void *)&A[0],m*k*sizeof(ComplexD));
|
||||
acceleratorCopyFromDevice((void *)&Bkn[p][0],(void *)&B[0],k*n*sizeof(ComplexD));
|
||||
acceleratorCopyFromDevice((void *)&Cmn[p][0],(void *)&C[0],m*n*sizeof(ComplexD));
|
||||
for (int mm = 0; mm < m; ++mm) {
|
||||
for (int nn = 0; nn < n; ++nn) {
|
||||
ComplexD c_mn(0.0);
|
||||
for (int kk = 0; kk < k; ++kk)
|
||||
c_mn += A[mm + kk*lda ] * B[kk + nn*ldb];
|
||||
std::cout << " beta "<<beta<<" C_"<<mm<<","<<nn<<" "<<c_mn<<" "<<C[mm + nn*ldc]<<std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||
// Need a default/reference implementation
|
||||
@@ -285,7 +324,6 @@ public:
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// synchronise();
|
||||
RealD t1=usecond();
|
||||
RealD flops = 8.0*m*n*k*batchCount;
|
||||
RealD bytes = 1.0*sizeof(ComplexD)*(m*k+k*n+m*n)*batchCount;
|
||||
@@ -366,8 +404,25 @@ public:
|
||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||
#endif
|
||||
#ifdef GRID_SYCL
|
||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
||||
#warning "oneMKL implementation not built "
|
||||
int64_t m64=m;
|
||||
int64_t n64=n;
|
||||
int64_t k64=k;
|
||||
int64_t lda64=lda;
|
||||
int64_t ldb64=ldb;
|
||||
int64_t ldc64=ldc;
|
||||
int64_t batchCount64=batchCount;
|
||||
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||
¬ransp,
|
||||
¬ransp,
|
||||
&m64,&n64,&k64,
|
||||
(ComplexF *) &alpha_p[0],
|
||||
(const ComplexF **)&Amk[0], (const int64_t *)&lda64,
|
||||
(const ComplexF **)&Bkn[0], (const int64_t *)&ldb64,
|
||||
(ComplexF *) &beta_p[0],
|
||||
(ComplexF **)&Cmn[0], (const int64_t *)&ldc64,
|
||||
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||
synchronise();
|
||||
#endif
|
||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||
int sda = lda*k;
|
||||
@@ -467,8 +522,25 @@ public:
|
||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||
#endif
|
||||
#ifdef GRID_SYCL
|
||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
||||
#warning "oneMKL implementation not built "
|
||||
int64_t m64=m;
|
||||
int64_t n64=n;
|
||||
int64_t k64=k;
|
||||
int64_t lda64=lda;
|
||||
int64_t ldb64=ldb;
|
||||
int64_t ldc64=ldc;
|
||||
int64_t batchCount64=batchCount;
|
||||
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||
¬ransp,
|
||||
¬ransp,
|
||||
&m64,&n64,&k64,
|
||||
(float *) &alpha_p[0],
|
||||
(const float **)&Amk[0], (const int64_t *)&lda64,
|
||||
(const float **)&Bkn[0], (const int64_t *)&ldb64,
|
||||
(float *) &beta_p[0],
|
||||
(float **)&Cmn[0], (const int64_t *)&ldc64,
|
||||
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||
synchronise();
|
||||
#endif
|
||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||
int sda = lda*k;
|
||||
@@ -568,24 +640,25 @@ public:
|
||||
assert(err==CUBLAS_STATUS_SUCCESS);
|
||||
#endif
|
||||
#ifdef GRID_SYCL
|
||||
/*
|
||||
int64_t m64=m;
|
||||
int64_t n64=n;
|
||||
int64_t k64=k;
|
||||
int64_t lda64=lda;
|
||||
int64_t ldb64=ldb;
|
||||
int64_t ldc64=ldc;
|
||||
int64_t batchCount64=batchCount;
|
||||
oneapi::mkl::blas::column_major::gemm_batch(*theGridAccelerator,
|
||||
onemkl::transpose::N,
|
||||
onemkl::transpose::N,
|
||||
&m64,&n64,&k64,
|
||||
(double *) &alpha_p[0],
|
||||
(double **)&Amk[0], lda,
|
||||
(double **)&Bkn[0], ldb,
|
||||
(double *) &beta_p[0],
|
||||
(double **)&Cmn[0], ldc,
|
||||
1,&batchCount64);
|
||||
*/
|
||||
//MKL’s cblas_<T>gemm_batch & OneAPI
|
||||
#warning "oneMKL implementation not built "
|
||||
oneapi::mkl::transpose notransp =oneapi::mkl::transpose::N;
|
||||
oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
|
||||
¬ransp,
|
||||
¬ransp,
|
||||
&m64,&n64,&k64,
|
||||
(double *) &alpha_p[0],
|
||||
(const double **)&Amk[0], (const int64_t *)&lda64,
|
||||
(const double **)&Bkn[0], (const int64_t *)&ldb64,
|
||||
(double *) &beta_p[0],
|
||||
(double **)&Cmn[0], (const int64_t *)&ldc64,
|
||||
(int64_t)1,&batchCount64,std::vector<sycl::event>());
|
||||
synchronise();
|
||||
#endif
|
||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
|
||||
int sda = lda*k;
|
||||
@@ -673,6 +746,7 @@ public:
|
||||
beta,
|
||||
(ComplexD *)Cmn,ldc,sdc,
|
||||
batchCount);
|
||||
synchronise();
|
||||
#endif
|
||||
#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
|
||||
// Need a default/reference implementation
|
||||
|
||||
@@ -499,6 +499,87 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, left preconditioned by Mee^inv
|
||||
// ( 1 - Mee^inv Meo Moo^inv Moe ) phi = Mee_inv ( Mee - Meo Moo^inv Moe Mee^inv ) phi = Mee_inv eta
|
||||
//
|
||||
// Solve:
|
||||
// ( 1 - Mee^inv Meo Moo^inv Moe )^dag ( 1 - Mee^inv Meo Moo^inv Moe ) phi = ( 1 - Mee^inv Meo Moo^inv Moe )^dag Mee_inv eta
|
||||
//
|
||||
// Old notation e<->o
|
||||
//
|
||||
// Left precon by Moo^-1
|
||||
// b) (Doo^{dag} M_oo^-dag) (Moo^-1 Doo) psi_o = [ (D_oo)^dag M_oo^-dag ] Moo^-1 L^{-1} eta_o
|
||||
// eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagOneSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagOneSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagOneOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mpcdag *MooeeInv * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
Mtmp=src_o-Mtmp;
|
||||
_Matrix.MooeeInv(Mtmp,tmp); assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagOneOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagOneOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
|
||||
Reference in New Issue
Block a user