/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: MultiRHSDeflation.h Copyright (C) 2023 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once NAMESPACE_BEGIN(Grid); /* MultiRHS block projection Import basis -> nblock x nbasis x (block x internal) Import vector of fine lattice objects -> nblock x nrhs x (block x internal) => coarse_(nrhs x nbasis )^block = via batched GEMM //template //inline void blockProject(Lattice > &coarseData, // const VLattice &fineData, // const VLattice &Basis) */ template class MultiRHSBlockProject { public: typedef typename Field::scalar_type scalar; typedef typename Field::scalar_object scalar_object; typedef Field Fermion; int nbasis; GridBase *coarse_grid; GridBase *fine_grid; uint64_t block_vol; uint64_t fine_vol; uint64_t coarse_vol; uint64_t words; // Row major layout "C" order: // BLAS_V[coarse_vol][nbasis][block_vol][words] // BLAS_F[coarse_vol][nrhs][block_vol][words] // BLAS_C[coarse_vol][nrhs][nbasis] /* * in Fortran column major notation (cuBlas order) * * Vxb = [v1(x)][..][vn(x)] ... x coarse vol * * Fxr = [r1(x)][..][rm(x)] ... x coarse vol * * Block project: * C_br = V^dag F x coarse vol * * Block promote: * F_xr = Vxb Cbr x coarse_vol */ deviceVector BLAS_V; // words * block_vol * nbasis x coarse_vol deviceVector BLAS_F; // nrhs x fine_vol * words -- the sources deviceVector BLAS_C; // nrhs x coarse_vol * nbasis -- the coarse coeffs RealD blasNorm2(deviceVector &blas) { scalar ss(0.0); std::vector tmp(blas.size()); acceleratorCopyFromDevice(&blas[0],&tmp[0],blas.size()*sizeof(scalar)); for(int64_t s=0;sGlobalSum(ss); return real(ss); } MultiRHSBlockProject(){}; ~MultiRHSBlockProject(){ Deallocate(); }; void Deallocate(void) { nbasis=0; coarse_grid=nullptr; fine_grid=nullptr; fine_vol=0; block_vol=0; coarse_vol=0; words=0; BLAS_V.resize(0); BLAS_F.resize(0); BLAS_C.resize(0); } void Allocate(int _nbasis,GridBase *_fgrid,GridBase *_cgrid) { nbasis=_nbasis; fine_grid=_fgrid; coarse_grid=_cgrid; fine_vol = fine_grid->lSites(); coarse_vol = coarse_grid->lSites(); block_vol = fine_vol/coarse_vol; words = sizeof(scalar_object)/sizeof(scalar); BLAS_V.resize (fine_vol * words * nbasis ); } void ImportFineGridVectors(std::vector &vecs, deviceVector &blas) { int nvec = vecs.size(); typedef typename Field::vector_object vobj; // std::cout << GridLogMessage <<" BlockProjector importing "<_ndimension; assert(block_vol == fine_grid->oSites() / coarse_grid->oSites()); Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine_grid->_rdimensions[d] / coarse_grid->_rdimensions[d]; } uint64_t sz = blas.size(); acceleratorMemSet(&blas[0],0,blas.size()*sizeof(scalar)); Coordinate fine_rdimensions = fine_grid->_rdimensions; Coordinate coarse_rdimensions = coarse_grid->_rdimensions; int64_t bv= block_vol; for(int v=0;voSites(); // loop over fine sites const int Nsimd = vobj::Nsimd(); // std::cout << "sz "<oSites() * block_vol * nvec * words<oSites() * block_vol * nvec * words); uint64_t lwords= words; // local variable for copy in to GPU accelerator_for(sf,osites,Nsimd,{ #ifdef GRID_SIMT { int lane=acceleratorSIMTlane(Nsimd); // buffer lane #else for(int lane=0;lane &vecs, deviceVector &blas) { typedef typename Field::vector_object vobj; int nvec = vecs.size(); assert(vecs[0].Grid()==fine_grid); subdivides(coarse_grid,fine_grid); // require they map int _ndimension = coarse_grid->_ndimension; assert(block_vol == fine_grid->oSites() / coarse_grid->oSites()); Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine_grid->_rdimensions[d] / coarse_grid->_rdimensions[d]; } Coordinate fine_rdimensions = fine_grid->_rdimensions; Coordinate coarse_rdimensions = coarse_grid->_rdimensions; // std::cout << " export fine Blas norm "<oSites(); uint64_t lwords = words; // std::cout << " Nsimd is "< void ImportCoarseGridVectors(std::vector > &vecs, deviceVector &blas) { int nvec = vecs.size(); typedef typename vobj::scalar_object coarse_scalar_object; // std::cout << " BlockProjector importing "<_ndimension; uint64_t sz = blas.size(); Coordinate coarse_rdimensions = coarse_grid->_rdimensions; for(int v=0;voSites(); // loop over fine sites const int Nsimd = vobj::Nsimd(); uint64_t cwords=sizeof(typename vobj::scalar_object)/sizeof(scalar); assert(cwords==nbasis); accelerator_for(sc,osites,Nsimd,{ #ifdef GRID_SIMT { int lane=acceleratorSIMTlane(Nsimd); // buffer lane #else for(int lane=0;lane void ExportCoarseGridVectors(std::vector > &vecs, deviceVector &blas) { int nvec = vecs.size(); typedef typename vobj::scalar_object coarse_scalar_object; // std::cout << GridLogMessage<<" BlockProjector exporting "<_ndimension; uint64_t sz = blas.size(); Coordinate coarse_rdimensions = coarse_grid->_rdimensions; // std::cout << " export coarsee Blas norm "<oSites(); // loop over fine sites const int Nsimd = vobj::Nsimd(); uint64_t cwords=sizeof(typename vobj::scalar_object)/sizeof(scalar); assert(cwords==nbasis); accelerator_for(sc,osites,Nsimd,{ // Wrap in a macro "FOR_ALL_LANES(lane,{ ... }); #ifdef GRID_SIMT { int lane=acceleratorSIMTlane(Nsimd); // buffer lane #else for(int lane=0;lane &vecs) { // std::cout << " BlockProjector Import basis size "< void blockProject(std::vector &fine,std::vector< Lattice > & coarse) { int nrhs=fine.size(); int _nbasis = sizeof(typename cobj::scalar_object)/sizeof(scalar); // std::cout << "blockProject nbasis " < Vd(coarse_vol); deviceVector Fd(coarse_vol); deviceVector Cd(coarse_vol); // std::cout << "BlockProject pointers"< void blockPromote(std::vector &fine,std::vector > & coarse) { int nrhs=fine.size(); int _nbasis = sizeof(typename cobj::scalar_object)/sizeof(scalar); assert(nbasis==_nbasis); BLAS_F.resize (fine_vol * words * nrhs ); BLAS_C.resize (coarse_vol * nbasis * nrhs ); ImportCoarseGridVectors(coarse, BLAS_C); GridBLAS BLAS; deviceVector Vd(coarse_vol); deviceVector Fd(coarse_vol); deviceVector Cd(coarse_vol); for(int c=0;c