mirror of
https://github.com/paboyle/Grid.git
synced 2025-08-16 11:11:53 +01:00
Debugged momentum projection for A2A Meson Field
This commit is contained in:
282
Grid/algorithms/blas/MomentumProject.h
Normal file
282
Grid/algorithms/blas/MomentumProject.h
Normal file
@@ -0,0 +1,282 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: MomentumProject.h
|
||||
|
||||
Copyright (C) 2025
|
||||
|
||||
Author: Peter Boyle <pboyle@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
/*
|
||||
MultiMomProject
|
||||
|
||||
Import vectors -> nxyz x (ncomponent x nt)
|
||||
Import complex phases -> nmom x nxy
|
||||
|
||||
apply = via (possibly batched) GEMM
|
||||
*/
|
||||
template<class Field, class ComplexField>
|
||||
class MomentumProject
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename Field::scalar_type scalar;
|
||||
typedef typename Field::scalar_object scalar_object;
|
||||
|
||||
GridBase *grid;
|
||||
uint64_t nmom;
|
||||
uint64_t nxyz;
|
||||
uint64_t nt;
|
||||
uint64_t nbtw;
|
||||
uint64_t words;
|
||||
|
||||
deviceVector<scalar> BLAS_V; //
|
||||
deviceVector<scalar> BLAS_M; //
|
||||
deviceVector<scalar> BLAS_P; //
|
||||
|
||||
MomentumProject(){};
|
||||
~MomentumProject(){ Deallocate(); };
|
||||
|
||||
void Deallocate(void)
|
||||
{
|
||||
grid=nullptr;
|
||||
nmom=0;
|
||||
nxyz=0;
|
||||
nt=0;
|
||||
nbtw=0;
|
||||
words=0;
|
||||
BLAS_V.resize(0);
|
||||
BLAS_M.resize(0);
|
||||
BLAS_P.resize(0);
|
||||
}
|
||||
void Allocate(int _nmom,GridBase *_grid)
|
||||
{
|
||||
grid=_grid;
|
||||
Coordinate ldims = grid->LocalDimensions();
|
||||
|
||||
nmom=_nmom;
|
||||
nt = ldims[grid->Nd()-1];
|
||||
nxyz = grid->lSites()/nt;
|
||||
words = sizeof(scalar_object)/sizeof(scalar);
|
||||
nbtw = nt * words;
|
||||
|
||||
BLAS_V.resize (nxyz * nt * words );
|
||||
BLAS_M.resize (nmom * nxyz );
|
||||
BLAS_P.resize (nmom * nt * words );
|
||||
}
|
||||
void ImportMomenta(const std::vector <ComplexField> &momenta)
|
||||
{
|
||||
GRID_ASSERT(momenta.size()==nmom);
|
||||
// might as well just make the momenta here
|
||||
typedef typename Field::vector_object vobj;
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
|
||||
uint64_t sz = BLAS_M.size();
|
||||
|
||||
GRID_ASSERT(momenta.size()==nmom)
|
||||
GRID_ASSERT(momenta[0].Grid()==grid);
|
||||
GRID_ASSERT(sz = nxyz * nmom);
|
||||
|
||||
Coordinate rdimensions = grid->_rdimensions;
|
||||
Coordinate ldims = grid->LocalDimensions();
|
||||
int64_t osites = grid->oSites();
|
||||
Coordinate simd = grid->_simd_layout;
|
||||
const int Nsimd = vobj::Nsimd();
|
||||
uint64_t lwords = words; // local variable for copy in to GPU
|
||||
int64_t Nxyz = nxyz;
|
||||
auto blasData_p = &BLAS_M[0];
|
||||
for(int m=0;m<momenta.size();m++){
|
||||
|
||||
autoView( Data , momenta[m], AcceleratorRead);
|
||||
auto Data_p = &Data[0];
|
||||
|
||||
accelerator_for(xyz,nxyz,1,{
|
||||
//////////////////////////////////////////
|
||||
// isite -- map lane within buffer to lane within lattice
|
||||
////////////////////////////////////////////
|
||||
Coordinate lcoor(nd,0);
|
||||
Lexicographic::CoorFromIndex(lcoor,xyz,ldims);
|
||||
|
||||
Coordinate icoor(nd);
|
||||
Coordinate ocoor(nd);
|
||||
for (int d = 0; d < nd; d++) {
|
||||
icoor[d] = lcoor[d]/rdimensions[d];
|
||||
ocoor[d] = lcoor[d]%rdimensions[d];
|
||||
}
|
||||
int64_t osite;
|
||||
int64_t isite;
|
||||
Lexicographic::IndexFromCoor(ocoor,osite,rdimensions);
|
||||
Lexicographic::IndexFromCoor(icoor,isite,simd);
|
||||
|
||||
// BLAS_M[nmom][slice_vol]
|
||||
// Fortran Column major BLAS layout is M_xyz,mom
|
||||
scalar data = extractLane(isite,Data[osite]);
|
||||
uint64_t idx = xyz+m*Nxyz;
|
||||
blasData_p[idx] = data;
|
||||
});
|
||||
}
|
||||
}
|
||||
void ImportVector(Field &vec)
|
||||
{
|
||||
typedef typename Field::vector_object vobj;
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
|
||||
uint64_t sz = BLAS_V.size();
|
||||
|
||||
GRID_ASSERT(sz = nxyz * words * nt);
|
||||
|
||||
Coordinate rdimensions = grid->_rdimensions;
|
||||
Coordinate ldims= grid->LocalDimensions();
|
||||
int64_t osites = grid->oSites();
|
||||
Coordinate simd = grid->_simd_layout;
|
||||
const int Nsimd = vobj::Nsimd();
|
||||
uint64_t lwords= words; // local variable for copy in to GPU
|
||||
|
||||
auto blasData_p = &BLAS_V[0];
|
||||
autoView( Data , vec, AcceleratorRead);
|
||||
auto Data_p = &Data[0];
|
||||
|
||||
int64_t nwords = words;// for capture
|
||||
int64_t Nt = nt;// for capture
|
||||
|
||||
accelerator_for(sf,osites,Nsimd,{
|
||||
#ifdef GRID_SIMT
|
||||
{
|
||||
int lane=acceleratorSIMTlane(Nsimd); // buffer lane
|
||||
#else
|
||||
for(int lane=0;lane<Nsimd;lane++) {
|
||||
#endif
|
||||
//////////////////////////////////////////
|
||||
// isite -- map lane within buffer to lane within lattice
|
||||
////////////////////////////////////////////
|
||||
Coordinate lcoor(nd,0);
|
||||
Coordinate icoor(nd);
|
||||
Coordinate ocoor(nd);
|
||||
|
||||
Lexicographic::CoorFromIndex(icoor,lane,simd);
|
||||
Lexicographic::CoorFromIndex(ocoor,sf,rdimensions);
|
||||
|
||||
|
||||
int64_t l_xyz = 0;
|
||||
for (int d = 0; d < nd; d++) {
|
||||
lcoor[d] = rdimensions[d]*icoor[d] + ocoor[d];
|
||||
}
|
||||
uint64_t l_t = lcoor[nd-1];
|
||||
|
||||
Coordinate xyz_coor = lcoor;
|
||||
xyz_coor[nd-1] =0;
|
||||
Lexicographic::IndexFromCoor(xyz_coor,l_xyz,ldims);
|
||||
|
||||
|
||||
scalar_object data = extractLane(lane,Data[sf]);
|
||||
scalar *data_words = (scalar *) &data;
|
||||
for(int w = 0 ; w < nwords; w++) {
|
||||
// BLAS_V[slice_vol][nt][words]
|
||||
// Fortran Column major BLAS layout is V_(t,w)_xyz
|
||||
uint64_t idx = w+l_t*nwords + l_xyz * nwords * Nt;
|
||||
blasData_p[idx] = data_words[w];
|
||||
}
|
||||
#ifdef GRID_SIMT
|
||||
}
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
});
|
||||
}
|
||||
void ExportMomentumProjection(std::vector<typename Field::scalar_object> &projection)
|
||||
{
|
||||
projection.resize(nmom*nt);
|
||||
acceleratorCopyFromDevice(&BLAS_P[0],(scalar *)&projection[0],BLAS_P.size()*sizeof(scalar));
|
||||
// Could decide on a layout late?
|
||||
}
|
||||
|
||||
// Row major layout "C" order:
|
||||
// BLAS_V[slice_vol][nt][words]
|
||||
// BLAS_M[nmom][slice_vol]
|
||||
// BLAS_P[nmom][nt][words]
|
||||
//
|
||||
// Fortran Column major BLAS layout is V_(w,t)_xyz
|
||||
// Fortran Column major BLAS layout is M_xyz,mom
|
||||
// Fortran Column major BLAS layout is P_(w,t),mom
|
||||
//
|
||||
// Projected
|
||||
//
|
||||
// P = (V * M)_(w,t),mom
|
||||
//
|
||||
void Project(Field &data,std::vector< typename Field::scalar_object > & projected_gdata)
|
||||
{
|
||||
this->ImportVector(data);
|
||||
|
||||
std::vector< typename Field::scalar_object > projected_planes;
|
||||
|
||||
deviceVector<scalar *> Vd(1);
|
||||
deviceVector<scalar *> Md(1);
|
||||
deviceVector<scalar *> Pd(1);
|
||||
|
||||
scalar * Vh = & BLAS_V[0];
|
||||
scalar * Mh = & BLAS_M[0];
|
||||
scalar * Ph = & BLAS_P[0];
|
||||
|
||||
acceleratorPut(Vd[0],Vh);
|
||||
acceleratorPut(Md[0],Mh);
|
||||
acceleratorPut(Pd[0],Ph);
|
||||
|
||||
GridBLAS BLAS;
|
||||
|
||||
/////////////////////////////////////////
|
||||
// P_im = VMmx . Vxi
|
||||
/////////////////////////////////////////
|
||||
BLAS.gemmBatched(GridBLAS_OP_N,GridBLAS_OP_N,
|
||||
words*nt,nmom,nxyz,
|
||||
scalar(1.0),
|
||||
Vd,
|
||||
Md,
|
||||
scalar(0.0), // wipe out result
|
||||
Pd);
|
||||
BLAS.synchronise();
|
||||
|
||||
ExportMomentumProjection(projected_planes); // resizes
|
||||
|
||||
/////////////////////////////////
|
||||
// Reduce across MPI ranks
|
||||
/////////////////////////////////
|
||||
int nd = grid->Nd();
|
||||
int gt = grid->GlobalDimensions()[nd-1];
|
||||
int lt = grid->LocalDimensions()[nd-1];
|
||||
projected_gdata.resize(gt*nmom);
|
||||
for(int t=0;t<gt*nmom;t++){ // global Nt array with zeroes for stuff not on this node
|
||||
projected_gdata[t]=Zero();
|
||||
}
|
||||
for(int t=0;t<lt;t++){
|
||||
for(int m=0;m<nmom;m++){
|
||||
int st = grid->LocalStarts()[nd-1];
|
||||
projected_gdata[t+st + gt*m] = projected_planes[t+lt*m];
|
||||
}}
|
||||
grid->GlobalSumVector((scalar *)&projected_gdata[0],gt*nmom*words);
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
Reference in New Issue
Block a user