mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 23:37:06 +01:00
Merge branch 'feature/gpt' of https://github.com/lehner/Grid into lehner-feature/gpt
This commit is contained in:
@ -31,6 +31,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
|
||||
#include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No)
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
@ -59,12 +60,14 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
class Geometry {
|
||||
public:
|
||||
int npoint;
|
||||
int base;
|
||||
std::vector<int> directions ;
|
||||
std::vector<int> displacements;
|
||||
std::vector<int> points_dagger;
|
||||
|
||||
Geometry(int _d) {
|
||||
|
||||
int base = (_d==5) ? 1:0;
|
||||
base = (_d==5) ? 1:0;
|
||||
|
||||
// make coarse grid stencil for 4d , not 5d
|
||||
if ( _d==5 ) _d=4;
|
||||
@ -72,16 +75,51 @@ public:
|
||||
npoint = 2*_d+1;
|
||||
directions.resize(npoint);
|
||||
displacements.resize(npoint);
|
||||
points_dagger.resize(npoint);
|
||||
for(int d=0;d<_d;d++){
|
||||
directions[d ] = d+base;
|
||||
directions[d+_d] = d+base;
|
||||
displacements[d ] = +1;
|
||||
displacements[d+_d]= -1;
|
||||
points_dagger[d ] = d+_d;
|
||||
points_dagger[d+_d] = d;
|
||||
}
|
||||
directions [2*_d]=0;
|
||||
displacements[2*_d]=0;
|
||||
points_dagger[2*_d]=2*_d;
|
||||
}
|
||||
|
||||
int point(int dir, int disp) {
|
||||
assert(disp == -1 || disp == 0 || disp == 1);
|
||||
assert(base+0 <= dir && dir < base+4);
|
||||
|
||||
// directions faster index = new indexing
|
||||
// 4d (base = 0):
|
||||
// point 0 1 2 3 4 5 6 7 8
|
||||
// dir 0 1 2 3 0 1 2 3 0
|
||||
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
|
||||
// 5d (base = 1):
|
||||
// point 0 1 2 3 4 5 6 7 8
|
||||
// dir 1 2 3 4 1 2 3 4 0
|
||||
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
|
||||
|
||||
// displacements faster index = old indexing
|
||||
// 4d (base = 0):
|
||||
// point 0 1 2 3 4 5 6 7 8
|
||||
// dir 0 0 1 1 2 2 3 3 0
|
||||
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
|
||||
// 5d (base = 1):
|
||||
// point 0 1 2 3 4 5 6 7 8
|
||||
// dir 1 1 2 2 3 3 4 4 0
|
||||
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
|
||||
|
||||
if(dir == 0 and disp == 0)
|
||||
return 8;
|
||||
else // New indexing
|
||||
return (1 - disp) / 2 * 4 + dir - base;
|
||||
// else // Old indexing
|
||||
// return (4 * (dir - base) + 1 - disp) / 2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
@ -258,7 +296,7 @@ public:
|
||||
// Fine Object == (per site) type of fine field
|
||||
// nbasis == number of deflation vectors
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||
class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
@ -268,33 +306,59 @@ public:
|
||||
typedef iMatrix<CComplex,nbasis > Cobj;
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
typedef CoarseVector FermionField;
|
||||
|
||||
// enrich interface, use default implementation as in FermionOperator ///////
|
||||
void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; }
|
||||
void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; }
|
||||
void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; }
|
||||
void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; }
|
||||
void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; };
|
||||
void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; };
|
||||
|
||||
////////////////////
|
||||
// Data members
|
||||
////////////////////
|
||||
Geometry geom;
|
||||
GridBase * _grid;
|
||||
GridBase* _cbgrid;
|
||||
int hermitian;
|
||||
|
||||
CartesianStencil<siteVector,siteVector,int> Stencil;
|
||||
CartesianStencil<siteVector,siteVector,int> StencilEven;
|
||||
CartesianStencil<siteVector,siteVector,int> StencilOdd;
|
||||
|
||||
std::vector<CoarseMatrix> A;
|
||||
|
||||
std::vector<CoarseMatrix> Aeven;
|
||||
std::vector<CoarseMatrix> Aodd;
|
||||
|
||||
CoarseMatrix AselfInv;
|
||||
CoarseMatrix AselfInvEven;
|
||||
CoarseMatrix AselfInvOdd;
|
||||
|
||||
Vector<RealD> dag_factor;
|
||||
|
||||
///////////////////////
|
||||
// Interface
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||
GridBase * RedBlackGrid() { return _cbgrid; };
|
||||
|
||||
int ConstEE() { return 0; }
|
||||
|
||||
void M (const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
conformable(_grid,in.Grid());
|
||||
conformable(in.Grid(),out.Grid());
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
autoView( in_v , in, AcceleratorRead);
|
||||
autoView( out_v , out, AcceleratorWrite);
|
||||
autoView( Stencil_v , Stencil, AcceleratorRead);
|
||||
auto& geom_v = geom;
|
||||
typedef LatticeView<Cobj> Aview;
|
||||
|
||||
Vector<Aview> AcceleratorViewContainer;
|
||||
@ -316,14 +380,14 @@ public:
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
for(int point=0;point<geom.npoint;point++){
|
||||
for(int point=0;point<geom_v.npoint;point++){
|
||||
|
||||
SE=Stencil.GetEntry(ptype,point,ss);
|
||||
SE=Stencil_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
|
||||
nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
@ -344,12 +408,72 @@ public:
|
||||
return M(in,out);
|
||||
} else {
|
||||
// corresponds to Galerkin coarsening
|
||||
CoarseVector tmp(Grid());
|
||||
G5C(tmp, in);
|
||||
M(tmp, out);
|
||||
G5C(out, out);
|
||||
return MdagNonHermitian(in, out);
|
||||
}
|
||||
};
|
||||
|
||||
void MdagNonHermitian(const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
conformable(_grid,in.Grid());
|
||||
conformable(in.Grid(),out.Grid());
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
autoView( in_v , in, AcceleratorRead);
|
||||
autoView( out_v , out, AcceleratorWrite);
|
||||
autoView( Stencil_v , Stencil, AcceleratorRead);
|
||||
auto& geom_v = geom;
|
||||
typedef LatticeView<Cobj> Aview;
|
||||
|
||||
Vector<Aview> AcceleratorViewContainer;
|
||||
|
||||
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead));
|
||||
Aview *Aview_p = & AcceleratorViewContainer[0];
|
||||
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||
|
||||
int osites=Grid()->oSites();
|
||||
|
||||
Vector<int> points(geom.npoint, 0);
|
||||
for(int p=0; p<geom.npoint; p++)
|
||||
points[p] = geom.points_dagger[p];
|
||||
|
||||
RealD* dag_factor_p = &dag_factor[0];
|
||||
|
||||
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
int b = sss%nbasis;
|
||||
calcComplex res = Zero();
|
||||
calcVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
for(int p=0;p<geom_v.npoint;p++){
|
||||
int point = points[p];
|
||||
|
||||
SE=Stencil_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
for(int bb=0;bb<nbasis;bb++) {
|
||||
res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
|
||||
}
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
|
||||
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
||||
}
|
||||
|
||||
void MdirComms(const CoarseVector &in)
|
||||
{
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
@ -359,6 +483,7 @@ public:
|
||||
{
|
||||
conformable(_grid,in.Grid());
|
||||
conformable(_grid,out.Grid());
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
typedef LatticeView<Cobj> Aview;
|
||||
Vector<Aview> AcceleratorViewContainer;
|
||||
@ -367,6 +492,7 @@ public:
|
||||
|
||||
autoView( out_v , out, AcceleratorWrite);
|
||||
autoView( in_v , in, AcceleratorRead);
|
||||
autoView( Stencil_v , Stencil, AcceleratorRead);
|
||||
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||
@ -380,12 +506,12 @@ public:
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
SE=Stencil.GetEntry(ptype,point,ss);
|
||||
SE=Stencil_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
|
||||
nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
@ -413,34 +539,7 @@ public:
|
||||
|
||||
this->MdirComms(in);
|
||||
|
||||
int ndim = in.Grid()->Nd();
|
||||
|
||||
//////////////
|
||||
// 4D action like wilson
|
||||
// 0+ => 0
|
||||
// 0- => 1
|
||||
// 1+ => 2
|
||||
// 1- => 3
|
||||
// etc..
|
||||
//////////////
|
||||
// 5D action like DWF
|
||||
// 1+ => 0
|
||||
// 1- => 1
|
||||
// 2+ => 2
|
||||
// 2- => 3
|
||||
// etc..
|
||||
auto point = [dir, disp, ndim](){
|
||||
if(dir == 0 and disp == 0)
|
||||
return 8;
|
||||
else if ( ndim==4 ) {
|
||||
return (4 * dir + 1 - disp) / 2;
|
||||
} else {
|
||||
return (4 * (dir-1) + 1 - disp) / 2;
|
||||
}
|
||||
}();
|
||||
|
||||
MdirCalc(in,out,point);
|
||||
|
||||
MdirCalc(in,out,geom.point(dir,disp));
|
||||
};
|
||||
|
||||
void Mdiag(const CoarseVector &in, CoarseVector &out)
|
||||
@ -449,17 +548,269 @@ public:
|
||||
MdirCalc(in, out, point); // No comms
|
||||
};
|
||||
|
||||
void Mooee(const CoarseVector &in, CoarseVector &out) {
|
||||
MooeeInternal(in, out, DaggerNo, InverseNo);
|
||||
}
|
||||
|
||||
void MooeeInv(const CoarseVector &in, CoarseVector &out) {
|
||||
MooeeInternal(in, out, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
void MooeeDag(const CoarseVector &in, CoarseVector &out) {
|
||||
MooeeInternal(in, out, DaggerYes, InverseNo);
|
||||
}
|
||||
|
||||
void MooeeInvDag(const CoarseVector &in, CoarseVector &out) {
|
||||
MooeeInternal(in, out, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
void Meooe(const CoarseVector &in, CoarseVector &out) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
|
||||
void MeooeDag(const CoarseVector &in, CoarseVector &out) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
void Dhop(const CoarseVector &in, CoarseVector &out, int dag) {
|
||||
conformable(in.Grid(), _grid); // verifies full grid
|
||||
conformable(in.Grid(), out.Grid());
|
||||
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
|
||||
DhopInternal(Stencil, A, in, out, dag);
|
||||
}
|
||||
|
||||
void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) {
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
assert(in.Checkerboard() == Even);
|
||||
out.Checkerboard() = Odd;
|
||||
|
||||
DhopInternal(StencilEven, Aodd, in, out, dag);
|
||||
}
|
||||
|
||||
void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) {
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
assert(in.Checkerboard() == Odd);
|
||||
out.Checkerboard() = Even;
|
||||
|
||||
DhopInternal(StencilOdd, Aeven, in, out, dag);
|
||||
}
|
||||
|
||||
void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
|
||||
|
||||
CoarseMatrix *Aself = nullptr;
|
||||
if(in.Grid()->_isCheckerBoarded) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1];
|
||||
DselfInternal(StencilOdd, *Aself, in, out, dag);
|
||||
} else {
|
||||
Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1];
|
||||
DselfInternal(StencilEven, *Aself, in, out, dag);
|
||||
}
|
||||
} else {
|
||||
Aself = (inv) ? &AselfInv : &A[geom.npoint-1];
|
||||
DselfInternal(Stencil, *Aself, in, out, dag);
|
||||
}
|
||||
assert(Aself != nullptr);
|
||||
}
|
||||
|
||||
void DselfInternal(CartesianStencil<siteVector,siteVector,int> &st, CoarseMatrix &a,
|
||||
const CoarseVector &in, CoarseVector &out, int dag) {
|
||||
int point = geom.npoint-1;
|
||||
autoView( out_v, out, AcceleratorWrite);
|
||||
autoView( in_v, in, AcceleratorRead);
|
||||
autoView( st_v, st, AcceleratorRead);
|
||||
autoView( a_v, a, AcceleratorRead);
|
||||
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||
|
||||
RealD* dag_factor_p = &dag_factor[0];
|
||||
|
||||
if(dag) {
|
||||
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
int b = sss%nbasis;
|
||||
calcComplex res = Zero();
|
||||
calcVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
SE=st_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
for(int bb=0;bb<nbasis;bb++) {
|
||||
res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(a_v[ss](b,bb))*nbr(bb);
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
} else {
|
||||
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
int b = sss%nbasis;
|
||||
calcComplex res = Zero();
|
||||
calcVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
SE=st_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
for(int bb=0;bb<nbasis;bb++) {
|
||||
res = res + coalescedRead(a_v[ss](b,bb))*nbr(bb);
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void DhopInternal(CartesianStencil<siteVector,siteVector,int> &st, std::vector<CoarseMatrix> &a,
|
||||
const CoarseVector &in, CoarseVector &out, int dag) {
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
|
||||
st.HaloExchange(in,compressor);
|
||||
autoView( in_v, in, AcceleratorRead);
|
||||
autoView( out_v, out, AcceleratorWrite);
|
||||
autoView( st_v , st, AcceleratorRead);
|
||||
typedef LatticeView<Cobj> Aview;
|
||||
|
||||
// determine in what order we need the points
|
||||
int npoint = geom.npoint-1;
|
||||
Vector<int> points(npoint, 0);
|
||||
for(int p=0; p<npoint; p++)
|
||||
points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p;
|
||||
|
||||
Vector<Aview> AcceleratorViewContainer;
|
||||
for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead));
|
||||
Aview *Aview_p = & AcceleratorViewContainer[0];
|
||||
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||
|
||||
RealD* dag_factor_p = &dag_factor[0];
|
||||
|
||||
if(dag) {
|
||||
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
int b = sss%nbasis;
|
||||
calcComplex res = Zero();
|
||||
calcVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
for(int p=0;p<npoint;p++){
|
||||
int point = points[p];
|
||||
SE=st_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
for(int bb=0;bb<nbasis;bb++) {
|
||||
res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
|
||||
}
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
} else {
|
||||
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
|
||||
int ss = sss/nbasis;
|
||||
int b = sss%nbasis;
|
||||
calcComplex res = Zero();
|
||||
calcVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
for(int p=0;p<npoint;p++){
|
||||
int point = points[p];
|
||||
SE=st_v.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local) {
|
||||
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
|
||||
} else {
|
||||
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
|
||||
for(int bb=0;bb<nbasis;bb++) {
|
||||
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
|
||||
}
|
||||
}
|
||||
coalescedWrite(out_v[ss](b),res);
|
||||
});
|
||||
}
|
||||
|
||||
for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
||||
}
|
||||
|
||||
CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) :
|
||||
CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) :
|
||||
|
||||
_grid(&CoarseGrid),
|
||||
_cbgrid(&CoarseRBGrid),
|
||||
geom(CoarseGrid._ndimension),
|
||||
hermitian(hermitian_),
|
||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||
A(geom.npoint,&CoarseGrid)
|
||||
StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||
StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements,0),
|
||||
A(geom.npoint,&CoarseGrid),
|
||||
Aeven(geom.npoint,&CoarseRBGrid),
|
||||
Aodd(geom.npoint,&CoarseRBGrid),
|
||||
AselfInv(&CoarseGrid),
|
||||
AselfInvEven(&CoarseRBGrid),
|
||||
AselfInvOdd(&CoarseRBGrid),
|
||||
dag_factor(nbasis*nbasis)
|
||||
{
|
||||
fillFactor();
|
||||
};
|
||||
|
||||
void fillFactor() {
|
||||
Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis);
|
||||
if(!hermitian) {
|
||||
const int nb = nbasis/2;
|
||||
dag_factor_eigen.block(0,nb,nb,nb) *= -1.0;
|
||||
dag_factor_eigen.block(nb,0,nb,nb) *= -1.0;
|
||||
}
|
||||
|
||||
// GPU readable prefactor
|
||||
thread_for(i, nbasis*nbasis, {
|
||||
int j = i/nbasis;
|
||||
int k = i%nbasis;
|
||||
dag_factor[i] = dag_factor_eigen(j, k);
|
||||
});
|
||||
}
|
||||
|
||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||
{
|
||||
@ -608,6 +959,9 @@ public:
|
||||
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
|
||||
ForceHermitian();
|
||||
}
|
||||
|
||||
InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl;
|
||||
FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl;
|
||||
}
|
||||
|
||||
void ForceHermitian(void) {
|
||||
@ -629,6 +983,50 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InvertSelfStencilLink() {
|
||||
std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl;
|
||||
int localVolume = Grid()->lSites();
|
||||
|
||||
typedef typename Cobj::scalar_object scalar_object;
|
||||
|
||||
autoView(Aself_v, A[geom.npoint-1], CpuRead);
|
||||
autoView(AselfInv_v, AselfInv, CpuWrite);
|
||||
thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke
|
||||
Eigen::MatrixXcd selfLinkEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
|
||||
Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
|
||||
|
||||
scalar_object selfLink = Zero();
|
||||
scalar_object selfLinkInv = Zero();
|
||||
|
||||
Coordinate lcoor;
|
||||
|
||||
Grid()->LocalIndexToLocalCoor(site, lcoor);
|
||||
peekLocalSite(selfLink, Aself_v, lcoor);
|
||||
|
||||
for (int i = 0; i < nbasis; ++i)
|
||||
for (int j = 0; j < nbasis; ++j)
|
||||
selfLinkEigen(i, j) = static_cast<ComplexD>(TensorRemove(selfLink(i, j)));
|
||||
|
||||
selfLinkInvEigen = selfLinkEigen.inverse();
|
||||
|
||||
for(int i = 0; i < nbasis; ++i)
|
||||
for(int j = 0; j < nbasis; ++j)
|
||||
selfLinkInv(i, j) = selfLinkInvEigen(i, j);
|
||||
|
||||
pokeLocalSite(selfLinkInv, AselfInv_v, lcoor);
|
||||
});
|
||||
}
|
||||
|
||||
void FillHalfCbs() {
|
||||
std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl;
|
||||
for(int p = 0; p < geom.npoint; ++p) {
|
||||
pickCheckerboard(Even, Aeven[p], A[p]);
|
||||
pickCheckerboard(Odd, Aodd[p], A[p]);
|
||||
}
|
||||
pickCheckerboard(Even, AselfInvEven, AselfInv);
|
||||
pickCheckerboard(Odd, AselfInvOdd, AselfInv);
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -233,9 +233,9 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
||||
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
if (!AccCache.AccPtr)
|
||||
if (!AccCache.AccPtr) {
|
||||
EvictVictims(bytes);
|
||||
|
||||
}
|
||||
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
|
||||
|
||||
assert(AccCache.cpuLock==0); // Programming error
|
||||
@ -373,8 +373,10 @@ uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,V
|
||||
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
if (!AccCache.AccPtr)
|
||||
|
||||
if (!AccCache.AccPtr) {
|
||||
EvictVictims(bytes);
|
||||
}
|
||||
|
||||
assert((mode==CpuRead)||(mode==CpuWrite));
|
||||
assert(AccCache.accLock==0); // Programming error
|
||||
|
@ -127,6 +127,11 @@ accelerator_inline void convertType(T1 & out, const iScalar<T2> & in) {
|
||||
convertType(out,in._internal);
|
||||
}
|
||||
|
||||
template<typename T1, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
|
||||
accelerator_inline void convertType(T1 & out, const iScalar<T1> & in) {
|
||||
convertType(out,in._internal);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline void convertType(iScalar<T1> & out, const T2 & in) {
|
||||
convertType(out._internal,in);
|
||||
|
@ -21,22 +21,26 @@ void acceleratorInit(void)
|
||||
#define ENV_RANK_SLURM "SLURM_PROCID"
|
||||
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
|
||||
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
|
||||
// We extract the local rank initialization using an environment variable
|
||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) {
|
||||
printf("OPENMPI detected\n");
|
||||
rank = atoi(localRankStr);
|
||||
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) {
|
||||
printf("MVAPICH detected\n");
|
||||
rank = atoi(localRankStr);
|
||||
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_SLURM)) != NULL) {
|
||||
printf("SLURM detected\n");
|
||||
rank = atoi(localRankStr);
|
||||
} else {
|
||||
printf("MPI version is unknown - bad things may happen\n");
|
||||
}
|
||||
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
||||
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
||||
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
|
||||
// We extract the local rank initialization using an environment variable
|
||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) {
|
||||
if (!world_rank)
|
||||
printf("OPENMPI detected\n");
|
||||
rank = atoi(localRankStr);
|
||||
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) {
|
||||
if (!world_rank)
|
||||
printf("MVAPICH detected\n");
|
||||
rank = atoi(localRankStr);
|
||||
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_SLURM)) != NULL) {
|
||||
if (!world_rank)
|
||||
printf("SLURM detected\n");
|
||||
rank = atoi(localRankStr);
|
||||
} else {
|
||||
if (!world_rank)
|
||||
printf("MPI version is unknown - bad things may happen\n");
|
||||
}
|
||||
|
||||
size_t totalDeviceMem=0;
|
||||
for (int i = 0; i < nDevices; i++) {
|
||||
|
Reference in New Issue
Block a user