1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
Peter Boyle 2020-11-24 21:46:33 -05:00
commit 888eacd3b8
7 changed files with 964 additions and 71 deletions

View File

@ -31,6 +31,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
#define GRID_ALGORITHM_COARSENED_MATRIX_H
#include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No)
NAMESPACE_BEGIN(Grid);
@ -59,12 +60,14 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
class Geometry {
public:
int npoint;
int base;
std::vector<int> directions ;
std::vector<int> displacements;
std::vector<int> points_dagger;
Geometry(int _d) {
int base = (_d==5) ? 1:0;
base = (_d==5) ? 1:0;
// make coarse grid stencil for 4d , not 5d
if ( _d==5 ) _d=4;
@ -72,16 +75,51 @@ public:
npoint = 2*_d+1;
directions.resize(npoint);
displacements.resize(npoint);
points_dagger.resize(npoint);
for(int d=0;d<_d;d++){
directions[d ] = d+base;
directions[d+_d] = d+base;
displacements[d ] = +1;
displacements[d+_d]= -1;
points_dagger[d ] = d+_d;
points_dagger[d+_d] = d;
}
directions [2*_d]=0;
displacements[2*_d]=0;
points_dagger[2*_d]=2*_d;
}
int point(int dir, int disp) {
assert(disp == -1 || disp == 0 || disp == 1);
assert(base+0 <= dir && dir < base+4);
// directions faster index = new indexing
// 4d (base = 0):
// point 0 1 2 3 4 5 6 7 8
// dir 0 1 2 3 0 1 2 3 0
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
// 5d (base = 1):
// point 0 1 2 3 4 5 6 7 8
// dir 1 2 3 4 1 2 3 4 0
// disp +1 +1 +1 +1 -1 -1 -1 -1 0
// displacements faster index = old indexing
// 4d (base = 0):
// point 0 1 2 3 4 5 6 7 8
// dir 0 0 1 1 2 2 3 3 0
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
// 5d (base = 1):
// point 0 1 2 3 4 5 6 7 8
// dir 1 1 2 2 3 3 4 4 0
// disp +1 -1 +1 -1 +1 -1 +1 -1 0
if(dir == 0 and disp == 0)
return 8;
else // New indexing
return (1 - disp) / 2 * 4 + dir - base;
// else // Old indexing
// return (4 * (dir - base) + 1 - disp) / 2;
}
};
template<class Fobj,class CComplex,int nbasis>
@ -258,7 +296,7 @@ public:
// Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis>
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > siteVector;
@ -268,33 +306,59 @@ public:
typedef iMatrix<CComplex,nbasis > Cobj;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
typedef CoarseVector FermionField;
// enrich interface, use default implementation as in FermionOperator ///////
void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; }
void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; }
void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; }
void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; }
void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; };
void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; };
////////////////////
// Data members
////////////////////
Geometry geom;
GridBase * _grid;
GridBase* _cbgrid;
int hermitian;
CartesianStencil<siteVector,siteVector,int> Stencil;
CartesianStencil<siteVector,siteVector,int> StencilEven;
CartesianStencil<siteVector,siteVector,int> StencilOdd;
std::vector<CoarseMatrix> A;
std::vector<CoarseMatrix> Aeven;
std::vector<CoarseMatrix> Aodd;
CoarseMatrix AselfInv;
CoarseMatrix AselfInvEven;
CoarseMatrix AselfInvOdd;
Vector<RealD> dag_factor;
///////////////////////
// Interface
///////////////////////
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
GridBase * RedBlackGrid() { return _cbgrid; };
int ConstEE() { return 0; }
void M (const CoarseVector &in, CoarseVector &out)
{
conformable(_grid,in.Grid());
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom;
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
@ -316,14 +380,14 @@ public:
int ptype;
StencilEntry *SE;
for(int point=0;point<geom.npoint;point++){
for(int point=0;point<geom_v.npoint;point++){
SE=Stencil.GetEntry(ptype,point,ss);
SE=Stencil_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
@ -344,12 +408,72 @@ public:
return M(in,out);
} else {
// corresponds to Galerkin coarsening
CoarseVector tmp(Grid());
G5C(tmp, in);
M(tmp, out);
G5C(out, out);
return MdagNonHermitian(in, out);
}
};
void MdagNonHermitian(const CoarseVector &in, CoarseVector &out)
{
conformable(_grid,in.Grid());
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor);
autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom;
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View(AcceleratorRead));
Aview *Aview_p = & AcceleratorViewContainer[0];
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int osites=Grid()->oSites();
Vector<int> points(geom.npoint, 0);
for(int p=0; p<geom.npoint; p++)
points[p] = geom.points_dagger[p];
RealD* dag_factor_p = &dag_factor[0];
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
for(int p=0;p<geom_v.npoint;p++){
int point = points[p];
SE=Stencil_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
}
coalescedWrite(out_v[ss](b),res);
});
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
}
void MdirComms(const CoarseVector &in)
{
SimpleCompressor<siteVector> compressor;
@ -359,6 +483,7 @@ public:
{
conformable(_grid,in.Grid());
conformable(_grid,out.Grid());
out.Checkerboard() = in.Checkerboard();
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
@ -367,6 +492,7 @@ public:
autoView( out_v , out, AcceleratorWrite);
autoView( in_v , in, AcceleratorRead);
autoView( Stencil_v , Stencil, AcceleratorRead);
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
@ -380,12 +506,12 @@ public:
int ptype;
StencilEntry *SE;
SE=Stencil.GetEntry(ptype,point,ss);
SE=Stencil_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
@ -413,34 +539,7 @@ public:
this->MdirComms(in);
int ndim = in.Grid()->Nd();
//////////////
// 4D action like wilson
// 0+ => 0
// 0- => 1
// 1+ => 2
// 1- => 3
// etc..
//////////////
// 5D action like DWF
// 1+ => 0
// 1- => 1
// 2+ => 2
// 2- => 3
// etc..
auto point = [dir, disp, ndim](){
if(dir == 0 and disp == 0)
return 8;
else if ( ndim==4 ) {
return (4 * dir + 1 - disp) / 2;
} else {
return (4 * (dir-1) + 1 - disp) / 2;
}
}();
MdirCalc(in,out,point);
MdirCalc(in,out,geom.point(dir,disp));
};
void Mdiag(const CoarseVector &in, CoarseVector &out)
@ -449,17 +548,269 @@ public:
MdirCalc(in, out, point); // No comms
};
void Mooee(const CoarseVector &in, CoarseVector &out) {
MooeeInternal(in, out, DaggerNo, InverseNo);
}
void MooeeInv(const CoarseVector &in, CoarseVector &out) {
MooeeInternal(in, out, DaggerNo, InverseYes);
}
void MooeeDag(const CoarseVector &in, CoarseVector &out) {
MooeeInternal(in, out, DaggerYes, InverseNo);
}
void MooeeInvDag(const CoarseVector &in, CoarseVector &out) {
MooeeInternal(in, out, DaggerYes, InverseYes);
}
void Meooe(const CoarseVector &in, CoarseVector &out) {
if(in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
}
}
void MeooeDag(const CoarseVector &in, CoarseVector &out) {
if(in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
}
}
void Dhop(const CoarseVector &in, CoarseVector &out, int dag) {
conformable(in.Grid(), _grid); // verifies full grid
conformable(in.Grid(), out.Grid());
out.Checkerboard() = in.Checkerboard();
DhopInternal(Stencil, A, in, out, dag);
}
void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) {
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.Checkerboard() == Even);
out.Checkerboard() = Odd;
DhopInternal(StencilEven, Aodd, in, out, dag);
}
void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) {
conformable(in.Grid(), _cbgrid); // verifies half grid
conformable(in.Grid(), out.Grid()); // drops the cb check
assert(in.Checkerboard() == Odd);
out.Checkerboard() = Even;
DhopInternal(StencilOdd, Aeven, in, out, dag);
}
void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) {
out.Checkerboard() = in.Checkerboard();
assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
CoarseMatrix *Aself = nullptr;
if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) {
Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1];
DselfInternal(StencilOdd, *Aself, in, out, dag);
} else {
Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1];
DselfInternal(StencilEven, *Aself, in, out, dag);
}
} else {
Aself = (inv) ? &AselfInv : &A[geom.npoint-1];
DselfInternal(Stencil, *Aself, in, out, dag);
}
assert(Aself != nullptr);
}
void DselfInternal(CartesianStencil<siteVector,siteVector,int> &st, CoarseMatrix &a,
const CoarseVector &in, CoarseVector &out, int dag) {
int point = geom.npoint-1;
autoView( out_v, out, AcceleratorWrite);
autoView( in_v, in, AcceleratorRead);
autoView( st_v, st, AcceleratorRead);
autoView( a_v, a, AcceleratorRead);
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
RealD* dag_factor_p = &dag_factor[0];
if(dag) {
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(a_v[ss](b,bb))*nbr(bb);
}
coalescedWrite(out_v[ss](b),res);
});
} else {
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(a_v[ss](b,bb))*nbr(bb);
}
coalescedWrite(out_v[ss](b),res);
});
}
}
void DhopInternal(CartesianStencil<siteVector,siteVector,int> &st, std::vector<CoarseMatrix> &a,
const CoarseVector &in, CoarseVector &out, int dag) {
SimpleCompressor<siteVector> compressor;
st.HaloExchange(in,compressor);
autoView( in_v, in, AcceleratorRead);
autoView( out_v, out, AcceleratorWrite);
autoView( st_v , st, AcceleratorRead);
typedef LatticeView<Cobj> Aview;
// determine in what order we need the points
int npoint = geom.npoint-1;
Vector<int> points(npoint, 0);
for(int p=0; p<npoint; p++)
points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p;
Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead));
Aview *Aview_p = & AcceleratorViewContainer[0];
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
RealD* dag_factor_p = &dag_factor[0];
if(dag) {
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
for(int p=0;p<npoint;p++){
int point = points[p];
SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
}
coalescedWrite(out_v[ss](b),res);
});
} else {
accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
for(int p=0;p<npoint;p++){
int point = points[p];
SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
} else {
nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
}
acceleratorSynchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
}
coalescedWrite(out_v[ss](b),res);
});
}
for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose();
}
CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) :
CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) :
_grid(&CoarseGrid),
_cbgrid(&CoarseRBGrid),
geom(CoarseGrid._ndimension),
hermitian(hermitian_),
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
A(geom.npoint,&CoarseGrid)
StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements,0),
A(geom.npoint,&CoarseGrid),
Aeven(geom.npoint,&CoarseRBGrid),
Aodd(geom.npoint,&CoarseRBGrid),
AselfInv(&CoarseGrid),
AselfInvEven(&CoarseRBGrid),
AselfInvOdd(&CoarseRBGrid),
dag_factor(nbasis*nbasis)
{
fillFactor();
};
void fillFactor() {
Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis);
if(!hermitian) {
const int nb = nbasis/2;
dag_factor_eigen.block(0,nb,nb,nb) *= -1.0;
dag_factor_eigen.block(nb,0,nb,nb) *= -1.0;
}
// GPU readable prefactor
thread_for(i, nbasis*nbasis, {
int j = i/nbasis;
int k = i%nbasis;
dag_factor[i] = dag_factor_eigen(j, k);
});
}
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
@ -608,6 +959,9 @@ public:
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
ForceHermitian();
}
InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl;
FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl;
}
void ForceHermitian(void) {
@ -629,6 +983,50 @@ public:
}
}
}
void InvertSelfStencilLink() {
std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl;
int localVolume = Grid()->lSites();
typedef typename Cobj::scalar_object scalar_object;
autoView(Aself_v, A[geom.npoint-1], CpuRead);
autoView(AselfInv_v, AselfInv, CpuWrite);
thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke
Eigen::MatrixXcd selfLinkEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
scalar_object selfLink = Zero();
scalar_object selfLinkInv = Zero();
Coordinate lcoor;
Grid()->LocalIndexToLocalCoor(site, lcoor);
peekLocalSite(selfLink, Aself_v, lcoor);
for (int i = 0; i < nbasis; ++i)
for (int j = 0; j < nbasis; ++j)
selfLinkEigen(i, j) = static_cast<ComplexD>(TensorRemove(selfLink(i, j)));
selfLinkInvEigen = selfLinkEigen.inverse();
for(int i = 0; i < nbasis; ++i)
for(int j = 0; j < nbasis; ++j)
selfLinkInv(i, j) = selfLinkInvEigen(i, j);
pokeLocalSite(selfLinkInv, AselfInv_v, lcoor);
});
}
void FillHalfCbs() {
std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl;
for(int p = 0; p < geom.npoint; ++p) {
pickCheckerboard(Even, Aeven[p], A[p]);
pickCheckerboard(Odd, Aodd[p], A[p]);
}
pickCheckerboard(Even, AselfInvEven, AselfInv);
pickCheckerboard(Odd, AselfInvOdd, AselfInv);
}
};
NAMESPACE_END(Grid);

View File

@ -173,7 +173,8 @@ template<class T> using cshiftAllocator = devAllocator<T>;
template<class T> using cshiftAllocator = std::allocator<T>;
#endif
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
template<class T> using stencilVector = std::vector<T,alignedAllocator<T> >;
template<class T> using commVector = std::vector<T,devAllocator<T> >;
template<class T> using cshiftVector = std::vector<T,cshiftAllocator<T> >;

View File

@ -1,11 +1,12 @@
#include <Grid/GridCore.h>
#ifndef GRID_UVM
#warning "Using explicit device memory copies"
NAMESPACE_BEGIN(Grid);
//define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
#define dprintf(...)
////////////////////////////////////////////////////////////
// For caching copies of data on device
////////////////////////////////////////////////////////////
@ -103,7 +104,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
// dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
@ -111,7 +112,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache);
// dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
}
uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr);
@ -125,7 +126,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
// dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
if(AccCache.state==AccDirty) {
@ -136,7 +137,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache);
// dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
}
uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr);
@ -149,7 +150,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
// dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++;
AccCache.state=Consistent;
@ -164,7 +165,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes;
}
// dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++;
@ -227,18 +228,24 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
// Find if present, otherwise get or force an empty
////////////////////////////////////////////////////////////////////////////
if ( EntryPresent(CpuPtr)==0 ){
EvictVictims(bytes);
EntryCreate(CpuPtr,bytes,mode,hint);
}
auto AccCacheIterator = EntryLookup(CpuPtr);
auto & AccCache = AccCacheIterator->second;
if (!AccCache.AccPtr) {
EvictVictims(bytes);
}
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
assert(AccCache.cpuLock==0); // Programming error
if(AccCache.state!=Empty) {
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n",
(uint64_t)AccCache.CpuPtr,
(uint64_t)CpuPtr,
(uint64_t)AccCache.bytes,
(uint64_t)bytes);
assert(AccCache.CpuPtr == CpuPtr);
assert(AccCache.bytes ==bytes);
}
@ -285,21 +292,21 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
}
AccCache.accLock++;
// printf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
} else if(AccCache.state==Consistent) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
else
AccCache.state = Consistent; // Consistent + AccRead => Consistent
AccCache.accLock++;
// printf("Consistent entry into device accLock %d\n",AccCache.accLock);
dprintf("Consistent entry into device accLock %d\n",AccCache.accLock);
} else if(AccCache.state==AccDirty) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
else
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
AccCache.accLock++;
// printf("AccDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("AccDirty entry into device accLock %d\n",AccCache.accLock);
} else {
assert(0);
}
@ -361,13 +368,16 @@ uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,V
// Find if present, otherwise get or force an empty
////////////////////////////////////////////////////////////////////////////
if ( EntryPresent(CpuPtr)==0 ){
EvictVictims(bytes);
EntryCreate(CpuPtr,bytes,mode,transient);
}
auto AccCacheIterator = EntryLookup(CpuPtr);
auto & AccCache = AccCacheIterator->second;
if (!AccCache.AccPtr) {
EvictVictims(bytes);
}
assert((mode==CpuRead)||(mode==CpuWrite));
assert(AccCache.accLock==0); // Programming error

View File

@ -127,6 +127,11 @@ accelerator_inline void convertType(T1 & out, const iScalar<T2> & in) {
convertType(out,in._internal);
}
template<typename T1, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
accelerator_inline void convertType(T1 & out, const iScalar<T1> & in) {
convertType(out,in._internal);
}
template<typename T1,typename T2>
accelerator_inline void convertType(iScalar<T1> & out, const T2 & in) {
convertType(out._internal,in);

View File

@ -269,7 +269,7 @@ public:
std::vector<Vector<std::pair<int,int> > > face_table ;
Vector<int> surface_list;
Vector<StencilEntry> _entries; // Resident in managed memory
stencilVector<StencilEntry> _entries; // Resident in managed memory
std::vector<Packet> Packets;
std::vector<Merge> Mergers;
std::vector<Merge> MergersSHM;

View File

@ -21,22 +21,26 @@ void acceleratorInit(void)
#define ENV_RANK_SLURM "SLURM_PROCID"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
// We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) {
printf("OPENMPI detected\n");
rank = atoi(localRankStr);
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) {
printf("MVAPICH detected\n");
rank = atoi(localRankStr);
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_SLURM)) != NULL) {
printf("SLURM detected\n");
rank = atoi(localRankStr);
} else {
printf("MPI version is unknown - bad things may happen\n");
}
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
// We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) {
if (!world_rank)
printf("OPENMPI detected\n");
rank = atoi(localRankStr);
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) {
if (!world_rank)
printf("MVAPICH detected\n");
rank = atoi(localRankStr);
} else if ((localRankStr = getenv(ENV_LOCAL_RANK_SLURM)) != NULL) {
if (!world_rank)
printf("SLURM detected\n");
rank = atoi(localRankStr);
} else {
if (!world_rank)
printf("MPI version is unknown - bad things may happen\n");
}
size_t totalDeviceMem=0;
for (int i = 0; i < nDevices; i++) {

View File

@ -0,0 +1,475 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/solver/Test_coarse_even_odd.cc
Copyright (C) 2015-2020
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
#ifndef NBASIS
#define NBASIS 40
#endif
// NOTE: The tests in this file are written in analogy to
// - tests/core/Test_wilson_even_odd.cc
// - tests/core/Test_wilson_clover.cc
std::vector<int> readFromCommandlineIvec(int* argc,
char*** argv,
std::string&& option,
const std::vector<int>& defaultValue) {
std::string arg;
std::vector<int> ret(defaultValue);
if(GridCmdOptionExists(*argv, *argv + *argc, option)) {
arg = GridCmdOptionPayload(*argv, *argv + *argc, option);
GridCmdOptionIntVector(arg, ret);
}
return ret;
}
int main(int argc, char** argv) {
Grid_init(&argc, &argv);
/////////////////////////////////////////////////////////////////////////////
// Read from command line //
/////////////////////////////////////////////////////////////////////////////
const int nbasis = NBASIS; static_assert((nbasis & 0x1) == 0, "");
const int nb = nbasis/2;
Coordinate blockSize = readFromCommandlineIvec(&argc, &argv, "--blocksize", {2, 2, 2, 2});
std::cout << GridLogMessage << "Compiled with nbasis = " << nbasis << " -> nb = " << nb << std::endl;
/////////////////////////////////////////////////////////////////////////////
// General setup //
/////////////////////////////////////////////////////////////////////////////
Coordinate clatt = GridDefaultLatt();
for(int d=0; d<clatt.size(); d++) clatt[d] = clatt[d] / blockSize[d];
GridCartesian* Grid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridCartesian* Grid_c = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* RBGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(Grid_f);
GridRedBlackCartesian* RBGrid_c = SpaceTimeGrid::makeFourDimRedBlackGrid(Grid_c);
std::cout << GridLogMessage << "Grid_f:" << std::endl; Grid_f->show_decomposition();
std::cout << GridLogMessage << "Grid_c:" << std::endl; Grid_c->show_decomposition();
std::cout << GridLogMessage << "RBGrid_f:" << std::endl; RBGrid_f->show_decomposition();
std::cout << GridLogMessage << "RBGrid_c:" << std::endl; RBGrid_c->show_decomposition();
GridParallelRNG pRNG_f(Grid_f);
GridParallelRNG pRNG_c(Grid_c);
std::vector<int> seeds({1, 2, 3, 4});
pRNG_f.SeedFixedIntegers(seeds);
pRNG_c.SeedFixedIntegers(seeds);
/////////////////////////////////////////////////////////////////////////////
// Setup of Dirac Matrix and Operator //
/////////////////////////////////////////////////////////////////////////////
LatticeGaugeField Umu(Grid_f); SU3::HotConfiguration(pRNG_f, Umu);
RealD checkTolerance = (getPrecision<LatticeFermion>::value == 1) ? 1e-7 : 1e-15;
RealD mass = -0.30;
RealD csw = 1.9192;
WilsonCloverFermionR Dwc(Umu, *Grid_f, *RBGrid_f, mass, csw, csw);
MdagMLinearOperator<WilsonCloverFermionR, LatticeFermion> MdagMOp_Dwc(Dwc);
/////////////////////////////////////////////////////////////////////////////
// Type definitions //
/////////////////////////////////////////////////////////////////////////////
typedef Aggregation<vSpinColourVector, vTComplex, nbasis> Aggregates;
typedef CoarsenedMatrix<vSpinColourVector, vTComplex, nbasis> CoarseDiracMatrix;
typedef CoarseDiracMatrix::CoarseVector CoarseVector;
/////////////////////////////////////////////////////////////////////////////
// Setup of Aggregation //
/////////////////////////////////////////////////////////////////////////////
Aggregates Aggs(Grid_c, Grid_f, 0);
{
LatticeFermion tmp(Aggs.subspace[0].Grid());
for(int n = 0; n < nb; n++) {
gaussian(pRNG_f, Aggs.subspace[n]);
G5C(tmp, Aggs.subspace[n]);
axpby(Aggs.subspace[n + nb], 0.5, -0.5, Aggs.subspace[n], tmp);
axpby(Aggs.subspace[n], 0.5, 0.5, Aggs.subspace[n], tmp);
}
}
/////////////////////////////////////////////////////////////////////////////
// Setup of CoarsenedMatrix and Operator //
/////////////////////////////////////////////////////////////////////////////
const int hermitian = 0;
CoarseDiracMatrix Dc(*Grid_c, *RBGrid_c, hermitian);
Dc.CoarsenOperator(Grid_f, MdagMOp_Dwc, Aggs);
MdagMLinearOperator<CoarseDiracMatrix, CoarseVector> MdagMOp_Dc(Dc);
/////////////////////////////////////////////////////////////////////////////
// Setup vectors used in all tests //
/////////////////////////////////////////////////////////////////////////////
CoarseVector src(Grid_c); random(pRNG_c, src);
CoarseVector diff(Grid_c); diff = Zero();
/////////////////////////////////////////////////////////////////////////////
// Start of tests //
/////////////////////////////////////////////////////////////////////////////
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test Dhop + Mdiag = Munprec" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector phi(Grid_c); phi = Zero();
CoarseVector chi(Grid_c); chi = Zero();
CoarseVector res(Grid_c); res = Zero();
CoarseVector ref(Grid_c); ref = Zero();
Dc.Mdiag(src, phi); std::cout << GridLogMessage << "Applied Mdiag" << std::endl;
Dc.Dhop(src, chi, DaggerNo); std::cout << GridLogMessage << "Applied Dhop" << std::endl;
Dc.M(src, ref); std::cout << GridLogMessage << "Applied M" << std::endl;
res = phi + chi;
diff = ref - res;
auto absDev = norm2(diff);
auto relDev = absDev / norm2(ref);
std::cout << GridLogMessage << "norm2(Munprec), norm2(Dhop + Mdiag), abs. deviation, rel. deviation: "
<< norm2(ref) << " " << norm2(res) << " " << absDev << " " << relDev << " -> check "
<< ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test Meo + Moe = Dhop" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector src_e(RBGrid_c); src_e = Zero();
CoarseVector src_o(RBGrid_c); src_o = Zero();
CoarseVector res_e(RBGrid_c); res_e = Zero();
CoarseVector res_o(RBGrid_c); res_o = Zero();
CoarseVector res(Grid_c); res = Zero();
CoarseVector ref(Grid_c); ref = Zero();
pickCheckerboard(Even, src_e, src);
pickCheckerboard(Odd, src_o, src);
Dc.Meooe(src_e, res_o); std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dc.Meooe(src_o, res_e); std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dc.Dhop(src, ref, DaggerNo); std::cout << GridLogMessage << "Applied Dhop" << std::endl;
setCheckerboard(res, res_o);
setCheckerboard(res, res_e);
diff = ref - res;
auto absDev = norm2(diff);
auto relDev = absDev / norm2(ref);
std::cout << GridLogMessage << "norm2(Dhop), norm2(Meo + Moe), abs. deviation, rel. deviation: "
<< norm2(ref) << " " << norm2(res) << " " << absDev << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test |(Im(v^dag M^dag M v)| = 0" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector tmp(Grid_c); tmp = Zero();
CoarseVector phi(Grid_c); phi = Zero();
Dc.M(src, tmp); std::cout << GridLogMessage << "Applied M" << std::endl;
Dc.Mdag(tmp, phi); std::cout << GridLogMessage << "Applied Mdag" << std::endl;
std::cout << GridLogMessage << "src = " << norm2(src) << " tmp = " << norm2(tmp) << " phi = " << norm2(phi) << std::endl;
ComplexD dot = innerProduct(src, phi);
auto relDev = abs(imag(dot)) / abs(real(dot));
std::cout << GridLogMessage << "Re(v^dag M^dag M v), Im(v^dag M^dag M v), rel.deviation: "
<< real(dot) << " " << imag(dot) << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test |(Im(v^dag Mooee^dag Mooee v)| = 0 (full grid)" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector tmp(Grid_c); tmp = Zero();
CoarseVector phi(Grid_c); phi = Zero();
Dc.Mooee(src, tmp); std::cout << GridLogMessage << "Applied Mooee" << std::endl;
Dc.MooeeDag(tmp, phi); std::cout << GridLogMessage << "Applied MooeeDag" << std::endl;
ComplexD dot = innerProduct(src, phi);
auto relDev = abs(imag(dot)) / abs(real(dot));
std::cout << GridLogMessage << "Re(v^dag Mooee^dag Mooee v), Im(v^dag Mooee^dag Mooee v), rel.deviation: "
<< real(dot) << " " << imag(dot) << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test MooeeInv Mooee = 1 (full grid)" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector tmp(Grid_c); tmp = Zero();
CoarseVector phi(Grid_c); phi = Zero();
Dc.Mooee(src, tmp); std::cout << GridLogMessage << "Applied Mooee" << std::endl;
Dc.MooeeInv(tmp, phi); std::cout << GridLogMessage << "Applied MooeeInv" << std::endl;
diff = src - phi;
auto absDev = norm2(diff);
auto relDev = absDev / norm2(src);
std::cout << GridLogMessage << "norm2(src), norm2(MooeeInv Mooee src), abs. deviation, rel. deviation: "
<< norm2(src) << " " << norm2(phi) << " " << absDev << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeooeDagger is the dagger of Meooe by requiring" << std::endl;
std::cout << GridLogMessage << "= < phi | Meooe | chi > * = < chi | Meooe^dag| phi>" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
// clang-format off
CoarseVector phi(Grid_c); random(pRNG_c, phi);
CoarseVector chi(Grid_c); random(pRNG_c, chi);
CoarseVector chi_e(RBGrid_c); chi_e = Zero();
CoarseVector chi_o(RBGrid_c); chi_o = Zero();
CoarseVector dchi_e(RBGrid_c); dchi_e = Zero();
CoarseVector dchi_o(RBGrid_c); dchi_o = Zero();
CoarseVector phi_e(RBGrid_c); phi_e = Zero();
CoarseVector phi_o(RBGrid_c); phi_o = Zero();
CoarseVector dphi_e(RBGrid_c); dphi_e = Zero();
CoarseVector dphi_o(RBGrid_c); dphi_o = Zero();
// clang-format on
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dc.Meooe(chi_e, dchi_o); std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dc.Meooe(chi_o, dchi_e); std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dc.MeooeDag(phi_e, dphi_o); std::cout << GridLogMessage << "Applied MeoDag" << std::endl;
Dc.MeooeDag(phi_o, dphi_e); std::cout << GridLogMessage << "Applied MoeDag" << std::endl;
ComplexD phiDchi_e = innerProduct(phi_e, dchi_e);
ComplexD phiDchi_o = innerProduct(phi_o, dchi_o);
ComplexD chiDphi_e = innerProduct(chi_e, dphi_e);
ComplexD chiDphi_o = innerProduct(chi_o, dphi_o);
std::cout << GridLogDebug << "norm dchi_e = " << norm2(dchi_e) << " norm dchi_o = " << norm2(dchi_o) << " norm dphi_e = " << norm2(dphi_e)
<< " norm dphi_o = " << norm2(dphi_e) << std::endl;
std::cout << GridLogMessage << "e " << phiDchi_e << " " << chiDphi_e << std::endl;
std::cout << GridLogMessage << "o " << phiDchi_o << " " << chiDphi_o << std::endl;
std::cout << GridLogMessage << "phiDchi_e - conj(chiDphi_o) " << phiDchi_e - conj(chiDphi_o) << std::endl;
std::cout << GridLogMessage << "phiDchi_o - conj(chiDphi_e) " << phiDchi_o - conj(chiDphi_e) << std::endl;
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test MooeeInv Mooee = 1 (checkerboards separately)" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector chi(Grid_c); random(pRNG_c, chi);
CoarseVector tmp(Grid_c); tmp = Zero();
CoarseVector phi(Grid_c); phi = Zero();
CoarseVector chi_e(RBGrid_c); chi_e = Zero();
CoarseVector chi_o(RBGrid_c); chi_o = Zero();
CoarseVector phi_e(RBGrid_c); phi_e = Zero();
CoarseVector phi_o(RBGrid_c); phi_o = Zero();
CoarseVector tmp_e(RBGrid_c); tmp_e = Zero();
CoarseVector tmp_o(RBGrid_c); tmp_o = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, tmp_e, tmp);
pickCheckerboard(Odd, tmp_o, tmp);
Dc.Mooee(chi_e, tmp_e); std::cout << GridLogMessage << "Applied Mee" << std::endl;
Dc.MooeeInv(tmp_e, phi_e); std::cout << GridLogMessage << "Applied MeeInv" << std::endl;
Dc.Mooee(chi_o, tmp_o); std::cout << GridLogMessage << "Applied Moo" << std::endl;
Dc.MooeeInv(tmp_o, phi_o); std::cout << GridLogMessage << "Applied MooInv" << std::endl;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
diff = chi - phi;
auto absDev = norm2(diff);
auto relDev = absDev / norm2(chi);
std::cout << GridLogMessage << "norm2(chi), norm2(MeeInv Mee chi), abs. deviation, rel. deviation: "
<< norm2(chi) << " " << norm2(phi) << " " << absDev << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test MooeeDag MooeeInvDag = 1 (checkerboards separately)" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector chi(Grid_c); random(pRNG_c, chi);
CoarseVector tmp(Grid_c); tmp = Zero();
CoarseVector phi(Grid_c); phi = Zero();
CoarseVector chi_e(RBGrid_c); chi_e = Zero();
CoarseVector chi_o(RBGrid_c); chi_o = Zero();
CoarseVector phi_e(RBGrid_c); phi_e = Zero();
CoarseVector phi_o(RBGrid_c); phi_o = Zero();
CoarseVector tmp_e(RBGrid_c); tmp_e = Zero();
CoarseVector tmp_o(RBGrid_c); tmp_o = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, tmp_e, tmp);
pickCheckerboard(Odd, tmp_o, tmp);
Dc.MooeeDag(chi_e, tmp_e); std::cout << GridLogMessage << "Applied MeeDag" << std::endl;
Dc.MooeeInvDag(tmp_e, phi_e); std::cout << GridLogMessage << "Applied MeeInvDag" << std::endl;
Dc.MooeeDag(chi_o, tmp_o); std::cout << GridLogMessage << "Applied MooDag" << std::endl;
Dc.MooeeInvDag(tmp_o, phi_o); std::cout << GridLogMessage << "Applied MooInvDag" << std::endl;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
diff = chi - phi;
auto absDev = norm2(diff);
auto relDev = absDev / norm2(chi);
std::cout << GridLogMessage << "norm2(chi), norm2(MeeDag MeeInvDag chi), abs. deviation, rel. deviation: "
<< norm2(chi) << " " << norm2(phi) << " " << absDev << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test Meo + Moe + Moo + Mee = Munprec" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector chi(Grid_c); chi = Zero();
CoarseVector phi(Grid_c); phi = Zero();
CoarseVector ref(Grid_c); ref = Zero();
CoarseVector src_e(RBGrid_c); src_e = Zero();
CoarseVector src_o(RBGrid_c); src_o = Zero();
CoarseVector phi_e(RBGrid_c); phi_e = Zero();
CoarseVector phi_o(RBGrid_c); phi_o = Zero();
CoarseVector chi_e(RBGrid_c); chi_e = Zero();
CoarseVector chi_o(RBGrid_c); chi_o = Zero();
pickCheckerboard(Even, src_e, src);
pickCheckerboard(Odd, src_o, src);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Mooee src_o + Meooe src_e)
Dc.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dc.Mooee(src_e, chi_e); std::cout << GridLogMessage << "Applied Mee" << std::endl;
Dc.Mooee(src_o, chi_o); std::cout << GridLogMessage << "Applied Moo" << std::endl;
Dc.Meooe(src_o, phi_e); std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dc.Meooe(src_e, phi_o); std::cout << GridLogMessage << "Applied Meo" << std::endl;
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
std::cout << GridLogDebug << "norm phi_e = " << norm2(phi_e) << " norm phi_o = " << norm2(phi_o) << " norm phi = " << norm2(phi) << std::endl;
diff = ref - phi;
auto absDev = norm2(diff);
auto relDev = absDev / norm2(ref);
std::cout << GridLogMessage << "norm2(Dunprec), norm2(Deoprec), abs. deviation, rel. deviation: "
<< norm2(ref) << " " << norm2(phi) << " " << absDev << " " << relDev
<< " -> check " << ((relDev < checkTolerance) ? "passed" : "failed") << std::endl;
assert(relDev <= checkTolerance);
}
{
std::cout << GridLogMessage << "===========================================================================" << std::endl;
std::cout << GridLogMessage << "= Test MpcDagMpc is hermitian" << std::endl;
std::cout << GridLogMessage << "===========================================================================" << std::endl;
CoarseVector phi(Grid_c); random(pRNG_c, phi);
CoarseVector chi(Grid_c); random(pRNG_c, chi);
CoarseVector chi_e(RBGrid_c); chi_e = Zero();
CoarseVector chi_o(RBGrid_c); chi_o = Zero();
CoarseVector dchi_e(RBGrid_c); dchi_e = Zero();
CoarseVector dchi_o(RBGrid_c); dchi_o = Zero();
CoarseVector phi_e(RBGrid_c); phi_e = Zero();
CoarseVector phi_o(RBGrid_c); phi_o = Zero();
CoarseVector dphi_e(RBGrid_c); dphi_e = Zero();
CoarseVector dphi_o(RBGrid_c); dphi_o = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
SchurDiagMooeeOperator<CoarseDiracMatrix,CoarseVector> HermOpEO(Dc);
HermOpEO.MpcDagMpc(chi_e, dchi_e); std::cout << GridLogMessage << "Applied MpcDagMpc to chi_e" << std::endl;
HermOpEO.MpcDagMpc(chi_o, dchi_o); std::cout << GridLogMessage << "Applied MpcDagMpc to chi_o" << std::endl;
HermOpEO.MpcDagMpc(phi_e, dphi_e); std::cout << GridLogMessage << "Applied MpcDagMpc to phi_e" << std::endl;
HermOpEO.MpcDagMpc(phi_o, dphi_o); std::cout << GridLogMessage << "Applied MpcDagMpc to phi_o" << std::endl;
ComplexD phiDchi_e = innerProduct(phi_e, dchi_e);
ComplexD phiDchi_o = innerProduct(phi_o, dchi_o);
ComplexD chiDphi_e = innerProduct(chi_e, dphi_e);
ComplexD chiDphi_o = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << phiDchi_e << " " << chiDphi_e << std::endl;
std::cout << GridLogMessage << "o " << phiDchi_o << " " << chiDphi_o << std::endl;
std::cout << GridLogMessage << "phiDchi_e - conj(chiDphi_e) " << phiDchi_e - conj(chiDphi_e) << std::endl;
std::cout << GridLogMessage << "phiDchi_o - conj(chiDphi_o) " << phiDchi_o - conj(chiDphi_o) << std::endl;
}
Grid_finalize();
}