mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-21 17:22:03 +01:00
Compare commits
10 Commits
737d3ffb98
...
3bc2da5321
Author | SHA1 | Date | |
---|---|---|---|
3bc2da5321 | |||
2d710d6bfd | |||
6532b7f32b | |||
7b41b92d99 | |||
dd557af84b | |||
59b9d0e030 | |||
b82eee4733 | |||
6a87487544 | |||
fcf5023845 | |||
c8adad6d8b |
@ -226,6 +226,7 @@ public:
|
|||||||
void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe)
|
void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe)
|
||||||
{
|
{
|
||||||
int nfound=0;
|
int nfound=0;
|
||||||
|
std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl;
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
for(int pp=0;pp<CopyMe.geom.npoint;pp++){
|
for(int pp=0;pp<CopyMe.geom.npoint;pp++){
|
||||||
// Search for the same relative shift
|
// Search for the same relative shift
|
||||||
@ -233,12 +234,12 @@ public:
|
|||||||
if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) {
|
if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) {
|
||||||
_A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]);
|
_A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]);
|
||||||
_Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]);
|
_Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]);
|
||||||
ExchangeCoarseLinks();
|
|
||||||
nfound++;
|
nfound++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(nfound==geom.npoint);
|
assert(nfound==geom.npoint);
|
||||||
|
ExchangeCoarseLinks();
|
||||||
}
|
}
|
||||||
|
|
||||||
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
|
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
|
||||||
@ -271,7 +272,8 @@ public:
|
|||||||
}
|
}
|
||||||
void Mdag (const CoarseVector &in, CoarseVector &out)
|
void Mdag (const CoarseVector &in, CoarseVector &out)
|
||||||
{
|
{
|
||||||
Mult(_Adag,in,out);
|
if ( hermitian ) M(in,out);
|
||||||
|
else Mult(_Adag,in,out);
|
||||||
}
|
}
|
||||||
void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out)
|
void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out)
|
||||||
{
|
{
|
||||||
@ -298,10 +300,10 @@ public:
|
|||||||
const int Nsimd = CComplex::Nsimd();
|
const int Nsimd = CComplex::Nsimd();
|
||||||
|
|
||||||
int osites=pin.Grid()->oSites();
|
int osites=pin.Grid()->oSites();
|
||||||
int gsites=pin.Grid()->gSites();
|
// int gsites=pin.Grid()->gSites();
|
||||||
|
|
||||||
RealD flops = 1.0* npoint * nbasis * nbasis * 8 * gsites;
|
RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites;
|
||||||
RealD bytes = (1.0*osites*sizeof(siteMatrix)+2.0*osites*sizeof(siteVector))*npoint;
|
RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint;
|
||||||
|
|
||||||
// for(int point=0;point<npoint;point++){
|
// for(int point=0;point<npoint;point++){
|
||||||
// conformable(A[point],pin);
|
// conformable(A[point],pin);
|
||||||
@ -343,20 +345,20 @@ public:
|
|||||||
text+=usecond();
|
text+=usecond();
|
||||||
ttot+=usecond();
|
ttot+=usecond();
|
||||||
|
|
||||||
std::cout << GridLogMessage<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Mult ext "<<text<<" us"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Mult ext "<<text<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse bytes "<< bytes/1e6<<" MB"<<std::endl;
|
std::cout << GridLogPerformance<<"Coarse bytes "<< bytes/1e6<<" MB"<<std::endl;
|
||||||
};
|
};
|
||||||
|
|
||||||
void PopulateAdag(void)
|
void PopulateAdag(void)
|
||||||
{
|
{
|
||||||
for(int bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
|
for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
|
||||||
Coordinate bcoor;
|
Coordinate bcoor;
|
||||||
CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor);
|
CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor);
|
||||||
|
|
||||||
@ -412,10 +414,10 @@ public:
|
|||||||
*
|
*
|
||||||
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
|
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
|
||||||
*/
|
*/
|
||||||
void CoarsenOperatorColoured(LinearOperatorBase<Lattice<Fobj> > &linop,
|
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||||
{
|
{
|
||||||
std::cout << GridLogMessage<< "CoarsenMatrixColoured "<< std::endl;
|
std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
|
||||||
GridBase *grid = FineGrid();
|
GridBase *grid = FineGrid();
|
||||||
|
|
||||||
RealD tproj=0.0;
|
RealD tproj=0.0;
|
||||||
@ -540,9 +542,14 @@ public:
|
|||||||
auto sval = peekSite(_A[p],coor);
|
auto sval = peekSite(_A[p],coor);
|
||||||
}
|
}
|
||||||
|
|
||||||
PopulateAdag();
|
// Only needed if nonhermitian
|
||||||
|
if ( ! hermitian ) {
|
||||||
|
std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
|
||||||
|
PopulateAdag();
|
||||||
|
}
|
||||||
|
|
||||||
// Need to write something to populate Adag from A
|
// Need to write something to populate Adag from A
|
||||||
|
std::cout << GridLogMessage<<"ExchangeCoarseLinks "<<std::endl;
|
||||||
ExchangeCoarseLinks();
|
ExchangeCoarseLinks();
|
||||||
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
|
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
|
std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
|
||||||
@ -552,6 +559,7 @@ public:
|
|||||||
}
|
}
|
||||||
void ExchangeCoarseLinks(void){
|
void ExchangeCoarseLinks(void){
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
std::cout << "Exchange "<<p<<std::endl;
|
||||||
_A[p] = Cell.Exchange(_A[p]);
|
_A[p] = Cell.Exchange(_A[p]);
|
||||||
_Adag[p]= Cell.Exchange(_Adag[p]);
|
_Adag[p]= Cell.Exchange(_Adag[p]);
|
||||||
}
|
}
|
||||||
|
@ -42,46 +42,30 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
*/
|
*/
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template<class Field, class CoarseField, class Aggregation>
|
template<class Field>
|
||||||
class TwoLevelADEF2 : public LinearFunction<Field>
|
class TwoLevelCG : public LinearFunction<Field>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
const int mmax = 1;
|
|
||||||
GridBase *grid;
|
GridBase *grid;
|
||||||
GridBase *coarsegrid;
|
|
||||||
|
|
||||||
// Fine operator, Smoother, CoarseSolver
|
// Fine operator, Smoother, CoarseSolver
|
||||||
LinearOperatorBase<Field> &_FineLinop;
|
LinearOperatorBase<Field> &_FineLinop;
|
||||||
LinearFunction<Field> &_Smoother;
|
LinearFunction<Field> &_Smoother;
|
||||||
LinearFunction<CoarseField> &_CoarseSolver;
|
|
||||||
LinearFunction<CoarseField> &_CoarseSolverPrecise;
|
|
||||||
|
|
||||||
// Need something that knows how to get from Coarse to fine and back again
|
|
||||||
// void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
|
||||||
// void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
|
||||||
Aggregation &_Aggregates;
|
|
||||||
|
|
||||||
// more most opertor functions
|
// more most opertor functions
|
||||||
TwoLevelADEF2(RealD tol,
|
TwoLevelCG(RealD tol,
|
||||||
Integer maxit,
|
Integer maxit,
|
||||||
LinearOperatorBase<Field> &FineLinop,
|
LinearOperatorBase<Field> &FineLinop,
|
||||||
LinearFunction<Field> &Smoother,
|
LinearFunction<Field> &Smoother,
|
||||||
LinearFunction<CoarseField> &CoarseSolver,
|
GridBase *fine) :
|
||||||
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
|
||||||
Aggregation &Aggregates
|
|
||||||
) :
|
|
||||||
Tolerance(tol),
|
Tolerance(tol),
|
||||||
MaxIterations(maxit),
|
MaxIterations(maxit),
|
||||||
_FineLinop(FineLinop),
|
_FineLinop(FineLinop),
|
||||||
_Smoother(Smoother),
|
_Smoother(Smoother)
|
||||||
_CoarseSolver(CoarseSolver),
|
|
||||||
_CoarseSolverPrecise(CoarseSolverPrecise),
|
|
||||||
_Aggregates(Aggregates)
|
|
||||||
{
|
{
|
||||||
coarsegrid = Aggregates.CoarseGrid;
|
grid = fine;
|
||||||
grid = Aggregates.FineGrid;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual void operator() (const Field &src, Field &psi)
|
virtual void operator() (const Field &src, Field &psi)
|
||||||
@ -195,47 +179,8 @@ class TwoLevelADEF2 : public LinearFunction<Field>
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void PcgM1(Field & in, Field & out)
|
virtual void PcgM1(Field & in, Field & out) =0;
|
||||||
{
|
virtual void Vstart(Field & x,const Field & src)=0;
|
||||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
|
||||||
|
|
||||||
Field tmp(grid);
|
|
||||||
Field Min(grid);
|
|
||||||
CoarseField PleftProj(coarsegrid);
|
|
||||||
CoarseField PleftMss_proj(coarsegrid);
|
|
||||||
|
|
||||||
GridStopWatch SmootherTimer;
|
|
||||||
GridStopWatch MatrixTimer;
|
|
||||||
SmootherTimer.Start();
|
|
||||||
_Smoother(in,Min);
|
|
||||||
SmootherTimer.Stop();
|
|
||||||
|
|
||||||
MatrixTimer.Start();
|
|
||||||
_FineLinop.HermOp(Min,out);
|
|
||||||
MatrixTimer.Stop();
|
|
||||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
|
||||||
|
|
||||||
GridStopWatch ProjTimer;
|
|
||||||
GridStopWatch CoarseTimer;
|
|
||||||
GridStopWatch PromTimer;
|
|
||||||
ProjTimer.Start();
|
|
||||||
_Aggregates.ProjectToSubspace(PleftProj,tmp);
|
|
||||||
ProjTimer.Stop();
|
|
||||||
CoarseTimer.Start();
|
|
||||||
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
|
||||||
CoarseTimer.Stop();
|
|
||||||
PromTimer.Start();
|
|
||||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
|
||||||
PromTimer.Stop();
|
|
||||||
std::cout << GridLogMessage << "PcgM1 breakdown "<<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tSmoother " << SmootherTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tProj " << ProjTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tCoarse " << CoarseTimer.Elapsed() <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "\tProm " << PromTimer.Elapsed() <<std::endl;
|
|
||||||
|
|
||||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void PcgM2(const Field & in, Field & out) {
|
virtual void PcgM2(const Field & in, Field & out) {
|
||||||
out=in;
|
out=in;
|
||||||
@ -249,6 +194,89 @@ class TwoLevelADEF2 : public LinearFunction<Field>
|
|||||||
return dd;
|
return dd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Only Def1 has non-trivial Vout.
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
virtual void Vout (Field & in, Field & out,Field & src){
|
||||||
|
out = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field, class CoarseField, class Aggregation>
|
||||||
|
class TwoLevelADEF2 : public TwoLevelCG<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Need something that knows how to get from Coarse to fine and back again
|
||||||
|
// void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
||||||
|
// void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
|
GridBase *coarsegrid;
|
||||||
|
Aggregation &_Aggregates;
|
||||||
|
LinearFunction<CoarseField> &_CoarseSolver;
|
||||||
|
LinearFunction<CoarseField> &_CoarseSolverPrecise;
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// more most opertor functions
|
||||||
|
TwoLevelADEF2(RealD tol,
|
||||||
|
Integer maxit,
|
||||||
|
LinearOperatorBase<Field> &FineLinop,
|
||||||
|
LinearFunction<Field> &Smoother,
|
||||||
|
LinearFunction<CoarseField> &CoarseSolver,
|
||||||
|
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
||||||
|
Aggregation &Aggregates
|
||||||
|
) :
|
||||||
|
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid),
|
||||||
|
_CoarseSolver(CoarseSolver),
|
||||||
|
_CoarseSolverPrecise(CoarseSolverPrecise),
|
||||||
|
_Aggregates(Aggregates)
|
||||||
|
{
|
||||||
|
coarsegrid = Aggregates.CoarseGrid;
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void PcgM1(Field & in, Field & out)
|
||||||
|
{
|
||||||
|
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||||
|
|
||||||
|
Field tmp(this->grid);
|
||||||
|
Field Min(this->grid);
|
||||||
|
CoarseField PleftProj(this->coarsegrid);
|
||||||
|
CoarseField PleftMss_proj(this->coarsegrid);
|
||||||
|
|
||||||
|
GridStopWatch SmootherTimer;
|
||||||
|
GridStopWatch MatrixTimer;
|
||||||
|
SmootherTimer.Start();
|
||||||
|
this->_Smoother(in,Min);
|
||||||
|
SmootherTimer.Stop();
|
||||||
|
|
||||||
|
MatrixTimer.Start();
|
||||||
|
this->_FineLinop.HermOp(Min,out);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||||
|
|
||||||
|
GridStopWatch ProjTimer;
|
||||||
|
GridStopWatch CoarseTimer;
|
||||||
|
GridStopWatch PromTimer;
|
||||||
|
ProjTimer.Start();
|
||||||
|
this->_Aggregates.ProjectToSubspace(PleftProj,tmp);
|
||||||
|
ProjTimer.Stop();
|
||||||
|
CoarseTimer.Start();
|
||||||
|
this->_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||||
|
CoarseTimer.Stop();
|
||||||
|
PromTimer.Start();
|
||||||
|
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||||
|
PromTimer.Stop();
|
||||||
|
std::cout << GridLogPerformance << "PcgM1 breakdown "<<std::endl;
|
||||||
|
std::cout << GridLogPerformance << "\tSmoother " << SmootherTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogPerformance << "\tProj " << ProjTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogPerformance << "\tCoarse " << CoarseTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogPerformance << "\tProm " << PromTimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
|
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||||
|
}
|
||||||
|
|
||||||
virtual void Vstart(Field & x,const Field & src)
|
virtual void Vstart(Field & x,const Field & src)
|
||||||
{
|
{
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
@ -262,65 +290,69 @@ class TwoLevelADEF2 : public LinearFunction<Field>
|
|||||||
// = src_s - (A guess)_s - src_s + (A guess)_s
|
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||||
// = 0
|
// = 0
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
Field r(grid);
|
Field r(this->grid);
|
||||||
Field mmp(grid);
|
Field mmp(this->grid);
|
||||||
CoarseField PleftProj(coarsegrid);
|
CoarseField PleftProj(this->coarsegrid);
|
||||||
CoarseField PleftMss_proj(coarsegrid);
|
CoarseField PleftMss_proj(this->coarsegrid);
|
||||||
|
|
||||||
_Aggregates.ProjectToSubspace(PleftProj,src);
|
this->_Aggregates.ProjectToSubspace(PleftProj,src);
|
||||||
_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
|
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////
|
|
||||||
// Only Def1 has non-trivial Vout.
|
|
||||||
/////////////////////////////////////////////////////////////////////
|
|
||||||
virtual void Vout (Field & in, Field & out,Field & src){
|
|
||||||
out = in;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Field, class CoarseField, class Aggregation>
|
template<class Field>
|
||||||
class TwoLevelADEF1ev : public TwoLevelADEF2<Field,CoarseField,Aggregation>
|
class TwoLevelADEF1defl : public TwoLevelCG<Field>
|
||||||
{
|
{
|
||||||
TwoLevelADEF1ev(RealD tol,
|
public:
|
||||||
|
const std::vector<Field> &evec;
|
||||||
|
const std::vector<RealD> &eval;
|
||||||
|
|
||||||
|
TwoLevelADEF1defl(RealD tol,
|
||||||
Integer maxit,
|
Integer maxit,
|
||||||
LinearOperatorBase<Field> &FineLinop,
|
LinearOperatorBase<Field> &FineLinop,
|
||||||
LinearFunction<Field> &Smoother,
|
LinearFunction<Field> &Smoother,
|
||||||
LinearFunction<CoarseField> &CoarseSolver,
|
std::vector<Field> &_evec,
|
||||||
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
std::vector<RealD> &_eval) :
|
||||||
Aggregation &Aggregates ) :
|
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,_evec[0].Grid()),
|
||||||
TwoLevelADEF2<Field,CoarseField,Aggregation>(tol,maxit,FineLinop,Smoother,CoarseSolver,CoarseSolverPrecise,Aggregates)
|
evec(_evec),
|
||||||
|
eval(_eval)
|
||||||
{};
|
{};
|
||||||
|
|
||||||
// Can just inherit existing Vstart
|
|
||||||
// Can just inherit existing Vout
|
// Can just inherit existing Vout
|
||||||
// Can just inherit existing M2
|
// Can just inherit existing M2
|
||||||
// Can just inherit existing M3
|
// Can just inherit existing M3
|
||||||
|
|
||||||
|
// Simple vstart - do nothing
|
||||||
|
virtual void Vstart(Field & x,const Field & src){};
|
||||||
|
|
||||||
// Override PcgM1
|
// Override PcgM1
|
||||||
virtual void PcgM1(Field & in, Field & out)
|
virtual void PcgM1(Field & in, Field & out)
|
||||||
{
|
{
|
||||||
CoarseField PleftProj(this->coarsegrid);
|
int N=evec.size();
|
||||||
CoarseField PleftMss_proj(this->coarsegrid);
|
|
||||||
Field tmp(this->grid);
|
|
||||||
Field Pin(this->grid);
|
Field Pin(this->grid);
|
||||||
|
Field Qin(this->grid);
|
||||||
|
|
||||||
//MP + Q = M(1-AQ) + Q = M
|
//MP + Q = M(1-AQ) + Q = M
|
||||||
// // If we are eigenvector deflating in coarse space
|
// // If we are eigenvector deflating in coarse space
|
||||||
// // Q = Sum_i |phi_i> 1/lambda_i <phi_i|
|
// // Q = Sum_i |phi_i> 1/lambda_i <phi_i|
|
||||||
// // A Q = Sum_i |phi_i> <phi_i|
|
// // A Q = Sum_i |phi_i> <phi_i|
|
||||||
// // M(1-AQ) = M(1-proj) + Q
|
// // M(1-AQ) = M(1-proj) + Q
|
||||||
this->_Aggregates.ProjectToSubspace(PleftProj,in);
|
Qin.Checkerboard()=in.Checkerboard();
|
||||||
this->_Aggregates.PromoteFromSubspace(PleftProj,tmp);// tmp = Qin
|
Qin = Zero();
|
||||||
|
Pin = in;
|
||||||
|
for (int i=0;i<N;i++) {
|
||||||
|
const Field& tmp = evec[i];
|
||||||
|
auto ip = TensorRemove(innerProduct(tmp,in));
|
||||||
|
axpy(Qin, ip / eval[i],tmp,Qin);
|
||||||
|
axpy(Pin, -ip ,tmp,Pin);
|
||||||
|
}
|
||||||
|
|
||||||
Pin = in - tmp;
|
|
||||||
this->_Smoother(Pin,out);
|
this->_Smoother(Pin,out);
|
||||||
|
|
||||||
this->_CoarseSolver(PleftProj,PleftMss_proj);
|
out = out + Qin;
|
||||||
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Qin
|
|
||||||
|
|
||||||
out = out + tmp;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -70,8 +70,8 @@ public:
|
|||||||
Coordinate _istride; // Inner stride i.e. within simd lane
|
Coordinate _istride; // Inner stride i.e. within simd lane
|
||||||
int _osites; // _isites*_osites = product(dimensions).
|
int _osites; // _isites*_osites = product(dimensions).
|
||||||
int _isites;
|
int _isites;
|
||||||
int _fsites; // _isites*_osites = product(dimensions).
|
int64_t _fsites; // _isites*_osites = product(dimensions).
|
||||||
int _gsites;
|
int64_t _gsites;
|
||||||
Coordinate _slice_block;// subslice information
|
Coordinate _slice_block;// subslice information
|
||||||
Coordinate _slice_stride;
|
Coordinate _slice_stride;
|
||||||
Coordinate _slice_nblock;
|
Coordinate _slice_nblock;
|
||||||
@ -183,7 +183,7 @@ public:
|
|||||||
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
||||||
inline int oSites(void) const { return _osites; };
|
inline int oSites(void) const { return _osites; };
|
||||||
inline int lSites(void) const { return _isites*_osites; };
|
inline int lSites(void) const { return _isites*_osites; };
|
||||||
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };
|
||||||
inline int Nd (void) const { return _ndimension;};
|
inline int Nd (void) const { return _ndimension;};
|
||||||
|
|
||||||
inline const Coordinate LocalStarts(void) { return _lstart; };
|
inline const Coordinate LocalStarts(void) { return _lstart; };
|
||||||
@ -214,7 +214,7 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Global addressing
|
// Global addressing
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){
|
void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){
|
||||||
assert(gidx< gSites());
|
assert(gidx< gSites());
|
||||||
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
||||||
}
|
}
|
||||||
@ -222,7 +222,7 @@ public:
|
|||||||
assert(lidx<lSites());
|
assert(lidx<lSites());
|
||||||
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
||||||
}
|
}
|
||||||
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){
|
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){
|
||||||
gidx=0;
|
gidx=0;
|
||||||
int mult=1;
|
int mult=1;
|
||||||
for(int mu=0;mu<_ndimension;mu++) {
|
for(int mu=0;mu<_ndimension;mu++) {
|
||||||
|
@ -360,7 +360,7 @@ public:
|
|||||||
|
|
||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
for(int g=0;g<o.Grid()->_gsites;g++){
|
for(int64_t g=0;g<o.Grid()->_gsites;g++){
|
||||||
|
|
||||||
Coordinate gcoor;
|
Coordinate gcoor;
|
||||||
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
||||||
|
@ -432,7 +432,7 @@ public:
|
|||||||
#if 1
|
#if 1
|
||||||
thread_for( lidx, _grid->lSites(), {
|
thread_for( lidx, _grid->lSites(), {
|
||||||
|
|
||||||
int gidx;
|
int64_t gidx;
|
||||||
int o_idx;
|
int o_idx;
|
||||||
int i_idx;
|
int i_idx;
|
||||||
int rank;
|
int rank;
|
||||||
|
@ -471,13 +471,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
|
|
||||||
vobj zz = Zero();
|
vobj zz = Zero();
|
||||||
|
|
||||||
accelerator_for(sc,coarse->oSites(),1,{
|
accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{
|
||||||
|
|
||||||
// One thread per sub block
|
// One thread per sub block
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
||||||
|
|
||||||
vobj cd = zz;
|
auto cd = coalescedRead(zz);
|
||||||
|
|
||||||
for(int sb=0;sb<blockVol;sb++){
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
@ -488,10 +488,10 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||||
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
|
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
|
||||||
|
|
||||||
cd=cd+fineData_p[sf];
|
cd=cd+coalescedRead(fineData_p[sf]);
|
||||||
}
|
}
|
||||||
|
|
||||||
coarseData_p[sc] = cd;
|
coalescedWrite(coarseData_p[sc],cd);
|
||||||
|
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
@ -1054,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
|||||||
|
|
||||||
Coordinate fcoor(nd);
|
Coordinate fcoor(nd);
|
||||||
Coordinate ccoor(nd);
|
Coordinate ccoor(nd);
|
||||||
for(int g=0;g<fg->gSites();g++){
|
for(int64_t g=0;g<fg->gSites();g++){
|
||||||
|
|
||||||
fg->GlobalIndexToGlobalCoor(g,fcoor);
|
fg->GlobalIndexToGlobalCoor(g,fcoor);
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
|
@ -86,7 +86,9 @@ public:
|
|||||||
// expand up one dim at a time
|
// expand up one dim at a time
|
||||||
for(int d=0;d<dims;d++){
|
for(int d=0;d<dims;d++){
|
||||||
|
|
||||||
plocal[d] += 2*depth;
|
if ( processors[d] > 1 ) {
|
||||||
|
plocal[d] += 2*depth;
|
||||||
|
}
|
||||||
|
|
||||||
for(int d=0;d<dims;d++){
|
for(int d=0;d<dims;d++){
|
||||||
global[d] = plocal[d]*processors[d];
|
global[d] = plocal[d]*processors[d];
|
||||||
@ -98,11 +100,17 @@ public:
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
|
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
|
||||||
{
|
{
|
||||||
|
Coordinate processors=unpadded_grid->_processors;
|
||||||
|
|
||||||
Lattice<vobj> out(unpadded_grid);
|
Lattice<vobj> out(unpadded_grid);
|
||||||
|
|
||||||
Coordinate local =unpadded_grid->LocalDimensions();
|
Coordinate local =unpadded_grid->LocalDimensions();
|
||||||
Coordinate fll(dims,depth); // depends on the MPI spread
|
// depends on the MPI spread
|
||||||
|
Coordinate fll(dims,depth);
|
||||||
Coordinate tll(dims,0); // depends on the MPI spread
|
Coordinate tll(dims,0); // depends on the MPI spread
|
||||||
|
for(int d=0;d<dims;d++){
|
||||||
|
if( processors[d]==1 ) fll[d]=0;
|
||||||
|
}
|
||||||
localCopyRegion(in,out,fll,tll,local);
|
localCopyRegion(in,out,fll,tll,local);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@ -121,6 +129,7 @@ public:
|
|||||||
template<class vobj>
|
template<class vobj>
|
||||||
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
||||||
{
|
{
|
||||||
|
Coordinate processors=unpadded_grid->_processors;
|
||||||
GridBase *old_grid = in.Grid();
|
GridBase *old_grid = in.Grid();
|
||||||
GridCartesian *new_grid = grids[dim];//These are new grids
|
GridCartesian *new_grid = grids[dim];//These are new grids
|
||||||
Lattice<vobj> padded(new_grid);
|
Lattice<vobj> padded(new_grid);
|
||||||
@ -134,35 +143,47 @@ public:
|
|||||||
|
|
||||||
double tins=0, tshift=0;
|
double tins=0, tshift=0;
|
||||||
|
|
||||||
// Middle bit
|
int islocal = 0 ;
|
||||||
double t = usecond();
|
if ( processors[dim] == 1 ) islocal = 1;
|
||||||
for(int x=0;x<local[dim];x++){
|
|
||||||
InsertSliceLocal(in,padded,x,depth+x,dim);
|
if ( islocal ) {
|
||||||
|
|
||||||
|
double t = usecond();
|
||||||
|
for(int x=0;x<local[dim];x++){
|
||||||
|
InsertSliceLocal(in,padded,x,x,dim);
|
||||||
|
}
|
||||||
|
tins += usecond() - t;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Middle bit
|
||||||
|
double t = usecond();
|
||||||
|
for(int x=0;x<local[dim];x++){
|
||||||
|
InsertSliceLocal(in,padded,x,depth+x,dim);
|
||||||
|
}
|
||||||
|
tins += usecond() - t;
|
||||||
|
|
||||||
|
// High bit
|
||||||
|
t = usecond();
|
||||||
|
shifted = cshift.Cshift(in,dim,depth);
|
||||||
|
tshift += usecond() - t;
|
||||||
|
|
||||||
|
t=usecond();
|
||||||
|
for(int x=0;x<depth;x++){
|
||||||
|
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
||||||
|
}
|
||||||
|
tins += usecond() - t;
|
||||||
|
|
||||||
|
// Low bit
|
||||||
|
t = usecond();
|
||||||
|
shifted = cshift.Cshift(in,dim,-depth);
|
||||||
|
tshift += usecond() - t;
|
||||||
|
|
||||||
|
t = usecond();
|
||||||
|
for(int x=0;x<depth;x++){
|
||||||
|
InsertSliceLocal(shifted,padded,x,x,dim);
|
||||||
|
}
|
||||||
|
tins += usecond() - t;
|
||||||
}
|
}
|
||||||
tins += usecond() - t;
|
|
||||||
|
|
||||||
// High bit
|
|
||||||
t = usecond();
|
|
||||||
shifted = cshift.Cshift(in,dim,depth);
|
|
||||||
tshift += usecond() - t;
|
|
||||||
|
|
||||||
t=usecond();
|
|
||||||
for(int x=0;x<depth;x++){
|
|
||||||
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
|
||||||
}
|
|
||||||
tins += usecond() - t;
|
|
||||||
|
|
||||||
// Low bit
|
|
||||||
t = usecond();
|
|
||||||
shifted = cshift.Cshift(in,dim,-depth);
|
|
||||||
tshift += usecond() - t;
|
|
||||||
|
|
||||||
t = usecond();
|
|
||||||
for(int x=0;x<depth;x++){
|
|
||||||
InsertSliceLocal(shifted,padded,x,x,dim);
|
|
||||||
}
|
|
||||||
tins += usecond() - t;
|
|
||||||
|
|
||||||
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
|
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
|
||||||
|
|
||||||
return padded;
|
return padded;
|
||||||
|
@ -8,7 +8,7 @@ namespace Grid{
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static accelerator_inline void CoorFromIndex (coor_t& coor,int index,const coor_t &dims){
|
static accelerator_inline void CoorFromIndex (coor_t& coor,int64_t index,const coor_t &dims){
|
||||||
int nd= dims.size();
|
int nd= dims.size();
|
||||||
coor.resize(nd);
|
coor.resize(nd);
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
@ -18,28 +18,45 @@ namespace Grid{
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
static accelerator_inline void IndexFromCoor (const coor_t& coor,int64_t &index,const coor_t &dims){
|
||||||
int nd=dims.size();
|
int nd=dims.size();
|
||||||
int stride=1;
|
int stride=1;
|
||||||
index=0;
|
index=0;
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
index = index+stride*coor[d];
|
index = index+(int64_t)stride*coor[d];
|
||||||
stride=stride*dims[d];
|
stride=stride*dims[d];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template<class coor_t>
|
||||||
|
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
||||||
|
int64_t index64;
|
||||||
|
IndexFromCoor(coor,index64,dims);
|
||||||
|
assert(index64<2*1024*1024*1024LL);
|
||||||
|
index = (int) index64;
|
||||||
|
}
|
||||||
|
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){
|
static inline void IndexFromCoorReversed (const coor_t& coor,int64_t &index,const coor_t &dims){
|
||||||
int nd=dims.size();
|
int nd=dims.size();
|
||||||
int stride=1;
|
int stride=1;
|
||||||
index=0;
|
index=0;
|
||||||
for(int d=nd-1;d>=0;d--){
|
for(int d=nd-1;d>=0;d--){
|
||||||
index = index+stride*coor[d];
|
index = index+(int64_t)stride*coor[d];
|
||||||
stride=stride*dims[d];
|
stride=stride*dims[d];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class coor_t>
|
template<class coor_t>
|
||||||
static inline void CoorFromIndexReversed (coor_t& coor,int index,const coor_t &dims){
|
static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){
|
||||||
|
int64_t index64;
|
||||||
|
IndexFromCoorReversed(coor,index64,dims);
|
||||||
|
if ( index64>=2*1024*1024*1024LL ){
|
||||||
|
std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
|
||||||
|
}
|
||||||
|
assert(index64<2*1024*1024*1024LL);
|
||||||
|
index = (int) index64;
|
||||||
|
}
|
||||||
|
template<class coor_t>
|
||||||
|
static inline void CoorFromIndexReversed (coor_t& coor,int64_t index,const coor_t &dims){
|
||||||
int nd= dims.size();
|
int nd= dims.size();
|
||||||
coor.resize(nd);
|
coor.resize(nd);
|
||||||
for(int d=nd-1;d>=0;d--){
|
for(int d=nd-1;d>=0;d--){
|
||||||
|
43
systems/Frontier/benchmarks/bench2.slurm
Executable file
43
systems/Frontier/benchmarks/bench2.slurm
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/bin/bash -l
|
||||||
|
#SBATCH --job-name=bench
|
||||||
|
##SBATCH --partition=small-g
|
||||||
|
#SBATCH --nodes=2
|
||||||
|
#SBATCH --ntasks-per-node=8
|
||||||
|
#SBATCH --cpus-per-task=7
|
||||||
|
#SBATCH --gpus-per-node=8
|
||||||
|
#SBATCH --time=00:10:00
|
||||||
|
#SBATCH --account=phy157_dwf
|
||||||
|
#SBATCH --gpu-bind=none
|
||||||
|
#SBATCH --exclusive
|
||||||
|
#SBATCH --mem=0
|
||||||
|
|
||||||
|
cat << EOF > select_gpu
|
||||||
|
#!/bin/bash
|
||||||
|
export GPU_MAP=(0 1 2 3 7 6 5 4)
|
||||||
|
export NUMA_MAP=(3 3 1 1 2 2 0 0)
|
||||||
|
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||||
|
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||||
|
export HIP_VISIBLE_DEVICES=\$GPU
|
||||||
|
unset ROCR_VISIBLE_DEVICES
|
||||||
|
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||||
|
exec numactl -m \$NUMA -N \$NUMA \$*
|
||||||
|
EOF
|
||||||
|
|
||||||
|
chmod +x ./select_gpu
|
||||||
|
|
||||||
|
root=$HOME/Frontier/Grid/systems/Frontier/
|
||||||
|
source ${root}/sourceme.sh
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=7
|
||||||
|
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||||
|
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||||
|
|
||||||
|
for vol in 32.32.32.64
|
||||||
|
do
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
|
||||||
|
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
|
||||||
|
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
|
||||||
|
done
|
||||||
|
|
23
systems/Frontier/config-command
Normal file
23
systems/Frontier/config-command
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||||
|
../../configure --enable-comms=mpi-auto \
|
||||||
|
--with-lime=$CLIME \
|
||||||
|
--enable-unified=no \
|
||||||
|
--enable-shm=nvlink \
|
||||||
|
--enable-tracing=timer \
|
||||||
|
--enable-accelerator=hip \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--disable-gparity \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
--enable-simd=GPU \
|
||||||
|
--enable-accelerator-cshift \
|
||||||
|
--with-gmp=$OLCF_GMP_ROOT \
|
||||||
|
--with-fftw=$FFTW_DIR/.. \
|
||||||
|
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
CXX=hipcc MPICXX=mpicxx \
|
||||||
|
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \
|
||||||
|
LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 "
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
13
systems/Frontier/mpiwrapper.sh
Executable file
13
systems/Frontier/mpiwrapper.sh
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
lrank=$SLURM_LOCALID
|
||||||
|
lgpu=(0 1 2 3 7 6 5 4)
|
||||||
|
|
||||||
|
export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]}
|
||||||
|
|
||||||
|
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES "
|
||||||
|
|
||||||
|
$*
|
||||||
|
|
||||||
|
|
||||||
|
|
13
systems/Frontier/sourceme.sh
Normal file
13
systems/Frontier/sourceme.sh
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
|
||||||
|
spack load c-lime
|
||||||
|
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib
|
||||||
|
module load emacs
|
||||||
|
module load PrgEnv-gnu
|
||||||
|
module load rocm
|
||||||
|
module load cray-mpich/8.1.23
|
||||||
|
module load gmp
|
||||||
|
module load cray-fftw
|
||||||
|
module load craype-accel-amd-gfx90a
|
||||||
|
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||||
|
#Hack for lib
|
||||||
|
#export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH
|
9
systems/Frontier/wrap.sh
Executable file
9
systems/Frontier/wrap.sh
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES
|
||||||
|
unset ROCR_VISIBLE_DEVICES
|
||||||
|
|
||||||
|
#rank=$SLURM_PROCID
|
||||||
|
#rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@
|
||||||
|
|
||||||
|
$@
|
@ -144,12 +144,7 @@ int main (int argc, char ** argv)
|
|||||||
HermOpAdaptor<LatticeFermionD> HOA(HermDefOp);
|
HermOpAdaptor<LatticeFermionD> HOA(HermDefOp);
|
||||||
|
|
||||||
int pp=16;
|
int pp=16;
|
||||||
// LittleDiracOpCol.CoarsenOperator(HOA,Aggregates);
|
LittleDiracOp.CoarsenOperator(HOA,Aggregates);
|
||||||
// std::cout << "LittleDiracOp old " << LittleDiracOpCol._A[pp]<<std::endl;
|
|
||||||
LittleDiracOp.CoarsenOperatorColoured(HOA,Aggregates);
|
|
||||||
// LittleDiracOp.ExchangeCoarseLinks();
|
|
||||||
|
|
||||||
// std::cout << "LittleDiracOp new " << LittleDiracOp._A[pp]<<std::endl;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
// Test the operator
|
// Test the operator
|
||||||
|
@ -86,6 +86,12 @@ int main (int argc, char ** argv)
|
|||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
const int Ls=16;
|
const int Ls=16;
|
||||||
|
const int nbasis = 40;
|
||||||
|
const int cb = 0 ;
|
||||||
|
RealD mass=0.01;
|
||||||
|
RealD M5=1.8;
|
||||||
|
RealD b=1.5;
|
||||||
|
RealD c=0.5;
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
@ -121,10 +127,6 @@ int main (int argc, char ** argv)
|
|||||||
NerscIO::readConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
//////////////////////// Fermion action //////////////////////////////////
|
//////////////////////// Fermion action //////////////////////////////////
|
||||||
RealD mass=0.01;
|
|
||||||
RealD M5=1.8;
|
|
||||||
RealD b=1.5;
|
|
||||||
RealD c=0.5;
|
|
||||||
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
||||||
|
|
||||||
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
|
||||||
@ -143,8 +145,6 @@ int main (int argc, char ** argv)
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
///////////// Coarse basis and Little Dirac Operator ///////
|
///////////// Coarse basis and Little Dirac Operator ///////
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
const int nbasis = 40;
|
|
||||||
const int cb = 0 ;
|
|
||||||
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||||
|
|
||||||
@ -154,6 +154,18 @@ int main (int argc, char ** argv)
|
|||||||
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
||||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
Subspace Aggregates(Coarse5d,FrbGrid,cb);
|
Subspace Aggregates(Coarse5d,FrbGrid,cb);
|
||||||
|
#if 1
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
||||||
|
95.0,0.1,
|
||||||
|
// 400,200,200 -- 48 iters
|
||||||
|
// 600,200,200 -- 38 iters, 162s
|
||||||
|
// 600,200,100 -- 38 iters, 169s
|
||||||
|
// 600,200,50 -- 88 iters. 370s
|
||||||
|
600,
|
||||||
|
200,
|
||||||
|
100,
|
||||||
|
0.0);
|
||||||
|
#else
|
||||||
Aggregates.CreateSubspaceChebyshev(RNG5,
|
Aggregates.CreateSubspaceChebyshev(RNG5,
|
||||||
HermOpEO,
|
HermOpEO,
|
||||||
nbasis,
|
nbasis,
|
||||||
@ -172,18 +184,21 @@ int main (int argc, char ** argv)
|
|||||||
// 0.1, // nbasis 16 -- 142; sloppy solve
|
// 0.1, // nbasis 16 -- 142; sloppy solve
|
||||||
0.1, // nbasis 24
|
0.1, // nbasis 24
|
||||||
300);
|
300);
|
||||||
|
#endif
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// Need to check about red-black grid coarsening
|
// Need to check about red-black grid coarsening
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
|
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
|
||||||
LittleDiracOp.CoarsenOperatorColoured(FineHermOp,Aggregates);
|
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
|
||||||
|
|
||||||
// Try projecting to one hop only
|
// Try projecting to one hop only
|
||||||
|
std::cout << " projecting coarse matrix "<<std::endl;
|
||||||
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
|
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
|
||||||
LittleDiracOpProj.ProjectNearestNeighbour(0.5,LittleDiracOp);
|
LittleDiracOpProj.ProjectNearestNeighbour(0.5,LittleDiracOp);
|
||||||
|
|
||||||
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
|
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
|
||||||
HermMatrix CoarseOp (LittleDiracOp);
|
HermMatrix CoarseOp (LittleDiracOp);
|
||||||
|
HermMatrix CoarseOpProj (LittleDiracOpProj);
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Build a coarse lanczos
|
// Build a coarse lanczos
|
||||||
@ -200,12 +215,14 @@ int main (int argc, char ** argv)
|
|||||||
std::vector<RealD> eval(Nm);
|
std::vector<RealD> eval(Nm);
|
||||||
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||||
CoarseVector c_src(Coarse5d); c_src=1.0;
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
CoarseVector c_res(Coarse5d);
|
||||||
|
|
||||||
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
|
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
|
||||||
|
|
||||||
IRL.calc(eval,evec,c_src,Nconv);
|
IRL.calc(eval,evec,c_src,Nconv);
|
||||||
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Build a coarse space solver
|
// Build a coarse space solver
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
@ -216,6 +233,52 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
|
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
|
||||||
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
|
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
|
||||||
|
c_res=Zero();
|
||||||
|
HPDSolve(c_src,c_res);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Deflated (with real op EV's) solve for the projected coarse op
|
||||||
|
// Work towards ADEF1 in the coarse space
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
|
||||||
|
c_res=Zero();
|
||||||
|
HPDSolveProj(c_src,c_res);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Coarse ADEF1 with deflation space
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(2.0,40.,8,CoarseOpProj); // 311
|
||||||
|
ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(2.0,36.,12,CoarseOpProj); // 311
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// CG, Cheby mode spacing 200,200
|
||||||
|
// Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
|
||||||
|
// Unprojected Coarse CG solve to 4e-2 : 33 iters, 0.8s
|
||||||
|
// Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s 2.1x gain
|
||||||
|
// ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
|
||||||
|
// HDCG 38 iters 162s
|
||||||
|
//
|
||||||
|
// CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs
|
||||||
|
// ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s 2.1x gain
|
||||||
|
// ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
|
||||||
|
// HDCG 38 iters 169s
|
||||||
|
|
||||||
|
TwoLevelADEF1defl<CoarseVector>
|
||||||
|
cADEF1(1.0e-8, 100,
|
||||||
|
CoarseOp,
|
||||||
|
CoarseSmoother,
|
||||||
|
evec,eval);
|
||||||
|
|
||||||
|
c_res=Zero();
|
||||||
|
cADEF1(c_src,c_res);
|
||||||
|
|
||||||
|
cADEF1.Tolerance = 4.0e-2;
|
||||||
|
c_res=Zero();
|
||||||
|
cADEF1(c_src,c_res);
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Build a smoother
|
// Build a smoother
|
||||||
@ -242,10 +305,6 @@ int main (int argc, char ** argv)
|
|||||||
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
|
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
|
||||||
std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
|
std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
|
||||||
|
|
||||||
// Standard CG
|
|
||||||
// result=Zero();
|
|
||||||
// CGfine(HermOpEO, src, result);
|
|
||||||
|
|
||||||
for(int l=0;l<los.size();l++){
|
for(int l=0;l<los.size();l++){
|
||||||
|
|
||||||
RealD lo = los[l];
|
RealD lo = los[l];
|
||||||
@ -271,9 +330,25 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
result=Zero();
|
result=Zero();
|
||||||
HDCG(src,result);
|
HDCG(src,result);
|
||||||
|
|
||||||
|
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
|
||||||
|
HDCGdefl(1.0e-8, 3000,
|
||||||
|
FineHermOp,
|
||||||
|
Smoother,
|
||||||
|
cADEF1,
|
||||||
|
HPDSolve,
|
||||||
|
Aggregates);
|
||||||
|
|
||||||
|
result=Zero();
|
||||||
|
HDCGdefl(src,result);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Standard CG
|
||||||
|
result=Zero();
|
||||||
|
CGfine(HermOpEO, src, result);
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user