mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Compare commits
10 Commits
737d3ffb98
...
3bc2da5321
Author | SHA1 | Date | |
---|---|---|---|
|
3bc2da5321 | ||
|
2d710d6bfd | ||
|
6532b7f32b | ||
|
7b41b92d99 | ||
|
dd557af84b | ||
|
59b9d0e030 | ||
|
b82eee4733 | ||
|
6a87487544 | ||
|
fcf5023845 | ||
|
c8adad6d8b |
@ -226,6 +226,7 @@ public:
|
||||
void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe)
|
||||
{
|
||||
int nfound=0;
|
||||
std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
for(int pp=0;pp<CopyMe.geom.npoint;pp++){
|
||||
// Search for the same relative shift
|
||||
@ -233,12 +234,12 @@ public:
|
||||
if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) {
|
||||
_A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]);
|
||||
_Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]);
|
||||
ExchangeCoarseLinks();
|
||||
nfound++;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(nfound==geom.npoint);
|
||||
ExchangeCoarseLinks();
|
||||
}
|
||||
|
||||
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
|
||||
@ -271,7 +272,8 @@ public:
|
||||
}
|
||||
void Mdag (const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
Mult(_Adag,in,out);
|
||||
if ( hermitian ) M(in,out);
|
||||
else Mult(_Adag,in,out);
|
||||
}
|
||||
void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out)
|
||||
{
|
||||
@ -298,10 +300,10 @@ public:
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
|
||||
int osites=pin.Grid()->oSites();
|
||||
int gsites=pin.Grid()->gSites();
|
||||
// int gsites=pin.Grid()->gSites();
|
||||
|
||||
RealD flops = 1.0* npoint * nbasis * nbasis * 8 * gsites;
|
||||
RealD bytes = (1.0*osites*sizeof(siteMatrix)+2.0*osites*sizeof(siteVector))*npoint;
|
||||
RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites;
|
||||
RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint;
|
||||
|
||||
// for(int point=0;point<npoint;point++){
|
||||
// conformable(A[point],pin);
|
||||
@ -343,20 +345,20 @@ public:
|
||||
text+=usecond();
|
||||
ttot+=usecond();
|
||||
|
||||
std::cout << GridLogMessage<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse Mult ext "<<text<<" us"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
|
||||
std::cout << GridLogMessage<<"Coarse bytes "<< bytes/1e6<<" MB"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Mult ext "<<text<<" us"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
|
||||
std::cout << GridLogPerformance<<"Coarse bytes "<< bytes/1e6<<" MB"<<std::endl;
|
||||
};
|
||||
|
||||
void PopulateAdag(void)
|
||||
{
|
||||
for(int bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
|
||||
for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
|
||||
Coordinate bcoor;
|
||||
CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor);
|
||||
|
||||
@ -412,10 +414,10 @@ public:
|
||||
*
|
||||
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
|
||||
*/
|
||||
void CoarsenOperatorColoured(LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||
{
|
||||
std::cout << GridLogMessage<< "CoarsenMatrixColoured "<< std::endl;
|
||||
std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
|
||||
GridBase *grid = FineGrid();
|
||||
|
||||
RealD tproj=0.0;
|
||||
@ -540,9 +542,14 @@ public:
|
||||
auto sval = peekSite(_A[p],coor);
|
||||
}
|
||||
|
||||
PopulateAdag();
|
||||
// Only needed if nonhermitian
|
||||
if ( ! hermitian ) {
|
||||
std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
|
||||
PopulateAdag();
|
||||
}
|
||||
|
||||
// Need to write something to populate Adag from A
|
||||
std::cout << GridLogMessage<<"ExchangeCoarseLinks "<<std::endl;
|
||||
ExchangeCoarseLinks();
|
||||
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
|
||||
std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
|
||||
@ -552,6 +559,7 @@ public:
|
||||
}
|
||||
void ExchangeCoarseLinks(void){
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
std::cout << "Exchange "<<p<<std::endl;
|
||||
_A[p] = Cell.Exchange(_A[p]);
|
||||
_Adag[p]= Cell.Exchange(_Adag[p]);
|
||||
}
|
||||
|
@ -42,46 +42,30 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
*/
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field, class CoarseField, class Aggregation>
|
||||
class TwoLevelADEF2 : public LinearFunction<Field>
|
||||
template<class Field>
|
||||
class TwoLevelCG : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
const int mmax = 1;
|
||||
GridBase *grid;
|
||||
GridBase *coarsegrid;
|
||||
|
||||
// Fine operator, Smoother, CoarseSolver
|
||||
LinearOperatorBase<Field> &_FineLinop;
|
||||
LinearFunction<Field> &_Smoother;
|
||||
LinearFunction<CoarseField> &_CoarseSolver;
|
||||
LinearFunction<CoarseField> &_CoarseSolverPrecise;
|
||||
|
||||
// Need something that knows how to get from Coarse to fine and back again
|
||||
// void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
||||
// void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
||||
Aggregation &_Aggregates;
|
||||
|
||||
// more most opertor functions
|
||||
TwoLevelADEF2(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> &FineLinop,
|
||||
LinearFunction<Field> &Smoother,
|
||||
LinearFunction<CoarseField> &CoarseSolver,
|
||||
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
||||
Aggregation &Aggregates
|
||||
) :
|
||||
TwoLevelCG(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> &FineLinop,
|
||||
LinearFunction<Field> &Smoother,
|
||||
GridBase *fine) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
_FineLinop(FineLinop),
|
||||
_Smoother(Smoother),
|
||||
_CoarseSolver(CoarseSolver),
|
||||
_CoarseSolverPrecise(CoarseSolverPrecise),
|
||||
_Aggregates(Aggregates)
|
||||
_Smoother(Smoother)
|
||||
{
|
||||
coarsegrid = Aggregates.CoarseGrid;
|
||||
grid = Aggregates.FineGrid;
|
||||
grid = fine;
|
||||
};
|
||||
|
||||
virtual void operator() (const Field &src, Field &psi)
|
||||
@ -195,47 +179,8 @@ class TwoLevelADEF2 : public LinearFunction<Field>
|
||||
|
||||
public:
|
||||
|
||||
virtual void PcgM1(Field & in, Field & out)
|
||||
{
|
||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||
|
||||
Field tmp(grid);
|
||||
Field Min(grid);
|
||||
CoarseField PleftProj(coarsegrid);
|
||||
CoarseField PleftMss_proj(coarsegrid);
|
||||
|
||||
GridStopWatch SmootherTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
SmootherTimer.Start();
|
||||
_Smoother(in,Min);
|
||||
SmootherTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
_FineLinop.HermOp(Min,out);
|
||||
MatrixTimer.Stop();
|
||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||
|
||||
GridStopWatch ProjTimer;
|
||||
GridStopWatch CoarseTimer;
|
||||
GridStopWatch PromTimer;
|
||||
ProjTimer.Start();
|
||||
_Aggregates.ProjectToSubspace(PleftProj,tmp);
|
||||
ProjTimer.Stop();
|
||||
CoarseTimer.Start();
|
||||
_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||
CoarseTimer.Stop();
|
||||
PromTimer.Start();
|
||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||
PromTimer.Stop();
|
||||
std::cout << GridLogMessage << "PcgM1 breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tSmoother " << SmootherTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tProj " << ProjTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tCoarse " << CoarseTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tProm " << PromTimer.Elapsed() <<std::endl;
|
||||
|
||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||
}
|
||||
virtual void PcgM1(Field & in, Field & out) =0;
|
||||
virtual void Vstart(Field & x,const Field & src)=0;
|
||||
|
||||
virtual void PcgM2(const Field & in, Field & out) {
|
||||
out=in;
|
||||
@ -249,6 +194,89 @@ class TwoLevelADEF2 : public LinearFunction<Field>
|
||||
return dd;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Only Def1 has non-trivial Vout.
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
virtual void Vout (Field & in, Field & out,Field & src){
|
||||
out = in;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class Field, class CoarseField, class Aggregation>
|
||||
class TwoLevelADEF2 : public TwoLevelCG<Field>
|
||||
{
|
||||
public:
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
// Need something that knows how to get from Coarse to fine and back again
|
||||
// void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
||||
// void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
GridBase *coarsegrid;
|
||||
Aggregation &_Aggregates;
|
||||
LinearFunction<CoarseField> &_CoarseSolver;
|
||||
LinearFunction<CoarseField> &_CoarseSolverPrecise;
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// more most opertor functions
|
||||
TwoLevelADEF2(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> &FineLinop,
|
||||
LinearFunction<Field> &Smoother,
|
||||
LinearFunction<CoarseField> &CoarseSolver,
|
||||
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
||||
Aggregation &Aggregates
|
||||
) :
|
||||
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid),
|
||||
_CoarseSolver(CoarseSolver),
|
||||
_CoarseSolverPrecise(CoarseSolverPrecise),
|
||||
_Aggregates(Aggregates)
|
||||
{
|
||||
coarsegrid = Aggregates.CoarseGrid;
|
||||
};
|
||||
|
||||
virtual void PcgM1(Field & in, Field & out)
|
||||
{
|
||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||
|
||||
Field tmp(this->grid);
|
||||
Field Min(this->grid);
|
||||
CoarseField PleftProj(this->coarsegrid);
|
||||
CoarseField PleftMss_proj(this->coarsegrid);
|
||||
|
||||
GridStopWatch SmootherTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
SmootherTimer.Start();
|
||||
this->_Smoother(in,Min);
|
||||
SmootherTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
this->_FineLinop.HermOp(Min,out);
|
||||
MatrixTimer.Stop();
|
||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||
|
||||
GridStopWatch ProjTimer;
|
||||
GridStopWatch CoarseTimer;
|
||||
GridStopWatch PromTimer;
|
||||
ProjTimer.Start();
|
||||
this->_Aggregates.ProjectToSubspace(PleftProj,tmp);
|
||||
ProjTimer.Stop();
|
||||
CoarseTimer.Start();
|
||||
this->_CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||
CoarseTimer.Stop();
|
||||
PromTimer.Start();
|
||||
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||
PromTimer.Stop();
|
||||
std::cout << GridLogPerformance << "PcgM1 breakdown "<<std::endl;
|
||||
std::cout << GridLogPerformance << "\tSmoother " << SmootherTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tProj " << ProjTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tCoarse " << CoarseTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogPerformance << "\tProm " << PromTimer.Elapsed() <<std::endl;
|
||||
|
||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||
}
|
||||
|
||||
virtual void Vstart(Field & x,const Field & src)
|
||||
{
|
||||
///////////////////////////////////
|
||||
@ -262,65 +290,69 @@ class TwoLevelADEF2 : public LinearFunction<Field>
|
||||
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||
// = 0
|
||||
///////////////////////////////////
|
||||
Field r(grid);
|
||||
Field mmp(grid);
|
||||
CoarseField PleftProj(coarsegrid);
|
||||
CoarseField PleftMss_proj(coarsegrid);
|
||||
Field r(this->grid);
|
||||
Field mmp(this->grid);
|
||||
CoarseField PleftProj(this->coarsegrid);
|
||||
CoarseField PleftMss_proj(this->coarsegrid);
|
||||
|
||||
_Aggregates.ProjectToSubspace(PleftProj,src);
|
||||
_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
|
||||
this->_Aggregates.ProjectToSubspace(PleftProj,src);
|
||||
this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
|
||||
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Only Def1 has non-trivial Vout.
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
virtual void Vout (Field & in, Field & out,Field & src){
|
||||
out = in;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field, class CoarseField, class Aggregation>
|
||||
class TwoLevelADEF1ev : public TwoLevelADEF2<Field,CoarseField,Aggregation>
|
||||
template<class Field>
|
||||
class TwoLevelADEF1defl : public TwoLevelCG<Field>
|
||||
{
|
||||
TwoLevelADEF1ev(RealD tol,
|
||||
public:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
TwoLevelADEF1defl(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> &FineLinop,
|
||||
LinearFunction<Field> &Smoother,
|
||||
LinearFunction<CoarseField> &CoarseSolver,
|
||||
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
||||
Aggregation &Aggregates ) :
|
||||
TwoLevelADEF2<Field,CoarseField,Aggregation>(tol,maxit,FineLinop,Smoother,CoarseSolver,CoarseSolverPrecise,Aggregates)
|
||||
std::vector<Field> &_evec,
|
||||
std::vector<RealD> &_eval) :
|
||||
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,_evec[0].Grid()),
|
||||
evec(_evec),
|
||||
eval(_eval)
|
||||
{};
|
||||
|
||||
// Can just inherit existing Vstart
|
||||
// Can just inherit existing Vout
|
||||
// Can just inherit existing M2
|
||||
// Can just inherit existing M3
|
||||
|
||||
// Simple vstart - do nothing
|
||||
virtual void Vstart(Field & x,const Field & src){};
|
||||
|
||||
// Override PcgM1
|
||||
virtual void PcgM1(Field & in, Field & out)
|
||||
{
|
||||
CoarseField PleftProj(this->coarsegrid);
|
||||
CoarseField PleftMss_proj(this->coarsegrid);
|
||||
Field tmp(this->grid);
|
||||
int N=evec.size();
|
||||
Field Pin(this->grid);
|
||||
Field Qin(this->grid);
|
||||
|
||||
//MP + Q = M(1-AQ) + Q = M
|
||||
// // If we are eigenvector deflating in coarse space
|
||||
// // Q = Sum_i |phi_i> 1/lambda_i <phi_i|
|
||||
// // A Q = Sum_i |phi_i> <phi_i|
|
||||
// // M(1-AQ) = M(1-proj) + Q
|
||||
this->_Aggregates.ProjectToSubspace(PleftProj,in);
|
||||
this->_Aggregates.PromoteFromSubspace(PleftProj,tmp);// tmp = Qin
|
||||
Qin.Checkerboard()=in.Checkerboard();
|
||||
Qin = Zero();
|
||||
Pin = in;
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
auto ip = TensorRemove(innerProduct(tmp,in));
|
||||
axpy(Qin, ip / eval[i],tmp,Qin);
|
||||
axpy(Pin, -ip ,tmp,Pin);
|
||||
}
|
||||
|
||||
Pin = in - tmp;
|
||||
this->_Smoother(Pin,out);
|
||||
|
||||
this->_CoarseSolver(PleftProj,PleftMss_proj);
|
||||
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Qin
|
||||
|
||||
out = out + tmp;
|
||||
out = out + Qin;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -70,8 +70,8 @@ public:
|
||||
Coordinate _istride; // Inner stride i.e. within simd lane
|
||||
int _osites; // _isites*_osites = product(dimensions).
|
||||
int _isites;
|
||||
int _fsites; // _isites*_osites = product(dimensions).
|
||||
int _gsites;
|
||||
int64_t _fsites; // _isites*_osites = product(dimensions).
|
||||
int64_t _gsites;
|
||||
Coordinate _slice_block;// subslice information
|
||||
Coordinate _slice_stride;
|
||||
Coordinate _slice_nblock;
|
||||
@ -183,7 +183,7 @@ public:
|
||||
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
||||
inline int oSites(void) const { return _osites; };
|
||||
inline int lSites(void) const { return _isites*_osites; };
|
||||
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
||||
inline int64_t gSites(void) const { return (int64_t)_isites*(int64_t)_osites*(int64_t)_Nprocessors; };
|
||||
inline int Nd (void) const { return _ndimension;};
|
||||
|
||||
inline const Coordinate LocalStarts(void) { return _lstart; };
|
||||
@ -214,7 +214,7 @@ public:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Global addressing
|
||||
////////////////////////////////////////////////////////////////
|
||||
void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){
|
||||
void GlobalIndexToGlobalCoor(int64_t gidx,Coordinate &gcoor){
|
||||
assert(gidx< gSites());
|
||||
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
||||
}
|
||||
@ -222,7 +222,7 @@ public:
|
||||
assert(lidx<lSites());
|
||||
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
||||
}
|
||||
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){
|
||||
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int64_t & gidx){
|
||||
gidx=0;
|
||||
int mult=1;
|
||||
for(int mu=0;mu<_ndimension;mu++) {
|
||||
|
@ -360,7 +360,7 @@ public:
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
for(int g=0;g<o.Grid()->_gsites;g++){
|
||||
for(int64_t g=0;g<o.Grid()->_gsites;g++){
|
||||
|
||||
Coordinate gcoor;
|
||||
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
||||
|
@ -432,7 +432,7 @@ public:
|
||||
#if 1
|
||||
thread_for( lidx, _grid->lSites(), {
|
||||
|
||||
int gidx;
|
||||
int64_t gidx;
|
||||
int o_idx;
|
||||
int i_idx;
|
||||
int rank;
|
||||
|
@ -471,13 +471,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
|
||||
vobj zz = Zero();
|
||||
|
||||
accelerator_for(sc,coarse->oSites(),1,{
|
||||
accelerator_for(sc,coarse->oSites(),vobj::Nsimd(),{
|
||||
|
||||
// One thread per sub block
|
||||
Coordinate coor_c(_ndimension);
|
||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
||||
|
||||
vobj cd = zz;
|
||||
auto cd = coalescedRead(zz);
|
||||
|
||||
for(int sb=0;sb<blockVol;sb++){
|
||||
|
||||
@ -488,10 +488,10 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
|
||||
|
||||
cd=cd+fineData_p[sf];
|
||||
cd=cd+coalescedRead(fineData_p[sf]);
|
||||
}
|
||||
|
||||
coarseData_p[sc] = cd;
|
||||
coalescedWrite(coarseData_p[sc],cd);
|
||||
|
||||
});
|
||||
return;
|
||||
@ -1054,7 +1054,7 @@ void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||
|
||||
Coordinate fcoor(nd);
|
||||
Coordinate ccoor(nd);
|
||||
for(int g=0;g<fg->gSites();g++){
|
||||
for(int64_t g=0;g<fg->gSites();g++){
|
||||
|
||||
fg->GlobalIndexToGlobalCoor(g,fcoor);
|
||||
for(int d=0;d<nd;d++){
|
||||
|
@ -86,8 +86,10 @@ public:
|
||||
// expand up one dim at a time
|
||||
for(int d=0;d<dims;d++){
|
||||
|
||||
plocal[d] += 2*depth;
|
||||
|
||||
if ( processors[d] > 1 ) {
|
||||
plocal[d] += 2*depth;
|
||||
}
|
||||
|
||||
for(int d=0;d<dims;d++){
|
||||
global[d] = plocal[d]*processors[d];
|
||||
}
|
||||
@ -98,11 +100,17 @@ public:
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
|
||||
{
|
||||
Coordinate processors=unpadded_grid->_processors;
|
||||
|
||||
Lattice<vobj> out(unpadded_grid);
|
||||
|
||||
Coordinate local =unpadded_grid->LocalDimensions();
|
||||
Coordinate fll(dims,depth); // depends on the MPI spread
|
||||
// depends on the MPI spread
|
||||
Coordinate fll(dims,depth);
|
||||
Coordinate tll(dims,0); // depends on the MPI spread
|
||||
for(int d=0;d<dims;d++){
|
||||
if( processors[d]==1 ) fll[d]=0;
|
||||
}
|
||||
localCopyRegion(in,out,fll,tll,local);
|
||||
return out;
|
||||
}
|
||||
@ -121,6 +129,7 @@ public:
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
||||
{
|
||||
Coordinate processors=unpadded_grid->_processors;
|
||||
GridBase *old_grid = in.Grid();
|
||||
GridCartesian *new_grid = grids[dim];//These are new grids
|
||||
Lattice<vobj> padded(new_grid);
|
||||
@ -133,36 +142,48 @@ public:
|
||||
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
|
||||
|
||||
double tins=0, tshift=0;
|
||||
|
||||
// Middle bit
|
||||
double t = usecond();
|
||||
for(int x=0;x<local[dim];x++){
|
||||
InsertSliceLocal(in,padded,x,depth+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// High bit
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// Low bit
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,-depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t = usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,x,x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
int islocal = 0 ;
|
||||
if ( processors[dim] == 1 ) islocal = 1;
|
||||
|
||||
if ( islocal ) {
|
||||
|
||||
double t = usecond();
|
||||
for(int x=0;x<local[dim];x++){
|
||||
InsertSliceLocal(in,padded,x,x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
} else {
|
||||
// Middle bit
|
||||
double t = usecond();
|
||||
for(int x=0;x<local[dim];x++){
|
||||
InsertSliceLocal(in,padded,x,depth+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// High bit
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// Low bit
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,-depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t = usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,x,x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
}
|
||||
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
|
||||
|
||||
return padded;
|
||||
|
@ -8,7 +8,7 @@ namespace Grid{
|
||||
public:
|
||||
|
||||
template<class coor_t>
|
||||
static accelerator_inline void CoorFromIndex (coor_t& coor,int index,const coor_t &dims){
|
||||
static accelerator_inline void CoorFromIndex (coor_t& coor,int64_t index,const coor_t &dims){
|
||||
int nd= dims.size();
|
||||
coor.resize(nd);
|
||||
for(int d=0;d<nd;d++){
|
||||
@ -18,28 +18,45 @@ namespace Grid{
|
||||
}
|
||||
|
||||
template<class coor_t>
|
||||
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
||||
static accelerator_inline void IndexFromCoor (const coor_t& coor,int64_t &index,const coor_t &dims){
|
||||
int nd=dims.size();
|
||||
int stride=1;
|
||||
index=0;
|
||||
for(int d=0;d<nd;d++){
|
||||
index = index+stride*coor[d];
|
||||
index = index+(int64_t)stride*coor[d];
|
||||
stride=stride*dims[d];
|
||||
}
|
||||
}
|
||||
template<class coor_t>
|
||||
static accelerator_inline void IndexFromCoor (const coor_t& coor,int &index,const coor_t &dims){
|
||||
int64_t index64;
|
||||
IndexFromCoor(coor,index64,dims);
|
||||
assert(index64<2*1024*1024*1024LL);
|
||||
index = (int) index64;
|
||||
}
|
||||
|
||||
template<class coor_t>
|
||||
static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){
|
||||
static inline void IndexFromCoorReversed (const coor_t& coor,int64_t &index,const coor_t &dims){
|
||||
int nd=dims.size();
|
||||
int stride=1;
|
||||
index=0;
|
||||
for(int d=nd-1;d>=0;d--){
|
||||
index = index+stride*coor[d];
|
||||
index = index+(int64_t)stride*coor[d];
|
||||
stride=stride*dims[d];
|
||||
}
|
||||
}
|
||||
template<class coor_t>
|
||||
static inline void CoorFromIndexReversed (coor_t& coor,int index,const coor_t &dims){
|
||||
static inline void IndexFromCoorReversed (const coor_t& coor,int &index,const coor_t &dims){
|
||||
int64_t index64;
|
||||
IndexFromCoorReversed(coor,index64,dims);
|
||||
if ( index64>=2*1024*1024*1024LL ){
|
||||
std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
|
||||
}
|
||||
assert(index64<2*1024*1024*1024LL);
|
||||
index = (int) index64;
|
||||
}
|
||||
template<class coor_t>
|
||||
static inline void CoorFromIndexReversed (coor_t& coor,int64_t index,const coor_t &dims){
|
||||
int nd= dims.size();
|
||||
coor.resize(nd);
|
||||
for(int d=nd-1;d>=0;d--){
|
||||
|
43
systems/Frontier/benchmarks/bench2.slurm
Executable file
43
systems/Frontier/benchmarks/bench2.slurm
Executable file
@ -0,0 +1,43 @@
|
||||
#!/bin/bash -l
|
||||
#SBATCH --job-name=bench
|
||||
##SBATCH --partition=small-g
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH --ntasks-per-node=8
|
||||
#SBATCH --cpus-per-task=7
|
||||
#SBATCH --gpus-per-node=8
|
||||
#SBATCH --time=00:10:00
|
||||
#SBATCH --account=phy157_dwf
|
||||
#SBATCH --gpu-bind=none
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --mem=0
|
||||
|
||||
cat << EOF > select_gpu
|
||||
#!/bin/bash
|
||||
export GPU_MAP=(0 1 2 3 7 6 5 4)
|
||||
export NUMA_MAP=(3 3 1 1 2 2 0 0)
|
||||
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||
export HIP_VISIBLE_DEVICES=\$GPU
|
||||
unset ROCR_VISIBLE_DEVICES
|
||||
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||
exec numactl -m \$NUMA -N \$NUMA \$*
|
||||
EOF
|
||||
|
||||
chmod +x ./select_gpu
|
||||
|
||||
root=$HOME/Frontier/Grid/systems/Frontier/
|
||||
source ${root}/sourceme.sh
|
||||
|
||||
export OMP_NUM_THREADS=7
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||
|
||||
for vol in 32.32.32.64
|
||||
do
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
|
||||
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
|
||||
done
|
||||
|
23
systems/Frontier/config-command
Normal file
23
systems/Frontier/config-command
Normal file
@ -0,0 +1,23 @@
|
||||
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||
../../configure --enable-comms=mpi-auto \
|
||||
--with-lime=$CLIME \
|
||||
--enable-unified=no \
|
||||
--enable-shm=nvlink \
|
||||
--enable-tracing=timer \
|
||||
--enable-accelerator=hip \
|
||||
--enable-gen-simd-width=64 \
|
||||
--disable-gparity \
|
||||
--disable-fermion-reps \
|
||||
--enable-simd=GPU \
|
||||
--enable-accelerator-cshift \
|
||||
--with-gmp=$OLCF_GMP_ROOT \
|
||||
--with-fftw=$FFTW_DIR/.. \
|
||||
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||
--disable-fermion-reps \
|
||||
CXX=hipcc MPICXX=mpicxx \
|
||||
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 " \
|
||||
LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 "
|
||||
|
||||
|
||||
|
||||
|
13
systems/Frontier/mpiwrapper.sh
Executable file
13
systems/Frontier/mpiwrapper.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
lrank=$SLURM_LOCALID
|
||||
lgpu=(0 1 2 3 7 6 5 4)
|
||||
|
||||
export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]}
|
||||
|
||||
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES "
|
||||
|
||||
$*
|
||||
|
||||
|
||||
|
13
systems/Frontier/sourceme.sh
Normal file
13
systems/Frontier/sourceme.sh
Normal file
@ -0,0 +1,13 @@
|
||||
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
|
||||
spack load c-lime
|
||||
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/crusher/spack-envs/base/opt/cray-sles15-zen3/gcc-11.2.0/gperftools-2.9.1-72ubwtuc5wcz2meqltbfdb76epufgzo2/lib
|
||||
module load emacs
|
||||
module load PrgEnv-gnu
|
||||
module load rocm
|
||||
module load cray-mpich/8.1.23
|
||||
module load gmp
|
||||
module load cray-fftw
|
||||
module load craype-accel-amd-gfx90a
|
||||
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||
#Hack for lib
|
||||
#export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH
|
9
systems/Frontier/wrap.sh
Executable file
9
systems/Frontier/wrap.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/sh
|
||||
|
||||
export HIP_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES
|
||||
unset ROCR_VISIBLE_DEVICES
|
||||
|
||||
#rank=$SLURM_PROCID
|
||||
#rocprof -d rocprof.$rank -o rocprof.$rank/results.rank$SLURM_PROCID.csv --sys-trace $@
|
||||
|
||||
$@
|
@ -144,12 +144,7 @@ int main (int argc, char ** argv)
|
||||
HermOpAdaptor<LatticeFermionD> HOA(HermDefOp);
|
||||
|
||||
int pp=16;
|
||||
// LittleDiracOpCol.CoarsenOperator(HOA,Aggregates);
|
||||
// std::cout << "LittleDiracOp old " << LittleDiracOpCol._A[pp]<<std::endl;
|
||||
LittleDiracOp.CoarsenOperatorColoured(HOA,Aggregates);
|
||||
// LittleDiracOp.ExchangeCoarseLinks();
|
||||
|
||||
// std::cout << "LittleDiracOp new " << LittleDiracOp._A[pp]<<std::endl;
|
||||
LittleDiracOp.CoarsenOperator(HOA,Aggregates);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Test the operator
|
||||
|
@ -86,6 +86,12 @@ int main (int argc, char ** argv)
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=16;
|
||||
const int nbasis = 40;
|
||||
const int cb = 0 ;
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
RealD b=1.5;
|
||||
RealD c=0.5;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
@ -121,10 +127,6 @@ int main (int argc, char ** argv)
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
//////////////////////// Fermion action //////////////////////////////////
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
RealD b=1.5;
|
||||
RealD c=0.5;
|
||||
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
||||
|
||||
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
|
||||
@ -143,8 +145,6 @@ int main (int argc, char ** argv)
|
||||
////////////////////////////////////////////////////////////
|
||||
///////////// Coarse basis and Little Dirac Operator ///////
|
||||
////////////////////////////////////////////////////////////
|
||||
const int nbasis = 40;
|
||||
const int cb = 0 ;
|
||||
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||
|
||||
@ -154,6 +154,18 @@ int main (int argc, char ** argv)
|
||||
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
Subspace Aggregates(Coarse5d,FrbGrid,cb);
|
||||
#if 1
|
||||
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
||||
95.0,0.1,
|
||||
// 400,200,200 -- 48 iters
|
||||
// 600,200,200 -- 38 iters, 162s
|
||||
// 600,200,100 -- 38 iters, 169s
|
||||
// 600,200,50 -- 88 iters. 370s
|
||||
600,
|
||||
200,
|
||||
100,
|
||||
0.0);
|
||||
#else
|
||||
Aggregates.CreateSubspaceChebyshev(RNG5,
|
||||
HermOpEO,
|
||||
nbasis,
|
||||
@ -172,19 +184,22 @@ int main (int argc, char ** argv)
|
||||
// 0.1, // nbasis 16 -- 142; sloppy solve
|
||||
0.1, // nbasis 24
|
||||
300);
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////
|
||||
// Need to check about red-black grid coarsening
|
||||
////////////////////////////////////////////////////////////
|
||||
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
|
||||
LittleDiracOp.CoarsenOperatorColoured(FineHermOp,Aggregates);
|
||||
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
|
||||
|
||||
// Try projecting to one hop only
|
||||
std::cout << " projecting coarse matrix "<<std::endl;
|
||||
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
|
||||
LittleDiracOpProj.ProjectNearestNeighbour(0.5,LittleDiracOp);
|
||||
|
||||
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
|
||||
HermMatrix CoarseOp (LittleDiracOp);
|
||||
|
||||
HermMatrix CoarseOpProj (LittleDiracOpProj);
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Build a coarse lanczos
|
||||
//////////////////////////////////////////
|
||||
@ -200,11 +215,13 @@ int main (int argc, char ** argv)
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||
CoarseVector c_res(Coarse5d);
|
||||
|
||||
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
|
||||
|
||||
IRL.calc(eval,evec,c_src,Nconv);
|
||||
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Build a coarse space solver
|
||||
@ -216,7 +233,53 @@ int main (int argc, char ** argv)
|
||||
|
||||
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
|
||||
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
|
||||
c_res=Zero();
|
||||
HPDSolve(c_src,c_res);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Deflated (with real op EV's) solve for the projected coarse op
|
||||
// Work towards ADEF1 in the coarse space
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
|
||||
c_res=Zero();
|
||||
HPDSolveProj(c_src,c_res);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Coarse ADEF1 with deflation space
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(2.0,40.,8,CoarseOpProj); // 311
|
||||
ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(2.0,36.,12,CoarseOpProj); // 311
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// CG, Cheby mode spacing 200,200
|
||||
// Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
|
||||
// Unprojected Coarse CG solve to 4e-2 : 33 iters, 0.8s
|
||||
// Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
|
||||
////////////////////////////////////////////////////////
|
||||
// CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs
|
||||
////////////////////////////////////////////////////////
|
||||
// ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s 2.1x gain
|
||||
// ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
|
||||
// HDCG 38 iters 162s
|
||||
//
|
||||
// CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs
|
||||
// ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s 2.1x gain
|
||||
// ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
|
||||
// HDCG 38 iters 169s
|
||||
|
||||
TwoLevelADEF1defl<CoarseVector>
|
||||
cADEF1(1.0e-8, 100,
|
||||
CoarseOp,
|
||||
CoarseSmoother,
|
||||
evec,eval);
|
||||
|
||||
c_res=Zero();
|
||||
cADEF1(c_src,c_res);
|
||||
|
||||
cADEF1.Tolerance = 4.0e-2;
|
||||
c_res=Zero();
|
||||
cADEF1(c_src,c_res);
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Build a smoother
|
||||
//////////////////////////////////////////
|
||||
@ -242,10 +305,6 @@ int main (int argc, char ** argv)
|
||||
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
|
||||
std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
|
||||
|
||||
// Standard CG
|
||||
// result=Zero();
|
||||
// CGfine(HermOpEO, src, result);
|
||||
|
||||
for(int l=0;l<los.size();l++){
|
||||
|
||||
RealD lo = los[l];
|
||||
@ -271,8 +330,24 @@ int main (int argc, char ** argv)
|
||||
|
||||
result=Zero();
|
||||
HDCG(src,result);
|
||||
|
||||
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
|
||||
HDCGdefl(1.0e-8, 3000,
|
||||
FineHermOp,
|
||||
Smoother,
|
||||
cADEF1,
|
||||
HPDSolve,
|
||||
Aggregates);
|
||||
|
||||
result=Zero();
|
||||
HDCGdefl(src,result);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Standard CG
|
||||
result=Zero();
|
||||
CGfine(HermOpEO, src, result);
|
||||
|
||||
Grid_finalize();
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user