mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-12 16:55:37 +00:00
Simplifying the MultiRHS solver to make it do SRHS *and* MRHS
This commit is contained in:
parent
ee3b3c4c56
commit
070b61f08f
@ -1,157 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#ifndef GRID_DEFLATION_H
|
|
||||||
#define GRID_DEFLATION_H
|
|
||||||
|
|
||||||
namespace Grid {
|
|
||||||
|
|
||||||
template<class Field>
|
|
||||||
class ZeroGuesser: public LinearFunction<Field> {
|
|
||||||
public:
|
|
||||||
using LinearFunction<Field>::operator();
|
|
||||||
virtual void operator()(const Field &src, Field &guess) { guess = Zero(); };
|
|
||||||
};
|
|
||||||
template<class Field>
|
|
||||||
class DoNothingGuesser: public LinearFunction<Field> {
|
|
||||||
public:
|
|
||||||
using LinearFunction<Field>::operator();
|
|
||||||
virtual void operator()(const Field &src, Field &guess) { };
|
|
||||||
};
|
|
||||||
template<class Field>
|
|
||||||
class SourceGuesser: public LinearFunction<Field> {
|
|
||||||
public:
|
|
||||||
using LinearFunction<Field>::operator();
|
|
||||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
|
||||||
};
|
|
||||||
|
|
||||||
////////////////////////////////
|
|
||||||
// Fine grid deflation
|
|
||||||
////////////////////////////////
|
|
||||||
template<class Field>
|
|
||||||
class DeflatedGuesser: public LinearFunction<Field> {
|
|
||||||
private:
|
|
||||||
const std::vector<Field> &evec;
|
|
||||||
const std::vector<RealD> &eval;
|
|
||||||
const unsigned int N;
|
|
||||||
|
|
||||||
public:
|
|
||||||
using LinearFunction<Field>::operator();
|
|
||||||
|
|
||||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval)
|
|
||||||
: DeflatedGuesser(_evec, _eval, _evec.size())
|
|
||||||
{}
|
|
||||||
|
|
||||||
DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N)
|
|
||||||
: evec(_evec), eval(_eval), N(_N)
|
|
||||||
{
|
|
||||||
assert(evec.size()==eval.size());
|
|
||||||
assert(N <= evec.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void operator()(const Field &src,Field &guess) {
|
|
||||||
guess = Zero();
|
|
||||||
for (int i=0;i<N;i++) {
|
|
||||||
const Field& tmp = evec[i];
|
|
||||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
|
||||||
}
|
|
||||||
guess.Checkerboard() = src.Checkerboard();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class FineField, class CoarseField>
|
|
||||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
|
||||||
private:
|
|
||||||
const std::vector<FineField> &subspace;
|
|
||||||
const std::vector<CoarseField> &evec_coarse;
|
|
||||||
const std::vector<RealD> &eval_coarse;
|
|
||||||
public:
|
|
||||||
|
|
||||||
using LinearFunction<FineField>::operator();
|
|
||||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
|
||||||
const std::vector<CoarseField> &_evec_coarse,
|
|
||||||
const std::vector<RealD> &_eval_coarse)
|
|
||||||
: subspace(_subspace),
|
|
||||||
evec_coarse(_evec_coarse),
|
|
||||||
eval_coarse(_eval_coarse)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void operator()(const FineField &src,FineField &guess) {
|
|
||||||
int N = (int)evec_coarse.size();
|
|
||||||
CoarseField src_coarse(evec_coarse[0].Grid());
|
|
||||||
CoarseField guess_coarse(evec_coarse[0].Grid()); guess_coarse = Zero();
|
|
||||||
blockProject(src_coarse,src,subspace);
|
|
||||||
for (int i=0;i<N;i++) {
|
|
||||||
const CoarseField & tmp = evec_coarse[i];
|
|
||||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
|
||||||
}
|
|
||||||
blockPromote(guess_coarse,guess,subspace);
|
|
||||||
guess.Checkerboard() = src.Checkerboard();
|
|
||||||
};
|
|
||||||
|
|
||||||
void operator()(const std::vector<FineField> &src,std::vector<FineField> &guess) {
|
|
||||||
int Nevec = (int)evec_coarse.size();
|
|
||||||
int Nsrc = (int)src.size();
|
|
||||||
// make temp variables
|
|
||||||
std::vector<CoarseField> src_coarse(Nsrc,evec_coarse[0].Grid());
|
|
||||||
std::vector<CoarseField> guess_coarse(Nsrc,evec_coarse[0].Grid());
|
|
||||||
//Preporcessing
|
|
||||||
std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl;
|
|
||||||
for (int j=0;j<Nsrc;j++)
|
|
||||||
{
|
|
||||||
guess_coarse[j] = Zero();
|
|
||||||
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
|
|
||||||
blockProject(src_coarse[j],src[j],subspace);
|
|
||||||
}
|
|
||||||
//deflation set up for eigen vector batchsize 1 and source batch size equal number of sources
|
|
||||||
std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl;
|
|
||||||
for (int i=0;i<Nevec;i++)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << "ProjectAccum Nvec: " << i << std::endl;
|
|
||||||
const CoarseField & tmp = evec_coarse[i];
|
|
||||||
for (int j=0;j<Nsrc;j++)
|
|
||||||
{
|
|
||||||
axpy(guess_coarse[j],TensorRemove(innerProduct(tmp,src_coarse[j])) / eval_coarse[i],tmp,guess_coarse[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//postprocessing
|
|
||||||
std::cout << GridLogMessage << "Start BlockPromote for loop" << std::endl;
|
|
||||||
for (int j=0;j<Nsrc;j++)
|
|
||||||
{
|
|
||||||
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
|
|
||||||
blockPromote(guess_coarse[j],guess[j],subspace);
|
|
||||||
guess[j].Checkerboard() = src[j].Checkerboard();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
@ -73,7 +73,7 @@ public:
|
|||||||
GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know
|
GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know
|
||||||
GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
|
GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
|
||||||
|
|
||||||
void ShiftMatrix(RealD shift)
|
/* void ShiftMatrix(RealD shift)
|
||||||
{
|
{
|
||||||
int Nd=_FineGrid->Nd();
|
int Nd=_FineGrid->Nd();
|
||||||
Coordinate zero_shift(Nd,0);
|
Coordinate zero_shift(Nd,0);
|
||||||
@ -102,6 +102,7 @@ public:
|
|||||||
assert(nfound==geom.npoint);
|
assert(nfound==geom.npoint);
|
||||||
ExchangeCoarseLinks();
|
ExchangeCoarseLinks();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
|
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
|
||||||
: geom(_geom),
|
: geom(_geom),
|
||||||
@ -459,6 +460,9 @@ public:
|
|||||||
CoarseScalar InnerProd(CoarseGrid());
|
CoarseScalar InnerProd(CoarseGrid());
|
||||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||||
|
|
||||||
|
for(int s=0;s<Subspace.subspace.size();s++){
|
||||||
|
std::cout << " subspace norm "<<norm2(Subspace.subspace[s])<<std::endl;
|
||||||
|
}
|
||||||
const int npoint = geom.npoint;
|
const int npoint = geom.npoint;
|
||||||
|
|
||||||
Coordinate clatt = CoarseGrid()->GlobalDimensions();
|
Coordinate clatt = CoarseGrid()->GlobalDimensions();
|
||||||
@ -494,6 +498,7 @@ public:
|
|||||||
}
|
}
|
||||||
phase=exp(phase*ci);
|
phase=exp(phase*ci);
|
||||||
Mkl(k,l) = phase;
|
Mkl(k,l) = phase;
|
||||||
|
std::cout<<" Mkl "<<k<<" "<<l<<" "<<phase<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
invMkl = Mkl.inverse();
|
invMkl = Mkl.inverse();
|
||||||
@ -548,6 +553,7 @@ public:
|
|||||||
tmat-=usecond();
|
tmat-=usecond();
|
||||||
linop.Op(phaV,MphaV);
|
linop.Op(phaV,MphaV);
|
||||||
tmat+=usecond();
|
tmat+=usecond();
|
||||||
|
std::cout << i << " " <<p << " MphaV "<<norm2(MphaV)<<" "<<norm2(phaV)<<std::endl;
|
||||||
|
|
||||||
tproj-=usecond();
|
tproj-=usecond();
|
||||||
blockProject(coarseInner,MphaV,Subspace.subspace);
|
blockProject(coarseInner,MphaV,Subspace.subspace);
|
||||||
@ -555,6 +561,7 @@ public:
|
|||||||
|
|
||||||
ComputeProj[p] = coarseInner;
|
ComputeProj[p] = coarseInner;
|
||||||
tproj+=usecond();
|
tproj+=usecond();
|
||||||
|
std::cout << i << " " <<p << " ComputeProj "<<norm2(ComputeProj[p])<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -563,6 +570,7 @@ public:
|
|||||||
FT[k] = Zero();
|
FT[k] = Zero();
|
||||||
for(int l=0;l<npoint;l++){
|
for(int l=0;l<npoint;l++){
|
||||||
FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
|
FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
|
||||||
|
std::cout << i << " " <<k <<" "<<l<< " FT "<<norm2(FT[k])<<" "<<invMkl(l,k)<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
int osites=CoarseGrid()->oSites();
|
int osites=CoarseGrid()->oSites();
|
||||||
@ -583,6 +591,10 @@ public:
|
|||||||
// PopulateAdag();
|
// PopulateAdag();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
std::cout << " _A["<<p<<"] "<<norm2(_A[p])<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
// Need to write something to populate Adag from A
|
// Need to write something to populate Adag from A
|
||||||
ExchangeCoarseLinks();
|
ExchangeCoarseLinks();
|
||||||
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
|
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
|
||||||
|
@ -51,15 +51,15 @@ public:
|
|||||||
typedef iVector<CComplex,nbasis > Cvec;
|
typedef iVector<CComplex,nbasis > Cvec;
|
||||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||||
typedef Lattice<Fobj > FineField;
|
typedef Lattice<Fobj > FineField;
|
||||||
|
typedef Lattice<CComplex > FineComplexField;
|
||||||
typedef CoarseVector Field;
|
typedef CoarseVector Field;
|
||||||
|
|
||||||
////////////////////
|
////////////////////
|
||||||
// Data members
|
// Data members
|
||||||
////////////////////
|
////////////////////
|
||||||
GridCartesian * _CoarseGridMulti;
|
GridCartesian * _CoarseGridMulti;
|
||||||
GridCartesian * _CoarseGrid;
|
|
||||||
GeneralCoarseOp & _Op;
|
|
||||||
NonLocalStencilGeometry geom;
|
NonLocalStencilGeometry geom;
|
||||||
|
NonLocalStencilGeometry geom_srhs;
|
||||||
PaddedCell Cell;
|
PaddedCell Cell;
|
||||||
GeneralLocalStencil Stencil;
|
GeneralLocalStencil Stencil;
|
||||||
|
|
||||||
@ -77,20 +77,57 @@ public:
|
|||||||
GridBase * Grid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know
|
GridBase * Grid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know
|
||||||
GridCartesian * CoarseGrid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know
|
GridCartesian * CoarseGrid(void) { return _CoarseGridMulti; }; // this is all the linalg routines need to know
|
||||||
|
|
||||||
MultiGeneralCoarsenedMatrix(GeneralCoarseOp & Op,GridCartesian *CoarseGridMulti) :
|
// Can be used to do I/O on the operator matrices externally
|
||||||
_Op(Op),
|
void SetMatrix (int p,CoarseMatrix & A)
|
||||||
_CoarseGrid(Op.CoarseGrid()),
|
{
|
||||||
|
assert(A.size()==geom_srhs.npoint);
|
||||||
|
GridtoBLAS(A[p],BLAS_A[p]);
|
||||||
|
}
|
||||||
|
void GetMatrix (int p,CoarseMatrix & A)
|
||||||
|
{
|
||||||
|
assert(A.size()==geom_srhs.npoint);
|
||||||
|
BLAStoGrid(A[p],BLAS_A[p]);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
void CopyMatrix (GeneralCoarseOp &_Op)
|
||||||
|
{
|
||||||
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
auto Aup = _Op.Cell.Extract(_Op._A[p]);
|
||||||
|
//Unpadded
|
||||||
|
GridtoBLAS(Aup,BLAS_A[p]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void CheckMatrix (GeneralCoarseOp &_Op)
|
||||||
|
{
|
||||||
|
std::cout <<"************* Checking the little direc operator mRHS"<<std::endl;
|
||||||
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
//Unpadded
|
||||||
|
auto Aup = _Op.Cell.Extract(_Op._A[p]);
|
||||||
|
auto Ack = Aup;
|
||||||
|
BLAStoGrid(Ack,BLAS_A[p]);
|
||||||
|
std::cout << p<<" Ack "<<norm2(Ack)<<std::endl;
|
||||||
|
std::cout << p<<" Aup "<<norm2(Aup)<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout <<"************* "<<std::endl;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
MultiGeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridCartesian *CoarseGridMulti) :
|
||||||
_CoarseGridMulti(CoarseGridMulti),
|
_CoarseGridMulti(CoarseGridMulti),
|
||||||
geom(_CoarseGridMulti,Op.geom.hops,Op.geom.skip+1),
|
geom_srhs(_geom),
|
||||||
Cell(Op.geom.Depth(),_CoarseGridMulti),
|
geom(_CoarseGridMulti,_geom.hops,_geom.skip+1),
|
||||||
|
Cell(geom.Depth(),_CoarseGridMulti),
|
||||||
Stencil(Cell.grids.back(),geom.shifts) // padded cell stencil
|
Stencil(Cell.grids.back(),geom.shifts) // padded cell stencil
|
||||||
{
|
{
|
||||||
int32_t padded_sites = _Op._A[0].Grid()->lSites();
|
int32_t padded_sites = Cell.grids.back()->lSites();
|
||||||
int32_t unpadded_sites = _CoarseGrid->lSites();
|
int32_t unpadded_sites = CoarseGridMulti->lSites();
|
||||||
|
|
||||||
int32_t nrhs = CoarseGridMulti->FullDimensions()[0]; // # RHS
|
int32_t nrhs = CoarseGridMulti->FullDimensions()[0]; // # RHS
|
||||||
int32_t orhs = nrhs/CComplex::Nsimd();
|
int32_t orhs = nrhs/CComplex::Nsimd();
|
||||||
|
|
||||||
|
padded_sites = padded_sites/nrhs;
|
||||||
|
unpadded_sites = unpadded_sites/nrhs;
|
||||||
|
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
// Device data vector storage
|
// Device data vector storage
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
@ -98,9 +135,9 @@ public:
|
|||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
BLAS_A[p].resize (unpadded_sites); // no ghost zone, npoint elements
|
BLAS_A[p].resize (unpadded_sites); // no ghost zone, npoint elements
|
||||||
}
|
}
|
||||||
|
|
||||||
BLAS_B.resize(nrhs *padded_sites); // includes ghost zone
|
BLAS_B.resize(nrhs *padded_sites); // includes ghost zone
|
||||||
BLAS_C.resize(nrhs *unpadded_sites); // no ghost zone
|
BLAS_C.resize(nrhs *unpadded_sites); // no ghost zone
|
||||||
|
|
||||||
BLAS_AP.resize(geom.npoint);
|
BLAS_AP.resize(geom.npoint);
|
||||||
BLAS_BP.resize(geom.npoint);
|
BLAS_BP.resize(geom.npoint);
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
@ -113,21 +150,20 @@ public:
|
|||||||
// Pointers to data
|
// Pointers to data
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
|
|
||||||
// Site identity mapping for A, C
|
// Site identity mapping for A
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
for(int ss=0;ss<unpadded_sites;ss++){
|
for(int ss=0;ss<unpadded_sites;ss++){
|
||||||
ComplexD *ptr = (ComplexD *)&BLAS_A[p][ss];
|
ComplexD *ptr = (ComplexD *)&BLAS_A[p][ss];
|
||||||
acceleratorPut(BLAS_AP[p][ss],ptr);
|
acceleratorPut(BLAS_AP[p][ss],ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Site identity mapping for C
|
||||||
for(int ss=0;ss<unpadded_sites;ss++){
|
for(int ss=0;ss<unpadded_sites;ss++){
|
||||||
ComplexD *ptr = (ComplexD *)&BLAS_C[ss*nrhs];
|
ComplexD *ptr = (ComplexD *)&BLAS_C[ss*nrhs];
|
||||||
acceleratorPut(BLAS_CP[ss],ptr);
|
acceleratorPut(BLAS_CP[ss],ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////
|
|
||||||
// Neighbour table is more complicated
|
// Neighbour table is more complicated
|
||||||
/////////////////////////////////////////////////
|
|
||||||
int32_t j=0; // Interior point counter (unpadded)
|
int32_t j=0; // Interior point counter (unpadded)
|
||||||
for(int32_t s=0;s<padded_sites;s++){ // 4 volume, padded
|
for(int32_t s=0;s<padded_sites;s++){ // 4 volume, padded
|
||||||
int ghost_zone=0;
|
int ghost_zone=0;
|
||||||
@ -150,18 +186,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(j==unpadded_sites);
|
assert(j==unpadded_sites);
|
||||||
CopyMatrix();
|
|
||||||
}
|
}
|
||||||
template<class vobj> void GridtoBLAS(const Lattice<vobj> &from,deviceVector<typename vobj::scalar_object> &to)
|
template<class vobj> void GridtoBLAS(const Lattice<vobj> &from,deviceVector<typename vobj::scalar_object> &to)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
std::vector<typename vobj::scalar_object> tmp;
|
|
||||||
unvectorizeToLexOrdArray(tmp,from);
|
|
||||||
assert(tmp.size()==from.Grid()->lSites());
|
|
||||||
assert(tmp.size()==to.size());
|
|
||||||
to.resize(tmp.size());
|
|
||||||
acceleratorCopyToDevice(&tmp[0],&to[0],sizeof(typename vobj::scalar_object)*tmp.size());
|
|
||||||
#else
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
@ -206,17 +233,9 @@ public:
|
|||||||
to[w] = stmp;
|
to[w] = stmp;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
template<class vobj> void BLAStoGrid(Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &in)
|
template<class vobj> void BLAStoGrid(Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &in)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
std::vector<typename vobj::scalar_object> tmp;
|
|
||||||
tmp.resize(in.size());
|
|
||||||
assert(in.size()==grid.Grid()->lSites());
|
|
||||||
acceleratorCopyFromDevice(&in[0],&tmp[0],sizeof(typename vobj::scalar_object)*in.size());
|
|
||||||
vectorizeFromLexOrdArray(tmp,grid);
|
|
||||||
#else
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
@ -261,15 +280,152 @@ public:
|
|||||||
putlane(to[w], stmp, to_lane);
|
putlane(to[w], stmp, to_lane);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
void CopyMatrix (void)
|
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||||
|
Aggregation<Fobj,CComplex,nbasis> & Subspace,
|
||||||
|
GridBase *CoarseGrid)
|
||||||
{
|
{
|
||||||
for(int p=0;p<geom.npoint;p++){
|
std::cout << GridLogMessage<< "GeneralCoarsenMatrixMrhs "<< std::endl;
|
||||||
//Unpadded
|
|
||||||
auto Aup = _Op.Cell.Extract(_Op._A[p]);
|
GridBase *grid = Subspace.FineGrid;
|
||||||
GridtoBLAS(Aup,BLAS_A[p]);
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Orthogonalise the subblocks over the basis
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
CoarseScalar InnerProd(CoarseGrid);
|
||||||
|
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||||
|
|
||||||
|
const int npoint = geom_srhs.npoint;
|
||||||
|
|
||||||
|
Coordinate clatt = CoarseGrid->GlobalDimensions();
|
||||||
|
int Nd = CoarseGrid->Nd();
|
||||||
|
/*
|
||||||
|
* Here, k,l index which possible momentum/shift within the N-points connected by MdagM.
|
||||||
|
* Matrix index i is mapped to this shift via
|
||||||
|
* geom.shifts[i]
|
||||||
|
*
|
||||||
|
* conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]
|
||||||
|
* = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >
|
||||||
|
* = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l}
|
||||||
|
* = M_{kl} A_ji^{b.b+l}
|
||||||
|
*
|
||||||
|
* Must assemble and invert matrix M_k,l = e^[i q_k . delta_l]
|
||||||
|
*
|
||||||
|
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
|
||||||
|
*
|
||||||
|
* Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j}
|
||||||
|
*/
|
||||||
|
Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
|
||||||
|
Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
|
||||||
|
ComplexD ci(0.0,1.0);
|
||||||
|
for(int k=0;k<npoint;k++){ // Loop over momenta
|
||||||
|
|
||||||
|
for(int l=0;l<npoint;l++){ // Loop over nbr relative
|
||||||
|
ComplexD phase(0.0,0.0);
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ clatt[mu];
|
||||||
|
phase=phase+TwoPiL*geom_srhs.shifts[k][mu]*geom_srhs.shifts[l][mu];
|
||||||
|
}
|
||||||
|
phase=exp(phase*ci);
|
||||||
|
Mkl(k,l) = phase;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
invMkl = Mkl.inverse();
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Now compute the matrix elements of linop between the orthonormal
|
||||||
|
// set of vectors.
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
FineField phaV(grid); // Phased block basis vector
|
||||||
|
FineField MphaV(grid);// Matrix applied
|
||||||
|
std::vector<FineComplexField> phaF(npoint,grid);
|
||||||
|
std::vector<CoarseComplexField> pha(npoint,CoarseGrid);
|
||||||
|
|
||||||
|
CoarseVector coarseInner(CoarseGrid);
|
||||||
|
|
||||||
|
typedef typename CComplex::scalar_type SComplex;
|
||||||
|
FineComplexField one(grid); one=SComplex(1.0);
|
||||||
|
FineComplexField zz(grid); zz = Zero();
|
||||||
|
for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Stick a phase on every block
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
CoarseComplexField coor(CoarseGrid);
|
||||||
|
pha[p]=Zero();
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ clatt[mu];
|
||||||
|
pha[p] = pha[p] + (TwoPiL * geom_srhs.shifts[p][mu]) * coor;
|
||||||
|
}
|
||||||
|
pha[p] =exp(pha[p]*ci);
|
||||||
|
|
||||||
|
blockZAXPY(phaF[p],pha[p],one,zz);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Could save on storage here
|
||||||
|
std::vector<CoarseMatrix> _A;
|
||||||
|
_A.resize(geom_srhs.npoint,CoarseGrid);
|
||||||
|
|
||||||
|
std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid);
|
||||||
|
CoarseVector FT(CoarseGrid);
|
||||||
|
for(int i=0;i<nbasis;i++){// Loop over basis vectors
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
|
||||||
|
for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
|
||||||
|
|
||||||
|
phaV = phaF[p]*Subspace.subspace[i];
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Multiple phased subspace vector by matrix and project to subspace
|
||||||
|
// Remove local bulk phase to leave relative phases
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
linop.Op(phaV,MphaV);
|
||||||
|
|
||||||
|
// Fixme, could use batched block projector here
|
||||||
|
blockProject(coarseInner,MphaV,Subspace.subspace);
|
||||||
|
|
||||||
|
coarseInner = conjugate(pha[p]) * coarseInner;
|
||||||
|
|
||||||
|
ComputeProj[p] = coarseInner;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int k=0;k<npoint;k++){
|
||||||
|
FT = Zero();
|
||||||
|
for(int l=0;l<npoint;l++){
|
||||||
|
FT= FT+ invMkl(l,k)*ComputeProj[l];
|
||||||
|
}
|
||||||
|
|
||||||
|
int osites=CoarseGrid->oSites();
|
||||||
|
autoView( A_v , _A[k], AcceleratorWrite);
|
||||||
|
autoView( FT_v , FT, AcceleratorRead);
|
||||||
|
accelerator_for(sss, osites, 1, {
|
||||||
|
for(int j=0;j<nbasis;j++){
|
||||||
|
A_v[sss](i,j) = FT_v[sss](j);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only needed if nonhermitian
|
||||||
|
// if ( ! hermitian ) {
|
||||||
|
// std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
|
||||||
|
// PopulateAdag();
|
||||||
|
// }
|
||||||
|
// Need to write something to populate Adag from A
|
||||||
|
|
||||||
|
for(int p=0;p<geom_srhs.npoint;p++){
|
||||||
|
GridtoBLAS(_A[p],BLAS_A[p]);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
Grid : Message : 11698.730546 s : CoarsenOperator eigen 1334 us
|
||||||
|
Grid : Message : 11698.730563 s : CoarsenOperator phase 34729 us
|
||||||
|
Grid : Message : 11698.730565 s : CoarsenOperator phaseBZ 2423814 us
|
||||||
|
Grid : Message : 11698.730566 s : CoarsenOperator mat 127890998 us
|
||||||
|
Grid : Message : 11698.730567 s : CoarsenOperator proj 515840840 us
|
||||||
|
Grid : Message : 11698.730568 s : CoarsenOperator inv 103948313 us
|
||||||
|
Takes 600s to compute matrix elements, DOMINATED by the block project.
|
||||||
|
Easy to speed up with the batched block project.
|
||||||
|
Store npoint vectors, get npoint x Nbasis block projection, and 81 fold faster.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
void Mdag(const CoarseVector &in, CoarseVector &out)
|
void Mdag(const CoarseVector &in, CoarseVector &out)
|
||||||
{
|
{
|
||||||
@ -302,16 +458,17 @@ public:
|
|||||||
|
|
||||||
const int Nsimd = CComplex::Nsimd();
|
const int Nsimd = CComplex::Nsimd();
|
||||||
|
|
||||||
|
int64_t nrhs =pin.Grid()->GlobalDimensions()[0];
|
||||||
|
assert(nrhs>=1);
|
||||||
|
|
||||||
RealD flops,bytes;
|
RealD flops,bytes;
|
||||||
int64_t osites=in.Grid()->oSites(); // unpadded
|
int64_t osites=in.Grid()->oSites(); // unpadded
|
||||||
int64_t unpadded_vol = _CoarseGrid->lSites();
|
int64_t unpadded_vol = CoarseGrid()->lSites()/nrhs;
|
||||||
|
|
||||||
flops = 1.0* npoint * nbasis * nbasis * 8.0 * osites * CComplex::Nsimd();
|
flops = 1.0* npoint * nbasis * nbasis * 8.0 * osites * CComplex::Nsimd();
|
||||||
bytes = 1.0*osites*sizeof(siteMatrix)*npoint/pin.Grid()->GlobalDimensions()[0]
|
bytes = 1.0*osites*sizeof(siteMatrix)*npoint/pin.Grid()->GlobalDimensions()[0]
|
||||||
+ 2.0*osites*sizeof(siteVector)*npoint;
|
+ 2.0*osites*sizeof(siteVector)*npoint;
|
||||||
|
|
||||||
int64_t nrhs =pin.Grid()->GlobalDimensions()[0];
|
|
||||||
assert(nrhs>=1);
|
|
||||||
|
|
||||||
t_GtoB=-usecond();
|
t_GtoB=-usecond();
|
||||||
GridtoBLAS(pin,BLAS_B);
|
GridtoBLAS(pin,BLAS_B);
|
||||||
@ -339,7 +496,7 @@ public:
|
|||||||
BLAStoGrid(out,BLAS_C);
|
BLAStoGrid(out,BLAS_C);
|
||||||
t_BtoG+=usecond();
|
t_BtoG+=usecond();
|
||||||
t_tot+=usecond();
|
t_tot+=usecond();
|
||||||
|
/*
|
||||||
std::cout << GridLogMessage << "New Mrhs coarse DONE "<<std::endl;
|
std::cout << GridLogMessage << "New Mrhs coarse DONE "<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Mult exch "<<t_exch<<" us"<<std::endl;
|
std::cout << GridLogMessage<<"Coarse Mult exch "<<t_exch<<" us"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Mult mult "<<t_mult<<" us"<<std::endl;
|
std::cout << GridLogMessage<<"Coarse Mult mult "<<t_mult<<" us"<<std::endl;
|
||||||
@ -351,12 +508,12 @@ public:
|
|||||||
std::cout << GridLogMessage<<"Coarse Kernel flop/s "<< flops/t_mult<<" mflop/s"<<std::endl;
|
std::cout << GridLogMessage<<"Coarse Kernel flop/s "<< flops/t_mult<<" mflop/s"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse Kernel bytes/s "<< bytes/t_mult/1000<<" GB/s"<<std::endl;
|
std::cout << GridLogMessage<<"Coarse Kernel bytes/s "<< bytes/t_mult/1000<<" GB/s"<<std::endl;
|
||||||
std::cout << GridLogMessage<<"Coarse overall flops/s "<< flops/t_tot<<" mflop/s"<<std::endl;
|
std::cout << GridLogMessage<<"Coarse overall flops/s "<< flops/t_tot<<" mflop/s"<<std::endl;
|
||||||
|
*/
|
||||||
// std::cout << GridLogMessage<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
|
// std::cout << GridLogMessage<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
|
||||||
};
|
};
|
||||||
virtual void Mdiag (const Field &in, Field &out){ assert(0);};
|
virtual void Mdiag (const Field &in, Field &out){ assert(0);};
|
||||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);};
|
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);};
|
||||||
virtual void MdirAll (const Field &in, std::vector<Field> &out){assert(0);};
|
virtual void MdirAll (const Field &in, std::vector<Field> &out){assert(0);};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
43
TODO
43
TODO
@ -1,6 +1,44 @@
|
|||||||
- - Slice sum optimisation & A2A - atomic addition
|
i) Clean up CoarsenedMatrix, GeneralCoarsenedMatrix, GeneralCoarsenedMatrixMultiRHS
|
||||||
|
|
||||||
|
-- Ideally want a SINGLE implementation that does MultiRHS **AND** works with one RHS.
|
||||||
|
|
||||||
|
-- -- Getting there. One RHS is hard due to vectorisation & hardwired coarse5d layout
|
||||||
|
-- Compromise: Wrap it in a copy in/out for a slice.
|
||||||
|
|
||||||
|
-- Bad for Lanczos: need to do a BLOCK Lanczos instead. Longer term.
|
||||||
|
|
||||||
|
-- **** Make the test do ONLY the single RHS. ****
|
||||||
|
-- I/O for the matrix elements required.
|
||||||
|
-- Make the Adef2 build an eigenvector deflater and a block projector
|
||||||
|
--
|
||||||
|
|
||||||
|
-- Work with Regensburg on tests.
|
||||||
|
-- Plan interface preserving the coarsened matrix interface (??)
|
||||||
|
|
||||||
|
-- Move functionality from GeneralCoarsenedMatrix INTO GeneralCoarsenedMatrixMultiRHS -- DONE
|
||||||
|
-- Don't immediately delete original
|
||||||
|
-- Instead make the new one self contained, then delete.
|
||||||
|
-- New DWF inverter test.
|
||||||
|
|
||||||
|
// void PopulateAdag(void)
|
||||||
|
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, Aggregation<Fobj,CComplex,nbasis> & Subspace) -- DONE
|
||||||
|
ExchangeCoarseLinks();
|
||||||
|
|
||||||
|
iii) Aurora -- christoph's problem -- DONE
|
||||||
|
Aurora -- Carleton's problem staggered.
|
||||||
|
|
||||||
|
iv) Dennis merge and test Aurora -- DONE (save test)
|
||||||
|
|
||||||
|
v) Merge Ed Bennet's request --DONE
|
||||||
|
|
||||||
|
vi) Repro CG -- get down to the level of single node testing via split grid test
|
||||||
|
|
||||||
|
|
||||||
|
=========================
|
||||||
|
|
||||||
|
===============
|
||||||
|
- - Slice sum optimisation & A2A - atomic addition -- Dennis
|
||||||
- - Also faster non-atomic reduction
|
- - Also faster non-atomic reduction
|
||||||
- - Remaining PRs
|
|
||||||
- - DDHMC
|
- - DDHMC
|
||||||
- - MixedPrec is the action eval, high precision
|
- - MixedPrec is the action eval, high precision
|
||||||
- - MixedPrecCleanup is the force eval, low precision
|
- - MixedPrecCleanup is the force eval, low precision
|
||||||
@ -17,7 +55,6 @@ DDHMC
|
|||||||
-- Multishift Mixed Precision - DONE
|
-- Multishift Mixed Precision - DONE
|
||||||
-- Pole dependent residual - DONE
|
-- Pole dependent residual - DONE
|
||||||
|
|
||||||
|
|
||||||
=======
|
=======
|
||||||
-- comms threads issue??
|
-- comms threads issue??
|
||||||
-- Part done: Staggered kernel performance on GPU
|
-- Part done: Staggered kernel performance on GPU
|
||||||
|
@ -208,9 +208,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
gridblasHandle_t GridBLAS::gridblasHandle;
|
|
||||||
int GridBLAS::gridblasInit;
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
@ -281,7 +278,6 @@ int main (int argc, char ** argv)
|
|||||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||||
|
|
||||||
NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d);
|
NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d);
|
||||||
NearestStencilGeometry5D geom_nn(Coarse5d);
|
|
||||||
|
|
||||||
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
||||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
@ -309,75 +305,12 @@ int main (int argc, char ** argv)
|
|||||||
LoadBasis(Aggregates,subspace_file);
|
LoadBasis(Aggregates,subspace_file);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// NBASIS=40
|
|
||||||
// Best so far: ord 2000 [0.01,95], 500,500 -- 466 iters
|
|
||||||
// slurm-398626.out:Grid : Message : 141.295253 s : 500 filt [1] <n|MdagM|n> 0.000103622063
|
|
||||||
|
|
||||||
|
|
||||||
//Grid : Message : 33.870465 s : Chebyshev subspace pass-1 : ord 2000 [0.001,95]
|
|
||||||
//Grid : Message : 33.870485 s : Chebyshev subspace pass-2 : nbasis40 min 1000 step 1000 lo0
|
|
||||||
//slurm-1482200.out : filt ~ 0.004 -- not as low mode projecting -- took 626 iters
|
|
||||||
|
|
||||||
// To try: 2000 [0.1,95] ,2000,500,500 -- slurm-1482213.out 586 iterations
|
|
||||||
|
|
||||||
// To try: 2000 [0.01,95] ,2000,500,500 -- 469 (think I bumped 92 to 95) (??)
|
|
||||||
// To try: 2000 [0.025,95],2000,500,500
|
|
||||||
// To try: 2000 [0.005,95],2000,500,500
|
|
||||||
|
|
||||||
// NBASIS=44 -- HDCG paper was 64 vectors; AMD compiler craps out at 48
|
|
||||||
// To try: 2000 [0.01,95] ,2000,500,500 -- 419 lowest slurm-1482355.out
|
|
||||||
// To try: 2000 [0.025,95] ,2000,500,500 -- 487
|
|
||||||
// To try: 2000 [0.005,95] ,2000,500,500
|
|
||||||
/*
|
|
||||||
Smoother [3,92] order 16
|
|
||||||
slurm-1482355.out:Grid : Message : 35.239686 s : Chebyshev subspace pass-1 : ord 2000 [0.01,95]
|
|
||||||
slurm-1482355.out:Grid : Message : 35.239714 s : Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0
|
|
||||||
slurm-1482355.out:Grid : Message : 5561.305552 s : HDCG: Pcg converged in 419 iterations and 2616.202598 s
|
|
||||||
|
|
||||||
slurm-1482367.out:Grid : Message : 43.157235 s : Chebyshev subspace pass-1 : ord 2000 [0.025,95]
|
|
||||||
slurm-1482367.out:Grid : Message : 43.157257 s : Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0
|
|
||||||
slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 iterations and 3131.185821 s
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
|
||||||
95.0,0.0075,
|
|
||||||
2500,
|
|
||||||
500,
|
|
||||||
500,
|
|
||||||
0.0);
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
Aggregates.CreateSubspaceChebyshevPowerLaw(RNG5,HermOpEO,nbasis,
|
|
||||||
95.0,
|
|
||||||
2000);
|
|
||||||
*/
|
|
||||||
|
|
||||||
Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
|
Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
|
||||||
0.0003,1.0e-5,2000); // Lo, tol, maxit
|
0.0003,1.0e-5,2000); // Lo, tol, maxit
|
||||||
/*
|
|
||||||
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
|
||||||
95.0,0.05,
|
|
||||||
2000,
|
|
||||||
500,
|
|
||||||
500,
|
|
||||||
0.0);
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
|
||||||
95.0,0.01,
|
|
||||||
2000,
|
|
||||||
500,
|
|
||||||
500,
|
|
||||||
0.0);
|
|
||||||
*/
|
|
||||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); -- running slurm-1484934.out nbasis 56
|
|
||||||
|
|
||||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run
|
||||||
SaveBasis(Aggregates,subspace_file);
|
SaveBasis(Aggregates,subspace_file);
|
||||||
}
|
}
|
||||||
MemoryManager::Print();
|
|
||||||
|
|
||||||
if(refine){
|
if(refine){
|
||||||
if ( load_refine ) {
|
if ( load_refine ) {
|
||||||
@ -388,15 +321,15 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
|
|||||||
SaveBasis(Aggregates,refine_file);
|
SaveBasis(Aggregates,refine_file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MemoryManager::Print();
|
|
||||||
Aggregates.Orthogonalise();
|
Aggregates.Orthogonalise();
|
||||||
if ( load_mat ) {
|
if ( load_mat ) {
|
||||||
LoadOperator(LittleDiracOp,ldop_file);
|
LoadOperator(LittleDiracOp,ldop_file);
|
||||||
} else {
|
} else {
|
||||||
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
|
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
|
||||||
SaveOperator(LittleDiracOp,ldop_file);
|
// SaveOperator(LittleDiracOp,ldop_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
// I/O test:
|
// I/O test:
|
||||||
CoarseVector c_src(Coarse5d); random(CRNG,c_src);
|
CoarseVector c_src(Coarse5d); random(CRNG,c_src);
|
||||||
CoarseVector c_res(Coarse5d);
|
CoarseVector c_res(Coarse5d);
|
||||||
@ -428,31 +361,42 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
|
|||||||
std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
|
std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
// Try projecting to one hop only
|
// mrhs coarse operator
|
||||||
// LittleDiracOp.ShiftMatrix(1.0e-4);
|
// Create a higher dim coarse grid
|
||||||
// LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
|
|
||||||
|
|
||||||
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
|
std::cout << "**************************************"<<std::endl;
|
||||||
HermMatrix CoarseOp (LittleDiracOp);
|
std::cout << "Building MultiRHS Coarse operator"<<std::endl;
|
||||||
// HermMatrix CoarseOpProj (LittleDiracOpProj);
|
std::cout << "**************************************"<<std::endl;
|
||||||
|
ConjugateGradient<CoarseVector> coarseCG(4.0e-2,20000,true);
|
||||||
|
|
||||||
|
const int nrhs=vComplex::Nsimd()*3;
|
||||||
|
|
||||||
|
Coordinate mpi=GridDefaultMpi();
|
||||||
|
Coordinate rhMpi ({1,1,mpi[0],mpi[1],mpi[2],mpi[3]});
|
||||||
|
Coordinate rhLatt({nrhs,1,clatt[0],clatt[1],clatt[2],clatt[3]});
|
||||||
|
Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1});
|
||||||
|
|
||||||
|
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
||||||
|
// MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
|
||||||
|
typedef MultiGeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> MultiGeneralCoarsenedMatrix_t;
|
||||||
|
MultiGeneralCoarsenedMatrix_t mrhs(geom,CoarseMrhs);
|
||||||
|
// mrhs.CopyMatrix(LittleDiracOp);
|
||||||
|
// mrhs.SetMatrix(LittleDiracOp.);
|
||||||
|
mrhs.CoarsenOperator(FineHermOp,Aggregates,Coarse5d);
|
||||||
|
// mrhs.CheckMatrix(LittleDiracOp);
|
||||||
|
|
||||||
MemoryManager::Print();
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Build a coarse lanczos
|
// Build a coarse lanczos
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Chebyshev<CoarseVector> IRLCheby(0.012,40.0,201); //500 HDCG iters
|
std::cout << "**************************************"<<std::endl;
|
||||||
// int Nk=512; // Didn't save much
|
std::cout << "Building Coarse Lanczos "<<std::endl;
|
||||||
// int Nm=640;
|
std::cout << "**************************************"<<std::endl;
|
||||||
// int Nstop=400;
|
|
||||||
|
|
||||||
// Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
|
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
|
||||||
// int Nk=128;
|
HermMatrix CoarseOp (LittleDiracOp);
|
||||||
// int Nm=160;
|
|
||||||
|
|
||||||
// Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
|
|
||||||
// Chebyshev<CoarseVector> IRLCheby(0.04,40.0,201);
|
|
||||||
int Nk=192;
|
int Nk=192;
|
||||||
int Nm=256;
|
int Nm=256;
|
||||||
int Nstop=Nk;
|
int Nstop=Nk;
|
||||||
@ -491,121 +435,13 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
|
|||||||
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
|
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
|
||||||
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
|
||||||
|
|
||||||
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
|
|
||||||
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
|
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
|
||||||
c_res=Zero();
|
c_res=Zero();
|
||||||
// HPDSolve(c_src,c_res); c_ref = c_res;
|
|
||||||
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
|
||||||
// std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl;
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// Deflated (with real op EV's) solve for the projected coarse op
|
|
||||||
// Work towards ADEF1 in the coarse space
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
|
|
||||||
// c_res=Zero();
|
|
||||||
// HPDSolveProj(c_src,c_res);
|
|
||||||
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
|
||||||
// std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl;
|
|
||||||
// c_res = c_res - c_ref;
|
|
||||||
// std::cout << "Projected solver error "<<norm2(c_res)<<std::endl;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
/////////// MRHS test .////////////
|
||||||
// Coarse ADEF1 with deflation space
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// ChebyshevSmoother<CoarseVector > CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
|
|
||||||
// CoarseSmoother(0.1,37.,8,CoarseOpProj); //
|
|
||||||
// CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s
|
|
||||||
// CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s
|
|
||||||
// CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter
|
|
||||||
// CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter
|
|
||||||
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj); // 311
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// CG, Cheby mode spacing 200,200
|
|
||||||
// Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
|
|
||||||
// Unprojected Coarse CG solve to 4e-2 : 33 iters, 0.8s
|
|
||||||
// Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s 2.1x gain
|
|
||||||
// ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
|
|
||||||
// HDCG 38 iters 162s
|
|
||||||
//
|
|
||||||
// CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs
|
|
||||||
// ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s 2.1x gain
|
|
||||||
// ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
|
|
||||||
// HDCG 38 iters 169s
|
|
||||||
|
|
||||||
/*
|
|
||||||
TwoLevelADEF1defl<CoarseVector>
|
|
||||||
cADEF1(1.0e-8, 500,
|
|
||||||
CoarseOp,
|
|
||||||
CoarseSmoother,
|
|
||||||
evec,eval);
|
|
||||||
*/
|
|
||||||
// c_res=Zero();
|
|
||||||
// cADEF1(c_src,c_res);
|
|
||||||
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
|
||||||
// std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
|
|
||||||
// c_res = c_res - c_ref;
|
|
||||||
// std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
|
|
||||||
|
|
||||||
// cADEF1.Tolerance = 4.0e-2;
|
|
||||||
// cADEF1.Tolerance = 1.0e-1;
|
|
||||||
// cADEF1.Tolerance = 5.0e-2;
|
|
||||||
// c_res=Zero();
|
|
||||||
// cADEF1(c_src,c_res);
|
|
||||||
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
|
||||||
// std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
|
|
||||||
// c_res = c_res - c_ref;
|
|
||||||
// std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
|
|
||||||
|
|
||||||
//////////////////////////////////////////
|
|
||||||
// Build a smoother
|
|
||||||
//////////////////////////////////////////
|
|
||||||
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499
|
|
||||||
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp); //383
|
|
||||||
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp); //328
|
|
||||||
// std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1
|
|
||||||
// std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations
|
|
||||||
// std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters
|
|
||||||
// std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters
|
|
||||||
|
|
||||||
//
|
|
||||||
// Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40
|
|
||||||
// Need to measure cost of coarse space.
|
|
||||||
//
|
|
||||||
// -- i) Reduce coarse residual -- 0.04
|
|
||||||
// -- ii) Lanczos on coarse space -- done
|
|
||||||
// -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and
|
|
||||||
// use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//
|
|
||||||
|
|
||||||
MemoryManager::Print();
|
|
||||||
//////////////////////////////////////
|
|
||||||
// mrhs coarse solve
|
|
||||||
// Create a higher dim coarse grid
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
ConjugateGradient<CoarseVector> coarseCG(4.0e-2,20000,true);
|
|
||||||
|
|
||||||
const int nrhs=vComplex::Nsimd()*3;
|
|
||||||
|
|
||||||
Coordinate mpi=GridDefaultMpi();
|
|
||||||
Coordinate rhMpi ({1,1,mpi[0],mpi[1],mpi[2],mpi[3]});
|
|
||||||
Coordinate rhLatt({nrhs,1,clatt[0],clatt[1],clatt[2],clatt[3]});
|
|
||||||
Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1});
|
|
||||||
|
|
||||||
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
|
||||||
MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
|
|
||||||
typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t;
|
|
||||||
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
|
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
|
||||||
MrhsHermMatrix MrhsCoarseOp (mrhs);
|
MrhsHermMatrix MrhsCoarseOp (mrhs);
|
||||||
MemoryManager::Print();
|
|
||||||
#if 1
|
#if 1
|
||||||
{
|
{
|
||||||
CoarseVector rh_res(CoarseMrhs);
|
CoarseVector rh_res(CoarseMrhs);
|
||||||
@ -644,6 +480,7 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
|
|||||||
InsertSlice(c_src,rh_src,r,0);
|
InsertSlice(c_src,rh_src,r,0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << " Calling the multiRHS coarse CG"<<std::endl;
|
||||||
coarseCG(MrhsCoarseOp,rh_src,rh_res);
|
coarseCG(MrhsCoarseOp,rh_src,rh_res);
|
||||||
|
|
||||||
//redo with block CG ?
|
//redo with block CG ?
|
||||||
@ -666,47 +503,11 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
|
|||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
// fine solve
|
// fine solve
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
// std::vector<RealD> los({2.0,2.5}); // Nbasis 40 == 36,36 iters
|
|
||||||
// std::vector<RealD> los({2.0});
|
|
||||||
// std::vector<RealD> los({2.5});
|
|
||||||
|
|
||||||
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
|
|
||||||
// std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
|
|
||||||
// std::vector<int> ords({9}); // Nbasis 40 == 40 iters (320 mults)
|
|
||||||
|
|
||||||
// 148 outer
|
|
||||||
// std::vector<RealD> los({1.0});
|
|
||||||
// std::vector<int> ords({24});
|
|
||||||
|
|
||||||
// 162 outer
|
|
||||||
// std::vector<RealD> los({2.5});
|
|
||||||
// std::vector<int> ords({9});
|
|
||||||
|
|
||||||
// ??? outer
|
|
||||||
std::vector<RealD> los({2.0});
|
std::vector<RealD> los({2.0});
|
||||||
std::vector<int> ords({7});
|
std::vector<int> ords({7});
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Smoother opt @56 nbasis, 0.04 convergence, 192 evs
|
|
||||||
ord lo
|
|
||||||
|
|
||||||
16 0.1 no converge -- likely sign indefinite
|
|
||||||
32 0.1 no converge -- likely sign indefinite(?)
|
|
||||||
|
|
||||||
16 0.5 422
|
|
||||||
32 0.5 302
|
|
||||||
|
|
||||||
8 1.0 575
|
|
||||||
12 1.0 449
|
|
||||||
16 1.0 375
|
|
||||||
32 1.0 302
|
|
||||||
|
|
||||||
12 3.0 476
|
|
||||||
16 3.0 319
|
|
||||||
32 3.0 306
|
|
||||||
|
|
||||||
Powerlaw setup 62 vecs
|
Powerlaw setup 62 vecs
|
||||||
slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
|
slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
|
||||||
slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
|
slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
|
||||||
@ -727,38 +528,7 @@ slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 it
|
|||||||
-- CG smoother O(16): 290
|
-- CG smoother O(16): 290
|
||||||
-- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
|
-- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
|
||||||
|
|
||||||
Grid : Message : 2790.797194 s : HDCG: Pcg converged in 190 iterations and 1049.563182 s 1.0 32
|
|
||||||
Grid : Message : 3766.374396 s : HDCG: Pcg converged in 218 iterations and 975.455668 s 1.0 16
|
|
||||||
Grid : Message : 4888.746190 s : HDCG: Pcg converged in 191 iterations and 1122.252055 s 0.5 32
|
|
||||||
Grid : Message : 5956.679661 s : HDCG: Pcg converged in 231 iterations and 1067.812850 s 0.5 16
|
|
||||||
|
|
||||||
Grid : Message : 2767.405829 s : HDCG: Pcg converged in 218 iterations and 967.214067 s -- 16
|
|
||||||
Grid : Message : 3816.165905 s : HDCG: Pcg converged in 251 iterations and 1048.636269 s -- 12
|
|
||||||
Grid : Message : 5121.206572 s : HDCG: Pcg converged in 318 iterations and 1304.916168 s -- 8
|
|
||||||
|
|
||||||
|
|
||||||
[paboyle@login2.crusher debug]$ grep -v Memory slurm-402426.out | grep converged | grep HDCG -- [1.0,16] cheby
|
|
||||||
Grid : Message : 5185.521063 s : HDCG: Pcg converged in 377 iterations and 1595.843529 s
|
|
||||||
|
|
||||||
[paboyle@login2.crusher debug]$ grep HDCG slurm-402184.out | grep onver
|
|
||||||
Grid : Message : 3760.438160 s : HDCG: Pcg converged in 422 iterations and 2129.243141 s
|
|
||||||
Grid : Message : 5660.588015 s : HDCG: Pcg converged in 308 iterations and 1900.026821 s
|
|
||||||
|
|
||||||
|
|
||||||
Grid : Message : 4238.206528 s : HDCG: Pcg converged in 575 iterations and 2657.430676 s
|
|
||||||
Grid : Message : 6345.880344 s : HDCG: Pcg converged in 449 iterations and 2108.505208 s
|
|
||||||
|
|
||||||
grep onverg slurm-401663.out | grep HDCG
|
|
||||||
Grid : Message : 3900.817781 s : HDCG: Pcg converged in 476 iterations and 1992.591311 s
|
|
||||||
Grid : Message : 5647.202699 s : HDCG: Pcg converged in 306 iterations and 1746.838660 s
|
|
||||||
|
|
||||||
|
|
||||||
[paboyle@login2.crusher debug]$ grep converged slurm-401775.out | grep HDCG
|
|
||||||
Grid : Message : 3583.177025 s : HDCG: Pcg converged in 375 iterations and 1800.896037 s
|
|
||||||
Grid : Message : 5348.342243 s : HDCG: Pcg converged in 302 iterations and 1765.045018 s
|
|
||||||
|
|
||||||
Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
|
Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
//
|
//
|
||||||
MemoryManager::Print();
|
MemoryManager::Print();
|
||||||
@ -774,14 +544,6 @@ Conclusion: higher order smoother is doing better. Much better. Use a Krylov smo
|
|||||||
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
|
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
|
||||||
ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp); // 311
|
ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp); // 311
|
||||||
|
|
||||||
/*
|
|
||||||
* CG smooth 11 iter:
|
|
||||||
slurm-403825.out:Grid : Message : 4369.824339 s : HDCG: fPcg converged in 215 iterations 3.0
|
|
||||||
slurm-403908.out:Grid : Message : 3955.897470 s : HDCG: fPcg converged in 236 iterations 1.0
|
|
||||||
slurm-404273.out:Grid : Message : 3843.792191 s : HDCG: fPcg converged in 210 iterations 2.0
|
|
||||||
* CG smooth 9 iter:
|
|
||||||
*/
|
|
||||||
//
|
|
||||||
RealD MirsShift = lo;
|
RealD MirsShift = lo;
|
||||||
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
|
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
|
||||||
CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
|
CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
|
||||||
@ -820,16 +582,14 @@ Conclusion: higher order smoother is doing better. Much better. Use a Krylov smo
|
|||||||
CoarseMrhs, // Grid needed to Mrhs grid
|
CoarseMrhs, // Grid needed to Mrhs grid
|
||||||
Aggregates);
|
Aggregates);
|
||||||
|
|
||||||
MemoryManager::Print();
|
|
||||||
std::cout << "Calling mRHS HDCG"<<std::endl;
|
std::cout << "Calling mRHS HDCG"<<std::endl;
|
||||||
FrbGrid->Barrier();
|
FrbGrid->Barrier();
|
||||||
|
|
||||||
MemoryManager::Print();
|
|
||||||
std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid);
|
std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid);
|
||||||
std::cout << " mRHS source"<<std::endl;
|
std::cout << " mRHS source"<<std::endl;
|
||||||
std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid);
|
std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid);
|
||||||
std::cout << " mRHS result"<<std::endl;
|
std::cout << " mRHS result"<<std::endl;
|
||||||
MemoryManager::Print();
|
|
||||||
random(RNG5,src_mrhs[0]);
|
random(RNG5,src_mrhs[0]);
|
||||||
for(int r=0;r<nrhs;r++){
|
for(int r=0;r<nrhs;r++){
|
||||||
if(r>0)src_mrhs[r]=src_mrhs[0];
|
if(r>0)src_mrhs[r]=src_mrhs[0];
|
||||||
|
Loading…
Reference in New Issue
Block a user