1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge branch 'feature/scalar_adjointFT' of https://github.com/paboyle/Grid into feature/scalar_adjointFT

This commit is contained in:
Guido Cossu 2017-12-19 15:43:55 +00:00
commit 84f9c37ed4
45 changed files with 2099 additions and 1505 deletions

View File

@ -103,29 +103,32 @@ namespace Grid {
GridBase *CoarseGrid;
GridBase *FineGrid;
std::vector<Lattice<Fobj> > subspace;
int checkerboard;
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :
CoarseGrid(_CoarseGrid),
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid)
subspace(nbasis,_FineGrid),
checkerboard(_checkerboard)
{
};
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
// CheckOrthogonal();
}
void CheckOrthogonal(void){
CoarseVector iProj(CoarseGrid);
CoarseVector eProj(CoarseGrid);
Lattice<CComplex> pokey(CoarseGrid);
for(int i=0;i<nbasis;i++){
blockProject(iProj,subspace[i],subspace);
eProj=zero;
for(int ss=0;ss<CoarseGrid->oSites();ss++){
parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){
eProj._odata[ss](i)=CComplex(1.0);
}
eProj=eProj - iProj;
@ -137,6 +140,7 @@ namespace Grid {
blockProject(CoarseVec,FineVec,subspace);
}
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
FineVec.checkerboard = subspace[0].checkerboard;
blockPromote(CoarseVec,FineVec,subspace);
}
void CreateSubspaceRandom(GridParallelRNG &RNG){
@ -147,6 +151,7 @@ namespace Grid {
Orthogonalise();
}
/*
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
{
// Run a Lanczos with sloppy convergence
@ -195,7 +200,7 @@ namespace Grid {
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
}
}
*/
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale;

View File

@ -317,11 +317,23 @@ namespace Grid {
}
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp2(in._grid);
_Mat.Mooee(in,out);
_Mat.Mooee(out,tmp);
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp2);
return axpy_norm(out,-1.0,tmp2,tmp);
#if 0
//... much prefer conventional Schur norm
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.MeooeDag(out,tmp);
_Mat.Meooe(out,tmp);
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
#endif
}
virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out);
@ -346,6 +358,14 @@ namespace Grid {
virtual void operator() (const Field &in, Field &out) = 0;
};
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
public:
void operator() (const Field &in, Field &out){
out = in;
};
};
/////////////////////////////////////////////////////////////
// Base classes for Multishift solvers for operators
/////////////////////////////////////////////////////////////
@ -368,6 +388,64 @@ namespace Grid {
};
*/
////////////////////////////////////////////////////////////////////////////////////////////
// Hermitian operator Linear function and operator function
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<typename Field>
class PlainHermOp : public LinearFunction<Field> {
public:
LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
{}
void operator()(const Field& in, Field& out) {
_Linop.HermOp(in,out);
}
};
template<typename Field>
class FunctionHermOp : public LinearFunction<Field> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
: _poly(poly), _Linop(linop) {};
void operator()(const Field& in, Field& out) {
_poly(_Linop,in,out);
}
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
for(int n=1;n<Coeffs.size();n++){
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
}
};
};
}

View File

@ -34,41 +34,12 @@ Author: Christoph Lehner <clehner@bnl.gov>
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////
// Simple general polynomial with user supplied coefficients
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
// std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl;
// std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl;
for(int n=1;n<Coeffs.size();n++){
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
// std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl;
// std::cout << n<<" " <<norm2(out)<<std::endl;
}
};
};
struct ChebyParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
RealD, alpha,
RealD, beta,
int, Npoly);
};
////////////////////////////////////////////////////////////////////////////////////////////
// Generic Chebyshev approximations
@ -83,8 +54,10 @@ namespace Grid {
public:
void csv(std::ostream &out){
RealD diff = hi-lo;
for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) {
RealD diff = hi-lo;
RealD delta = (hi-lo)*1.0e-9;
for (RealD x=lo; x<hi; x+=delta) {
delta*=1.1;
RealD f = approx(x);
out<< x<<" "<<f<<std::endl;
}
@ -100,6 +73,7 @@ namespace Grid {
};
Chebyshev(){};
Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};

View File

@ -1,753 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BIRL_H
#define GRID_BIRL_H
#include <string.h> //memset
#include <zlib.h>
#include <sys/stat.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h>
namespace Grid {
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
template<class Field>
class BlockImplicitlyRestartedLanczos {
const RealD small = 1.0e-16;
public:
int lock;
int get;
int Niter;
int converged;
int Nminres; // Minimum number of restarts; only check for convergence after
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
int Np; // Np -- Number of spare vecs in kryloc space
int Nm; // Nm -- total number of vectors
int orth_period;
RealD OrthoTime;
RealD eresid, betastp;
SortEigen<Field> _sort;
LinearFunction<Field> &_HermOp;
LinearFunction<Field> &_HermOpTest;
/////////////////////////
// Constructor
/////////////////////////
BlockImplicitlyRestartedLanczos(
LinearFunction<Field> & HermOp,
LinearFunction<Field> & HermOpTest,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
RealD _betastp, // if beta(k) < betastp: converged
int _Niter, // Max iterations
int _Nminres, int _orth_period = 1) :
_HermOp(HermOp),
_HermOpTest(HermOpTest),
Nstop(_Nstop),
Nk(_Nk),
Nm(_Nm),
eresid(_eresid),
betastp(_betastp),
Niter(_Niter),
Nminres(_Nminres),
orth_period(_orth_period)
{
Np = Nm-Nk; assert(Np>0);
};
BlockImplicitlyRestartedLanczos(
LinearFunction<Field> & HermOp,
LinearFunction<Field> & HermOpTest,
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
RealD _betastp, // if beta(k) < betastp: converged
int _Niter, // Max iterations
int _Nminres,
int _orth_period = 1) :
_HermOp(HermOp),
_HermOpTest(HermOpTest),
Nstop(_Nk),
Nk(_Nk),
Nm(_Nm),
eresid(_eresid),
betastp(_betastp),
Niter(_Niter),
Nminres(_Nminres),
orth_period(_orth_period)
{
Np = Nm-Nk; assert(Np>0);
};
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0
2. For k = 1,2,...,m Do:
3. wk:=Avkβkv_{k1}
4. αk:=(wk,vk) //
5. wk:=wkαkvk // wk orthog vk
6. βk+1 := wk2. If βk+1 = 0 then Stop
7. vk+1 := wk/βk+1
8. EndDo
*/
void step(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
BasisFieldVector<Field>& evec,
Field& w,int Nm,int k)
{
assert( k< Nm );
GridStopWatch gsw_op,gsw_o;
Field& evec_k = evec[k];
gsw_op.Start();
_HermOp(evec_k,w);
gsw_op.Stop();
if(k>0){
w -= lme[k-1] * evec[k-1];
}
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
RealD alph = real(zalph);
w = w - alph * evec_k;// 5. wk:=wkαkvk
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
std::cout<<GridLogMessage << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
const RealD tiny = 1.0e-20;
if ( beta < tiny ) {
std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
}
lmd[k] = alph;
lme[k] = beta;
gsw_o.Start();
if (k>0 && k % orth_period == 0) {
orthogonalize(w,evec,k); // orthonormalise
}
gsw_o.Stop();
if(k < Nm-1) {
evec[k+1] = w;
}
std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() <<
" orth=" << gsw_o.Elapsed() << std::endl;
}
void qr_decomp(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int Nk,
int Nm,
std::vector<RealD>& Qt,
RealD Dsh,
int kmin,
int kmax)
{
int k = kmin-1;
RealD x;
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
RealD c = ( lmd[k] -Dsh) *Fden;
RealD s = -lme[k] *Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
x =-s*lme[k+1];
lme[k+1] = c*lme[k+1];
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt[i+Nm*k ];
RealD Qtmp2 = Qt[i+Nm*(k+1)];
Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2;
Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2;
}
// Givens transformations
for(int k = kmin; k < kmax-1; ++k){
RealD Fden = 1.0/hypot(x,lme[k-1]);
RealD c = lme[k-1]*Fden;
RealD s = - x*Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
lme[k-1] = c*lme[k-1] -s*x;
if(k != kmax-2){
x = -s*lme[k+1];
lme[k+1] = c*lme[k+1];
}
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt[i+Nm*k ];
RealD Qtmp2 = Qt[i+Nm*(k+1)];
Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2;
Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2;
}
}
}
#ifdef USE_LAPACK_IRL
#define LAPACK_INT int
//long long
void diagonalize_lapack(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int N1,
int N2,
std::vector<RealD>& Qt,
GridBase *grid){
std::cout << GridLogMessage << "diagonalize_lapack start\n";
GridStopWatch gsw;
const int size = Nm;
// tevals.resize(size);
// tevecs.resize(size);
LAPACK_INT NN = N1;
std::vector<double> evals_tmp(NN);
std::vector<double> evec_tmp(NN*NN);
memset(&evec_tmp[0],0,sizeof(double)*NN*NN);
// double AA[NN][NN];
std::vector<double> DD(NN);
std::vector<double> EE(NN);
for (int i = 0; i< NN; i++)
for (int j = i - 1; j <= i + 1; j++)
if ( j < NN && j >= 0 ) {
if (i==j) DD[i] = lmd[i];
if (i==j) evals_tmp[i] = lmd[i];
if (j==(i-1)) EE[j] = lme[j];
}
LAPACK_INT evals_found;
LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
LAPACK_INT liwork = 3+NN*10 ;
std::vector<LAPACK_INT> iwork(liwork);
std::vector<double> work(lwork);
std::vector<LAPACK_INT> isuppz(2*NN);
char jobz = 'V'; // calculate evals & evecs
char range = 'I'; // calculate all evals
// char range = 'A'; // calculate all evals
char uplo = 'U'; // refer to upper half of original matrix
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
std::vector<int> ifail(NN);
LAPACK_INT info;
// int total = QMP_get_number_of_nodes();
// int node = QMP_get_node_number();
// GridBase *grid = evec[0]._grid;
int total = grid->_Nprocessors;
int node = grid->_processor;
int interval = (NN/total)+1;
double vl = 0.0, vu = 0.0;
LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
if (iu > NN) iu=NN;
double tol = 0.0;
if (1) {
memset(&evals_tmp[0],0,sizeof(double)*NN);
if ( il <= NN){
std::cout << GridLogMessage << "dstegr started" << std::endl;
gsw.Start();
dstegr(&jobz, &range, &NN,
(double*)&DD[0], (double*)&EE[0],
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
&tol, // tolerance
&evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN,
&isuppz[0],
&work[0], &lwork, &iwork[0], &liwork,
&info);
gsw.Stop();
std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl;
for (int i = iu-1; i>= il-1; i--){
evals_tmp[i] = evals_tmp[i - (il-1)];
if (il>1) evals_tmp[i-(il-1)]=0.;
for (int j = 0; j< NN; j++){
evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j];
if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.;
}
}
}
{
// QMP_sum_double_array(evals_tmp,NN);
// QMP_sum_double_array((double *)evec_tmp,NN*NN);
grid->GlobalSumVector(&evals_tmp[0],NN);
grid->GlobalSumVector(&evec_tmp[0],NN*NN);
}
}
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
for(int i=0;i<NN;i++){
for(int j=0;j<NN;j++)
Qt[(NN-1-i)*N2+j]=evec_tmp[i*NN + j];
lmd [NN-1-i]=evals_tmp[i];
}
std::cout << GridLogMessage << "diagonalize_lapack complete\n";
}
#undef LAPACK_INT
#endif
void diagonalize(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int N2,
int N1,
std::vector<RealD>& Qt,
GridBase *grid)
{
#ifdef USE_LAPACK_IRL
const int check_lapack=0; // just use lapack if 0, check against lapack if 1
if(!check_lapack)
return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
std::vector <RealD> lmd2(N1);
std::vector <RealD> lme2(N1);
std::vector<RealD> Qt2(N1*N1);
for(int k=0; k<N1; ++k){
lmd2[k] = lmd[k];
lme2[k] = lme[k];
}
for(int k=0; k<N1*N1; ++k)
Qt2[k] = Qt[k];
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
#endif
int Niter = 10000*N1;
int kmin = 1;
int kmax = N2;
// (this should be more sophisticated)
for(int iter=0; ; ++iter){
if ( (iter+1)%(100*N1)==0)
std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
Niter = iter;
#ifdef USE_LAPACK_IRL
if(check_lapack){
const double SMALL=1e-8;
diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
std::vector <RealD> lmd3(N2);
for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
_sort.push(lmd3,N2);
_sort.push(lmd2,N2);
for(int k=0; k<N2; ++k){
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
}
for(int k=0; k<N1*N1; ++k){
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
}
}
#endif
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
}
}
std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort();
}
#if 1
template<typename T>
static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void orthogonalize(Field& w,
BasisFieldVector<Field>& evec,
int k)
{
double t0=-usecond()/1e6;
evec.orthogonalize(w,k);
normalise(w);
t0+=usecond()/1e6;
OrthoTime +=t0;
}
void setUnit_Qt(int Nm, std::vector<RealD> &Qt) {
for(int i=0; i<Qt.size(); ++i) Qt[i] = 0.0;
for(int k=0; k<Nm; ++k) Qt[k + k*Nm] = 1.0;
}
/* Rudy Arthur's thesis pp.137
------------------------
Require: M > K P = M K
Compute the factorization AVM = VM HM + fM eM
repeat
Q=I
for i = 1,...,P do
QiRi =HM θiI Q = QQi
H M = Q i H M Q i
end for
βK =HM(K+1,K) σK =Q(M,K)
r=vK+1βK +rσK
VK =VM(1:M)Q(1:M,1:K)
HK =HM(1:K,1:K)
AVK =VKHK +fKeK Extend to an M = K + P step factorization AVM = VMHM + fMeM
until convergence
*/
void calc(std::vector<RealD>& eval,
BasisFieldVector<Field>& evec,
const Field& src,
int& Nconv,
bool reverse,
int SkipTest)
{
GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid;
assert(grid == src._grid);
std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl;
std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl;
assert(Nm <= evec.size() && Nm <= eval.size());
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
{
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
_HermOpTest(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
}
std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm);
std::vector<RealD> eval2(Nm);
std::vector<RealD> eval2_copy(Nm);
std::vector<RealD> Qt(Nm*Nm);
Field f(grid);
Field v(grid);
int k1 = 1;
int k2 = Nk;
Nconv = 0;
RealD beta_k;
// Set initial vector
evec[0] = src;
normalise(evec[0]);
std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0])<<std::endl;
// Initial Nk steps
OrthoTime=0.;
double t0=usecond()/1e6;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
double t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
t1=usecond()/1e6;
// Restarting loop begins
for(int iter = 0; iter<Niter; ++iter){
std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
//
// Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
// We loop over
//
OrthoTime=0.;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
f *= lme[Nm-1];
t1=usecond()/1e6;
// getting eigenvalues
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k+k1-1];
lme2[k] = lme[k+k1-1];
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// sorting
eval2_copy = eval2;
_sort.push(eval2,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// Implicitly shifted QR transformations
setUnit_Qt(Nm,Qt);
for(int ip=0; ip<k2; ++ip){
std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
}
for(int ip=k2; ip<Nm; ++ip){
std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
assert(k2<Nm);
assert(k2<Nm);
assert(k1>0);
evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
fflush(stdout);
// Compressed vector f and beta(k2)
f *= Qt[Nm-1+Nm*(k2-1)];
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
lme[k2-1] = beta_k;
// Convergence test
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k];
lme2[k] = lme[k];
std::cout<<GridLogMessage << "eval2[" << k << "] = " << eval2[k] << std::endl;
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
Nconv = 0;
if (iter >= Nminres) {
std::cout << GridLogMessage << "Rotation to test convergence " << std::endl;
Field ev0_orig(grid);
ev0_orig = evec[0];
evec.rotate(Qt,0,Nk,0,Nk,Nm);
{
std::cout << GridLogMessage << "Test convergence" << std::endl;
Field B(grid);
for(int j = 0; j<Nk; j+=SkipTest){
B=evec[j];
//std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl;
B.checkerboard = evec[0].checkerboard;
_HermOpTest(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval2[j] = vnum/vden;
v -= eval2[j]*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] "
<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv
<<" "<< vnum/(sqrt(vden)*sqrt(vv0))
<< " norm(B["<<j<<"])="<< vden <<std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (j == Nconv) ){
Nconv+=SkipTest;
}
}
// test if we converged, if so, terminate
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
if( Nconv>=Nstop || beta_k < betastp){
goto converged;
}
std::cout << GridLogMessage << "Rotate back" << std::endl;
//B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss];
{
Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk);
for (int k=0;k<Nk;k++)
for (int j=0;j<Nk;j++)
qm(j,k) = Qt[k+Nm*j];
GridStopWatch timeInv;
timeInv.Start();
Eigen::MatrixXd qmI = qm.inverse();
timeInv.Stop();
std::vector<RealD> QtI(Nm*Nm);
for (int k=0;k<Nk;k++)
for (int j=0;j<Nk;j++)
QtI[k+Nm*j] = qmI(j,k);
RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 )
assert(res_check_rotate_inverse < 1e-7);
evec.rotate(QtI,0,Nk,0,Nk,Nm);
axpy(ev0_orig,-1.0,evec[0],ev0_orig);
std::cout << GridLogMessage << "Rotation done (in " << timeInv.Elapsed() << " = " << timeInv.useconds() << " us" <<
", error = " << res_check_rotate_inverse <<
"); | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl;
}
}
} else {
std::cout << GridLogMessage << "iter < Nminres: do not yet test for convergence\n";
} // end of iter loop
}
std::cout<<GridLogMessage<<"\n NOT converged.\n";
abort();
converged:
if (SkipTest == 1) {
eval = eval2;
} else {
// test quickly
for (int j=0;j<Nstop;j+=SkipTest) {
std::cout<<GridLogMessage << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl;
}
eval2_copy.resize(eval2.size());
eval = eval2_copy;
}
evec.sortInPlace(eval,reverse);
{
// test
for (int j=0;j<Nstop;j++) {
std::cout<<GridLogMessage << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl;
}
}
//_sort.push(eval,evec,Nconv);
//evec.sort(eval,Nconv);
std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n";
std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n";
}
#endif
};
}
#endif

View File

@ -1,163 +0,0 @@
namespace Grid {
template<class Field>
class BasisFieldVector {
public:
int _Nm;
typedef typename Field::scalar_type Coeff_t;
typedef typename Field::vector_type vCoeff_t;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
std::vector<Field> _v; // _Nfull vectors
void report(int n,GridBase* value) {
std::cout << GridLogMessage << "BasisFieldVector allocated:\n";
std::cout << GridLogMessage << " Delta N = " << n << "\n";
std::cout << GridLogMessage << " Size of full vectors (size) = " <<
((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n";
std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl;
value->Barrier();
if (value->IsBoss()) {
system("cat /proc/meminfo");
}
value->Barrier();
}
BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) {
report(Nm,value);
}
~BasisFieldVector() {
}
Field& operator[](int i) {
return _v[i];
}
void orthogonalize(Field& w, int k) {
for(int j=0; j<k; ++j){
Coeff_t ip = (Coeff_t)innerProduct(_v[j],w);
w = w - ip*_v[j];
}
}
void rotate(std::vector<RealD>& Qt,int j0, int j1, int k0,int k1,int Nm) {
GridBase* grid = _v[0]._grid;
#pragma omp parallel
{
std::vector < vobj > B(Nm);
#pragma omp for
for(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt[k+Nm*j] * _v[k]._odata[ss];
}
}
for(int j=j0; j<j1; ++j){
_v[j]._odata[ss] = B[j];
}
}
}
}
size_t size() const {
return _Nm;
}
void resize(int n) {
if (n > _Nm)
_v.reserve(n);
_v.resize(n,_v[0]._grid);
if (n < _Nm)
_v.shrink_to_fit();
report(n - _Nm,_v[0]._grid);
_Nm = n;
}
std::vector<int> getIndex(std::vector<RealD>& sort_vals) {
std::vector<int> idx(sort_vals.size());
iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
sort(idx.begin(), idx.end(),
[&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);});
return idx;
}
void reorderInPlace(std::vector<RealD>& sort_vals, std::vector<int>& idx) {
GridStopWatch gsw;
gsw.Start();
int nswaps = 0;
for (size_t i=0;i<idx.size();i++) {
if (idx[i] != i) {
// find proper place (this could be done in logarithmic time, don't bother for now)
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(j!=idx.size());
Field _t(_v[0]._grid);
_t = _v[idx[j]];
_v[idx[j]] = _v[idx[i]];
_v[idx[i]] = _t;
RealD _td = sort_vals[idx[j]];
sort_vals[idx[j]] = sort_vals[idx[i]];
sort_vals[idx[i]] = _td;
int _tt = idx[i];
idx[i] = idx[j];
idx[j] = _tt;
nswaps++;
}
}
// sort values
gsw.Stop();
std::cout << GridLogMessage << "Sorted eigenspace in place in " << gsw.Elapsed() << " using " << nswaps << " swaps" << std::endl;
}
void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) {
std::vector<int> idx = getIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
reorderInPlace(sort_vals,idx);
}
void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
};
}

View File

@ -78,12 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@ -92,7 +92,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
return;
}
std::cout << GridLogIterative << std::setprecision(4)
std::cout << GridLogIterative << std::setprecision(8)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;

View File

@ -7,8 +7,9 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung
Author: Guido Cossu
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -27,125 +28,288 @@ Author: Guido Cossu
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_IRL_H
#define GRID_IRL_H
#ifndef GRID_BIRL_H
#define GRID_BIRL_H
#include <string.h> //memset
//#include <zlib.h>
#include <sys/stat.h>
namespace Grid {
namespace Grid {
enum IRLdiagonalisation {
IRLdiagonaliseWithDSTEGR,
IRLdiagonaliseWithQR,
IRLdiagonaliseWithEigen
};
////////////////////////////////////////////////////////////////////////////////
// Helper class for sorting the evalues AND evectors by Field
// Use pointer swizzle on vectors
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
// Move following 100 LOC to lattice/Lattice_basis.h
////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
static bool less_lmd(RealD left,RealD right){
return left > right;
}
static bool less_pair(std::pair<RealD,Field const*>& left,
std::pair<RealD,Field const*>& right){
return left.first > (right.first);
}
public:
void push(std::vector<RealD>& lmd,std::vector<Field>& evec,int N) {
////////////////////////////////////////////////////////////////////////
// PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set.
// : The vector reorder should be done by pointer swizzle somehow
////////////////////////////////////////////////////////////////////////
std::vector<Field> cpy(lmd.size(),evec[0]._grid);
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
std::vector<std::pair<RealD, Field const*> > emod(lmd.size());
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{
for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j];
}
}
for(int i=0;i<lmd.size();++i) emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
typename std::vector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=*(it->second);
++it;
template<class Field>
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
{
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0]._grid;
parallel_region
{
std::vector < vobj > B(Nm); // Thread private
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt(j,k) * basis[k]._odata[ss];
}
}
for(int j=j0; j<j1; ++j){
basis[j]._odata[ss] = B[j];
}
}
}
void push(std::vector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
// Extract a single rotated vector
template<class Field>
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
{
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0]._grid;
result.checkerboard = basis[0].checkerboard;
parallel_for(int ss=0;ss < grid->oSites();ss++){
vobj B = zero;
for(int k=k0; k<k1; ++k){
B +=Qt(j,k) * basis[k]._odata[ss];
}
result._odata[ss] = B;
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
template<class Field>
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
{
int vlen = idx.size();
assert(vlen>=1);
assert(vlen<=sort_vals.size());
assert(vlen<=_v.size());
for (size_t i=0;i<vlen;i++) {
if (idx[i] != i) {
//////////////////////////////////////
// idx[i] is a table of desired sources giving a permutation.
// Swap v[i] with v[idx[i]].
// Find j>i for which _vnew[j] = _vold[i],
// track the move idx[j] => idx[i]
// track the move idx[i] => i
//////////////////////////////////////
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
std::swap(sort_vals[i],sort_vals[idx[i]]);
idx[j] = idx[i];
idx[i] = i;
}
}
};
}
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
{
std::vector<int> idx(sort_vals.size());
std::iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
});
return idx;
}
template<class Field>
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
{
std::vector<int> idx = basisSortGetIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
basisReorderInPlace(_v,sort_vals,idx);
}
// PAB: faster to compute the inner products first then fuse loops.
// If performance critical can improve.
template<class Field>
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
assert(_v.size()==eval.size());
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
template<class Field> class ImplicitlyRestartedLanczosTester
{
public:
virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
};
enum IRLdiagonalisation {
IRLdiagonaliseWithDSTEGR,
IRLdiagonaliseWithQR,
IRLdiagonaliseWithEigen
};
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
{
public:
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
{
return TestConvergence(j,resid,B,eval,evalMaxApprox);
}
int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
{
Field v(B);
RealD eval_poly = eval;
// Apply operator
_HermOp(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval = vnum/vden;
v -= eval*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
};
template<class Field>
class ImplicitlyRestartedLanczos {
private:
int MaxIter; // Max iterations
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
int Nm; // Nm -- total number of vectors
RealD eresid;
private:
const RealD small = 1.0e-8;
int MaxIter;
int MinRestart; // Minimum number of restarts; only check for convergence after
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
// int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk
int Nm; // Nm -- total number of vectors
IRLdiagonalisation diagonalisation;
////////////////////////////////////
int orth_period;
RealD OrthoTime;
RealD eresid, betastp;
////////////////////////////////
// Embedded objects
////////////////////////////////////
SortEigen<Field> _sort;
LinearOperatorBase<Field> &_Linop;
OperatorFunction<Field> &_poly;
////////////////////////////////
LinearFunction<Field> &_PolyOp;
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosTester<Field> &_Tester;
// Default tester provided (we need a ref to something in default case)
ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
/////////////////////////
// Constructor
/////////////////////////
public:
ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op
OperatorFunction<Field> & poly, // polynomial
int _Nstop, // really sought vecs
int _Nk, // sought vecs
int _Nm, // total vecs
RealD _eresid, // resid in lmd deficit
int _MaxIter, // Max iterations
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) :
_Linop(Linop), _poly(poly),
Nstop(_Nstop), Nk(_Nk), Nm(_Nm),
eresid(_eresid), MaxIter(_MaxIter),
diagonalisation(_diagonalisation)
{ };
//////////////////////////////////////////////////////////////////
// PAB:
//////////////////////////////////////////////////////////////////
// Too many options & knobs.
// Eliminate:
// orth_period
// betastp
// MinRestart
//
// Do we really need orth_period
// What is the theoretical basis & guarantees of betastp ?
// Nstop=Nk viable?
// MinRestart avoidable with new convergence test?
// Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
// HermOp could be eliminated if we dropped the Power method for max eval.
// -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
//////////////////////////////////////////////////////////////////
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
LinearFunction<Field> & HermOp,
ImplicitlyRestartedLanczosTester<Field> & Tester,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
eresid(_eresid), betastp(_betastp),
MaxIter(_MaxIter) , MinRestart(_MinRestart),
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
LinearFunction<Field> & HermOp,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
eresid(_eresid), betastp(_betastp),
MaxIter(_MaxIter) , MinRestart(_MinRestart),
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
////////////////////////////////
// Helpers
////////////////////////////////
static RealD normalise(Field& v)
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void orthogonalize(Field& w, std::vector<Field>& evec, int k)
void orthogonalize(Field& w, std::vector<Field>& evec,int k)
{
typedef typename Field::scalar_type MyComplex;
MyComplex ip;
for(int j=0; j<k; ++j){
ip = innerProduct(evec[j],w);
w = w - ip * evec[j];
}
OrthoTime-=usecond()/1e6;
basisOrthogonalize(evec,w,k);
normalise(w);
OrthoTime+=usecond()/1e6;
}
/* Rudy Arthur's thesis pp.137
@ -165,184 +329,238 @@ repeat
AVK =VKHK +fKeK Extend to an M = K + P step factorization AVM = VMHM + fMeM
until convergence
*/
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv)
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false)
{
GridBase *grid = src._grid;
assert(grid == evec[0]._grid);
GridBase *grid = evec[0]._grid;
assert(grid == src._grid);
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
std::cout << GridLogMessage <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl;
std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl;
GridLogIRL.TimingMode(1);
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
std::cout << GridLogMessage << "Diagonalisation is DSTEGR "<<std::endl;
std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
std::cout << GridLogMessage << "Diagonalisation is QR "<<std::endl;
std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
std::cout << GridLogMessage << "Diagonalisation is Eigen "<<std::endl;
std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
}
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
assert(Nm <= evec.size() && Nm <= eval.size());
assert(Nm == evec.size() && Nm == eval.size());
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
{
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
normalise(src_n);
_HermOp(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
}
std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm);
std::vector<RealD> eval2(Nm);
std::vector<RealD> eval2_copy(Nm);
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
std::vector<int> Iconv(Nm);
std::vector<Field> B(Nm,grid); // waste of space replicating
Field f(grid);
Field v(grid);
int k1 = 1;
int k2 = Nk;
Nconv = 0;
RealD beta_k;
Nconv = 0;
// Set initial vector
evec[0] = src;
std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl;
normalise(evec[0]);
std::cout << GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
// Initial Nk steps
OrthoTime=0.;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
//////////////////////////////////
// Restarting loop begins
//////////////////////////////////
int iter;
for(iter = 0; iter<MaxIter; ++iter){
OrthoTime=0.;
std::cout<< GridLogMessage <<" **********************"<< std::endl;
std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
std::cout<< GridLogMessage <<" **********************"<< std::endl;
std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
f *= lme[Nm-1];
std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
//////////////////////////////////
// getting eigenvalues
//////////////////////////////////
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k+k1-1];
lme2[k] = lme[k+k1-1];
}
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
//////////////////////////////////
// sorting
_sort.push(eval2,Nm);
//////////////////////////////////
eval2_copy = eval2;
std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
const int chunk=8;
for(int io=0; io<k2;io+=chunk){
std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
for(int ii=0;ii<chunk;ii++){
if ( (io+ii)<k2 )
std::cout<< " "<< std::setw(12)<< eval2[io+ii];
}
std::cout << std::endl;
}
//////////////////////////////////
// Implicitly shifted QR transformations
//////////////////////////////////
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
for(int ip=k2; ip<Nm; ++ip){
// Eigen replacement for qr_decomp ???
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
for(int j=k1-1; j<k2+1; ++j){
for(int k=0; k<Nm; ++k){
B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt(j,k) * evec[k];
}
}
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
assert(k2<Nm); assert(k2<Nm); assert(k1>0);
basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
std::cout<<GridLogIRL <<"basisRotated by Qt"<<std::endl;
////////////////////////////////////////////////////
// Compressed vector f and beta(k2)
////////////////////////////////////////////////////
f *= Qt(k2-1,Nm-1);
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<< GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
lme[k2-1] = beta_k;
////////////////////////////////////////////////////
// Convergence test
////////////////////////////////////////////////////
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k];
lme2[k] = lme[k];
}
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
for(int k = 0; k<Nk; ++k) B[k]=0.0;
for(int j = 0; j<Nk; ++j){
for(int k = 0; k<Nk; ++k){
B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt(j,k) * evec[k];
}
}
std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
Nconv = 0;
for(int i=0; i<Nk; ++i){
_Linop.HermOp(B[i],v);
RealD vnum = real(innerProduct(B[i],v)); // HermOp.
RealD vden = norm2(B[i]);
eval2[i] = vnum/vden;
v -= eval2[i]*B[i];
RealD vv = norm2(v);
std::cout.precision(13);
std::cout << GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
std::cout << " |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (i == Nconv) ){
Iconv[Nconv] = i;
++Nconv;
}
} // i-loop end
std::cout<< GridLogMessage <<" #modes converged: "<<Nconv<<std::endl;
if (iter >= MinRestart) {
if( Nconv>=Nstop ){
goto converged;
}
} // end of iter loop
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout<< GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged.";
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
Field B(grid); B.checkerboard = evec[0].checkerboard;
// power of two search pattern; not every evalue in eval2 is assessed.
for(int jj = 1; jj<=Nstop; jj*=2){
int j = Nstop-jj;
RealD e = eval2_copy[j]; // Discard the evalue
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
if ( j > Nconv ) {
Nconv=j+1;
jj=Nstop; // Terminate the scan
}
}
}
// Do evec[0] for good measure
{
int j=0;
RealD e = eval2_copy[0];
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
}
// test if we converged, if so, terminate
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
// if( Nconv>=Nstop || beta_k < betastp){
if( Nconv>=Nstop){
goto converged;
}
} else {
std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
} // end of iter loop
}
std::cout<<GridLogError<<"\n NOT converged.\n";
abort();
converged:
// Sorting
eval.resize(Nconv);
evec.resize(Nconv,grid);
for(int i=0; i<Nconv; ++i){
eval[i] = eval2[Iconv[i]];
evec[i] = B[Iconv[i]];
{
Field B(grid); B.checkerboard = evec[0].checkerboard;
basisRotate(evec,Qt,0,Nk,0,Nk,Nm);
std::cout << GridLogIRL << " Rotated basis"<<std::endl;
Nconv=0;
//////////////////////////////////////////////////////////////////////
// Full final convergence test; unconditionally applied
//////////////////////////////////////////////////////////////////////
for(int j = 0; j<=Nk; j++){
B=evec[j];
if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
Nconv++;
}
}
if ( Nconv < Nstop )
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
eval=eval2;
//Keep only converged
eval.resize(Nconv);// Nstop?
evec.resize(Nconv,grid);// Nstop?
basisSortInPlace(evec,eval,reverse);
}
_sort.push(eval,evec,Nconv);
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage << " -- Iterations = "<< iter << "\n";
std::cout << GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout << GridLogMessage << " -- Nconv = "<< Nconv << "\n";
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n";
std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n";
std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n";
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
}
private:
private:
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0
2. For k = 1,2,...,m Do:
@ -360,28 +578,38 @@ private:
{
const RealD tiny = 1.0e-20;
assert( k< Nm );
_poly(_Linop,evec[k],w); // 3. wk:=Avkβkv_{k1}
GridStopWatch gsw_op,gsw_o;
Field& evec_k = evec[k];
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
if(k>0) w -= lme[k-1] * evec[k-1];
ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk)
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
RealD alph = real(zalph);
w = w - alph * evec[k];// 5. wk:=wkαkvk
w = w - alph * evec_k;// 5. wk:=wkαkvk
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
lmd[k] = alph;
lme[k] = beta;
if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise
if ( k < Nm-1) evec[k+1] = w;
if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<<beta<<std::endl;
if (k>0 && k % orth_period == 0) {
orthogonalize(w,evec,k); // orthonormalise
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
}
if(k < Nm-1) evec[k+1] = w;
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
}
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt, // Nm x Nm
@ -404,11 +632,11 @@ private:
}
}
}
///////////////////////////////////////////////////////////////////////////
// File could end here if settle on Eigen ???
///////////////////////////////////////////////////////////////////////////
void qr_decomp(std::vector<RealD>& lmd, // Nm
///////////////////////////////////////////////////////////////////////////
// File could end here if settle on Eigen ??? !!!
///////////////////////////////////////////////////////////////////////////
void QR_decomp(std::vector<RealD>& lmd, // Nm
std::vector<RealD>& lme, // Nm
int Nk, int Nm, // Nk, Nm
Eigen::MatrixXd& Qt, // Nm x Nm matrix
@ -575,51 +803,50 @@ void diagonalize_lapack(std::vector<RealD>& lmd,
#endif
}
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt,
GridBase *grid)
{
int Niter = 100*Nm;
int kmin = 1;
int kmax = Nk;
// (this should be more sophisticated)
for(int iter=0; iter<Niter; ++iter){
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
Niter = iter;
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt,
GridBase *grid)
{
int QRiter = 100*Nm;
int kmin = 1;
int kmax = Nk;
// (this should be more sophisticated)
for(int iter=0; iter<QRiter; ++iter){
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
QRiter = iter;
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
}
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort();
}
};
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
abort();
}
};
}
#endif

View File

@ -0,0 +1,352 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
Copyright (C) 2015
Author: Christoph Lehner <clehner@bnl.gov>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LOCAL_COHERENCE_IRL_H
#define GRID_LOCAL_COHERENCE_IRL_H
namespace Grid {
struct LanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
ChebyParams, Cheby,/*Chebyshev*/
int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
int, Nk, /*Vecs in Lanczos seek converge*/
int, Nm, /*Total vecs in Lanczos include restart*/
RealD, resid, /*residual*/
int, MaxIt,
RealD, betastp, /* ? */
int, MinRes); // Must restart
};
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
bool, doFine,
bool, doFineRead,
bool, doCoarse,
bool, doCoarseRead,
LanczosParams, FineParams,
LanczosParams, CoarseParams,
ChebyParams, Smoother,
RealD , coarse_relax_tol,
std::vector<int>, blockSize,
std::string, config,
std::vector < std::complex<double> >, omega,
RealD, mass,
RealD, M5);
};
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
_Linop(linop),
_Aggregate(aggregate) { };
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = _Aggregate.FineGrid;
FineField fin(FineGrid);
FineField fout(FineGrid);
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
OperatorFunction<FineField> & _poly;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
_poly(poly),
_Linop(linop),
_Aggregate(aggregate) { };
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = _Aggregate.FineGrid;
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard;
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard;
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis>
class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LinearFunction<CoarseField> & _Poly;
OperatorFunction<FineField> & _smoother;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
RealD _coarse_relax_tol;
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop,
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
RealD eval_poly = eval;
// Apply operator
_Poly(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval = vnum/vden;
v -= eval*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
GridBase *FineGrid = _Aggregate.FineGrid;
int checkerboard = _Aggregate.checkerboard;
FineField fB(FineGrid);fB.checkerboard =checkerboard;
FineField fv(FineGrid);fv.checkerboard =checkerboard;
_Aggregate.PromoteFromSubspace(B,fv);
_smoother(_Linop,fv,fB);
RealD eval_poly = eval;
_Linop.HermOp(fB,fv);
RealD vnum = real(innerProduct(fB,fv)); // HermOp.
RealD vden = norm2(fB);
RealD vv0 = norm2(fv);
eval = vnum/vden;
fv -= eval*fB;
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
if( (vv<eresid*eresid) ) return 1;
return 0;
}
};
////////////////////////////////////////////
// Make serializable Lanczos params
////////////////////////////////////////////
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczos
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<Fobj> FineField;
protected:
GridBase *_CoarseGrid;
GridBase *_FineGrid;
int _checkerboard;
LinearOperatorBase<FineField> & _FineOp;
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
// the hassle and complexity of cross coupling.
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
std::vector<RealD> evals_fine;
std::vector<RealD> evals_coarse;
std::vector<CoarseField> evec_coarse;
public:
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_Aggregate(CoarseGrid,FineGrid,checkerboard),
_FineOp(FineOp),
_checkerboard(checkerboard)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = ::sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void fakeFine(void)
{
int Nk = nbasis;
_Aggregate.subspace.resize(Nk,_FineGrid);
_Aggregate.subspace[0]=1.0;
_Aggregate.subspace[0].checkerboard=_checkerboard;
normalise(_Aggregate.subspace[0]);
PlainHermOp<FineField> Op(_FineOp);
for(int k=1;k<Nk;k++){
_Aggregate.subspace[k].checkerboard=_checkerboard;
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
normalise(_Aggregate.subspace[k]);
}
}
void testFine(RealD resid)
{
assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis);
PlainHermOp<FineField> Op(_FineOp);
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
for(int k=0;k<nbasis;k++){
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
}
}
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
for(int k=0;k<evec_coarse.size();k++){
if ( k < nbasis ) {
assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
} else {
assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
}
}
}
void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
assert(nbasis<=Nm);
Chebyshev<FineField> Cheby(cheby_parms);
FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
PlainHermOp<FineField> Op(_FineOp);
evals_fine.resize(Nm);
_Aggregate.subspace.resize(Nm,_FineGrid);
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
int Nconv;
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
// Shrink down to number saved
assert(Nstop>=nbasis);
assert(Nconv>=nbasis);
evals_fine.resize(nbasis);
_Aggregate.subspace.resize(nbasis,_FineGrid);
}
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);
CoarseField src(_CoarseGrid); src=1.0;
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
int Nconv=0;
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
assert(Nconv>=Nstop);
evals_coarse.resize(Nstop);
evec_coarse.resize (Nstop,_CoarseGrid);
for (int i=0;i<Nstop;i++){
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
}
}
};
}
#endif

View File

@ -55,7 +55,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*Odd
* i) D_oo psi_o = L^{-1} eta_o
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
*
* Wilson:
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
* Stag:
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
*
* L^-1 eta_o= (1 0 ) (e
* (-MoeMee^{-1} 1 )
*
*Even
* ii) Mee psi_e + Meo psi_o = src_e
*
@ -82,7 +90,7 @@ namespace Grid {
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Now make the norm reflect extra factor of Mee
template<class Field> class SchurRedBlackStaggeredSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
@ -122,18 +130,19 @@ namespace Grid {
pickCheckerboard(Odd ,sol_o,out);
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd);
//src_o = tmp; assert(src_o.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
///////////////////////////////////////////////////

View File

@ -44,13 +44,18 @@ namespace Grid{
class GridBase : public CartesianCommunicator , public GridThread {
public:
int dummy;
// Give Lattice access
template<class object> friend class Lattice;
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {};
const CartesianCommunicator &parent,
int &split_rank)
: CartesianCommunicator(processor_grid,parent,split_rank) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent)
: CartesianCommunicator(processor_grid,parent,dummy) {};
virtual ~GridBase() = default;

View File

@ -38,7 +38,7 @@ namespace Grid{
class GridCartesian: public GridBase {
public:
int dummy;
virtual int CheckerBoardFromOindexTable (int Oindex) {
return 0;
}
@ -67,7 +67,14 @@ public:
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent) : GridBase(processor_grid,parent)
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
{
Init(dimensions,simd_layout,processor_grid);
}
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
{
Init(dimensions,simd_layout,processor_grid);
}

View File

@ -207,6 +207,7 @@ public:
{
assert((_gdimensions[d] & 0x1) == 0);
_gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard
_gsites /= 2;
}
_ldimensions[d] = _gdimensions[d] / _processors[d];
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);

View File

@ -97,9 +97,41 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
}
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
// Split the communicator
row[dim] = _processors[dim];
int me;
CartesianCommunicator Comm(row,*this,me);
Comm.AllToAll(in,out,words,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
// MPI is a pain and uses "int" arguments
// 64*64*64*128*16 == 500Million elements of data.
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
// (Turns up on 32^3 x 64 Gparity too)
MPI_Datatype object;
int iwords;
int ibytes;
iwords = words;
ibytes = bytes;
assert(words == iwords); // safe to cast to int ?
assert(bytes == ibytes); // safe to cast to int ?
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
MPI_Type_commit(&object);
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
MPI_Type_free(&object);
}
#endif
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{
_ndimension = processors.size();
assert(_ndimension = parent._ndimension);
@ -124,33 +156,51 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent._processor_coor[d] % processors[d];
scoor[d] = parent._processor_coor[d] / processors[d];
ssize[d] = parent._processors[d]/ processors[d];
ssize[d] = parent._processors[d] / processors[d];
}
int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms
Lexicographic::IndexFromCoor(ccoor,crank,processors);
Lexicographic::IndexFromCoor(scoor,srank,ssize);
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
// Mpi uses the reverse Lexico convention to us
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors);
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize);
MPI_Comm comm_split;
if ( Nchild > 1 ) {
// std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
// std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
// for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
// std::cout<<std::endl;
/*
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
// std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
// for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
// std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
*/
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
/*
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
*/
} else {
comm_split=parent.communicator;
srank = 0;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -158,14 +208,12 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Take an MPI_Comm and self assemble
//////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
// if ( communicator_base != communicator_world ) {
// std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
// }
_ndimension = processors.size();
_processor_coor.resize(_ndimension);
@ -179,14 +227,24 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
}
std::vector<int> periodic(_ndimension,1);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
if ( communicator_base != communicator_world ) {
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
for(int d=0;d<_processors.size();d++){
std::cout << _processor_coor[d]<<" ";
}
std::cout << std::endl;
}
int Size;
MPI_Comm_size(communicator,&Size);
#ifdef GRID_COMMS_MPIT
#if defined(GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
@ -195,7 +253,9 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
assert(Size==_Nprocessors);
}
#endif
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
InitFromMPICommunicator(processors,communicator_world);
@ -204,10 +264,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
#endif
#if !defined( GRID_COMMS_MPI3)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
#endif
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT)
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,

View File

@ -153,12 +153,12 @@ class CartesianCommunicator {
// Constructors to sub-divide a parent communicator
// and default to comm world
////////////////////////////////////////////////
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent);
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank);
CartesianCommunicator(const std::vector<int> &pdimensions_in);
virtual ~CartesianCommunicator();
private:
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
////////////////////////////////////////////////
// Private initialise from an MPI communicator
// Can use after an MPI_Comm_split, but hidden from user so private
@ -275,12 +275,16 @@ class CartesianCommunicator {
// std::cerr << " AllToAll in.size() "<<in.size()<<std::endl;
// std::cerr << " AllToAll out.size() "<<out.size()<<std::endl;
assert(in.size()==out.size());
size_t bytes=(in.size()*sizeof(T))/numnode;
assert((bytes*numnode) == in.size()*sizeof(T));
AllToAll(dim,(void *)&in[0],(void *)&out[0],bytes);
uint64_t bytes=sizeof(T);
uint64_t words=in.size()/numnode;
assert(numnode * words == in.size());
assert(words < (1ULL<<32));
AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
}
void AllToAll(int dim ,void *in,void *out,int bytes);
void AllToAll(void *in,void *out,int bytes);
void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
template<class obj> void Broadcast(int root,obj &data)
{

View File

@ -55,7 +55,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
CartesianCommunicator::~CartesianCommunicator()
{
if (communicator && !MPI::Is_finalized())
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised)
MPI_Comm_free(&communicator);
}
@ -194,21 +196,6 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
// Split the communicator
row[dim] = _processors[dim];
CartesianCommunicator Comm(row,*this);
Comm.AllToAll(in,out,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,int bytes)
{
MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator);
}
///////////////////////////////////////////////////////
// Should only be used prior to Grid Init finished.

View File

@ -454,11 +454,15 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
//////////////////////////////////
// Try to subdivide communicator
//////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
/*
* Use default in MPI compile
*/
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors)
{
std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
int ierr;
@ -596,9 +600,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
}
}
};
CartesianCommunicator::~CartesianCommunicator() = default;
CartesianCommunicator::~CartesianCommunicator()
{
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised) {
MPI_Comm_free(&communicator);
for(int i=0;i< communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -715,7 +727,8 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int from,
int bytes,int dir)
{
assert(dir < communicator_halo.size());
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
MPI_Request xrq;
MPI_Request rrq;
@ -735,14 +748,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
gfrom = MPI_UNDEFINED;
#endif
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq);
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq);
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
assert(ierr==0);
list.push_back(xrq);
off_node_bytes+=bytes;

View File

@ -55,11 +55,16 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
CartesianCommunicator::~CartesianCommunicator()
{
if (communicator && !MPI::Is_finalized())
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised){
MPI_Comm_free(&communicator);
for(int i=0;i< communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -224,13 +229,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
{
int myrank = _processor;
int ierr;
assert(dir < communicator_halo.size());
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
// Give the CPU to MPI immediately; can use threads to overlap optionally
MPI_Request req[2];
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]);
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[commdir],&req[0]);
list.push_back(req[0]);
list.push_back(req[1]);
@ -240,7 +246,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
{
int nreq=waitall.size();
MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE);
};
}
double CartesianCommunicator::StencilSendToRecvFrom(void *xmit,
int xmit_to_rank,
void *recv,
@ -249,13 +255,14 @@ double CartesianCommunicator::StencilSendToRecvFrom(void *xmit,
{
int myrank = _processor;
int ierr;
assert(dir < communicator_halo.size());
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo.size()<< <std::endl;
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
// Give the CPU to MPI immediately; can use threads to overlap optionally
MPI_Request req[2];
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]);
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[commdir],&req[0]);
MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
return 2.0*bytes;
}

View File

@ -38,8 +38,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors) {}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors) { srank=0;}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
@ -100,9 +100,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes);
bcopy(in,out,bytes*words);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes*words);
}
int CartesianCommunicator::RankWorld(void){return 0;}

View File

@ -75,6 +75,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
ShmInitGeneric();
}
CartesianCommunicator::~CartesianCommunicator(){}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors)
{

View File

@ -109,8 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
coarseData=zero;
// Loop with a cache friendly loop ordering
for(int sf=0;sf<fine->oSites();sf++){
// Loop over coars parallel, and then loop over fine associated with coarse.
parallel_for(int sf=0;sf<fine->oSites();sf++){
int sc;
std::vector<int> coor_c(_ndimension);
@ -119,8 +119,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
PARALLEL_CRITICAL
for(int i=0;i<nbasis;i++) {
coarseData._odata[sc](i)=coarseData._odata[sc](i)
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
@ -139,6 +140,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
GridBase * coarse= coarseA._grid;
fineZ.checkerboard=fineX.checkerboard;
assert(fineX.checkerboard==fineY.checkerboard);
subdivides(coarse,fine); // require they map
conformable(fineX,fineY);
conformable(fineX,fineZ);
@ -180,9 +182,10 @@ template<class vobj,class CComplex>
GridBase *coarse(CoarseInner._grid);
GridBase *fine (fineX._grid);
Lattice<dotp> fine_inner(fine);
Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard;
Lattice<dotp> coarse_inner(coarse);
// Precision promotion?
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
parallel_for(int ss=0;ss<coarse->oSites();ss++){
@ -193,7 +196,7 @@ template<class vobj,class CComplex>
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
{
GridBase *coarse = ip._grid;
Lattice<vobj> zz(fineX._grid); zz=zero;
Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard;
blockInnerProduct(ip,fineX,fineX);
ip = pow(ip,-0.5);
blockZAXPY(fineX,ip,fineX,zz);
@ -216,19 +219,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
}
// Turn this around to loop threaded over sc and interior loop
// over sf would thread better
coarseData=zero;
for(int sf=0;sf<fine->oSites();sf++){
parallel_region {
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
PARALLEL_CRITICAL
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
}
}
return;
}
@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
{
GridBase * fine = unpicked._grid;
Lattice<vobj> zz(fine);
Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard;
Lattice<iScalar<vInteger> > fcoor(fine);
zz = zero;
@ -303,20 +312,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
}
// Loop with a cache friendly loop ordering
for(int sf=0;sf<fine->oSites();sf++){
parallel_region {
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
}
}
}
return;
@ -747,6 +757,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
// NB: Easiest to programme if keep in lex order.
//
/////////////////////////////////////////////////////////
template<class Vobj>
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
@ -790,11 +801,12 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
int lsites = full_grid->lSites();
Integer sz = lsites * nvector;
uint64_t lsites = full_grid->lSites();
uint64_t sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
for(int v=0;v<nvector;v++){
unvectorizeToLexOrdArray(scalardata,full[v]);
parallel_for(int site=0;site<lsites;site++){
@ -806,18 +818,23 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
std::vector<int> ldims = full_grid->_ldimensions;
std::vector<int> lcoor(ndim);
for(int d=0;d<ndim;d++){
for(int d=ndim-1;d>=0;d--){
if ( ratio[d] != 1 ) {
full_grid ->AllToAll(d,alldata,tmpdata);
// std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl;
// for(int v=0;v<nvec;v++){
// std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl;
// std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl;
// }
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
nvec /= ratio[d];
auto rdims = ldims; rdims[d] *= ratio[d];
auto rsites= lsites*ratio[d];
for(int v=0;v<nvec;v++){
@ -837,7 +854,9 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
int rmul=nvec*lsites;
int vmul= lsites;
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
// if ( lsite==0 ) {
// std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl;
// }
}
}
}
@ -850,7 +869,6 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
}
}
}
vectorizeFromLexOrdArray(alldata,split);
}
@ -908,8 +926,8 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
int lsites = full_grid->lSites();
Integer sz = lsites * nvector;
uint64_t lsites = full_grid->lSites();
uint64_t sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
@ -926,10 +944,12 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
lsites = split_grid->lSites();
std::vector<int> ldims = split_grid->_ldimensions;
for(int d=ndim-1;d>=0;d--){
// for(int d=ndim-1;d>=0;d--){
for(int d=0;d<ndim;d++){
if ( ratio[d] != 1 ) {
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
@ -975,13 +995,11 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
lsites = full_grid->lSites();
for(int v=0;v<nvector;v++){
assert(v<full.size());
parallel_for(int site=0;site<lsites;site++){
scalardata[site] = alldata[v*lsites+site];
}
assert(v<full.size());
vectorizeFromLexOrdArray(scalardata,full[v]);
}
}

View File

@ -50,7 +50,7 @@ namespace Grid {
return (status==0) ? res.get() : name ;
}
GridStopWatch Logger::StopWatch;
GridStopWatch Logger::GlobalStopWatch;
int Logger::timestamp;
std::ostream Logger::devnull(0);
@ -59,13 +59,15 @@ void GridLogTimestamp(int on){
}
Colours GridLogColours(0);
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL");
GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(0);

View File

@ -85,12 +85,15 @@ class Logger {
protected:
Colours &Painter;
int active;
int timing_mode;
static int timestamp;
std::string name, topName;
std::string COLOUR;
public:
static GridStopWatch StopWatch;
static GridStopWatch GlobalStopWatch;
GridStopWatch LocalStopWatch;
GridStopWatch *StopWatch;
static std::ostream devnull;
std::string background() {return Painter.colour["NORMAL"];}
@ -101,22 +104,38 @@ public:
name(nm),
topName(topNm),
Painter(col_class),
COLOUR(col) {} ;
timing_mode(0),
COLOUR(col)
{
StopWatch = & GlobalStopWatch;
};
void Active(int on) {active = on;};
int isActive(void) {return active;};
static void Timestamp(int on) {timestamp = on;};
void Reset(void) {
StopWatch->Reset();
StopWatch->Start();
}
void TimingMode(int on) {
timing_mode = on;
if(on) {
StopWatch = &LocalStopWatch;
Reset();
}
}
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
if ( log.active ) {
stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : ";
stream << log.background()<< std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::left << log.name << log.background() << " : ";
if ( log.timestamp ) {
StopWatch.Stop();
GridTime now = StopWatch.Elapsed();
StopWatch.Start();
stream << log.evidence()<< now << log.background() << " : " ;
log.StopWatch->Stop();
GridTime now = log.StopWatch->Elapsed();
if ( log.timing_mode==1 ) log.StopWatch->Reset();
log.StopWatch->Start();
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
}
stream << log.colour();
return stream;
@ -135,6 +154,8 @@ public:
void GridLogConfigure(std::vector<std::string> &logstreams);
extern GridLogger GridLogIRL;
extern GridLogger GridLogSolver;
extern GridLogger GridLogError;
extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;

View File

@ -261,7 +261,7 @@ class BinaryIO {
GridBase *grid,
std::vector<fobj> &iodata,
std::string file,
int offset,
Integer offset,
const std::string &format, int control,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
@ -356,7 +356,7 @@ class BinaryIO {
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
#ifdef USE_MPI_IO
std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl;
std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0);
ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0);
ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0);
@ -367,7 +367,7 @@ class BinaryIO {
assert(0);
#endif
} else {
std::cout << GridLogMessage << "C++ read I/O " << file << " : "
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
std::ifstream fin;
fin.open(file, std::ios::binary | std::ios::in);
@ -413,9 +413,9 @@ class BinaryIO {
timer.Start();
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
#ifdef USE_MPI_IO
std::cout << GridLogMessage << "MPI write I/O " << file << std::endl;
std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl;
ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
std::cout << GridLogMessage << "Checking for errors" << std::endl;
// std::cout << GridLogMessage << "Checking for errors" << std::endl;
if (ierr != MPI_SUCCESS)
{
char error_string[BUFSIZ];
@ -444,48 +444,56 @@ class BinaryIO {
assert(0);
#endif
} else {
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
std::ofstream fout;
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
try {
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
} catch (const std::fstream::failure& exc) {
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
exit(1);
#endif
}
std::cout << GridLogMessage<< "C++ write I/O "<< file<<" : "
<< iodata.size()*sizeof(fobj)<<" bytes"<<std::endl;
if ( control & BINARYIO_MASTER_APPEND ) {
fout.seekp(0,fout.end);
} else {
fout.seekp(offset+myrank*lsites*sizeof(fobj));
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
try {
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
} catch (const std::fstream::failure& exc) {
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
exit(1);
#endif
}
if ( control & BINARYIO_MASTER_APPEND ) {
try {
fout.seekp(0,fout.end);
} catch (const std::fstream::failure& exc) {
std::cout << "Exception in seeking file end " << file << std::endl;
}
} else {
try {
fout.seekp(offset+myrank*lsites*sizeof(fobj));
} catch (const std::fstream::failure& exc) {
std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl;
}
}
try {
fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
}
catch (const std::fstream::failure& exc) {
std::cout << "Exception in writing file " << file << std::endl;
std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
exit(1);
#endif
}
try {
fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
}
catch (const std::fstream::failure& exc) {
std::cout << "Exception in writing file " << file << std::endl;
std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
#ifdef USE_MPI_IO
MPI_Abort(MPI_COMM_WORLD,1);
#else
exit(1);
#endif
}
fout.close();
}
timer.Stop();
}
}
timer.Stop();
}
std::cout<<GridLogMessage<<"IOobject: ";
if ( control & BINARYIO_READ) std::cout << " read ";
else std::cout << " write ";
@ -515,7 +523,7 @@ class BinaryIO {
static inline void readLatticeObject(Lattice<vobj> &Umu,
std::string file,
munger munge,
int offset,
Integer offset,
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
@ -552,7 +560,7 @@ class BinaryIO {
static inline void writeLatticeObject(Lattice<vobj> &Umu,
std::string file,
munger munge,
int offset,
Integer offset,
const std::string &format,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
@ -589,7 +597,7 @@ class BinaryIO {
static inline void readRNG(GridSerialRNG &serial,
GridParallelRNG &parallel,
std::string file,
int offset,
Integer offset,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)
@ -651,7 +659,7 @@ class BinaryIO {
static inline void writeRNG(GridSerialRNG &serial,
GridParallelRNG &parallel,
std::string file,
int offset,
Integer offset,
uint32_t &nersc_csum,
uint32_t &scidac_csuma,
uint32_t &scidac_csumb)

View File

@ -147,7 +147,7 @@ namespace QCD {
_scidacRecord = sr;
std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
// std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
}
///////////////////////////////////////////////////////
@ -159,7 +159,7 @@ namespace QCD {
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
if ( scidac_csuma !=scidac_checksuma) return 0;
if ( scidac_csumb !=scidac_checksumb) return 0;
return 1;
return 1;
}
////////////////////////////////////////////////////////////////////////////////////
@ -224,7 +224,7 @@ class GridLimeReader : public BinaryIO {
assert(PayloadSize == file_bytes);// Must match or user error
off_t offset= ftell(File);
uint64_t offset= ftello(File);
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
@ -237,7 +237,7 @@ class GridLimeReader : public BinaryIO {
/////////////////////////////////////////////
// Verify checksums
/////////////////////////////////////////////
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
return;
}
}
@ -253,16 +253,13 @@ class GridLimeReader : public BinaryIO {
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
// std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl;
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
XmlReader RD(&xmlc[0],"");
@ -332,7 +329,7 @@ class GridLimeWriter : public BinaryIO {
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
// std::cout << " File offset is now"<<ftell(File) << std::endl;
// std::cout << " File offset is now"<<ftello(File) << std::endl;
}
////////////////////////////////////////////
// Write a generic lattice field and csum
@ -349,7 +346,6 @@ class GridLimeWriter : public BinaryIO {
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
@ -361,18 +357,20 @@ class GridLimeWriter : public BinaryIO {
// These are both buffered, so why I think this code is right is as follows.
//
// i) write record header to FILE *File, telegraphing the size.
// ii) ftell reads the offset from FILE *File .
// ii) ftello reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
off_t offset = ftell(File);
uint64_t offset = ftello(File);
// std::cout << " Writing to offset "<<offset << std::endl;
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
err=limeWriterCloseRecord(LimeW); assert(err>=0);
////////////////////////////////////////
// Write checksum element, propagaing forward from the BinaryIO
// Always pair a checksum with a binary object, and close message
@ -382,7 +380,7 @@ class GridLimeWriter : public BinaryIO {
std::stringstream streamb; streamb << std::hex << scidac_csumb;
checksum.suma= streama.str();
checksum.sumb= streamb.str();
std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
}
};
@ -642,7 +640,7 @@ class IldgReader : public GridLimeReader {
// Copy out the string
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
// std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
//////////////////////////////////
// ILDG format record
@ -686,7 +684,7 @@ class IldgReader : public GridLimeReader {
std::string xmls(&xmlc[0]);
// is it a USQCD info field
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
XmlReader RD(&xmlc[0],"");
read(RD,"usqcdInfo",usqcdInfo_);
found_usqcdInfo = 1;
@ -704,8 +702,7 @@ class IldgReader : public GridLimeReader {
// Binary data
/////////////////////////////////
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
off_t offset= ftell(File);
uint64_t offset= ftello(File);
if ( format == std::string("IEEE64BIG") ) {
GaugeSimpleMunger<dobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);

View File

@ -70,8 +70,8 @@ XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("")
pugi::xml_parse_result result;
result = doc_.load_string(xmlstring);
if ( !result ) {
cerr << "XML error description: " << result.description() << "\n";
cerr << "XML error offset : " << result.offset << "\n";
cerr << "XML error description (from char *): " << result.description() << "\nXML\n"<< xmlstring << "\n";
cerr << "XML error offset (from char *) " << result.offset << "\nXML\n"<< xmlstring <<"\n";
abort();
}
if ( toplev == std::string("") ) {
@ -87,8 +87,8 @@ XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName)
pugi::xml_parse_result result;
result = doc_.load_file(fileName_.c_str());
if ( !result ) {
cerr << "XML error description: " << result.description() << "\n";
cerr << "XML error offset : " << result.offset << "\n";
cerr << "XML error description: " << result.description() <<" "<< fileName_ <<"\n";
cerr << "XML error offset : " << result.offset <<" "<< fileName_ <<"\n";
abort();
}
if ( toplev == std::string("") ) {

View File

@ -51,7 +51,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define PARALLEL_CRITICAL
#endif
#define parallel_region PARALLEL_REGION
#define parallel_for PARALLEL_FOR_LOOP for
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
namespace Grid {

View File

@ -208,7 +208,7 @@ static int Grid_is_initialised = 0;
void Grid_init(int *argc,char ***argv)
{
GridLogger::StopWatch.Start();
GridLogger::GlobalStopWatch.Start();
std::string arg;

View File

@ -26,6 +26,25 @@ namespace Grid{
}
}
static inline void IndexFromCoorReversed (const std::vector<int>& coor,int &index,const std::vector<int> &dims){
int nd=dims.size();
int stride=1;
index=0;
for(int d=nd-1;d>=0;d--){
index = index+stride*coor[d];
stride=stride*dims[d];
}
}
static inline void CoorFromIndexReversed (std::vector<int>& coor,int index,const std::vector<int> &dims){
int nd= dims.size();
coor.resize(nd);
for(int d=nd-1;d>=0;d--){
coor[d] = index % dims[d];
index = index / dims[d];
}
}
};
}

View File

@ -37,8 +37,15 @@ RealD InverseApproximation(RealD x){
RealD SqrtApproximation(RealD x){
return std::sqrt(x);
}
RealD Approximation32(RealD x){
return std::pow(x,-1.0/32.0);
}
RealD Approximation2(RealD x){
return std::pow(x,-1.0/2.0);
}
RealD StepFunction(RealD x){
if ( x<0.1 ) return 1.0;
if ( x<10.0 ) return 1.0;
else return 0.0;
}
@ -56,7 +63,6 @@ int main (int argc, char ** argv)
Chebyshev<LatticeFermion> ChebyInv(lo,hi,2000,InverseApproximation);
{
std::ofstream of("chebyinv");
ChebyInv.csv(of);
@ -78,7 +84,6 @@ int main (int argc, char ** argv)
ChebyStep.JacksonSmooth();
{
std::ofstream of("chebystepjack");
ChebyStep.csv(of);
@ -100,5 +105,30 @@ int main (int argc, char ** argv)
ChebyNE.csv(of);
}
lo=0.0;
hi=4.0;
Chebyshev<LatticeFermion> Cheby32(lo,hi,2000,Approximation32);
{
std::ofstream of("cheby32");
Cheby32.csv(of);
}
Cheby32.JacksonSmooth();
{
std::ofstream of("cheby32jack");
Cheby32.csv(of);
}
Chebyshev<LatticeFermion> ChebySqrt(lo,hi,2000,Approximation2);
{
std::ofstream of("chebysqrt");
ChebySqrt.csv(of);
}
ChebySqrt.JacksonSmooth();
{
std::ofstream of("chebysqrtjack");
ChebySqrt.csv(of);
}
Grid_finalize();
}

View File

@ -38,11 +38,11 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Testing Remez"<<std::endl;
double lo=0.01;
double hi=1.0;
double lo=1.0e-3;
double hi=5.0;
int precision=64;
int degree=10;
AlgRemez remez(0.001,1.0,precision);
int degree=16;
AlgRemez remez(lo,hi,precision);
////////////////////////////////////////
// sqrt and inverse sqrt
@ -50,21 +50,50 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/2)"<<std::endl;
remez.generateApprox(degree,1,2);
MultiShiftFunction Sqrt(remez,1.0,false);
MultiShiftFunction InvSqrt(remez,1.0,true);
MultiShiftFunction Root2(remez,1.0,false);
MultiShiftFunction InvRoot2(remez,1.0,true);
std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(degree,1,4);
MultiShiftFunction SqrtSqrt(remez,1.0,false);
MultiShiftFunction InvSqrtSqrt(remez,1.0,true);
MultiShiftFunction Root4(remez,1.0,false);
MultiShiftFunction InvRoot4(remez,1.0,true);
std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/8)"<<std::endl;
remez.generateApprox(degree,1,8);
MultiShiftFunction Root8(remez,1.0,false);
MultiShiftFunction InvRoot8(remez,1.0,true);
std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/16)"<<std::endl;
remez.generateApprox(degree,1,16);
MultiShiftFunction Root16(remez,1.0,false);
MultiShiftFunction InvRoot16(remez,1.0,true);
std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/32)"<<std::endl;
remez.generateApprox(degree,1,32);
MultiShiftFunction Root32(remez,1.0,false);
MultiShiftFunction InvRoot32(remez,1.0,true);
ofstream gnuplot(std::string("Sqrt.gnu"),std::ios::out|std::ios::trunc);
Sqrt.gnuplot(gnuplot);
ofstream gnuplot(std::string("Root2.gnu"),std::ios::out|std::ios::trunc);
Root2.gnuplot(gnuplot);
ofstream gnuplot_i2(std::string("InvRoot2.gnu"),std::ios::out|std::ios::trunc);
InvRoot2.gnuplot(gnuplot_i2);
ofstream gnuplot_i4(std::string("InvRoot4.gnu"),std::ios::out|std::ios::trunc);
InvRoot4.gnuplot(gnuplot_i4);
ofstream gnuplot_i8(std::string("InvRoot8.gnu"),std::ios::out|std::ios::trunc);
InvRoot8.gnuplot(gnuplot_i8);
ofstream gnuplot_i16(std::string("InvRoot16.gnu"),std::ios::out|std::ios::trunc);
InvRoot16.gnuplot(gnuplot_i16);
ofstream gnuplot_i32(std::string("InvRoot32.gnu"),std::ios::out|std::ios::trunc);
InvRoot32.gnuplot(gnuplot_i32);
ofstream gnuplot_inv(std::string("InvSqrt.gnu"),std::ios::out|std::ios::trunc);
InvSqrt.gnuplot(gnuplot);
double x=0.6789;
double sx=std::sqrt(x);
@ -72,10 +101,10 @@ int main (int argc, char ** argv)
double isx=1.0/sx;
double issx=1.0/ssx;
double asx =Sqrt.approx(x);
double assx =SqrtSqrt.approx(x);
double aisx =InvSqrt.approx(x);
double aissx=InvSqrtSqrt.approx(x);
double asx =Root2.approx(x);
double assx =Root4.approx(x);
double aisx =InvRoot2.approx(x);
double aissx=InvRoot4.approx(x);
std::cout<<GridLogMessage << "x^(1/2) : "<<sx<<" "<<asx<<std::endl;
std::cout<<GridLogMessage << "x^(1/4) : "<<ssx<<" "<<assx<<std::endl;

View File

@ -0,0 +1,81 @@
namespace Grid {
template<class Field>
class BasisFieldVector {
public:
int _Nm;
typedef typename Field::scalar_type Coeff_t;
typedef typename Field::vector_type vCoeff_t;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
std::vector<Field> _v; // _Nfull vectors
void report(int n,GridBase* value) {
std::cout << GridLogMessage << "BasisFieldVector allocated:\n";
std::cout << GridLogMessage << " Delta N = " << n << "\n";
std::cout << GridLogMessage << " Size of full vectors (size) = " <<
((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n";
std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl;
value->Barrier();
#ifdef __linux
if (value->IsBoss()) {
system("cat /proc/meminfo");
}
#endif
value->Barrier();
}
BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) {
report(Nm,value);
}
~BasisFieldVector() {
}
Field& operator[](int i) {
return _v[i];
}
void orthogonalize(Field& w, int k) {
basisOrthogonalize(_v,w,k);
}
void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) {
basisRotate(_v,Qt,j0,j1,k0,k1,Nm);
}
size_t size() const {
return _Nm;
}
void resize(int n) {
if (n > _Nm)
_v.reserve(n);
_v.resize(n,_v[0]._grid);
if (n < _Nm)
_v.shrink_to_fit();
report(n - _Nm,_v[0]._grid);
_Nm = n;
}
void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) {
basisSortInPlace(_v,sort_vals,reverse);
}
void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
basisDeflate(_v,eval,src_orig,result);
}
};
}

View File

@ -21,7 +21,14 @@
(ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough)
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
/////////////////////////////////////////////////////////////////////////////
// The following are now decoupled from the Lanczos and deal with grids.
// Safe to replace functionality
/////////////////////////////////////////////////////////////////////////////
#include "BlockedGrid.h"
#include "FieldBasisVector.h"
#include "BlockProjector.h"
#include "FieldVectorIO.h"
#include "Params.h"
@ -93,19 +100,6 @@ void write_history(char* fn, std::vector<RealD>& hist) {
fclose(f);
}
template<typename Field>
class FunctionHermOp : public LinearFunction<Field> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) : _poly(poly), _Linop(linop) {
}
void operator()(const Field& in, Field& out) {
_poly(_Linop,in,out);
}
};
template<typename Field>
class CheckpointedLinearFunction : public LinearFunction<Field> {
@ -261,19 +255,6 @@ public:
}
};
template<typename Field>
class PlainHermOp : public LinearFunction<Field> {
public:
LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop) {
}
void operator()(const Field& in, Field& out) {
_Linop.HermOp(in,out);
}
};
template<typename vtype, int N > using CoarseSiteFieldGeneral = iScalar< iVector<vtype, N> >;
template<int N> using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >;
template<int N> using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >;
@ -319,7 +300,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo
Op2 = &Op2plain;
}
ProjectedHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2nopoly(pr,HermOp);
BlockImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2);
ImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,MaxIt,betastp2,MinRes2);
src_coarse = 1.0;
@ -350,7 +331,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo
) {
IRL2.calc(eval2,coef,src_coarse,Nconv,true,SkipTest2);
IRL2.calc(eval2,coef._v,src_coarse,Nconv,true);
coef.resize(Nstop2);
eval2.resize(Nstop2);
@ -450,6 +431,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo
auto result = src_orig;
// undeflated solve
std::cout << GridLogMessage << " Undeflated solve "<<std::endl;
result = zero;
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
@ -457,6 +439,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo
// CG.ResHistory.clear();
// deflated solve with all eigenvectors
std::cout << GridLogMessage << " Deflated solve with all evectors"<<std::endl;
result = zero;
pr.deflate(coef,eval2,Nstop2,src_orig,result);
CG(HermOp, src_orig, result);
@ -465,6 +448,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo
// CG.ResHistory.clear();
// deflated solve with non-blocked eigenvectors
std::cout << GridLogMessage << " Deflated solve with non-blocked evectors"<<std::endl;
result = zero;
pr.deflate(coef,eval1,Nstop1,src_orig,result);
CG(HermOp, src_orig, result);
@ -473,6 +457,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo
// CG.ResHistory.clear();
// deflated solve with all eigenvectors and original eigenvalues from proj
std::cout << GridLogMessage << " Deflated solve with all eigenvectors and original eigenvalues from proj"<<std::endl;
result = zero;
pr.deflate(coef,eval3,Nstop2,src_orig,result);
CG(HermOp, src_orig, result);
@ -641,7 +626,7 @@ int main (int argc, char ** argv) {
}
// First round of Lanczos to get low mode basis
BlockImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1);
ImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,MaxIt,betastp1,MinRes1);
int Nconv;
char tag[1024];
@ -650,7 +635,7 @@ int main (int argc, char ** argv) {
if (simple_krylov_basis) {
quick_krylov_basis(evec,src,Op1,Nstop1);
} else {
IRL1.calc(eval1,evec,src,Nconv,false,1);
IRL1.calc(eval1,evec._v,src,Nconv,false);
}
evec.resize(Nstop1); // and throw away superfluous
eval1.resize(Nstop1);

View File

@ -0,0 +1,254 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
Copyright (C) 2017
Author: Leans heavily on Christoph Lehner's code
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/*
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
* in Grid that were intended to be used to support blocked Aggregates, from
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard)
// Base constructor
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
{};
void checkpointFine(std::string evecs_file,std::string evals_file)
{
assert(this->_Aggregate.subspace.size()==nbasis);
emptyUserRecord record;
Grid::QCD::ScidacWriter WR;
WR.open(evecs_file);
for(int k=0;k<nbasis;k++) {
WR.writeScidacFieldRecord(this->_Aggregate.subspace[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_fine);
}
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
{
this->evals_fine.resize(nbasis);
this->_Aggregate.subspace.resize(nbasis,this->_FineGrid);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_fine);
assert(this->evals_fine.size()==nbasis);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
emptyUserRecord record;
Grid::QCD::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nbasis;k++) {
this->_Aggregate.subspace[k].checkerboard=this->_checkerboard;
RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record);
}
RD.close();
}
void checkpointCoarse(std::string evecs_file,std::string evals_file)
{
int n = this->evec_coarse.size();
emptyUserRecord record;
Grid::QCD::ScidacWriter WR;
WR.open(evecs_file);
for(int k=0;k<n;k++) {
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_coarse);
}
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
{
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
this->evals_coarse.resize(nvec);
this->evec_coarse.resize(nvec,this->_CoarseGrid);
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_coarse);
assert(this->evals_coarse.size()==nvec);
emptyUserRecord record;
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
Grid::QCD::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nvec;k++) {
RD.readScidacFieldRecord(this->evec_coarse[k],record);
}
RD.close();
}
};
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
GridLogIRL.TimingMode(1);
LocalCoherenceLanczosParams Params;
{
Params.omega.resize(10);
Params.blockSize.resize(5);
XmlWriter writer("Params_template.xml");
write(writer,"Params",Params);
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
}
{
XmlReader reader(std::string("./Params.xml"));
read(reader, "Params", Params);
}
int Ls = (int)Params.omega.size();
RealD mass = Params.mass;
RealD M5 = Params.M5;
std::vector<int> blockSize = Params.blockSize;
// Grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> fineLatt = GridDefaultLatt();
int dims=fineLatt.size();
assert(blockSize.size()==dims+1);
std::vector<int> coarseLatt(dims);
std::vector<int> coarseLatt5d ;
for (int d=0;d<coarseLatt.size();d++){
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
}
std::cout << GridLogMessage<< " 5d coarse lattice is ";
for (int i=0;i<coarseLatt.size();i++){
std::cout << coarseLatt[i]<<"x";
}
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
std::cout << cLs<<std::endl;
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5);
// Gauge field
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
NerscIO::readConfiguration(Umu,header,Params.config);
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
// ZMobius EO Operator
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
// Eigenvector storage
LanczosParams fine =Params.FineParams;
LanczosParams coarse=Params.CoarseParams;
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
assert(Nm2 >= Nm1);
const int nbasis= 60;
assert(nbasis==Ns1);
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd);
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
assert( (Params.doFine)||(Params.doFineRead));
if ( Params.doFine ) {
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
_LocalCoherenceLanczos.calcFine(fine.Cheby,
fine.Nstop,fine.Nk,fine.Nm,
fine.resid,fine.MaxIt,
fine.betastp,fine.MinRes);
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
_LocalCoherenceLanczos.Orthogonalise();
}
if ( Params.doFineRead ) {
_LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml"));
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
_LocalCoherenceLanczos.Orthogonalise();
}
if ( Params.doCoarse ) {
std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl;
std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl;
_LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol,
coarse.Nstop, coarse.Nk,coarse.Nm,
coarse.resid, coarse.MaxIt,
coarse.betastp,coarse.MinRes);
std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"));
}
if ( Params.doCoarseRead ) {
// Verify we can reread ???
_LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop);
_LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check
}
Grid_finalize();
}

View File

@ -84,11 +84,12 @@ int main (int argc, char ** argv)
std::vector<double> Coeffs { 0.,-1.};
Polynomial<FermionField> PolyX(Coeffs);
Chebyshev<FermionField> Cheb(0.2,5.,11);
// ChebyshevLanczos<LatticeFermion> Cheb(9.,1.,0.,20);
// Cheb.csv(std::cout);
// exit(-24);
ImplicitlyRestartedLanczos<FermionField> IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt);
Chebyshev<FermionField> Cheby(0.2,5.,11);
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
PlainHermOp<FermionField> Op (HermOp);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nm);

View File

@ -119,12 +119,13 @@ int main (int argc, char ** argv)
RealD beta = 0.1;
RealD mu = 0.0;
int order = 11;
ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order);
Chebyshev<LatticeComplex> Cheby(alpha,beta,order);
std::ofstream file("cheby.dat");
Cheby.csv(file);
HermOpOperatorFunction<LatticeComplex> X;
DumbOperator<LatticeComplex> HermOp(grid);
FunctionHermOp<LatticeComplex> OpCheby(Cheby,HermOp);
PlainHermOp<LatticeComplex> Op(HermOp);
const int Nk = 40;
const int Nm = 80;
@ -133,8 +134,9 @@ int main (int argc, char ** argv)
int Nconv;
RealD eresid = 1.0e-6;
ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit);
ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit);
ImplicitlyRestartedLanczos<LatticeComplex> IRL(Op,Op,Nk,Nk,Nm,eresid,Nit);
ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(OpCheby,Op,Nk,Nk,Nm,eresid,Nit);
LatticeComplex src(grid); gaussian(RNG,src);
{

View File

@ -86,9 +86,12 @@ int main(int argc, char** argv) {
std::vector<double> Coeffs{0, 1.};
Polynomial<FermionField> PolyX(Coeffs);
Chebyshev<FermionField> Cheb(0.0, 10., 12);
ImplicitlyRestartedLanczos<FermionField> IRL(HermOp, PolyX, Nstop, Nk, Nm,
resid, MaxIt);
Chebyshev<FermionField> Cheby(0.0, 10., 12);
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
PlainHermOp<FermionField> Op (HermOp);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
std::vector<RealD> eval(Nm);
FermionField src(FGrid);

View File

@ -555,13 +555,13 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
Subspace Aggregates(Coarse5d,FGrid);
Subspace Aggregates(Coarse5d,FGrid,0);
// Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
assert ( (nbasis & 0x1)==0);
int nb=nbasis/2;
std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
// Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
// Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
for(int n=0;n<nb;n++){
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;

View File

@ -52,15 +52,28 @@ int main (int argc, char ** argv)
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
int nrhs = UGrid->RankCount() ;
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
for(int i=0;i<argc;i++){
if(std::string(argv[i]) == "--split"){
for(int k=0;k<mpi_layout.size();k++){
std::stringstream ss;
ss << argv[i+1+k];
ss >> mpi_split[k];
}
break;
}
}
int nrhs = 1;
int me;
for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]);
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
*UGrid,me);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
@ -70,7 +83,6 @@ int main (int argc, char ** argv)
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
std::vector<FermionField> src(nrhs,FGrid);
@ -93,7 +105,7 @@ int main (int argc, char ** argv)
emptyUserRecord record;
std::string file("./scratch.scidac");
std::string filef("./scratch.scidac.ferm");
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_src_split(SFGrid);
@ -169,7 +181,7 @@ int main (int argc, char ** argv)
for(int n=0;n<nrhs;n++){
FGrid->Barrier();
if ( n==me ) {
std::cerr << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<<std::endl;
std::cout << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<<std::endl;
}
FGrid->Barrier();
}
@ -190,7 +202,7 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000);
ConjugateGradient<FermionField> CG((1.0e-5/(me+1)),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
@ -218,7 +230,6 @@ int main (int argc, char ** argv)
std::cout << " diff " <<tmp<<std::endl;
}
*/
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];

View File

@ -1,4 +1,4 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -47,20 +47,36 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
int nrhs = UGrid->RankCount() ;
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
for(int i=0;i<argc;i++){
if(std::string(argv[i]) == "--split"){
for(int k=0;k<mpi_layout.size();k++){
std::stringstream ss;
ss << argv[i+1+k];
ss >> mpi_split[k];
}
break;
}
}
int nrhs = 1;
int me;
for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]);
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
*UGrid,me);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
@ -78,16 +94,46 @@ int main (int argc, char ** argv)
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
for(int s=0;s<nrhs;s++) result[s]=zero;
#undef LEXICO_TEST
#ifdef LEXICO_TEST
{
LatticeFermion lex(FGrid); lex = zero;
LatticeFermion ftmp(FGrid);
Integer stride =10000;
double nrm;
LatticeComplex coor(FGrid);
for(int d=0;d<5;d++){
LatticeCoordinate(coor,d);
ftmp = stride;
ftmp = ftmp * coor;
lex = lex + ftmp;
stride=stride/10;
}
for(int s=0;s<nrhs;s++) {
src[s]=lex;
ftmp = 1000*1000*s;
src[s] = src[s] + ftmp;
}
}
#else
for(int s=0;s<nrhs;s++) {
random(pRNG5,src[s]);
tmp = 100.0*s;
src[s] = (src[s] * 0.1) + tmp;
std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl;
}
#endif
for(int n =0 ; n< nrhs ; n++) {
std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
}
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
/////////////////
// MPI only sends
/////////////////
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_tmp(SFGrid);
@ -98,6 +144,36 @@ int main (int argc, char ** argv)
///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu);
Grid_split (src,s_src);
std::cout << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
std::cout << " s_src\n "<< s_src <<std::endl;
#ifdef LEXICO_TEST
FermionField s_src_tmp(SFGrid);
FermionField s_src_diff(SFGrid);
{
LatticeFermion lex(SFGrid); lex = zero;
LatticeFermion ftmp(SFGrid);
Integer stride =10000;
double nrm;
LatticeComplex coor(SFGrid);
for(int d=0;d<5;d++){
LatticeCoordinate(coor,d);
ftmp = stride;
ftmp = ftmp * coor;
lex = lex + ftmp;
stride=stride/10;
}
s_src_tmp=lex;
ftmp = 1000*1000*me;
s_src_tmp = s_src_tmp + ftmp;
}
s_src_diff = s_src_tmp - s_src;
std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl;
std::cout << " s_src \n" << s_src << std::endl;
std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
std::cout << " s_src_diff \n" << s_src_diff << std::endl;
#endif
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
@ -113,10 +189,11 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000);
ConjugateGradient<FermionField> CG((1.0e-5),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
std::cout << " s_res norm "<<norm2(s_res)<<std::endl;
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
@ -134,10 +211,12 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl;
Grid_unsplit(result,s_res);
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl;
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
}
Grid_finalize();

View File

@ -47,7 +47,9 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
@ -57,10 +59,11 @@ int main (int argc, char ** argv)
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
int me;
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
*UGrid,me);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
@ -89,8 +92,6 @@ int main (int argc, char ** argv)
/////////////////
// MPI only sends
/////////////////
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_src_e(SFrbGrid);

View File

@ -0,0 +1,157 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_mrhs_cg.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params;
const int Ls=4;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (mpi_layout.size(),1);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
for(int i=0;i<argc;i++){
if(std::string(argv[i]) == "--split"){
for(int k=0;k<mpi_layout.size();k++){
std::stringstream ss;
ss << argv[i+1+k];
ss >> mpi_split[k];
}
break;
}
}
int nrhs = 1;
for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]);
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
mpi_split,
*UGrid);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid);
///////////////////////////////////////////////
// Set up the problem as a 4d spreadout job
///////////////////////////////////////////////
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds);
GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds);
std::vector<FermionField> src(nrhs,FGrid);
std::vector<FermionField> src_chk(nrhs,FGrid);
std::vector<FermionField> result(nrhs,FGrid);
FermionField tmp(FGrid);
for(int s=0;s<nrhs;s++) random(pRNG5,src[s]);
for(int s=0;s<nrhs;s++) result[s]=zero;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
/////////////////
// MPI only sends
/////////////////
int me = UGrid->ThisRank();
LatticeGaugeField s_Umu(SGrid);
FermionField s_src(SFGrid);
FermionField s_tmp(SFGrid);
FermionField s_res(SFGrid);
///////////////////////////////////////////////////////////////
// split the source out using MPI instead of I/O
///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu);
Grid_split (src,s_src);
///////////////////////////////////////////////////////////////
// Set up N-solvers as trivially parallel
///////////////////////////////////////////////////////////////
RealD mass=0.01;
RealD M5=1.8;
DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5);
DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5);
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling DWF CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
std::vector<uint32_t> iterations(nrhs,0);
iterations[me] = CG.IterationsToComplete;
for(int n=0;n<nrhs;n++){
UGrid->GlobalSum(iterations[n]);
std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl;
}
/////////////////////////////////////////////////////////////
// Gather and residual check on the results
/////////////////////////////////////////////////////////////
std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl;
Grid_unsplit(result,s_res);
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl;
}
Grid_finalize();
}

View File

@ -48,7 +48,6 @@ struct scal {
int main (int argc, char ** argv)
{
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;
typename ImprovedStaggeredFermionR::ImplParams params;
Grid_init(&argc,&argv);

View File

@ -0,0 +1,76 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_wilson_cg_schur.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typename ImprovedStaggeredFermionR::ImplParams params;
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu);
FermionField src(&Grid); random(pRNG,src);
FermionField result(&Grid); result=zero;
FermionField resid(&Grid);
RealD mass=0.1;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
ConjugateGradient<FermionField> CG(1.0e-8,10000);
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
SchurSolver(Ds,src,result);
Grid_finalize();
}