mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
develop pull
This commit is contained in:
commit
4e864e56c9
@ -35,6 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#include <Grid/algorithms/approx/Zolotarev.h>
|
#include <Grid/algorithms/approx/Zolotarev.h>
|
||||||
#include <Grid/algorithms/approx/Chebyshev.h>
|
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||||
|
#include <Grid/algorithms/approx/JacobiPolynomial.h>
|
||||||
#include <Grid/algorithms/approx/Remez.h>
|
#include <Grid/algorithms/approx/Remez.h>
|
||||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||||
#include <Grid/algorithms/approx/Forecast.h>
|
#include <Grid/algorithms/approx/Forecast.h>
|
||||||
|
@ -1,3 +1,14 @@
|
|||||||
|
// blockZaxpy in bockPromote - 3s, 5%
|
||||||
|
// noncoalesced linalg in Preconditionoer ~ 3s 5%
|
||||||
|
// Lancos tuning or replace 10-20s ~ 25%, open ended
|
||||||
|
// setup tuning 5s ~ 8%
|
||||||
|
// -- e.g. ordermin, orderstep tunables.
|
||||||
|
// MdagM path without norm in LinOp code. few seconds
|
||||||
|
|
||||||
|
// Mdir calc blocking kernels
|
||||||
|
// Fuse kernels in blockMaskedInnerProduct
|
||||||
|
// preallocate Vectors in Cayley 5D ~ few percent few seconds
|
||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -34,8 +45,35 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
template<class vobj,class CComplex>
|
||||||
|
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1,
|
||||||
|
Lattice<CComplex> &CoarseInner2,
|
||||||
|
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask1,
|
||||||
|
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask2,
|
||||||
|
const Lattice<vobj> &fineX,
|
||||||
|
const Lattice<vobj> &fineY)
|
||||||
|
{
|
||||||
|
typedef decltype(innerProduct(vobj(),vobj())) dotp;
|
||||||
|
|
||||||
|
GridBase *coarse(CoarseInner1.Grid());
|
||||||
|
GridBase *fine (fineX.Grid());
|
||||||
|
|
||||||
|
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
|
||||||
|
Lattice<dotp> fine_inner_msk(fine);
|
||||||
|
|
||||||
|
// Multiply could be fused with innerProduct
|
||||||
|
// Single block sum kernel could do both masks.
|
||||||
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
|
|
||||||
|
mult(fine_inner_msk, fine_inner,FineMask1);
|
||||||
|
blockSum(CoarseInner1,fine_inner_msk);
|
||||||
|
|
||||||
|
mult(fine_inner_msk, fine_inner,FineMask2);
|
||||||
|
blockSum(CoarseInner2,fine_inner_msk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class Geometry {
|
class Geometry {
|
||||||
// int dimension;
|
|
||||||
public:
|
public:
|
||||||
int npoint;
|
int npoint;
|
||||||
std::vector<int> directions ;
|
std::vector<int> directions ;
|
||||||
@ -52,10 +90,10 @@ public:
|
|||||||
directions.resize(npoint);
|
directions.resize(npoint);
|
||||||
displacements.resize(npoint);
|
displacements.resize(npoint);
|
||||||
for(int d=0;d<_d;d++){
|
for(int d=0;d<_d;d++){
|
||||||
directions[2*d ] = d+base;
|
directions[d ] = d+base;
|
||||||
directions[2*d+1] = d+base;
|
directions[d+_d] = d+base;
|
||||||
displacements[2*d ] = +1;
|
displacements[d ] = +1;
|
||||||
displacements[2*d+1] = -1;
|
displacements[d+_d]= -1;
|
||||||
}
|
}
|
||||||
directions [2*_d]=0;
|
directions [2*_d]=0;
|
||||||
displacements[2*_d]=0;
|
displacements[2*_d]=0;
|
||||||
@ -63,7 +101,7 @@ public:
|
|||||||
//// report back
|
//// report back
|
||||||
std::cout<<GridLogMessage<<"directions :";
|
std::cout<<GridLogMessage<<"directions :";
|
||||||
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
||||||
std::cout <<std::endl;
|
std::cout<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"displacements :";
|
std::cout<<GridLogMessage<<"displacements :";
|
||||||
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
||||||
std::cout<<std::endl;
|
std::cout<<std::endl;
|
||||||
@ -115,10 +153,10 @@ public:
|
|||||||
|
|
||||||
void Orthogonalise(void){
|
void Orthogonalise(void){
|
||||||
CoarseScalar InnerProd(CoarseGrid);
|
CoarseScalar InnerProd(CoarseGrid);
|
||||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
|
||||||
blockOrthogonalise(InnerProd,subspace);
|
|
||||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
|
||||||
blockOrthogonalise(InnerProd,subspace);
|
blockOrthogonalise(InnerProd,subspace);
|
||||||
|
// std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 2"<<std::endl; // Really have to do twice? Yuck
|
||||||
|
// blockOrthogonalise(InnerProd,subspace);
|
||||||
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
|
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
|
||||||
// CheckOrthogonal();
|
// CheckOrthogonal();
|
||||||
}
|
}
|
||||||
@ -128,7 +166,7 @@ public:
|
|||||||
for(int i=0;i<nbasis;i++){
|
for(int i=0;i<nbasis;i++){
|
||||||
blockProject(iProj,subspace[i],subspace);
|
blockProject(iProj,subspace[i],subspace);
|
||||||
eProj=Zero();
|
eProj=Zero();
|
||||||
thread_for(ss, CoarseGrid->oSites(),{
|
accelerator_for(ss, CoarseGrid->oSites(),1,{
|
||||||
eProj[ss](i)=CComplex(1.0);
|
eProj[ss](i)=CComplex(1.0);
|
||||||
});
|
});
|
||||||
eProj=eProj - iProj;
|
eProj=eProj - iProj;
|
||||||
@ -146,61 +184,9 @@ public:
|
|||||||
void CreateSubspaceRandom(GridParallelRNG &RNG){
|
void CreateSubspaceRandom(GridParallelRNG &RNG){
|
||||||
for(int i=0;i<nbasis;i++){
|
for(int i=0;i<nbasis;i++){
|
||||||
random(RNG,subspace[i]);
|
random(RNG,subspace[i]);
|
||||||
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
|
|
||||||
}
|
}
|
||||||
Orthogonalise();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
|
|
||||||
{
|
|
||||||
// Run a Lanczos with sloppy convergence
|
|
||||||
const int Nstop = nn;
|
|
||||||
const int Nk = nn+20;
|
|
||||||
const int Np = nn+20;
|
|
||||||
const int Nm = Nk+Np;
|
|
||||||
const int MaxIt= 10000;
|
|
||||||
RealD resid = 1.0e-3;
|
|
||||||
|
|
||||||
Chebyshev<FineField> Cheb(0.5,64.0,21);
|
|
||||||
ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
|
|
||||||
// IRL.lock = 1;
|
|
||||||
|
|
||||||
FineField noise(FineGrid); gaussian(RNG,noise);
|
|
||||||
FineField tmp(FineGrid);
|
|
||||||
std::vector<RealD> eval(Nm);
|
|
||||||
std::vector<FineField> evec(Nm,FineGrid);
|
|
||||||
|
|
||||||
int Nconv;
|
|
||||||
IRL.calc(eval,evec,
|
|
||||||
noise,
|
|
||||||
Nconv);
|
|
||||||
|
|
||||||
// pull back nn vectors
|
|
||||||
for(int b=0;b<nn;b++){
|
|
||||||
|
|
||||||
subspace[b] = evec[b];
|
|
||||||
|
|
||||||
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
|
||||||
|
|
||||||
hermop.Op(subspace[b],tmp);
|
|
||||||
std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
|
|
||||||
|
|
||||||
noise = tmp - sqrt(eval[b])*subspace[b] ;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
|
||||||
|
|
||||||
noise = tmp + eval[b]*subspace[b] ;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
Orthogonalise();
|
|
||||||
for(int b=0;b<nn;b++){
|
|
||||||
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
||||||
|
|
||||||
RealD scale;
|
RealD scale;
|
||||||
@ -232,54 +218,316 @@ public:
|
|||||||
subspace[b] = noise;
|
subspace[b] = noise;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Orthogonalise();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
|
||||||
|
// and this is the best I found
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#if 1
|
||||||
|
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
|
||||||
|
int nn,
|
||||||
|
double hi,
|
||||||
|
double lo,
|
||||||
|
int orderfilter,
|
||||||
|
int ordermin,
|
||||||
|
int orderstep,
|
||||||
|
double filterlo
|
||||||
|
) {
|
||||||
|
|
||||||
RealD scale;
|
RealD scale;
|
||||||
|
|
||||||
|
FineField noise(FineGrid);
|
||||||
|
FineField Mn(FineGrid);
|
||||||
|
FineField tmp(FineGrid);
|
||||||
|
|
||||||
Chebyshev<FineField> Cheb(0.1,64.0,900);
|
// New normalised noise
|
||||||
|
gaussian(RNG,noise);
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
|
// Initial matrix element
|
||||||
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||||
|
|
||||||
|
int b =0;
|
||||||
|
{
|
||||||
|
// Filter
|
||||||
|
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
|
||||||
|
Cheb(hermop,noise,Mn);
|
||||||
|
// normalise
|
||||||
|
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||||
|
subspace[b] = Mn;
|
||||||
|
hermop.Op(Mn,tmp);
|
||||||
|
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||||
|
b++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate a full sequence of Chebyshevs
|
||||||
|
{
|
||||||
|
lo=filterlo;
|
||||||
|
noise=Mn;
|
||||||
|
|
||||||
|
FineField T0(FineGrid); T0 = noise;
|
||||||
|
FineField T1(FineGrid);
|
||||||
|
FineField T2(FineGrid);
|
||||||
|
FineField y(FineGrid);
|
||||||
|
|
||||||
|
FineField *Tnm = &T0;
|
||||||
|
FineField *Tn = &T1;
|
||||||
|
FineField *Tnp = &T2;
|
||||||
|
|
||||||
|
// Tn=T1 = (xscale M + mscale)in
|
||||||
|
RealD xscale = 2.0/(hi-lo);
|
||||||
|
RealD mscale = -(hi+lo)/(hi-lo);
|
||||||
|
hermop.HermOp(T0,y);
|
||||||
|
T1=y*xscale+noise*mscale;
|
||||||
|
|
||||||
|
for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
|
||||||
|
|
||||||
|
hermop.HermOp(*Tn,y);
|
||||||
|
|
||||||
|
auto y_v = y.View();
|
||||||
|
auto Tn_v = Tn->View();
|
||||||
|
auto Tnp_v = Tnp->View();
|
||||||
|
auto Tnm_v = Tnm->View();
|
||||||
|
const int Nsimd = CComplex::Nsimd();
|
||||||
|
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
|
||||||
|
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
||||||
|
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Possible more fine grained control is needed than a linear sweep,
|
||||||
|
// but huge productivity gain if this is simple algorithm and not a tunable
|
||||||
|
int m =1;
|
||||||
|
if ( n>=ordermin ) m=n-ordermin;
|
||||||
|
if ( (m%orderstep)==0 ) {
|
||||||
|
Mn=*Tnp;
|
||||||
|
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||||
|
subspace[b] = Mn;
|
||||||
|
hermop.Op(Mn,tmp);
|
||||||
|
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||||
|
b++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cycle pointers to avoid copies
|
||||||
|
FineField *swizzle = Tnm;
|
||||||
|
Tnm =Tn;
|
||||||
|
Tn =Tnp;
|
||||||
|
Tnp =swizzle;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(b==nn);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
|
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
|
||||||
|
int nn,
|
||||||
|
double hi,
|
||||||
|
double lo,
|
||||||
|
int orderfilter,
|
||||||
|
int ordermin,
|
||||||
|
int orderstep,
|
||||||
|
double filterlo
|
||||||
|
) {
|
||||||
|
|
||||||
|
RealD scale;
|
||||||
|
|
||||||
FineField noise(FineGrid);
|
FineField noise(FineGrid);
|
||||||
FineField Mn(FineGrid);
|
FineField Mn(FineGrid);
|
||||||
|
FineField tmp(FineGrid);
|
||||||
|
FineField combined(FineGrid);
|
||||||
|
|
||||||
for(int b=0;b<nn;b++){
|
// New normalised noise
|
||||||
|
gaussian(RNG,noise);
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
gaussian(RNG,noise);
|
// Initial matrix element
|
||||||
scale = std::pow(norm2(noise),-0.5);
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||||
noise=noise*scale;
|
|
||||||
|
|
||||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
|
||||||
|
|
||||||
Cheb(hermop,noise,Mn);
|
|
||||||
|
|
||||||
scale = std::pow(norm2(Mn),-0.5);
|
|
||||||
Mn=Mn*scale;
|
|
||||||
subspace[b] = Mn;
|
|
||||||
|
|
||||||
hermop.Op(Mn,noise); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(noise)<<std::endl;
|
|
||||||
|
|
||||||
|
int b =0;
|
||||||
|
#define FILTERb(llo,hhi,oorder) \
|
||||||
|
{ \
|
||||||
|
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
|
||||||
|
Cheb(hermop,noise,Mn); \
|
||||||
|
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \
|
||||||
|
subspace[b] = Mn; \
|
||||||
|
hermop.Op(Mn,tmp); \
|
||||||
|
std::cout<<GridLogMessage << oorder<< " Cheb filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
|
||||||
|
b++; \
|
||||||
}
|
}
|
||||||
|
|
||||||
Orthogonalise();
|
// JacobiPolynomial<FineField> Cheb(0.002,60.0,1500,-0.5,3.5); \
|
||||||
|
|
||||||
|
RealD alpha=-0.8;
|
||||||
|
RealD beta =-0.8;
|
||||||
|
#define FILTER(llo,hhi,oorder) \
|
||||||
|
{ \
|
||||||
|
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
|
||||||
|
/* JacobiPolynomial<FineField> Cheb(0.0,60.0,oorder,alpha,beta);*/\
|
||||||
|
Cheb(hermop,noise,Mn); \
|
||||||
|
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \
|
||||||
|
subspace[b] = Mn; \
|
||||||
|
hermop.Op(Mn,tmp); \
|
||||||
|
std::cout<<GridLogMessage << oorder<< "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
|
||||||
|
b++; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define FILTERc(llo,hhi,oorder) \
|
||||||
|
{ \
|
||||||
|
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
|
||||||
|
Cheb(hermop,noise,combined); \
|
||||||
|
}
|
||||||
|
|
||||||
|
double node = 0.000;
|
||||||
|
FILTERb(lo,hi,orderfilter);// 0
|
||||||
|
// FILTERc(node,hi,51);// 0
|
||||||
|
noise = Mn;
|
||||||
|
int base = 0;
|
||||||
|
int mult = 100;
|
||||||
|
FILTER(node,hi,base+1*mult);
|
||||||
|
FILTER(node,hi,base+2*mult);
|
||||||
|
FILTER(node,hi,base+3*mult);
|
||||||
|
FILTER(node,hi,base+4*mult);
|
||||||
|
FILTER(node,hi,base+5*mult);
|
||||||
|
FILTER(node,hi,base+6*mult);
|
||||||
|
FILTER(node,hi,base+7*mult);
|
||||||
|
FILTER(node,hi,base+8*mult);
|
||||||
|
FILTER(node,hi,base+9*mult);
|
||||||
|
FILTER(node,hi,base+10*mult);
|
||||||
|
FILTER(node,hi,base+11*mult);
|
||||||
|
FILTER(node,hi,base+12*mult);
|
||||||
|
FILTER(node,hi,base+13*mult);
|
||||||
|
FILTER(node,hi,base+14*mult);
|
||||||
|
FILTER(node,hi,base+15*mult);
|
||||||
|
assert(b==nn);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
|
||||||
|
int nn,
|
||||||
|
double hi,
|
||||||
|
double lo,
|
||||||
|
int orderfilter,
|
||||||
|
int ordermin,
|
||||||
|
int orderstep,
|
||||||
|
double filterlo
|
||||||
|
) {
|
||||||
|
|
||||||
|
RealD scale;
|
||||||
|
|
||||||
|
FineField noise(FineGrid);
|
||||||
|
FineField Mn(FineGrid);
|
||||||
|
FineField tmp(FineGrid);
|
||||||
|
FineField combined(FineGrid);
|
||||||
|
|
||||||
|
// New normalised noise
|
||||||
|
gaussian(RNG,noise);
|
||||||
|
scale = std::pow(norm2(noise),-0.5);
|
||||||
|
noise=noise*scale;
|
||||||
|
|
||||||
|
// Initial matrix element
|
||||||
|
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||||
|
|
||||||
|
int b =0;
|
||||||
|
{
|
||||||
|
Chebyshev<FineField> JacobiPoly(0.005,60.,1500);
|
||||||
|
// JacobiPolynomial<FineField> JacobiPoly(0.002,60.0,1500,-0.5,3.5);
|
||||||
|
//JacobiPolynomial<FineField> JacobiPoly(0.03,60.0,500,-0.5,3.5);
|
||||||
|
// JacobiPolynomial<FineField> JacobiPoly(0.00,60.0,1000,-0.5,3.5);
|
||||||
|
JacobiPoly(hermop,noise,Mn);
|
||||||
|
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||||
|
subspace[b] = Mn;
|
||||||
|
hermop.Op(Mn,tmp);
|
||||||
|
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||||
|
b++;
|
||||||
|
// scale = std::pow(norm2(tmp),-0.5); tmp=tmp*scale;
|
||||||
|
// subspace[b] = tmp; b++;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
#define FILTER(lambda) \
|
||||||
|
{ \
|
||||||
|
hermop.HermOp(subspace[0],tmp); \
|
||||||
|
tmp = tmp - lambda *subspace[0]; \
|
||||||
|
scale = std::pow(norm2(tmp),-0.5); \
|
||||||
|
tmp=tmp*scale; \
|
||||||
|
subspace[b] = tmp; \
|
||||||
|
hermop.Op(subspace[b],tmp); \
|
||||||
|
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
|
||||||
|
b++; \
|
||||||
|
}
|
||||||
|
// scale = std::pow(norm2(tmp),-0.5); tmp=tmp*scale;
|
||||||
|
// subspace[b] = tmp; b++;
|
||||||
|
// }
|
||||||
|
|
||||||
|
FILTER(2.0e-5);
|
||||||
|
FILTER(2.0e-4);
|
||||||
|
FILTER(4.0e-4);
|
||||||
|
FILTER(8.0e-4);
|
||||||
|
FILTER(8.0e-4);
|
||||||
|
|
||||||
|
FILTER(2.0e-3);
|
||||||
|
FILTER(3.0e-3);
|
||||||
|
FILTER(4.0e-3);
|
||||||
|
FILTER(5.0e-3);
|
||||||
|
FILTER(6.0e-3);
|
||||||
|
|
||||||
|
FILTER(2.5e-3);
|
||||||
|
FILTER(3.5e-3);
|
||||||
|
FILTER(4.5e-3);
|
||||||
|
FILTER(5.5e-3);
|
||||||
|
FILTER(6.5e-3);
|
||||||
|
|
||||||
|
// FILTER(6.0e-5);//6
|
||||||
|
// FILTER(7.0e-5);//8
|
||||||
|
// FILTER(8.0e-5);//9
|
||||||
|
// FILTER(9.0e-5);//3
|
||||||
|
|
||||||
|
/*
|
||||||
|
// FILTER(1.0e-4);//10
|
||||||
|
FILTER(2.0e-4);//11
|
||||||
|
// FILTER(3.0e-4);//12
|
||||||
|
// FILTER(4.0e-4);//13
|
||||||
|
FILTER(5.0e-4);//14
|
||||||
|
|
||||||
|
FILTER(6.0e-3);//4
|
||||||
|
FILTER(7.0e-4);//1
|
||||||
|
FILTER(8.0e-4);//7
|
||||||
|
FILTER(9.0e-4);//15
|
||||||
|
FILTER(1.0e-3);//2
|
||||||
|
|
||||||
|
FILTER(2.0e-3);//2
|
||||||
|
FILTER(3.0e-3);//2
|
||||||
|
FILTER(4.0e-3);//2
|
||||||
|
FILTER(5.0e-3);//2
|
||||||
|
FILTER(6.0e-3);//2
|
||||||
|
|
||||||
|
FILTER(7.0e-3);//2
|
||||||
|
FILTER(8.0e-3);//2
|
||||||
|
FILTER(1.0e-2);//2
|
||||||
|
*/
|
||||||
|
std::cout << GridLogMessage <<"Jacobi filtering done" <<std::endl;
|
||||||
|
assert(b==nn);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Fine Object == (per site) type of fine field
|
// Fine Object == (per site) type of fine field
|
||||||
// nbasis == number of deflation vectors
|
// nbasis == number of deflation vectors
|
||||||
template<class Fobj,class CComplex,int nbasis>
|
template<class Fobj,class CComplex,int nbasis>
|
||||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
typedef iVector<CComplex,nbasis > siteVector;
|
typedef iVector<CComplex,nbasis > siteVector;
|
||||||
|
typedef Lattice<CComplex > CoarseComplexField;
|
||||||
typedef Lattice<siteVector> CoarseVector;
|
typedef Lattice<siteVector> CoarseVector;
|
||||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||||
|
typedef iMatrix<CComplex,nbasis > Cobj;
|
||||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||||
typedef Lattice<Fobj > FineField;
|
typedef Lattice<Fobj > FineField;
|
||||||
|
|
||||||
@ -294,7 +542,6 @@ public:
|
|||||||
|
|
||||||
std::vector<CoarseMatrix> A;
|
std::vector<CoarseMatrix> A;
|
||||||
|
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Interface
|
// Interface
|
||||||
///////////////////////
|
///////////////////////
|
||||||
@ -305,33 +552,71 @@ public:
|
|||||||
conformable(_grid,in.Grid());
|
conformable(_grid,in.Grid());
|
||||||
conformable(in.Grid(),out.Grid());
|
conformable(in.Grid(),out.Grid());
|
||||||
|
|
||||||
RealD Nin = norm2(in);
|
// RealD Nin = norm2(in);
|
||||||
SimpleCompressor<siteVector> compressor;
|
SimpleCompressor<siteVector> compressor;
|
||||||
|
|
||||||
|
double comms_usec = -usecond();
|
||||||
Stencil.HaloExchange(in,compressor);
|
Stencil.HaloExchange(in,compressor);
|
||||||
|
comms_usec += usecond();
|
||||||
|
|
||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
thread_for(ss,Grid()->oSites(),{
|
typedef LatticeView<Cobj> Aview;
|
||||||
siteVector res = Zero();
|
|
||||||
siteVector nbr;
|
Vector<Aview> AcceleratorViewContainer;
|
||||||
|
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
|
||||||
|
Aview *Aview_p = & AcceleratorViewContainer[0];
|
||||||
|
|
||||||
|
const int Nsimd = CComplex::Nsimd();
|
||||||
|
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||||
|
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||||
|
|
||||||
|
GridStopWatch ArithmeticTimer;
|
||||||
|
int osites=Grid()->oSites();
|
||||||
|
// double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
|
||||||
|
// double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
|
||||||
|
double usecs =-usecond();
|
||||||
|
// assert(geom.npoint==9);
|
||||||
|
|
||||||
|
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||||
|
int ss = sss/nbasis;
|
||||||
|
int b = sss%nbasis;
|
||||||
|
calcComplex res = Zero();
|
||||||
|
calcVector nbr;
|
||||||
int ptype;
|
int ptype;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
|
|
||||||
|
int lane=SIMTlane(Nsimd);
|
||||||
for(int point=0;point<geom.npoint;point++){
|
for(int point=0;point<geom.npoint;point++){
|
||||||
|
|
||||||
SE=Stencil.GetEntry(ptype,point,ss);
|
SE=Stencil.GetEntry(ptype,point,ss);
|
||||||
|
|
||||||
if(SE->_is_local&&SE->_permute) {
|
if(SE->_is_local) {
|
||||||
permute(nbr,in_v[SE->_offset],ptype);
|
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
|
||||||
} else if(SE->_is_local) {
|
|
||||||
nbr = in_v[SE->_offset];
|
|
||||||
} else {
|
} else {
|
||||||
nbr = Stencil.CommBuf()[SE->_offset];
|
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
|
||||||
|
}
|
||||||
|
synchronise();
|
||||||
|
|
||||||
|
for(int bb=0;bb<nbasis;bb++) {
|
||||||
|
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
|
||||||
}
|
}
|
||||||
auto A_point = A[point].View();
|
|
||||||
res = res + A_point[ss]*nbr;
|
|
||||||
}
|
}
|
||||||
vstream(out_v[ss],res);
|
coalescedWrite(out_v[ss](b),res,lane);
|
||||||
});
|
});
|
||||||
|
usecs +=usecond();
|
||||||
|
|
||||||
|
double nrm_usec=-usecond();
|
||||||
RealD Nout= norm2(out);
|
RealD Nout= norm2(out);
|
||||||
|
nrm_usec+=usecond();
|
||||||
|
|
||||||
|
/*
|
||||||
|
std::cout << GridLogMessage << "\tNorm " << nrm_usec << " us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tHalo " << comms_usec << " us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tMatrix " << usecs << " us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\t mflop/s " << flops/usecs<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\t MB/s " << bytes/usecs<<std::endl;
|
||||||
|
*/
|
||||||
return Nout;
|
return Nout;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -349,25 +634,54 @@ public:
|
|||||||
return norm2(out);
|
return norm2(out);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
void MdirComms(const CoarseVector &in)
|
||||||
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
|
{
|
||||||
|
|
||||||
conformable(_grid,in.Grid());
|
|
||||||
conformable(in.Grid(),out.Grid());
|
|
||||||
|
|
||||||
SimpleCompressor<siteVector> compressor;
|
SimpleCompressor<siteVector> compressor;
|
||||||
Stencil.HaloExchange(in,compressor);
|
Stencil.HaloExchange(in,compressor);
|
||||||
|
}
|
||||||
|
void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
|
||||||
|
{
|
||||||
|
conformable(_grid,in.Grid());
|
||||||
|
conformable(_grid,out.Grid());
|
||||||
|
|
||||||
auto point = [dir, disp](){
|
typedef LatticeView<Cobj> Aview;
|
||||||
if(dir == 0 and disp == 0)
|
Vector<Aview> AcceleratorViewContainer;
|
||||||
return 8;
|
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
|
||||||
else
|
Aview *Aview_p = & AcceleratorViewContainer[0];
|
||||||
return (4 * dir + 1 - disp) / 2;
|
|
||||||
}();
|
|
||||||
|
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
thread_for(ss,Grid()->oSites(),{
|
|
||||||
|
const int Nsimd = CComplex::Nsimd();
|
||||||
|
typedef decltype(coalescedRead(in_v[0])) calcVector;
|
||||||
|
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
|
||||||
|
|
||||||
|
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||||
|
int ss = sss/nbasis;
|
||||||
|
int b = sss%nbasis;
|
||||||
|
calcComplex res = Zero();
|
||||||
|
calcVector nbr;
|
||||||
|
int ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
|
|
||||||
|
int lane=SIMTlane(Nsimd);
|
||||||
|
SE=Stencil.GetEntry(ptype,point,ss);
|
||||||
|
|
||||||
|
if(SE->_is_local) {
|
||||||
|
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
|
||||||
|
} else {
|
||||||
|
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
|
||||||
|
}
|
||||||
|
synchronise();
|
||||||
|
|
||||||
|
for(int bb=0;bb<nbasis;bb++) {
|
||||||
|
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
|
||||||
|
}
|
||||||
|
coalescedWrite(out_v[ss](b),res,lane);
|
||||||
|
});
|
||||||
|
#if 0
|
||||||
|
accelerator_for(ss,Grid()->oSites(),1,{
|
||||||
|
|
||||||
siteVector res = Zero();
|
siteVector res = Zero();
|
||||||
siteVector nbr;
|
siteVector nbr;
|
||||||
int ptype;
|
int ptype;
|
||||||
@ -382,16 +696,65 @@ public:
|
|||||||
} else {
|
} else {
|
||||||
nbr = Stencil.CommBuf()[SE->_offset];
|
nbr = Stencil.CommBuf()[SE->_offset];
|
||||||
}
|
}
|
||||||
|
synchronise();
|
||||||
|
|
||||||
auto A_point = A[point].View();
|
res = res + Aview_p[point][ss]*nbr;
|
||||||
res = res + A_point[ss]*nbr;
|
|
||||||
|
|
||||||
vstream(out_v[ss],res);
|
out_v[ss]=res;
|
||||||
});
|
});
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out)
|
||||||
|
{
|
||||||
|
this->MdirComms(in);
|
||||||
|
int ndir=geom.npoint-1;
|
||||||
|
if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {
|
||||||
|
std::cout <<"MdirAll out size "<< out.size()<<std::endl;
|
||||||
|
std::cout <<"MdirAll ndir "<< ndir<<std::endl;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
for(int p=0;p<ndir;p++){
|
||||||
|
MdirCalc(in,out[p],p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
|
||||||
|
|
||||||
|
this->MdirComms(in);
|
||||||
|
|
||||||
|
int ndim = in.Grid()->Nd();
|
||||||
|
|
||||||
|
//////////////
|
||||||
|
// 4D action like wilson
|
||||||
|
// 0+ => 0
|
||||||
|
// 0- => 1
|
||||||
|
// 1+ => 2
|
||||||
|
// 1- => 3
|
||||||
|
// etc..
|
||||||
|
//////////////
|
||||||
|
// 5D action like DWF
|
||||||
|
// 1+ => 0
|
||||||
|
// 1- => 1
|
||||||
|
// 2+ => 2
|
||||||
|
// 2- => 3
|
||||||
|
// etc..
|
||||||
|
auto point = [dir, disp, ndim](){
|
||||||
|
if(dir == 0 and disp == 0)
|
||||||
|
return 8;
|
||||||
|
else if ( ndim==4 ) {
|
||||||
|
return (4 * dir + 1 - disp) / 2;
|
||||||
|
} else {
|
||||||
|
return (4 * (dir-1) + 1 - disp) / 2;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
MdirCalc(in,out,point);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void Mdiag(const CoarseVector &in, CoarseVector &out){
|
void Mdiag(const CoarseVector &in, CoarseVector &out)
|
||||||
Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
|
{
|
||||||
|
int point=geom.npoint-1;
|
||||||
|
MdirCalc(in, out, point); // No comms
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -401,25 +764,44 @@ public:
|
|||||||
geom(CoarseGrid._ndimension),
|
geom(CoarseGrid._ndimension),
|
||||||
hermitian(hermitian_),
|
hermitian(hermitian_),
|
||||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||||
A(geom.npoint,&CoarseGrid)
|
A(geom.npoint,&CoarseGrid)
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
|
||||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||||
Aggregation<Fobj,CComplex,nbasis> & Subspace){
|
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||||
|
{
|
||||||
|
typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
|
||||||
|
typedef typename Fobj::scalar_type scalar_type;
|
||||||
|
|
||||||
FineField iblock(FineGrid); // contributions from within this block
|
FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
|
||||||
FineField oblock(FineGrid); // contributions from outwith this block
|
FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
|
||||||
|
|
||||||
|
std::vector<FineComplexField> masks(geom.npoint,FineGrid);
|
||||||
|
FineComplexField imask(FineGrid); // contributions from within this block
|
||||||
|
FineComplexField omask(FineGrid); // contributions from outwith this block
|
||||||
|
|
||||||
|
FineComplexField evenmask(FineGrid);
|
||||||
|
FineComplexField oddmask(FineGrid);
|
||||||
|
|
||||||
FineField phi(FineGrid);
|
FineField phi(FineGrid);
|
||||||
FineField tmp(FineGrid);
|
FineField tmp(FineGrid);
|
||||||
FineField zz(FineGrid); zz=Zero();
|
FineField zz(FineGrid); zz=Zero();
|
||||||
FineField Mphi(FineGrid);
|
FineField Mphi(FineGrid);
|
||||||
|
FineField Mphie(FineGrid);
|
||||||
|
FineField Mphio(FineGrid);
|
||||||
|
std::vector<FineField> Mphi_p(geom.npoint,FineGrid);
|
||||||
|
|
||||||
Lattice<iScalar<vInteger> > coor(FineGrid);
|
Lattice<iScalar<vInteger> > coor (FineGrid);
|
||||||
|
Lattice<iScalar<vInteger> > bcoor(FineGrid);
|
||||||
|
Lattice<iScalar<vInteger> > bcb (FineGrid);
|
||||||
|
|
||||||
CoarseVector iProj(Grid());
|
CoarseVector iProj(Grid());
|
||||||
CoarseVector oProj(Grid());
|
CoarseVector oProj(Grid());
|
||||||
|
CoarseVector SelfProj(Grid());
|
||||||
|
CoarseComplexField iZProj(Grid());
|
||||||
|
CoarseComplexField oZProj(Grid());
|
||||||
|
|
||||||
CoarseScalar InnerProd(Grid());
|
CoarseScalar InnerProd(Grid());
|
||||||
|
|
||||||
// Orthogonalise the subblocks over the basis
|
// Orthogonalise the subblocks over the basis
|
||||||
@ -428,69 +810,114 @@ public:
|
|||||||
// Compute the matrix elements of linop between this orthonormal
|
// Compute the matrix elements of linop between this orthonormal
|
||||||
// set of vectors.
|
// set of vectors.
|
||||||
int self_stencil=-1;
|
int self_stencil=-1;
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++)
|
||||||
|
{
|
||||||
|
int dir = geom.directions[p];
|
||||||
|
int disp = geom.displacements[p];
|
||||||
A[p]=Zero();
|
A[p]=Zero();
|
||||||
if( geom.displacements[p]==0){
|
if( geom.displacements[p]==0){
|
||||||
self_stencil=p;
|
self_stencil=p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
|
||||||
|
|
||||||
|
LatticeCoordinate(coor,dir);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
// Work out even and odd block checkerboarding for fast diagonal term
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
if ( disp==1 ) {
|
||||||
|
bcb = bcb + div(coor,block);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( disp==0 ) {
|
||||||
|
masks[p]= Zero();
|
||||||
|
} else if ( disp==1 ) {
|
||||||
|
masks[p] = where(mod(coor,block)==(block-1),one,zero);
|
||||||
|
} else if ( disp==-1 ) {
|
||||||
|
masks[p] = where(mod(coor,block)==(Integer)0,one,zero);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
|
||||||
|
oddmask = one-evenmask;
|
||||||
|
|
||||||
assert(self_stencil!=-1);
|
assert(self_stencil!=-1);
|
||||||
|
|
||||||
for(int i=0;i<nbasis;i++){
|
for(int i=0;i<nbasis;i++){
|
||||||
|
|
||||||
phi=Subspace.subspace[i];
|
phi=Subspace.subspace[i];
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
|
// std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
||||||
|
linop.OpDirAll(phi,Mphi_p);
|
||||||
|
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
|
||||||
|
|
||||||
for(int p=0;p<geom.npoint;p++){
|
for(int p=0;p<geom.npoint;p++){
|
||||||
|
|
||||||
|
Mphi = Mphi_p[p];
|
||||||
|
|
||||||
int dir = geom.directions[p];
|
int dir = geom.directions[p];
|
||||||
int disp = geom.displacements[p];
|
int disp = geom.displacements[p];
|
||||||
|
|
||||||
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
|
if ( (disp==-1) || (!hermitian ) ) {
|
||||||
|
|
||||||
LatticeCoordinate(coor,dir);
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Pick out contributions coming from this cell and neighbour cell
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
omask = masks[p];
|
||||||
|
imask = one-omask;
|
||||||
|
|
||||||
if ( disp==0 ){
|
|
||||||
linop.OpDiag(phi,Mphi);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
linop.OpDir(phi,Mphi,dir,disp);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Pick out contributions coming from this cell and neighbour cell
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
if ( disp==0 ) {
|
|
||||||
iblock = Mphi;
|
|
||||||
oblock = Zero();
|
|
||||||
} else if ( disp==1 ) {
|
|
||||||
oblock = where(mod(coor,block)==(block-1),Mphi,zz);
|
|
||||||
iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
|
|
||||||
} else if ( disp==-1 ) {
|
|
||||||
oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
|
|
||||||
iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
|
|
||||||
} else {
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
Subspace.ProjectToSubspace(iProj,iblock);
|
|
||||||
Subspace.ProjectToSubspace(oProj,oblock);
|
|
||||||
// blockProject(iProj,iblock,Subspace.subspace);
|
|
||||||
// blockProject(oProj,oblock,Subspace.subspace);
|
|
||||||
auto iProj_v = iProj.View() ;
|
|
||||||
auto oProj_v = oProj.View() ;
|
|
||||||
auto A_p = A[p].View();
|
|
||||||
auto A_self = A[self_stencil].View();
|
|
||||||
thread_for(ss, Grid()->oSites(),{
|
|
||||||
for(int j=0;j<nbasis;j++){
|
for(int j=0;j<nbasis;j++){
|
||||||
if( disp!= 0 ) {
|
|
||||||
A_p[ss](j,i) = oProj_v[ss](j);
|
blockMaskedInnerProduct(iZProj,oZProj,imask,omask,Subspace.subspace[j],Mphi);
|
||||||
}
|
|
||||||
A_self[ss](j,i) = A_self[ss](j,i) + iProj_v[ss](j);
|
auto iZProj_v = iZProj.View() ;
|
||||||
|
auto oZProj_v = oZProj.View() ;
|
||||||
|
auto A_p = A[p].View();
|
||||||
|
auto A_self = A[self_stencil].View();
|
||||||
|
|
||||||
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////
|
||||||
|
// Faster alternate self coupling.. use hermiticity to save 2x
|
||||||
|
///////////////////////////////////////////
|
||||||
|
{
|
||||||
|
mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
|
||||||
|
mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
|
||||||
|
|
||||||
|
// tmp = Mphie*evenmask + Mphio*oddmask;
|
||||||
|
{
|
||||||
|
auto tmp_ = tmp.View();
|
||||||
|
auto evenmask_ = evenmask.View();
|
||||||
|
auto oddmask_ = oddmask.View();
|
||||||
|
auto Mphie_ = Mphie.View();
|
||||||
|
auto Mphio_ = Mphio.View();
|
||||||
|
accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{
|
||||||
|
coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
blockProject(SelfProj,tmp,Subspace.subspace);
|
||||||
|
|
||||||
|
auto SelfProj_ = SelfProj.View();
|
||||||
|
auto A_self = A[self_stencil].View();
|
||||||
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
|
||||||
|
for(int j=0;j<nbasis;j++){
|
||||||
|
coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(hermitian) {
|
||||||
|
std::cout << GridLogMessage << " ForceHermitian "<<std::endl;
|
||||||
|
ForceHermitian();
|
||||||
|
}
|
||||||
|
// AssertHermitian();
|
||||||
|
// ForceDiagonal();
|
||||||
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
@ -513,17 +940,26 @@ public:
|
|||||||
std::cout<<GridLogMessage<< iProj <<std::endl;
|
std::cout<<GridLogMessage<< iProj <<std::endl;
|
||||||
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
// ForceHermitian();
|
|
||||||
// AssertHermitian();
|
|
||||||
// ForceDiagonal();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ForceHermitian(void) {
|
void ForceHermitian(void) {
|
||||||
for(int d=0;d<4;d++){
|
CoarseMatrix Diff (Grid());
|
||||||
int dd=d+1;
|
for(int p=0;p<geom.npoint;p++){
|
||||||
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
|
int dir = geom.directions[p];
|
||||||
|
int disp = geom.displacements[p];
|
||||||
|
if(disp==-1) {
|
||||||
|
// Find the opposite link
|
||||||
|
for(int pp=0;pp<geom.npoint;pp++){
|
||||||
|
int dirp = geom.directions[pp];
|
||||||
|
int dispp = geom.displacements[pp];
|
||||||
|
if ( (dirp==dir) && (dispp==1) ){
|
||||||
|
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
|
||||||
|
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
|
||||||
|
A[pp] = adj(Cshift(A[p],dir,1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// A[8] = 0.5*(A[8] + adj(A[8]));
|
|
||||||
}
|
}
|
||||||
void AssertHermitian(void) {
|
void AssertHermitian(void) {
|
||||||
CoarseMatrix AA (Grid());
|
CoarseMatrix AA (Grid());
|
||||||
|
@ -47,6 +47,7 @@ public:
|
|||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||||
|
virtual void OpDirAll (const Field &in, std::vector<Field> &out) = 0; // Abstract base
|
||||||
|
|
||||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||||
@ -83,6 +84,9 @@ public:
|
|||||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
_Mat.Mdir(in,out,dir,disp);
|
_Mat.Mdir(in,out,dir,disp);
|
||||||
}
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
_Mat.MdirAll(in,out);
|
||||||
|
};
|
||||||
void Op (const Field &in, Field &out){
|
void Op (const Field &in, Field &out){
|
||||||
_Mat.M(in,out);
|
_Mat.M(in,out);
|
||||||
}
|
}
|
||||||
@ -93,8 +97,7 @@ public:
|
|||||||
_Mat.MdagM(in,out,n1,n2);
|
_Mat.MdagM(in,out,n1,n2);
|
||||||
}
|
}
|
||||||
void HermOp(const Field &in, Field &out){
|
void HermOp(const Field &in, Field &out){
|
||||||
RealD n1,n2;
|
_Mat.MdagM(in,out);
|
||||||
HermOpAndNorm(in,out,n1,n2);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -116,6 +119,9 @@ public:
|
|||||||
_Mat.Mdir(in,out,dir,disp);
|
_Mat.Mdir(in,out,dir,disp);
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
assert(0);
|
||||||
|
};
|
||||||
void Op (const Field &in, Field &out){
|
void Op (const Field &in, Field &out){
|
||||||
_Mat.M(in,out);
|
_Mat.M(in,out);
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -154,6 +160,9 @@ public:
|
|||||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
_Mat.Mdir(in,out,dir,disp);
|
_Mat.Mdir(in,out,dir,disp);
|
||||||
}
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
_Mat.MdirAll(in,out);
|
||||||
|
};
|
||||||
void Op (const Field &in, Field &out){
|
void Op (const Field &in, Field &out){
|
||||||
_Mat.M(in,out);
|
_Mat.M(in,out);
|
||||||
}
|
}
|
||||||
@ -162,7 +171,6 @@ public:
|
|||||||
}
|
}
|
||||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
_Mat.M(in,out);
|
_Mat.M(in,out);
|
||||||
|
|
||||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||||
n2=norm2(out);
|
n2=norm2(out);
|
||||||
}
|
}
|
||||||
@ -183,6 +191,9 @@ public:
|
|||||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
_Mat.Mdir(in,out,dir,disp);
|
_Mat.Mdir(in,out,dir,disp);
|
||||||
}
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
_Mat.MdirAll(in,out);
|
||||||
|
};
|
||||||
void Op (const Field &in, Field &out){
|
void Op (const Field &in, Field &out){
|
||||||
_Mat.M(in,out);
|
_Mat.M(in,out);
|
||||||
}
|
}
|
||||||
@ -234,6 +245,9 @@ public:
|
|||||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
assert(0);
|
||||||
|
};
|
||||||
};
|
};
|
||||||
template<class Matrix,class Field>
|
template<class Matrix,class Field>
|
||||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||||
|
@ -45,8 +45,13 @@ public:
|
|||||||
ni=M(in,tmp);
|
ni=M(in,tmp);
|
||||||
no=Mdag(tmp,out);
|
no=Mdag(tmp,out);
|
||||||
}
|
}
|
||||||
|
virtual void MdagM(const Field &in, Field &out) {
|
||||||
|
RealD ni, no;
|
||||||
|
MdagM(in,out,ni,no);
|
||||||
|
}
|
||||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||||
|
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -56,12 +61,12 @@ template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrix
|
|||||||
public:
|
public:
|
||||||
virtual GridBase *RedBlackGrid(void)=0;
|
virtual GridBase *RedBlackGrid(void)=0;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Query the even even properties to make algorithmic decisions
|
// Query the even even properties to make algorithmic decisions
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
virtual RealD Mass(void) { return 0.0; };
|
virtual RealD Mass(void) { return 0.0; };
|
||||||
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
|
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
|
||||||
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
||||||
|
|
||||||
// half checkerboard operaions
|
// half checkerboard operaions
|
||||||
virtual void Meooe (const Field &in, Field &out)=0;
|
virtual void Meooe (const Field &in, Field &out)=0;
|
||||||
|
@ -95,6 +95,24 @@ public:
|
|||||||
Coeffs[order-1] = 1.;
|
Coeffs[order-1] = 1.;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.
|
||||||
|
// Similar kick effect below the threshold as Lanczos filter approach
|
||||||
|
void InitLowPass(RealD _lo,RealD _hi,int _order)
|
||||||
|
{
|
||||||
|
lo=_lo;
|
||||||
|
hi=_hi;
|
||||||
|
order=_order;
|
||||||
|
|
||||||
|
if(order < 2) exit(-1);
|
||||||
|
Coeffs.resize(order);
|
||||||
|
for(int j=0;j<order;j++){
|
||||||
|
RealD k=(order-1.0);
|
||||||
|
RealD s=std::cos( j*M_PI*(k+0.5)/order );
|
||||||
|
Coeffs[j] = s * 2.0/order;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
||||||
{
|
{
|
||||||
lo=_lo;
|
lo=_lo;
|
||||||
@ -234,20 +252,20 @@ public:
|
|||||||
RealD xscale = 2.0/(hi-lo);
|
RealD xscale = 2.0/(hi-lo);
|
||||||
RealD mscale = -(hi+lo)/(hi-lo);
|
RealD mscale = -(hi+lo)/(hi-lo);
|
||||||
Linop.HermOp(T0,y);
|
Linop.HermOp(T0,y);
|
||||||
T1=y*xscale+in*mscale;
|
axpby(T1,xscale,mscale,y,in);
|
||||||
|
|
||||||
// sum = .5 c[0] T0 + c[1] T1
|
// sum = .5 c[0] T0 + c[1] T1
|
||||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
// out = ()*T0 + Coeffs[1]*T1;
|
||||||
|
axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
|
||||||
for(int n=2;n<order;n++){
|
for(int n=2;n<order;n++){
|
||||||
|
|
||||||
Linop.HermOp(*Tn,y);
|
Linop.HermOp(*Tn,y);
|
||||||
|
// y=xscale*y+mscale*(*Tn);
|
||||||
y=xscale*y+mscale*(*Tn);
|
// *Tnp=2.0*y-(*Tnm);
|
||||||
|
// out=out+Coeffs[n]* (*Tnp);
|
||||||
*Tnp=2.0*y-(*Tnm);
|
axpby(y,xscale,mscale,y,(*Tn));
|
||||||
|
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
|
||||||
out=out+Coeffs[n]* (*Tnp);
|
axpy(out,Coeffs[n],*Tnp,out);
|
||||||
|
|
||||||
// Cycle pointers to avoid copies
|
// Cycle pointers to avoid copies
|
||||||
Field *swizzle = Tnm;
|
Field *swizzle = Tnm;
|
||||||
Tnm =Tn;
|
Tnm =Tn;
|
||||||
|
129
Grid/algorithms/approx/JacobiPolynomial.h
Normal file
129
Grid/algorithms/approx/JacobiPolynomial.h
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
#ifndef GRID_JACOBIPOLYNOMIAL_H
|
||||||
|
#define GRID_JACOBIPOLYNOMIAL_H
|
||||||
|
|
||||||
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class JacobiPolynomial : public OperatorFunction<Field> {
|
||||||
|
private:
|
||||||
|
using OperatorFunction<Field>::operator();
|
||||||
|
|
||||||
|
int order;
|
||||||
|
RealD hi;
|
||||||
|
RealD lo;
|
||||||
|
RealD alpha;
|
||||||
|
RealD beta;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void csv(std::ostream &out){
|
||||||
|
csv(out,lo,hi);
|
||||||
|
}
|
||||||
|
void csv(std::ostream &out,RealD llo,RealD hhi){
|
||||||
|
RealD diff = hhi-llo;
|
||||||
|
RealD delta = diff*1.0e-5;
|
||||||
|
for (RealD x=llo-delta; x<=hhi; x+=delta) {
|
||||||
|
RealD f = approx(x);
|
||||||
|
out<< x<<" "<<f <<std::endl;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
JacobiPolynomial(){};
|
||||||
|
JacobiPolynomial(RealD _lo,RealD _hi,int _order,RealD _alpha, RealD _beta)
|
||||||
|
{
|
||||||
|
lo=_lo;
|
||||||
|
hi=_hi;
|
||||||
|
alpha=_alpha;
|
||||||
|
beta=_beta;
|
||||||
|
order=_order;
|
||||||
|
};
|
||||||
|
|
||||||
|
RealD approx(RealD x) // Convenience for plotting the approximation
|
||||||
|
{
|
||||||
|
RealD Tn;
|
||||||
|
RealD Tnm;
|
||||||
|
RealD Tnp;
|
||||||
|
|
||||||
|
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||||
|
|
||||||
|
RealD T0=1.0;
|
||||||
|
RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
|
||||||
|
|
||||||
|
Tn =T1;
|
||||||
|
Tnm=T0;
|
||||||
|
for(int n=2;n<=order;n++){
|
||||||
|
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
|
||||||
|
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
|
||||||
|
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
|
||||||
|
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
|
||||||
|
Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
|
||||||
|
Tnm=Tn;
|
||||||
|
Tn =Tnp;
|
||||||
|
}
|
||||||
|
return Tnp;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Implement the required interface
|
||||||
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||||
|
GridBase *grid=in.Grid();
|
||||||
|
|
||||||
|
int vol=grid->gSites();
|
||||||
|
|
||||||
|
Field T0(grid);
|
||||||
|
Field T1(grid);
|
||||||
|
Field T2(grid);
|
||||||
|
Field y(grid);
|
||||||
|
|
||||||
|
|
||||||
|
Field *Tnm = &T0;
|
||||||
|
Field *Tn = &T1;
|
||||||
|
Field *Tnp = &T2;
|
||||||
|
|
||||||
|
// RealD T0=1.0;
|
||||||
|
T0=in;
|
||||||
|
|
||||||
|
// RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||||
|
// = x * 2/(hi-lo) - (hi+lo)/(hi-lo)
|
||||||
|
Linop.HermOp(T0,y);
|
||||||
|
RealD xscale = 2.0/(hi-lo);
|
||||||
|
RealD mscale = -(hi+lo)/(hi-lo);
|
||||||
|
Linop.HermOp(T0,y);
|
||||||
|
y=y*xscale+in*mscale;
|
||||||
|
|
||||||
|
// RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
|
||||||
|
RealD halfAmB = (alpha-beta)*0.5;
|
||||||
|
RealD halfApBp2= (alpha+beta+2.0)*0.5;
|
||||||
|
T1 = halfAmB * in + halfApBp2*y;
|
||||||
|
|
||||||
|
for(int n=2;n<=order;n++){
|
||||||
|
|
||||||
|
Linop.HermOp(*Tn,y);
|
||||||
|
y=xscale*y+mscale*(*Tn);
|
||||||
|
|
||||||
|
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
|
||||||
|
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
|
||||||
|
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
|
||||||
|
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
|
||||||
|
|
||||||
|
// Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
|
||||||
|
cny=cny/cnp;
|
||||||
|
cn1=cn1/cnp;
|
||||||
|
cn1=cn1/cnp;
|
||||||
|
cnm=cnm/cnp;
|
||||||
|
|
||||||
|
*Tnp=cny*y + cn1 *(*Tn) + cnm * (*Tnm);
|
||||||
|
|
||||||
|
// Cycle pointers to avoid copies
|
||||||
|
Field *swizzle = Tnm;
|
||||||
|
Tnm =Tn;
|
||||||
|
Tn =Tnp;
|
||||||
|
Tnp =swizzle;
|
||||||
|
}
|
||||||
|
out=*Tnp;
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
#endif
|
@ -72,7 +72,6 @@ public:
|
|||||||
RealD guess = norm2(psi);
|
RealD guess = norm2(psi);
|
||||||
assert(std::isnan(guess) == 0);
|
assert(std::isnan(guess) == 0);
|
||||||
|
|
||||||
|
|
||||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||||
|
|
||||||
r = src - mmp;
|
r = src - mmp;
|
||||||
@ -154,18 +153,18 @@ public:
|
|||||||
RealD resnorm = std::sqrt(norm2(p));
|
RealD resnorm = std::sqrt(norm2(p));
|
||||||
RealD true_residual = resnorm / srcnorm;
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
|
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
|
||||||
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
<< "\tComputed residual " << std::sqrt(cp / ssq)
|
||||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
<< "\tTrue residual " << true_residual
|
||||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
<< "\tTarget " << Tolerance << std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
std::cout << GridLogIterative << "Time breakdown "<<std::endl;
|
||||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
std::cout << GridLogIterative << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
std::cout << GridLogIterative << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
std::cout << GridLogIterative << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
std::cout << GridLogIterative << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
std::cout << GridLogIterative << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||||
|
|
||||||
|
@ -43,6 +43,11 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
template<class Field>
|
template<class Field>
|
||||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||||
{
|
{
|
||||||
|
// If assume basis[j] are already orthonormal,
|
||||||
|
// can take all inner products in parallel saving 2x bandwidth
|
||||||
|
// Save 3x bandwidth on the second line of loop.
|
||||||
|
// perhaps 2.5x speed up.
|
||||||
|
// 2x overall in Multigrid Lanczos
|
||||||
for(int j=0; j<k; ++j){
|
for(int j=0; j<k; ++j){
|
||||||
auto ip = innerProduct(basis[j],w);
|
auto ip = innerProduct(basis[j],w);
|
||||||
w = w - ip*basis[j];
|
w = w - ip*basis[j];
|
||||||
@ -282,7 +287,7 @@ public:
|
|||||||
RealD _eresid, // resid in lmdue deficit
|
RealD _eresid, // resid in lmdue deficit
|
||||||
int _MaxIter, // Max iterations
|
int _MaxIter, // Max iterations
|
||||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||||
int _MinRestart=1, int _orth_period = 1,
|
int _MinRestart=0, int _orth_period = 1,
|
||||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
|
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
|
||||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||||
@ -347,7 +352,7 @@ until convergence
|
|||||||
GridBase *grid = src.Grid();
|
GridBase *grid = src.Grid();
|
||||||
assert(grid == evec[0].Grid());
|
assert(grid == evec[0].Grid());
|
||||||
|
|
||||||
GridLogIRL.TimingMode(1);
|
// GridLogIRL.TimingMode(1);
|
||||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||||
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
|
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
|
||||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||||
@ -577,11 +582,11 @@ until convergence
|
|||||||
/* Saad PP. 195
|
/* Saad PP. 195
|
||||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
|
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
|
||||||
2. For k = 1,2,...,m Do:
|
2. For k = 1,2,...,m Do:
|
||||||
3. wk:=Avk−βkv_{k−1}
|
3. wk:=Avk - b_k v_{k-1}
|
||||||
4. αk:=(wk,vk) //
|
4. ak:=(wk,vk) //
|
||||||
5. wk:=wk−αkvk // wk orthog vk
|
5. wk:=wk-akvk // wk orthog vk
|
||||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
6. bk+1 := ||wk||_2. If b_k+1 = 0 then Stop
|
||||||
7. vk+1 := wk/βk+1
|
7. vk+1 := wk/b_k+1
|
||||||
8. EndDo
|
8. EndDo
|
||||||
*/
|
*/
|
||||||
void step(std::vector<RealD>& lmd,
|
void step(std::vector<RealD>& lmd,
|
||||||
@ -589,6 +594,7 @@ until convergence
|
|||||||
std::vector<Field>& evec,
|
std::vector<Field>& evec,
|
||||||
Field& w,int Nm,int k)
|
Field& w,int Nm,int k)
|
||||||
{
|
{
|
||||||
|
std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
|
||||||
const RealD tiny = 1.0e-20;
|
const RealD tiny = 1.0e-20;
|
||||||
assert( k< Nm );
|
assert( k< Nm );
|
||||||
|
|
||||||
@ -600,20 +606,20 @@ until convergence
|
|||||||
|
|
||||||
if(k>0) w -= lme[k-1] * evec[k-1];
|
if(k>0) w -= lme[k-1] * evec[k-1];
|
||||||
|
|
||||||
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
|
ComplexD zalph = innerProduct(evec_k,w);
|
||||||
RealD alph = real(zalph);
|
RealD alph = real(zalph);
|
||||||
|
|
||||||
w = w - alph * evec_k;// 5. wk:=wk−αkvk
|
w = w - alph * evec_k;
|
||||||
|
|
||||||
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
RealD beta = normalise(w);
|
||||||
// 7. vk+1 := wk/βk+1
|
|
||||||
|
|
||||||
lmd[k] = alph;
|
lmd[k] = alph;
|
||||||
lme[k] = beta;
|
lme[k] = beta;
|
||||||
|
|
||||||
if (k>0 && k % orth_period == 0) {
|
if ( (k>0) && ( (k % orth_period) == 0 )) {
|
||||||
|
std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl;
|
||||||
orthogonalize(w,evec,k); // orthonormalise
|
orthogonalize(w,evec,k); // orthonormalise
|
||||||
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
|
std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(k < Nm-1) evec[k+1] = w;
|
if(k < Nm-1) evec[k+1] = w;
|
||||||
@ -621,6 +627,8 @@ until convergence
|
|||||||
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
|
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
|
||||||
if ( beta < tiny )
|
if ( beta < tiny )
|
||||||
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
|
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||||
|
@ -33,25 +33,29 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Take a matrix and form an NE solver calling a Herm solver
|
// Take a matrix and form an NE solver calling a Herm solver
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class Field> class NormalEquations : public OperatorFunction<Field>{
|
template<class Field> class NormalEquations {
|
||||||
private:
|
private:
|
||||||
SparseMatrixBase<Field> & _Matrix;
|
SparseMatrixBase<Field> & _Matrix;
|
||||||
OperatorFunction<Field> & _HermitianSolver;
|
OperatorFunction<Field> & _HermitianSolver;
|
||||||
|
LinearFunction<Field> & _Guess;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// Wrap the usual normal equations trick
|
// Wrap the usual normal equations trick
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver)
|
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
|
||||||
: _Matrix(Matrix), _HermitianSolver(HermitianSolver) {};
|
LinearFunction<Field> &Guess)
|
||||||
|
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
|
||||||
|
|
||||||
void operator() (const Field &in, Field &out){
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
Field src(in.Grid());
|
Field src(in.Grid());
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
|
||||||
|
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
|
||||||
_Matrix.Mdag(in,src);
|
_Matrix.Mdag(in,src);
|
||||||
_HermitianSolver(src,out); // Mdag M out = Mdag in
|
_Guess(src,out);
|
||||||
|
_HermitianSolver(MdagMOp,src,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -30,12 +30,12 @@ template<class Field> class PowerMethod
|
|||||||
RealD vden = norm2(src_n);
|
RealD vden = norm2(src_n);
|
||||||
RealD na = vnum/vden;
|
RealD na = vnum/vden;
|
||||||
|
|
||||||
if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) {
|
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
|
||||||
evalMaxApprox = na;
|
evalMaxApprox = na;
|
||||||
|
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||||
return evalMaxApprox;
|
return evalMaxApprox;
|
||||||
}
|
}
|
||||||
evalMaxApprox = na;
|
evalMaxApprox = na;
|
||||||
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
|
||||||
src_n = tmp;
|
src_n = tmp;
|
||||||
}
|
}
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -38,10 +38,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
|
||||||
|
|
||||||
template<class Field>
|
template<class Field>
|
||||||
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
|
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
|
||||||
public:
|
public:
|
||||||
using OperatorFunction<Field>::operator();
|
|
||||||
|
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
@ -49,23 +50,29 @@ public:
|
|||||||
int mmax;
|
int mmax;
|
||||||
int nstep;
|
int nstep;
|
||||||
int steps;
|
int steps;
|
||||||
|
int level;
|
||||||
GridStopWatch PrecTimer;
|
GridStopWatch PrecTimer;
|
||||||
GridStopWatch MatTimer;
|
GridStopWatch MatTimer;
|
||||||
GridStopWatch LinalgTimer;
|
GridStopWatch LinalgTimer;
|
||||||
|
|
||||||
LinearFunction<Field> &Preconditioner;
|
LinearFunction<Field> &Preconditioner;
|
||||||
|
LinearOperatorBase<Field> &Linop;
|
||||||
|
|
||||||
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
void Level(int lv) { level=lv; };
|
||||||
|
|
||||||
|
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||||
Tolerance(tol),
|
Tolerance(tol),
|
||||||
MaxIterations(maxit),
|
MaxIterations(maxit),
|
||||||
|
Linop(_Linop),
|
||||||
Preconditioner(Prec),
|
Preconditioner(Prec),
|
||||||
mmax(_mmax),
|
mmax(_mmax),
|
||||||
nstep(_nstep)
|
nstep(_nstep)
|
||||||
{
|
{
|
||||||
|
level=1;
|
||||||
verbose=1;
|
verbose=1;
|
||||||
};
|
};
|
||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
void operator() (const Field &src, Field &psi){
|
||||||
|
|
||||||
psi=Zero();
|
psi=Zero();
|
||||||
RealD cp, ssq,rsq;
|
RealD cp, ssq,rsq;
|
||||||
@ -84,9 +91,9 @@ public:
|
|||||||
steps=0;
|
steps=0;
|
||||||
for(int k=0;k<MaxIterations;k++){
|
for(int k=0;k<MaxIterations;k++){
|
||||||
|
|
||||||
cp=GCRnStep(Linop,src,psi,rsq);
|
cp=GCRnStep(src,psi,rsq);
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
|
GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
|
||||||
|
|
||||||
if(cp<rsq) {
|
if(cp<rsq) {
|
||||||
|
|
||||||
@ -95,24 +102,26 @@ public:
|
|||||||
Linop.HermOp(psi,r);
|
Linop.HermOp(psi,r);
|
||||||
axpy(r,-1.0,src,r);
|
axpy(r,-1.0,src,r);
|
||||||
RealD tr = norm2(r);
|
RealD tr = norm2(r);
|
||||||
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
|
GCRLogLevel<<"PGCR: Converged on iteration " <<steps
|
||||||
<< " computed residual "<<sqrt(cp/ssq)
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
<< " true residual " <<sqrt(tr/ssq)
|
<< " true residual " <<sqrt(tr/ssq)
|
||||||
<< " target " <<Tolerance <<std::endl;
|
<< " target " <<Tolerance <<std::endl;
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
|
/*
|
||||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
|
GCRLogLevel<<"PGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
|
||||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
|
GCRLogLevel<<"PGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
|
||||||
|
GCRLogLevel<<"PGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
|
||||||
|
*/
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
|
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
||||||
|
|
||||||
RealD cp;
|
RealD cp;
|
||||||
RealD a, b;
|
RealD a, b;
|
||||||
@ -134,9 +143,7 @@ public:
|
|||||||
std::vector<Field> p(mmax,grid);
|
std::vector<Field> p(mmax,grid);
|
||||||
std::vector<RealD> qq(mmax);
|
std::vector<RealD> qq(mmax);
|
||||||
|
|
||||||
std::cout<<GridLogIterative<< " ************** "<< std::endl;
|
GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
|
||||||
std::cout<<GridLogIterative<< " GCRnStep("<<nstep<<")"<<std::endl;
|
|
||||||
std::cout<<GridLogIterative<< " ************** "<< std::endl;
|
|
||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
// initial guess x0 is taken as nonzero.
|
// initial guess x0 is taken as nonzero.
|
||||||
@ -150,35 +157,15 @@ public:
|
|||||||
LinalgTimer.Start();
|
LinalgTimer.Start();
|
||||||
r=src-Az;
|
r=src-Az;
|
||||||
LinalgTimer.Stop();
|
LinalgTimer.Stop();
|
||||||
std::cout<<GridLogIterative<< " GCRnStep true residual r = src - A psi "<<norm2(r) <<std::endl;
|
GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
|
||||||
|
|
||||||
/////////////////////
|
/////////////////////
|
||||||
// p = Prec(r)
|
// p = Prec(r)
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
|
||||||
std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
|
|
||||||
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
|
|
||||||
PrecTimer.Start();
|
PrecTimer.Start();
|
||||||
Preconditioner(r,z);
|
Preconditioner(r,z);
|
||||||
PrecTimer.Stop();
|
PrecTimer.Stop();
|
||||||
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
|
|
||||||
std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
|
|
||||||
|
|
||||||
// MatTimer.Start();
|
|
||||||
// Linop.HermOp(z,tmp);
|
|
||||||
// MatTimer.Stop();
|
|
||||||
|
|
||||||
// LinalgTimer.Start();
|
|
||||||
// ttmp=tmp;
|
|
||||||
// tmp=tmp-r;
|
|
||||||
// LinalgTimer.Stop();
|
|
||||||
|
|
||||||
/*
|
|
||||||
std::cout<<GridLogMessage<<r<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<z<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<ttmp<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<tmp<<std::endl;
|
|
||||||
*/
|
|
||||||
|
|
||||||
MatTimer.Start();
|
MatTimer.Start();
|
||||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||||
@ -190,7 +177,6 @@ public:
|
|||||||
p[0]= z;
|
p[0]= z;
|
||||||
q[0]= Az;
|
q[0]= Az;
|
||||||
qq[0]= zAAz;
|
qq[0]= zAAz;
|
||||||
std::cout<<GridLogIterative<< " GCRnStep p0=z, q0 = A p0 " <<std::endl;
|
|
||||||
|
|
||||||
cp =norm2(r);
|
cp =norm2(r);
|
||||||
LinalgTimer.Stop();
|
LinalgTimer.Stop();
|
||||||
@ -212,20 +198,16 @@ public:
|
|||||||
cp = axpy_norm(r,-a,q[peri_k],r);
|
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||||
LinalgTimer.Stop();
|
LinalgTimer.Stop();
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
|
GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
|
||||||
|
|
||||||
if((k==nstep-1)||(cp<rsq)){
|
if((k==nstep-1)||(cp<rsq)){
|
||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
|
|
||||||
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
|
|
||||||
PrecTimer.Start();
|
PrecTimer.Start();
|
||||||
Preconditioner(r,z);// solve Az = r
|
Preconditioner(r,z);// solve Az = r
|
||||||
PrecTimer.Stop();
|
PrecTimer.Stop();
|
||||||
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
|
|
||||||
std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
|
|
||||||
|
|
||||||
MatTimer.Start();
|
MatTimer.Start();
|
||||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||||
|
@ -6,6 +6,12 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
MemoryStats *MemoryProfiler::stats = nullptr;
|
||||||
bool MemoryProfiler::debug = false;
|
bool MemoryProfiler::debug = false;
|
||||||
|
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
#define SMALL_LIMIT (0)
|
||||||
|
#else
|
||||||
|
#define SMALL_LIMIT (4096)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef POINTER_CACHE
|
#ifdef POINTER_CACHE
|
||||||
int PointerCache::victim;
|
int PointerCache::victim;
|
||||||
|
|
||||||
@ -13,7 +19,7 @@ PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
|||||||
|
|
||||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||||
|
|
||||||
if (bytes < 4096 ) return ptr;
|
if (bytes < SMALL_LIMIT ) return ptr;
|
||||||
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
assert(omp_in_parallel()==0);
|
assert(omp_in_parallel()==0);
|
||||||
@ -50,7 +56,7 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
|
|||||||
|
|
||||||
void *PointerCache::Lookup(size_t bytes) {
|
void *PointerCache::Lookup(size_t bytes) {
|
||||||
|
|
||||||
if (bytes < 4096 ) return NULL;
|
if (bytes < SMALL_LIMIT ) return NULL;
|
||||||
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
assert(omp_in_parallel()==0);
|
assert(omp_in_parallel()==0);
|
||||||
|
@ -49,8 +49,13 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#ifdef POINTER_CACHE
|
#ifdef POINTER_CACHE
|
||||||
class PointerCache {
|
class PointerCache {
|
||||||
private:
|
private:
|
||||||
|
/*Pinning pages is costly*/
|
||||||
|
/*Could maintain separate large and small allocation caches*/
|
||||||
|
#ifdef GRID_NVCC
|
||||||
|
static const int Ncache=128;
|
||||||
|
#else
|
||||||
static const int Ncache=8;
|
static const int Ncache=8;
|
||||||
|
#endif
|
||||||
static int victim;
|
static int victim;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -63,7 +68,6 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
||||||
static void *Insert(void *ptr,size_t bytes) ;
|
static void *Insert(void *ptr,size_t bytes) ;
|
||||||
static void *Lookup(size_t bytes) ;
|
static void *Lookup(size_t bytes) ;
|
||||||
|
|
||||||
@ -170,6 +174,7 @@ public:
|
|||||||
// Unified (managed) memory
|
// Unified (managed) memory
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
if ( ptr == (_Tp *) NULL ) {
|
if ( ptr == (_Tp *) NULL ) {
|
||||||
|
// printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout);
|
||||||
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
||||||
if( err != cudaSuccess ) {
|
if( err != cudaSuccess ) {
|
||||||
ptr = (_Tp *) NULL;
|
ptr = (_Tp *) NULL;
|
||||||
|
@ -37,19 +37,18 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
|||||||
GridBase *grid = l.Grid();
|
GridBase *grid = l.Grid();
|
||||||
int Nsimd = grid->iSites();
|
int Nsimd = grid->iSites();
|
||||||
|
|
||||||
Coordinate gcoor;
|
|
||||||
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
|
||||||
|
|
||||||
vector_type vI;
|
|
||||||
auto l_v = l.View();
|
auto l_v = l.View();
|
||||||
for(int o=0;o<grid->oSites();o++){
|
thread_for( o, grid->oSites(), {
|
||||||
|
vector_type vI;
|
||||||
|
Coordinate gcoor;
|
||||||
|
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
||||||
for(int i=0;i<grid->iSites();i++){
|
for(int i=0;i<grid->iSites();i++){
|
||||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||||
mergebuf[i]=(Integer)gcoor[mu];
|
mergebuf[i]=(Integer)gcoor[mu];
|
||||||
}
|
}
|
||||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||||
l_v[o]=vI;
|
l_v[o]=vI;
|
||||||
}
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// LatticeCoordinate();
|
// LatticeCoordinate();
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/lattice/Lattice_transfer.h
|
Source file: ./lib/lattice/Lattice_transfer.h
|
||||||
@ -83,12 +82,35 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj,class CComplex,int nbasis>
|
template<class vobj,class CComplex,int nbasis>
|
||||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
|
const Lattice<vobj> &fineData,
|
||||||
|
const std::vector<Lattice<vobj> > &Basis)
|
||||||
|
{
|
||||||
|
GridBase * fine = fineData.Grid();
|
||||||
|
GridBase * coarse= coarseData.Grid();
|
||||||
|
|
||||||
|
Lattice<CComplex> ip(coarse);
|
||||||
|
|
||||||
|
// auto fineData_ = fineData.View();
|
||||||
|
auto coarseData_ = coarseData.View();
|
||||||
|
auto ip_ = ip.View();
|
||||||
|
for(int v=0;v<nbasis;v++) {
|
||||||
|
blockInnerProduct(ip,Basis[v],fineData);
|
||||||
|
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
|
||||||
|
coalescedWrite(coarseData_[sc](v),ip_(sc));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class CComplex,int nbasis>
|
||||||
|
inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
const Lattice<vobj> &fineData,
|
const Lattice<vobj> &fineData,
|
||||||
const std::vector<Lattice<vobj> > &Basis)
|
const std::vector<Lattice<vobj> > &Basis)
|
||||||
{
|
{
|
||||||
|
typedef iVector<CComplex,nbasis > coarseSiteData;
|
||||||
|
coarseSiteData elide;
|
||||||
|
typedef decltype(coalescedRead(elide)) ScalarComplex;
|
||||||
GridBase * fine = fineData.Grid();
|
GridBase * fine = fineData.Grid();
|
||||||
GridBase * coarse= coarseData.Grid();
|
GridBase * coarse= coarseData.Grid();
|
||||||
int _ndimension = coarse->_ndimension;
|
int _ndimension = coarse->_ndimension;
|
||||||
@ -106,26 +128,40 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||||
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
||||||
}
|
}
|
||||||
|
int blockVol = fine->oSites()/coarse->oSites();
|
||||||
|
|
||||||
coarseData=Zero();
|
coarseData=Zero();
|
||||||
|
|
||||||
auto fineData_ = fineData.View();
|
auto fineData_ = fineData.View();
|
||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
// Loop over coars parallel, and then loop over fine associated with coarse.
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
thread_for( sf, fine->oSites(), {
|
// To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
int sc;
|
// Otherwise do fine inner product per site, and make the update atomic
|
||||||
Coordinate coor_c(_ndimension);
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
Coordinate coor_f(_ndimension);
|
accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), {
|
||||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
|
||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
|
||||||
|
|
||||||
thread_critical {
|
auto sc=sci/nbasis;
|
||||||
for(int i=0;i<nbasis;i++) {
|
auto i=sci%nbasis;
|
||||||
auto Basis_ = Basis[i].View();
|
auto Basis_ = Basis[i].View();
|
||||||
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
|
|
||||||
}
|
Coordinate coor_c(_ndimension);
|
||||||
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||||
|
|
||||||
|
int sf;
|
||||||
|
decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero();
|
||||||
|
|
||||||
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
|
Coordinate coor_b(_ndimension);
|
||||||
|
Coordinate coor_f(_ndimension);
|
||||||
|
|
||||||
|
Lexicographic::CoorFromIndex(coor_b,sb,block_r);
|
||||||
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
|
||||||
|
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||||
|
|
||||||
|
reduce=reduce+innerProduct(Basis_(sf),fineData_(sf));
|
||||||
}
|
}
|
||||||
|
coalescedWrite(coarseData_[sc](i),reduce);
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -160,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
auto fineY_ = fineY.View();
|
auto fineY_ = fineY.View();
|
||||||
auto coarseA_= coarseA.View();
|
auto coarseA_= coarseA.View();
|
||||||
|
|
||||||
thread_for(sf, fine->oSites(), {
|
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
@ -171,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
|||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||||
|
|
||||||
// z = A x + y
|
// z = A x + y
|
||||||
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf];
|
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -196,7 +232,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
|||||||
|
|
||||||
fine_inner = localInnerProduct(fineX,fineY);
|
fine_inner = localInnerProduct(fineX,fineY);
|
||||||
blockSum(coarse_inner,fine_inner);
|
blockSum(coarse_inner,fine_inner);
|
||||||
thread_for(ss, coarse->oSites(),{
|
accelerator_for(ss, coarse->oSites(), 1, {
|
||||||
CoarseInner_[ss] = coarse_inner_[ss];
|
CoarseInner_[ss] = coarse_inner_[ss];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -226,23 +262,29 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
for(int d=0 ; d<_ndimension;d++){
|
for(int d=0 ; d<_ndimension;d++){
|
||||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
int blockVol = fine->oSites()/coarse->oSites();
|
||||||
|
|
||||||
// Turn this around to loop threaded over sc and interior loop
|
// Turn this around to loop threaded over sc and interior loop
|
||||||
// over sf would thread better
|
// over sf would thread better
|
||||||
coarseData=Zero();
|
|
||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
auto fineData_ = fineData.View();
|
auto fineData_ = fineData.View();
|
||||||
|
|
||||||
thread_for(sf,fine->oSites(),{
|
accelerator_for(sc,coarse->oSites(),1,{
|
||||||
int sc;
|
|
||||||
|
// One thread per sub block
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
|
||||||
|
coarseData_[sc]=Zero();
|
||||||
|
|
||||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
|
||||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
int sf;
|
||||||
|
Coordinate coor_b(_ndimension);
|
||||||
|
Coordinate coor_f(_ndimension);
|
||||||
|
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
|
||||||
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||||
|
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
|
||||||
|
|
||||||
thread_critical {
|
|
||||||
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,6 +338,7 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
template<class vobj,class CComplex,int nbasis>
|
template<class vobj,class CComplex,int nbasis>
|
||||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
Lattice<vobj> &fineData,
|
Lattice<vobj> &fineData,
|
||||||
@ -321,7 +364,7 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
auto coarseData_ = coarseData.View();
|
auto coarseData_ = coarseData.View();
|
||||||
|
|
||||||
// Loop with a cache friendly loop ordering
|
// Loop with a cache friendly loop ordering
|
||||||
thread_for(sf,fine->oSites(),{
|
accelerator_for(sf,fine->oSites(),1,{
|
||||||
int sc;
|
int sc;
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Coordinate coor_f(_ndimension);
|
Coordinate coor_f(_ndimension);
|
||||||
@ -332,13 +375,35 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
|
|
||||||
for(int i=0;i<nbasis;i++) {
|
for(int i=0;i<nbasis;i++) {
|
||||||
auto basis_ = Basis[i].View();
|
auto basis_ = Basis[i].View();
|
||||||
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf];
|
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]);
|
||||||
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf];
|
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
template<class vobj,class CComplex,int nbasis>
|
||||||
|
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||||
|
Lattice<vobj> &fineData,
|
||||||
|
const std::vector<Lattice<vobj> > &Basis)
|
||||||
|
{
|
||||||
|
GridBase * fine = fineData.Grid();
|
||||||
|
GridBase * coarse= coarseData.Grid();
|
||||||
|
|
||||||
|
fineData=Zero();
|
||||||
|
for(int i=0;i<nbasis;i++) {
|
||||||
|
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
|
||||||
|
Lattice<CComplex> cip(coarse);
|
||||||
|
auto cip_ = cip.View();
|
||||||
|
auto ip_ = ip.View();
|
||||||
|
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
|
||||||
|
coalescedWrite(cip_[sc], ip_(sc)());
|
||||||
|
});
|
||||||
|
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
|
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
|
||||||
// Simd layouts need not match since we use peek/poke Local
|
// Simd layouts need not match since we use peek/poke Local
|
||||||
|
@ -101,7 +101,8 @@ public:
|
|||||||
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
// Efficient support for multigrid coarsening
|
// Efficient support for multigrid coarsening
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
|
|
||||||
void Meooe5D (const FermionField &in, FermionField &out);
|
void Meooe5D (const FermionField &in, FermionField &out);
|
||||||
void MeooeDag5D (const FermionField &in, FermionField &out);
|
void MeooeDag5D (const FermionField &in, FermionField &out);
|
||||||
|
@ -62,14 +62,15 @@ public:
|
|||||||
|
|
||||||
// Efficient support for multigrid coarsening
|
// Efficient support for multigrid coarsening
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Physical surface field utilities
|
// Physical surface field utilities
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
||||||
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
||||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||||
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
ContinuedFractionFermion5D(GaugeField &_Umu,
|
ContinuedFractionFermion5D(GaugeField &_Umu,
|
||||||
|
@ -89,6 +89,7 @@ public:
|
|||||||
|
|
||||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
|
||||||
|
|
||||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||||
|
@ -103,6 +103,7 @@ public:
|
|||||||
// Multigrid assistance; force term uses too
|
// Multigrid assistance; force term uses too
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -86,7 +86,8 @@ public:
|
|||||||
void MooeeDag (const FermionField &in, FermionField &out);
|
void MooeeDag (const FermionField &in, FermionField &out);
|
||||||
void MooeeInvDag (const FermionField &in, FermionField &out);
|
void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
|
||||||
// These can be overridden by fancy 5d chiral action
|
// These can be overridden by fancy 5d chiral action
|
||||||
|
@ -67,12 +67,13 @@ public:
|
|||||||
|
|
||||||
// Efficient support for multigrid coarsening
|
// Efficient support for multigrid coarsening
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Physical surface field utilities
|
// Physical surface field utilities
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||||
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
PartialFractionFermion5D(GaugeField &_Umu,
|
PartialFractionFermion5D(GaugeField &_Umu,
|
||||||
|
@ -115,9 +115,10 @@ public:
|
|||||||
// Multigrid assistance; force term uses too
|
// Multigrid assistance; force term uses too
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
|
void DhopDirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||||
int gamma, int dag);
|
void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Extra methods added by derived
|
// Extra methods added by derived
|
||||||
|
@ -111,15 +111,16 @@ public:
|
|||||||
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
|
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
|
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
|
||||||
// These can be overridden by fancy 5d chiral action
|
// These can be overridden by fancy 5d chiral action
|
||||||
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||||
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||||
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||||
|
|
||||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||||
// Implement s-diagonal DW
|
// Implement s-diagonal DW
|
||||||
@ -131,6 +132,9 @@ public:
|
|||||||
// add a DhopComm
|
// add a DhopComm
|
||||||
// -- suboptimal interface will presently trigger multiple comms.
|
// -- suboptimal interface will presently trigger multiple comms.
|
||||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
|
||||||
|
void DhopDirComms(const FermionField &in);
|
||||||
|
void DhopDirCalc(const FermionField &in, FermionField &out,int point);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// New methods added
|
// New methods added
|
||||||
|
@ -60,6 +60,9 @@ public:
|
|||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior=1,int exterior=1) ;
|
int interior=1,int exterior=1) ;
|
||||||
|
|
||||||
|
static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||||
|
int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
|
||||||
|
|
||||||
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
@ -100,9 +103,18 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
|
static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirTp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
static accelerator_inline void DhopDirTm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
|
||||||
|
|
||||||
// Specialised variants
|
// Specialised variants
|
||||||
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
@ -54,6 +54,14 @@ public:
|
|||||||
_Mat.Mdir(in,tmp,dir,disp);
|
_Mat.Mdir(in,tmp,dir,disp);
|
||||||
G5R5(out,tmp);
|
G5R5(out,tmp);
|
||||||
}
|
}
|
||||||
|
void OpDirAll(const Field &in, std::vector<Field> &out) {
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
_Mat.MdirAll(in,out);
|
||||||
|
for(int p=0;p<out.size();p++) {
|
||||||
|
tmp=out[p];
|
||||||
|
G5R5(out[p],tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
|
|
||||||
@ -96,6 +104,12 @@ public:
|
|||||||
_Mat.Mdir(in,tmp,dir,disp);
|
_Mat.Mdir(in,tmp,dir,disp);
|
||||||
out=g5*tmp;
|
out=g5*tmp;
|
||||||
}
|
}
|
||||||
|
void OpDirAll(const Field &in, std::vector<Field> &out) {
|
||||||
|
_Mat.MdirAll(in,out);
|
||||||
|
for(int p=0;p<out.size();p++) {
|
||||||
|
out[p]=g5*out[p];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
|
|
||||||
|
@ -389,6 +389,14 @@ void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,in
|
|||||||
Meo5D(psi,tmp);
|
Meo5D(psi,tmp);
|
||||||
this->DhopDir(tmp,chi,dir,disp);
|
this->DhopDir(tmp,chi,dir,disp);
|
||||||
}
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MdirAll(const FermionField &psi, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
FermionField tmp(psi.Grid());
|
||||||
|
Meo5D(psi,tmp);
|
||||||
|
this->DhopDirAll(tmp,out);
|
||||||
|
}
|
||||||
|
|
||||||
// force terms; five routines; default to Dhop on diagonal
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
|
@ -143,6 +143,25 @@ void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
|
void ContinuedFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi)
|
||||||
|
{
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
this->DhopDirAll(psi,chi); // Dslash on diagonal. g5 Dslash is hermitian
|
||||||
|
|
||||||
|
for(int p=0;p<chi.size();p++){
|
||||||
|
int sign=1;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==(Ls-1) ){
|
||||||
|
ag5xpby_ssp(chi[p],Beta[s]*ZoloHiInv,chi[p],0.0,chi[p],s,s);
|
||||||
|
} else {
|
||||||
|
ag5xpby_ssp(chi[p],cc[s]*Beta[s]*sign*ZoloHiInv,chi[p],0.0,chi[p],s,s);
|
||||||
|
}
|
||||||
|
sign=-sign;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
@ -538,10 +538,16 @@ void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
|
|||||||
// Implement the general interface. Here we use SAME mass on all slices
|
// Implement the general interface. Here we use SAME mass on all slices
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
|
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
DhopDir(in, out, dir, disp);
|
DhopDir(in, out, dir, disp);
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
out.Checkerboard() = in.Checkerboard();
|
out.Checkerboard() = in.Checkerboard();
|
||||||
Dhop(in, out, DaggerNo);
|
Dhop(in, out, DaggerNo);
|
||||||
|
@ -362,12 +362,19 @@ void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
|
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
DhopDir(in, out, dir, disp);
|
DhopDir(in, out, dir, disp);
|
||||||
}
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void ImprovedStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
assert(0); // Not implemented yet
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
|
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
@ -380,6 +387,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
@ -404,7 +412,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int len = U.Grid()->oSites();
|
int len = U.Grid()->oSites();
|
||||||
const int LLs = 1;
|
|
||||||
|
|
||||||
DhopTotalTime -= usecond();
|
DhopTotalTime -= usecond();
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
// this does both dag and undag but is trivial; make a common helper routing
|
// this does both dag and undag but is trivial; make a common helper routing
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
@ -45,8 +45,25 @@ void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFiel
|
|||||||
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
|
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
|
||||||
}
|
}
|
||||||
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void PartialFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi){
|
||||||
|
// this does both dag and undag but is trivial; make a common helper routing
|
||||||
|
int Ls = this->Ls;
|
||||||
|
|
||||||
|
this->DhopDirAll(psi,chi);
|
||||||
|
|
||||||
|
for(int point=0;point<chi.size();point++){
|
||||||
|
int nblock=(Ls-1)/2;
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
int s = 2*b;
|
||||||
|
ag5xpby_ssp(chi[point],-scale,chi[point],0.0,chi[point],s,s);
|
||||||
|
ag5xpby_ssp(chi[point], scale,chi[point],0.0,chi[point],s+1,s+1);
|
||||||
|
}
|
||||||
|
ag5xpby_ssp(chi[point],p[nblock]*scale/amax,chi[point],0.0,chi[point],Ls-1,Ls-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
|
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
|
||||||
{
|
{
|
||||||
|
@ -241,6 +241,15 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
|
|||||||
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
|
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
Compressor compressor(DaggerNo);
|
||||||
|
Stencil.HaloExchange(in,compressor);
|
||||||
|
uint64_t Nsite = Umu.Grid()->oSites();
|
||||||
|
Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||||
|
@ -319,28 +319,51 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
|
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
DhopDir(in, out, dir, disp);
|
DhopDir(in, out, dir, disp);
|
||||||
}
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
DhopDirAll(in, out);
|
||||||
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
{
|
{
|
||||||
|
Compressor compressor(DaggerNo);
|
||||||
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
|
||||||
int skip = (disp == 1) ? 0 : 1;
|
int skip = (disp == 1) ? 0 : 1;
|
||||||
int dirdisp = dir + skip * 4;
|
int dirdisp = dir + skip * 4;
|
||||||
int gamma = dir + (1 - skip) * 4;
|
int gamma = dir + (1 - skip) * 4;
|
||||||
|
|
||||||
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
|
DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
|
void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
|
||||||
{
|
{
|
||||||
Compressor compressor(dag);
|
Compressor compressor(DaggerNo);
|
||||||
|
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
|
||||||
|
assert((out.size()==8)||(out.size()==9));
|
||||||
|
for(int dir=0;dir<Nd;dir++){
|
||||||
|
for(int disp=-1;disp<=1;disp+=2){
|
||||||
|
|
||||||
|
int skip = (disp == 1) ? 0 : 1;
|
||||||
|
int dirdisp = dir + skip * 4;
|
||||||
|
int gamma = dir + (1 - skip) * 4;
|
||||||
|
|
||||||
|
DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
|
||||||
|
{
|
||||||
int Ls=1;
|
int Ls=1;
|
||||||
int Nsite=in.oSites();
|
uint64_t Nsite=in.oSites();
|
||||||
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
|
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -348,7 +371,8 @@ template <class Impl>
|
|||||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag) {
|
FermionField &out, int dag)
|
||||||
|
{
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||||
|
@ -91,8 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
|
|||||||
} \
|
} \
|
||||||
synchronise();
|
synchronise();
|
||||||
|
|
||||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
|
||||||
if (gamma == Dir) { \
|
|
||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
int perm= SE->_permute; \
|
int perm= SE->_permute; \
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||||
@ -102,10 +101,14 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
|
|||||||
} \
|
} \
|
||||||
synchronise(); \
|
synchronise(); \
|
||||||
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi);
|
||||||
synchronise(); \
|
|
||||||
|
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
||||||
|
if (gamma == Dir) { \
|
||||||
|
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// All legs kernels ; comms then compute
|
// All legs kernels ; comms then compute
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
@ -284,6 +287,35 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define DhopDirMacro(Dir,spProj,spRecon) \
|
||||||
|
template <class Impl> \
|
||||||
|
void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
|
||||||
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
|
||||||
|
{ \
|
||||||
|
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; \
|
||||||
|
typedef decltype(coalescedRead(in[0])) calcSpinor; \
|
||||||
|
calcHalfSpinor chi; \
|
||||||
|
calcSpinor result; \
|
||||||
|
calcHalfSpinor Uchi; \
|
||||||
|
StencilEntry *SE; \
|
||||||
|
int ptype; \
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd(); \
|
||||||
|
const int lane=SIMTlane(Nsimd); \
|
||||||
|
\
|
||||||
|
SE = st.GetEntry(ptype, dir, sF); \
|
||||||
|
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
|
||||||
|
coalescedWrite(out[sF], result,lane); \
|
||||||
|
}
|
||||||
|
|
||||||
|
DhopDirMacro(Xp,spProjXp,spReconXp);
|
||||||
|
DhopDirMacro(Yp,spProjYp,spReconYp);
|
||||||
|
DhopDirMacro(Zp,spProjZp,spReconZp);
|
||||||
|
DhopDirMacro(Tp,spProjTp,spReconTp);
|
||||||
|
DhopDirMacro(Xm,spProjXm,spReconXm);
|
||||||
|
DhopDirMacro(Ym,spProjYm,spReconYm);
|
||||||
|
DhopDirMacro(Zm,spProjZm,spReconZm);
|
||||||
|
DhopDirMacro(Tm,spProjTm,spReconTm);
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
|
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
|
||||||
@ -299,18 +331,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
|
|||||||
const int lane=SIMTlane(Nsimd);
|
const int lane=SIMTlane(Nsimd);
|
||||||
|
|
||||||
SE = st.GetEntry(ptype, dir, sF);
|
SE = st.GetEntry(ptype, dir, sF);
|
||||||
if (gamma == Xp) {
|
GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
|
||||||
if (SE->_is_local ) {
|
|
||||||
int perm= SE->_permute;
|
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);
|
|
||||||
spProjXp(chi,tmp);
|
|
||||||
} else {
|
|
||||||
chi = coalescedRead(buf[SE->_offset],lane);
|
|
||||||
}
|
|
||||||
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
|
|
||||||
spReconXp(result, Uchi);
|
|
||||||
}
|
|
||||||
|
|
||||||
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
|
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
|
||||||
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
|
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
|
||||||
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
|
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
|
||||||
@ -321,6 +342,38 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
|
|||||||
coalescedWrite(out[sF], result,lane);
|
coalescedWrite(out[sF], result,lane);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||||
|
int Nsite, const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto st_v = st.View();
|
||||||
|
|
||||||
|
auto out_Xm = out[0].View();
|
||||||
|
auto out_Ym = out[1].View();
|
||||||
|
auto out_Zm = out[2].View();
|
||||||
|
auto out_Tm = out[3].View();
|
||||||
|
auto out_Xp = out[4].View();
|
||||||
|
auto out_Yp = out[5].View();
|
||||||
|
auto out_Zp = out[6].View();
|
||||||
|
auto out_Tp = out[7].View();
|
||||||
|
|
||||||
|
accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{
|
||||||
|
int sU=sss/Ls;
|
||||||
|
int sF =sss;
|
||||||
|
DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0);
|
||||||
|
DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1);
|
||||||
|
DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2);
|
||||||
|
DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3);
|
||||||
|
DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4);
|
||||||
|
DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5);
|
||||||
|
DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6);
|
||||||
|
DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||||
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
|
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
|
||||||
@ -332,13 +385,32 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
auto st_v = st.View();
|
auto st_v = st.View();
|
||||||
accelerator_for(ss,Nsite,Simd::Nsimd(),{
|
#define LoopBody(Dir) \
|
||||||
for(int s=0;s<Ls;s++){
|
case Dir : \
|
||||||
int sU=ss;
|
accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \
|
||||||
int sF = s+Ls*sU;
|
for(int s=0;s<Ls;s++){ \
|
||||||
DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
|
int sU=ss; \
|
||||||
}
|
int sF = s+Ls*sU; \
|
||||||
});
|
DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\
|
||||||
|
} \
|
||||||
|
}); \
|
||||||
|
break;
|
||||||
|
|
||||||
|
switch(gamma){
|
||||||
|
LoopBody(Xp);
|
||||||
|
LoopBody(Yp);
|
||||||
|
LoopBody(Zp);
|
||||||
|
LoopBody(Tp);
|
||||||
|
|
||||||
|
LoopBody(Xm);
|
||||||
|
LoopBody(Ym);
|
||||||
|
LoopBody(Zm);
|
||||||
|
LoopBody(Tm);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#undef LoopBody
|
||||||
}
|
}
|
||||||
|
|
||||||
#define KERNEL_CALLNB(A) \
|
#define KERNEL_CALLNB(A) \
|
||||||
|
@ -92,6 +92,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);}
|
void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);}
|
||||||
|
void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);}
|
||||||
void Mdiag(const GaugeField&, GaugeField&){ assert(0);}
|
void Mdiag(const GaugeField&, GaugeField&){ assert(0);}
|
||||||
|
|
||||||
void ImportGauge(const GaugeField& _U) {
|
void ImportGauge(const GaugeField& _U) {
|
||||||
|
@ -403,6 +403,10 @@ namespace Optimization {
|
|||||||
accelerator_inline GpuVectorRD operator()(GpuVectorRD a, GpuVectorRD b){
|
accelerator_inline GpuVectorRD operator()(GpuVectorRD a, GpuVectorRD b){
|
||||||
return a/b;
|
return a/b;
|
||||||
}
|
}
|
||||||
|
accelerator_inline GpuVectorI operator()(GpuVectorI a, GpuVectorI b){
|
||||||
|
return a/b;
|
||||||
|
}
|
||||||
|
|
||||||
// Danger -- element wise divide fro complex, not complex div.
|
// Danger -- element wise divide fro complex, not complex div.
|
||||||
// See Grid_vector_types.h lines around 735, applied after "toReal"
|
// See Grid_vector_types.h lines around 735, applied after "toReal"
|
||||||
accelerator_inline GpuVectorCF operator()(GpuVectorCF a, GpuVectorCF b){
|
accelerator_inline GpuVectorCF operator()(GpuVectorCF a, GpuVectorCF b){
|
||||||
|
@ -628,6 +628,7 @@ void Grid_debug_handler_init(void)
|
|||||||
sigaction(SIGSEGV,&sa,NULL);
|
sigaction(SIGSEGV,&sa,NULL);
|
||||||
sigaction(SIGTRAP,&sa,NULL);
|
sigaction(SIGTRAP,&sa,NULL);
|
||||||
sigaction(SIGBUS,&sa,NULL);
|
sigaction(SIGBUS,&sa,NULL);
|
||||||
|
sigaction(SIGUSR2,&sa,NULL);
|
||||||
|
|
||||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid)
|
# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:GridBasedSoftware_Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid)
|
||||||
|
|
||||||
**Data parallel C++ mathematical object library.**
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
|
@ -280,7 +280,8 @@ case ${CXX} in
|
|||||||
# CXXLD="nvcc -v -link"
|
# CXXLD="nvcc -v -link"
|
||||||
CXX="nvcc -x cu "
|
CXX="nvcc -x cu "
|
||||||
CXXLD="nvcc -link"
|
CXXLD="nvcc -link"
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
|
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
|
||||||
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||||
if test $ac_openmp = yes; then
|
if test $ac_openmp = yes; then
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||||
fi
|
fi
|
||||||
|
@ -73,6 +73,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Support for coarsening to a multigrid
|
// Support for coarsening to a multigrid
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){};
|
||||||
void OpDiag (const Field &in, Field &out) {};
|
void OpDiag (const Field &in, Field &out) {};
|
||||||
void OpDir (const Field &in, Field &out,int dir,int disp){};
|
void OpDir (const Field &in, Field &out,int dir,int disp){};
|
||||||
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -29,323 +31,174 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
//#include <algorithms/iterative/PrecConjugateResidual.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
;
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
class myclass: Serializable {
|
* Smoother:
|
||||||
public:
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
|
|
||||||
int, domaindecompose,
|
|
||||||
int, domainsize,
|
|
||||||
int, order,
|
|
||||||
int, Ls,
|
|
||||||
double, mq,
|
|
||||||
double, lo,
|
|
||||||
double, hi,
|
|
||||||
int, steps);
|
|
||||||
|
|
||||||
myclass(){};
|
|
||||||
|
|
||||||
};
|
|
||||||
myclass params;
|
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
RealD InverseApproximation(RealD x){
|
RealD InverseApproximation(RealD x){
|
||||||
return 1.0/x;
|
return 1.0/x;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Fobj,class CComplex,int nbasis, class Matrix>
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
|
||||||
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
|
||||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::siteVector siteVector;
|
|
||||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseScalar CoarseScalar;
|
|
||||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
typedef LinearOperatorBase<FineField> FineOperator;
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
Aggregates & _Aggregates;
|
Aggregates & _Aggregates;
|
||||||
CoarseOperator & _CoarseOperator;
|
CoarseOperator & _CoarseOperator;
|
||||||
Matrix & _FineMatrix;
|
Matrix & _FineMatrix;
|
||||||
FineOperator & _FineOperator;
|
FineOperator & _FineOperator;
|
||||||
Matrix & _SmootherMatrix;
|
Guesser & _Guess;
|
||||||
FineOperator & _SmootherOperator;
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
// Constructor
|
|
||||||
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||||
FineOperator &Fine,Matrix &FineMatrix,
|
FineOperator &Fine,Matrix &FineMatrix,
|
||||||
FineOperator &Smooth,Matrix &SmootherMatrix)
|
FineSmoother &Smoother,
|
||||||
|
Guesser &Guess_,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
: _Aggregates(Agg),
|
: _Aggregates(Agg),
|
||||||
_CoarseOperator(Coarse),
|
_CoarseOperator(Coarse),
|
||||||
_FineOperator(Fine),
|
_FineOperator(Fine),
|
||||||
_FineMatrix(FineMatrix),
|
_FineMatrix(FineMatrix),
|
||||||
_SmootherOperator(Smooth),
|
_Smoother(Smoother),
|
||||||
_SmootherMatrix(SmootherMatrix)
|
_Guess(Guess_),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
{
|
{
|
||||||
}
|
|
||||||
|
|
||||||
void PowerMethod(const FineField &in) {
|
|
||||||
|
|
||||||
FineField p1(in.Grid());
|
|
||||||
FineField p2(in.Grid());
|
|
||||||
|
|
||||||
MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
|
|
||||||
|
|
||||||
p1=in;
|
|
||||||
for(int i=0;i<20;i++){
|
|
||||||
RealD absp1=std::sqrt(norm2(p1));
|
|
||||||
fMdagMOp.HermOp(p1,p2);// this is the G5 herm bit
|
|
||||||
// _FineOperator.Op(p1,p2);// this is the G5 herm bit
|
|
||||||
RealD absp2=std::sqrt(norm2(p2));
|
|
||||||
if(i%10==9)
|
|
||||||
std::cout<<GridLogMessage << "Power method on mdagm "<<i<<" " << absp2/absp1<<std::endl;
|
|
||||||
p1=p2*(1.0/std::sqrt(absp2));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void operator()(const FineField &in, FineField & out) {
|
|
||||||
operatorCheby(in,out);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
|
||||||
// ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
#if 1
|
|
||||||
void operatorADEF2(const FineField &in, FineField & out) {
|
|
||||||
|
|
||||||
CoarseVector Csrc(_CoarseOperator.Grid());
|
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||||
CoarseVector Ctmp(_CoarseOperator.Grid());
|
|
||||||
CoarseVector Csol(_CoarseOperator.Grid());
|
CoarseVector Csol(_CoarseOperator.Grid());
|
||||||
|
|
||||||
ConjugateGradient<CoarseVector> CG(1.0e-10,100000);
|
|
||||||
ConjugateGradient<FineField> fCG(3.0e-2,1000);
|
|
||||||
|
|
||||||
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
|
|
||||||
MdagMLinearOperator<CoarseOperator,CoarseVector> MdagMOp(_CoarseOperator);
|
|
||||||
MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
|
|
||||||
|
|
||||||
FineField tmp(in.Grid());
|
|
||||||
FineField res(in.Grid());
|
|
||||||
FineField Min(in.Grid());
|
|
||||||
|
|
||||||
// Monitor completeness of low mode space
|
|
||||||
_Aggregates.ProjectToSubspace (Csrc,in);
|
|
||||||
_Aggregates.PromoteFromSubspace(Csrc,out);
|
|
||||||
std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
|
|
||||||
|
|
||||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
|
||||||
_FineOperator.Op(in,tmp);// this is the G5 herm bit
|
|
||||||
fCG(fMdagMOp,tmp,Min); // solves MdagM = g5 M g5M
|
|
||||||
|
|
||||||
// Monitor completeness of low mode space
|
|
||||||
_Aggregates.ProjectToSubspace (Csrc,Min);
|
|
||||||
_Aggregates.PromoteFromSubspace(Csrc,out);
|
|
||||||
std::cout<<GridLogMessage<<"Completeness Min: "<<std::sqrt(norm2(out)/norm2(Min))<<std::endl;
|
|
||||||
|
|
||||||
_FineOperator.Op(Min,tmp);
|
|
||||||
tmp = in - tmp; // in - A Min
|
|
||||||
|
|
||||||
Csol=Zero();
|
|
||||||
_Aggregates.ProjectToSubspace (Csrc,tmp);
|
|
||||||
HermOp.AdjOp(Csrc,Ctmp);// Normal equations
|
|
||||||
CG(MdagMOp,Ctmp,Csol);
|
|
||||||
|
|
||||||
HermOp.Op(Csol,Ctmp);
|
|
||||||
Ctmp=Ctmp-Csrc;
|
|
||||||
std::cout<<GridLogMessage<<"coarse space true residual "<<std::sqrt(norm2(Ctmp)/norm2(Csrc))<<std::endl;
|
|
||||||
_Aggregates.PromoteFromSubspace(Csol,out);
|
|
||||||
|
|
||||||
_FineOperator.Op(out,res);
|
|
||||||
res=res-tmp;
|
|
||||||
std::cout<<GridLogMessage<<"promoted sol residual "<<std::sqrt(norm2(res)/norm2(tmp))<<std::endl;
|
|
||||||
_Aggregates.ProjectToSubspace (Csrc,res);
|
|
||||||
std::cout<<GridLogMessage<<"coarse space proj of residual "<<norm2(Csrc)<<std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
out = out+Min; // additive coarse space correction
|
|
||||||
// out = Min; // no additive coarse space correction
|
|
||||||
|
|
||||||
_FineOperator.Op(out,tmp);
|
|
||||||
tmp=tmp-in; // tmp is new residual
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<< " Preconditioner in " << norm2(in)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<< " Preconditioner out " << norm2(out)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in
|
|
||||||
#if 1
|
|
||||||
void operatorADEF1(const FineField &in, FineField & out) {
|
|
||||||
|
|
||||||
CoarseVector Csrc(_CoarseOperator.Grid());
|
|
||||||
CoarseVector Ctmp(_CoarseOperator.Grid());
|
|
||||||
CoarseVector Csol(_CoarseOperator.Grid()); Csol=Zero();
|
|
||||||
|
|
||||||
ConjugateGradient<CoarseVector> CG(1.0e-10,100000);
|
|
||||||
ConjugateGradient<FineField> fCG(3.0e-2,1000);
|
|
||||||
|
|
||||||
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
|
|
||||||
MdagMLinearOperator<CoarseOperator,CoarseVector> MdagMOp(_CoarseOperator);
|
|
||||||
ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix,0.1);
|
|
||||||
|
|
||||||
FineField tmp(in.Grid());
|
|
||||||
FineField res(in.Grid());
|
|
||||||
FineField Qin(in.Grid());
|
|
||||||
|
|
||||||
// Monitor completeness of low mode space
|
|
||||||
// _Aggregates.ProjectToSubspace (Csrc,in);
|
|
||||||
// _Aggregates.PromoteFromSubspace(Csrc,out);
|
|
||||||
// std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
|
|
||||||
|
|
||||||
_Aggregates.ProjectToSubspace (Csrc,in);
|
|
||||||
HermOp.AdjOp(Csrc,Ctmp);// Normal equations
|
|
||||||
CG(MdagMOp,Ctmp,Csol);
|
|
||||||
_Aggregates.PromoteFromSubspace(Csol,Qin);
|
|
||||||
|
|
||||||
// Qin=0;
|
|
||||||
_FineOperator.Op(Qin,tmp);// A Q in
|
|
||||||
tmp = in - tmp; // in - A Q in
|
|
||||||
|
|
||||||
_FineOperator.Op(tmp,res);// this is the G5 herm bit
|
|
||||||
fCG(fMdagMOp,res,out); // solves MdagM = g5 M g5M
|
|
||||||
|
|
||||||
out = out + Qin;
|
|
||||||
|
|
||||||
_FineOperator.Op(out,tmp);
|
|
||||||
tmp=tmp-in; // tmp is new residual
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void SmootherTest (const FineField & in){
|
|
||||||
|
|
||||||
FineField vec1(in.Grid());
|
|
||||||
FineField vec2(in.Grid());
|
|
||||||
RealD lo[3] = { 0.5, 1.0, 2.0};
|
|
||||||
|
|
||||||
// MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
|
|
||||||
ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
|
|
||||||
|
|
||||||
RealD Ni,r;
|
|
||||||
|
|
||||||
Ni = norm2(in);
|
|
||||||
|
|
||||||
for(int ilo=0;ilo<3;ilo++){
|
|
||||||
for(int ord=5;ord<50;ord*=2){
|
|
||||||
|
|
||||||
std::cout << " lo "<<lo[ilo]<<" order "<<ord<<std::endl;
|
|
||||||
|
|
||||||
_SmootherOperator.AdjOp(in,vec1);
|
|
||||||
|
|
||||||
Chebyshev<FineField> Cheby (lo[ilo],70.0,ord,InverseApproximation);
|
|
||||||
Cheby(fMdagMOp,vec1,vec2); // solves MdagM = g5 M g5M
|
|
||||||
|
|
||||||
_FineOperator.Op(vec2,vec1);// this is the G5 herm bit
|
|
||||||
vec1 = in - vec1; // tmp = in - A Min
|
|
||||||
r=norm2(vec1);
|
|
||||||
std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void operatorCheby(const FineField &in, FineField & out) {
|
|
||||||
|
|
||||||
CoarseVector Csrc(_CoarseOperator.Grid());
|
|
||||||
CoarseVector Ctmp(_CoarseOperator.Grid());
|
|
||||||
CoarseVector Csol(_CoarseOperator.Grid()); Csol=Zero();
|
|
||||||
|
|
||||||
ConjugateGradient<CoarseVector> CG(3.0e-3,100000);
|
|
||||||
|
|
||||||
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
|
|
||||||
MdagMLinearOperator<CoarseOperator,CoarseVector> MdagMOp(_CoarseOperator);
|
|
||||||
// MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
|
|
||||||
ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
|
|
||||||
|
|
||||||
FineField vec1(in.Grid());
|
FineField vec1(in.Grid());
|
||||||
FineField vec2(in.Grid());
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
Chebyshev<FineField> Cheby (params.lo,params.hi,params.order,InverseApproximation);
|
double t;
|
||||||
Chebyshev<FineField> ChebyAccu(params.lo,params.hi,params.order,InverseApproximation);
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
_Aggregates.ProjectToSubspace (Csrc,in);
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
// _Aggregates.PromoteFromSubspace(Csrc,out);
|
|
||||||
// std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
|
|
||||||
|
|
||||||
// ofstream fout("smoother");
|
|
||||||
// Cheby.csv(fout);
|
|
||||||
|
|
||||||
// V11 multigrid.
|
|
||||||
// Use a fixed chebyshev and hope hermiticity helps.
|
|
||||||
|
|
||||||
// To make a working smoother for indefinite operator
|
|
||||||
// must multiply by "Mdag" (ouch loses all low mode content)
|
|
||||||
// and apply to poly approx of (mdagm)^-1.
|
|
||||||
// so that we end up with an odd polynomial.
|
|
||||||
|
|
||||||
RealD Ni = norm2(in);
|
|
||||||
|
|
||||||
_SmootherOperator.AdjOp(in,vec1);// this is the G5 herm bit
|
|
||||||
ChebyAccu(fMdagMOp,vec1,out); // solves MdagM = g5 M g5M
|
|
||||||
|
|
||||||
// Update with residual for out
|
|
||||||
_FineOperator.Op(out,vec1);// this is the G5 herm bit
|
|
||||||
vec1 = in - vec1; // tmp = in - A Min
|
|
||||||
|
|
||||||
RealD r = norm2(vec1);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
|
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
HermOp.AdjOp(Csrc,Ctmp);// Normal equations // This appears to be zero.
|
// Coarse correction
|
||||||
CG(MdagMOp,Ctmp,Csol);
|
t=-usecond();
|
||||||
_Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s
|
_CoarseSolve(Csrc,Csol);
|
||||||
// Q = Q[in - A Min]
|
t+=usecond();
|
||||||
out = out+vec1;
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
// Three preconditioner smoothing -- hermitian if C3 = C1
|
// Coarse to Fine
|
||||||
// Recompute error
|
t=-usecond();
|
||||||
_FineOperator.Op(out,vec1);// this is the G5 herm bit
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
vec1 = in - vec1; // tmp = in - A Min
|
add(out,out,vec1);
|
||||||
r=norm2(vec1);
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl;
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
// Reapply smoother
|
// Fine Smoother
|
||||||
_SmootherOperator.Op(vec1,vec2); // this is the G5 herm bit
|
t=-usecond();
|
||||||
ChebyAccu(fMdagMOp,vec2,vec1); // solves MdagM = g5 M g5M
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
out =out+vec1;
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
vec1 = in - vec1; // tmp = in - A Min
|
|
||||||
r=norm2(vec1);
|
|
||||||
std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
|
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
XmlReader RD("params.xml");
|
const int Ls=16;
|
||||||
read(RD,"params",params);
|
|
||||||
std::cout<<"Params: Order "<<params.order<<"["<<params.lo<<","<<params.hi<<"]"<< " steps "<<params.steps<<std::endl;
|
|
||||||
|
|
||||||
const int Ls=params.Ls;
|
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
@ -357,14 +210,22 @@ int main (int argc, char ** argv)
|
|||||||
// Construct a coarsened grid; utility for this?
|
// Construct a coarsened grid; utility for this?
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
std::vector<int> block ({2,2,2,2});
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
std::vector<int> blockc ({2,2,2,2});
|
||||||
const int nbasis= 32;
|
const int nbasis= 32;
|
||||||
|
const int nbasisc= 32;
|
||||||
auto clatt = GridDefaultLatt();
|
auto clatt = GridDefaultLatt();
|
||||||
for(int d=0;d<clatt.size();d++){
|
for(int d=0;d<clatt.size();d++){
|
||||||
clatt[d] = clatt[d]/block[d];
|
clatt[d] = clatt[d]/block[d];
|
||||||
}
|
}
|
||||||
|
auto cclatt = clatt;
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
cclatt[d] = clatt[d]/blockc[d];
|
||||||
|
}
|
||||||
|
|
||||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
@ -372,186 +233,167 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
|
||||||
Gamma g5(Gamma::Algebra::Gamma5);
|
|
||||||
|
|
||||||
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||||
LatticeFermion result(FGrid); result=Zero();
|
LatticeFermion result(FGrid);
|
||||||
LatticeFermion ref(FGrid); ref=Zero();
|
|
||||||
LatticeFermion tmp(FGrid);
|
|
||||||
LatticeFermion err(FGrid);
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
LatticeGaugeField UmuDD(UGrid);
|
|
||||||
LatticeColourMatrix U(UGrid);
|
|
||||||
LatticeColourMatrix zz(UGrid);
|
|
||||||
|
|
||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
std::string file("./ckpoint_lat.4000");
|
std::string file("./ckpoint_lat.4000");
|
||||||
NerscIO::readConfiguration(Umu,header,file);
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
|
||||||
if ( params.domaindecompose ) {
|
|
||||||
Lattice<iScalar<vInteger> > coor(UGrid);
|
|
||||||
zz=Zero();
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
LatticeCoordinate(coor,mu);
|
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
|
|
||||||
PokeIndex<LorentzIndex>(UmuDD,U,mu);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
UmuDD = Umu;
|
|
||||||
}
|
|
||||||
// SU3::ColdConfiguration(RNG4,Umu);
|
|
||||||
// SU3::TepidConfiguration(RNG4,Umu);
|
|
||||||
// SU3::HotConfiguration(RNG4,Umu);
|
|
||||||
// Umu=Zero();
|
|
||||||
|
|
||||||
RealD mass=params.mq;
|
|
||||||
RealD M5=1.8;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.001;
|
||||||
|
RealD M5=1.8;
|
||||||
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
DomainWallFermionR DdwfDD(UmuDD,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
|
||||||
|
|
||||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
typedef CoarseOperator::CoarseVector CoarseVector;
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||||
Subspace Aggregates(Coarse5d,FGrid,0);
|
|
||||||
// Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
|
|
||||||
assert ( (nbasis & 0x1)==0);
|
|
||||||
int nb=nbasis/2;
|
|
||||||
std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
|
|
||||||
// Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
|
|
||||||
// Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
|
|
||||||
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb);
|
|
||||||
for(int n=0;n<nb;n++){
|
|
||||||
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
|
||||||
std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
|
|
||||||
}
|
|
||||||
for(int n=0;n<nbasis;n++){
|
|
||||||
std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n]) <<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
// for(int i=0;i<nbasis;i++){
|
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||||
// result = Aggregates.subspace[i];
|
|
||||||
// Aggregates.subspace[i]=result+g5*result;
|
assert ( (nbasis & 0x1)==0);
|
||||||
// }
|
{
|
||||||
result=Zero();
|
int nb=nbasis/2;
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||||
|
}
|
||||||
|
LatticeFermion A(FGrid);
|
||||||
|
LatticeFermion B(FGrid);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
A = Aggregates.subspace[n];
|
||||||
|
B = Aggregates.subspace[n+nb];
|
||||||
|
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||||
|
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
|
||||||
|
|
||||||
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOpDD(DdwfDD);
|
|
||||||
CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse5d,1); // Hermitian matrix
|
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Deflate the course space. Recursive multigrid?
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc> CoarseSubspace;
|
||||||
|
CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Testing some coarse space solvers " <<std::endl;
|
std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
CoarseVector c_src (Coarse5d);
|
|
||||||
CoarseVector c_res (Coarse5d);
|
|
||||||
gaussian(CRNG,c_src);
|
|
||||||
c_res=Zero();
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||||
ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
|
{
|
||||||
CG(PosdefLdop,c_src,c_res);
|
int nb=nbasisc/2;
|
||||||
|
CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,12.0,0.02,500,100,100,0.0);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
auto subspace = CoarseAggregates.subspace[n].View();
|
||||||
|
auto subspace_g5 = CoarseAggregates.subspace[n+nb].View();
|
||||||
|
for(int nn=0;nn<nb;nn++){
|
||||||
|
for(int site=0;site<Coarse5d->oSites();site++){
|
||||||
|
subspace_g5[site](nn) = subspace[site](nn);
|
||||||
|
subspace_g5[site](nn+nb)=-subspace[site](nn+nb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Level2Op L2Op(*CoarseCoarse5d,1); // Hermitian matrix
|
||||||
|
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
||||||
|
HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
|
||||||
|
L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
// HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
|
|
||||||
// ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
|
|
||||||
// MCR(HermIndefLdop,c_src,c_res);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
|
std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> Precon (Aggregates, LDOp,
|
MdagMLinearOperator<Level2Op,CoarseCoarseVector> IRLHermOpL2(L2Op);
|
||||||
HermIndefOp,Ddwf,
|
Chebyshev<CoarseCoarseVector> IRLChebyL2(0.001,4.2,71);
|
||||||
HermIndefOp,Ddwf);
|
FunctionHermOp<CoarseCoarseVector> IRLOpChebyL2(IRLChebyL2,IRLHermOpL2);
|
||||||
|
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
|
||||||
|
int cNk=24;
|
||||||
|
int cNm=36;
|
||||||
|
int cNstop=24;
|
||||||
|
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
|
||||||
|
|
||||||
// MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> PreconDD(Aggregates, LDOp,
|
int cNconv;
|
||||||
// HermIndefOp,Ddwf,
|
std::vector<RealD> eval2(cNm);
|
||||||
// HermIndefOpDD,DdwfDD);
|
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
|
||||||
// TrivialPrecon<LatticeFermion> simple;
|
CoarseCoarseVector cc_src(CoarseCoarse5d); cc_src=1.0;
|
||||||
|
IRLL2.calc(eval2,evec2,cc_src,cNconv);
|
||||||
|
|
||||||
|
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.1,1000);
|
||||||
|
DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
|
||||||
|
NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(L2Op,CoarseCoarseCG,DeflCoarseCoarseGuesser);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Testing smoother efficacy"<< std::endl;
|
std::cout<<GridLogMessage << "Building 3 level Multigrid "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
Precon.SmootherTest(src);
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> > ThreeLevelMG;
|
||||||
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
// MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
|
||||||
// std::cout<<GridLogMessage << "Testing DD smoother efficacy"<< std::endl;
|
ChebyshevSmoother<CoarseVector, Level1Op > CoarseSmoother(0.1,12.0,3,L1LinOp,LDOp);
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf);
|
||||||
// PreconDD.SmootherTest(src);
|
|
||||||
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
// MirsSmoother<CoarseVector, Level1Op > CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp);
|
||||||
// std::cout<<GridLogMessage << "Testing SAP smoother efficacy"<< std::endl;
|
// MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf);
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
// PreconDD.SAP(src,result);
|
|
||||||
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
CoarseMG Level2Precon (CoarseAggregates, L2Op,
|
||||||
// std::cout<<GridLogMessage << "Unprec CG "<< std::endl;
|
L1LinOp,LDOp,
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
CoarseSmoother,
|
||||||
|
DeflCoarseCoarseGuesser,
|
||||||
|
DeflCoarseCoarseCGNE);
|
||||||
|
Level2Precon.Level(2);
|
||||||
|
|
||||||
// TrivialPrecon<LatticeFermion> simple;
|
// PGCR Applying this solver to solve the coarse space problem
|
||||||
// ConjugateGradient<LatticeFermion> fCG(1.0e-8,100000);
|
PrecGeneralisedConjugateResidual<CoarseVector> l2PGCR(0.1, 100, L1LinOp,Level2Precon,16,16);
|
||||||
// fCG(HermDefOp,src,result);
|
l2PGCR.Level(2);
|
||||||
|
|
||||||
|
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ThreeLevelMG ThreeLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
l2PGCR);
|
||||||
|
ThreeLevelPrecon.Level(1);
|
||||||
|
|
||||||
|
// Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,ThreeLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl;
|
std::cout<<GridLogMessage << "Calling 3 level Multigrid "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
LatticeFermion src_o(FrbGrid);
|
|
||||||
LatticeFermion result_o(FrbGrid);
|
|
||||||
pickCheckerboard(Odd,src_o,src);
|
|
||||||
result_o=Zero();
|
|
||||||
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
|
||||||
ConjugateGradient<LatticeFermion> pCG(1.0e-8,10000);
|
|
||||||
// pCG(HermOpEO,src_o,result_o);
|
|
||||||
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
// std::cout<<GridLogMessage << "Testing GCR on indef matrix "<< std::endl;
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
// PrecGeneralisedConjugateResidual<LatticeFermion> UPGCR(1.0e-8,100000,simple,8,128);
|
|
||||||
// UPGCR(HermIndefOp,src,result);
|
|
||||||
|
|
||||||
|
|
||||||
/// Get themax eval
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
std::cout<<GridLogMessage <<" Applying power method to find spectral range "<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
Precon.PowerMethod(src);
|
|
||||||
|
|
||||||
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
// std::cout<<GridLogMessage << "Building a two level DDPGCR "<< std::endl;
|
|
||||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
// PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
|
|
||||||
// result=Zero();
|
|
||||||
// std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
|
|
||||||
// PGCRDD(HermIndefOp,src,result);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
std::cout<<GridLogMessage << "Building a two level PGCR "<< std::endl;
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
|
||||||
PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,8);
|
|
||||||
std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
|
|
||||||
result=Zero();
|
result=Zero();
|
||||||
PGCR(HermIndefOp,src,result);
|
l1PGCR(src,result);
|
||||||
|
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
|
||||||
|
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
|
||||||
|
std::cout<<GridLogMessage << " CoarseCoarse PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << "Done "<< std::endl;
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user