mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/zmobius_paramcompute
This commit is contained in:
		@@ -35,6 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
#include <Grid/algorithms/approx/Zolotarev.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Chebyshev.h>
 | 
			
		||||
#include <Grid/algorithms/approx/JacobiPolynomial.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Remez.h>
 | 
			
		||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Forecast.h>
 | 
			
		||||
@@ -43,11 +44,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
#include <Grid/algorithms/iterative/Deflation.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/BiCGSTAB.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/NormalEquations.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/MinimalResidual.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,14 @@
 | 
			
		||||
    // blockZaxpy in bockPromote - 3s, 5%
 | 
			
		||||
    // noncoalesced linalg in Preconditionoer ~ 3s 5%
 | 
			
		||||
    // Lancos tuning or replace 10-20s ~ 25%, open ended
 | 
			
		||||
    // setup tuning   5s  ~  8%
 | 
			
		||||
    //    -- e.g. ordermin, orderstep tunables.
 | 
			
		||||
    // MdagM path without norm in LinOp code.     few seconds
 | 
			
		||||
 | 
			
		||||
    // Mdir calc blocking kernels
 | 
			
		||||
    // Fuse kernels in blockMaskedInnerProduct
 | 
			
		||||
    // preallocate Vectors in Cayley 5D ~ few percent few seconds
 | 
			
		||||
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
@@ -34,15 +45,36 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
template<class vobj,class CComplex>
 | 
			
		||||
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
 | 
			
		||||
				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask,
 | 
			
		||||
				    const Lattice<vobj> &fineX,
 | 
			
		||||
				    const Lattice<vobj> &fineY)
 | 
			
		||||
{
 | 
			
		||||
  typedef decltype(innerProduct(vobj(),vobj())) dotp;
 | 
			
		||||
 | 
			
		||||
  GridBase *coarse(CoarseInner.Grid());
 | 
			
		||||
  GridBase *fine  (fineX.Grid());
 | 
			
		||||
 | 
			
		||||
  Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
 | 
			
		||||
  Lattice<dotp> fine_inner_msk(fine);
 | 
			
		||||
 | 
			
		||||
  // Multiply could be fused with innerProduct
 | 
			
		||||
  // Single block sum kernel could do both masks.
 | 
			
		||||
  fine_inner = localInnerProduct(fineX,fineY);
 | 
			
		||||
  mult(fine_inner_msk, fine_inner,FineMask);
 | 
			
		||||
  blockSum(CoarseInner,fine_inner_msk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Geometry {
 | 
			
		||||
  //    int dimension;
 | 
			
		||||
public:
 | 
			
		||||
  int npoint;
 | 
			
		||||
  std::vector<int> directions   ;
 | 
			
		||||
  std::vector<int> displacements;
 | 
			
		||||
 | 
			
		||||
  Geometry(int _d)  {
 | 
			
		||||
  
 | 
			
		||||
    
 | 
			
		||||
    int base = (_d==5) ? 1:0;
 | 
			
		||||
 | 
			
		||||
    // make coarse grid stencil for 4d , not 5d
 | 
			
		||||
@@ -52,10 +84,10 @@ public:
 | 
			
		||||
    directions.resize(npoint);
 | 
			
		||||
    displacements.resize(npoint);
 | 
			
		||||
    for(int d=0;d<_d;d++){
 | 
			
		||||
      directions[2*d  ] = d+base;
 | 
			
		||||
      directions[2*d+1] = d+base;
 | 
			
		||||
      displacements[2*d  ] = +1;
 | 
			
		||||
      displacements[2*d+1] = -1;
 | 
			
		||||
      directions[d   ] = d+base;
 | 
			
		||||
      directions[d+_d] = d+base;
 | 
			
		||||
      displacements[d  ] = +1;
 | 
			
		||||
      displacements[d+_d]= -1;
 | 
			
		||||
    }
 | 
			
		||||
    directions   [2*_d]=0;
 | 
			
		||||
    displacements[2*_d]=0;
 | 
			
		||||
@@ -63,7 +95,7 @@ public:
 | 
			
		||||
    //// report back
 | 
			
		||||
    std::cout<<GridLogMessage<<"directions    :";
 | 
			
		||||
    for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
 | 
			
		||||
    std::cout <<std::endl;
 | 
			
		||||
    std::cout<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"displacements :";
 | 
			
		||||
    for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
 | 
			
		||||
    std::cout<<std::endl;
 | 
			
		||||
@@ -115,10 +147,10 @@ public:
 | 
			
		||||
  
 | 
			
		||||
  void Orthogonalise(void){
 | 
			
		||||
    CoarseScalar InnerProd(CoarseGrid); 
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    //    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 2"<<std::endl; // Really have to do twice? Yuck
 | 
			
		||||
    //    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
 | 
			
		||||
    //      CheckOrthogonal();
 | 
			
		||||
  } 
 | 
			
		||||
@@ -128,7 +160,7 @@ public:
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
      blockProject(iProj,subspace[i],subspace);
 | 
			
		||||
      eProj=Zero(); 
 | 
			
		||||
      thread_for(ss, CoarseGrid->oSites(),{
 | 
			
		||||
      accelerator_for(ss, CoarseGrid->oSites(),1,{
 | 
			
		||||
	eProj[ss](i)=CComplex(1.0);
 | 
			
		||||
      });
 | 
			
		||||
      eProj=eProj - iProj;
 | 
			
		||||
@@ -146,61 +178,9 @@ public:
 | 
			
		||||
  void CreateSubspaceRandom(GridParallelRNG &RNG){
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
      random(RNG,subspace[i]);
 | 
			
		||||
      std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
    virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) 
 | 
			
		||||
    {
 | 
			
		||||
    // Run a Lanczos with sloppy convergence
 | 
			
		||||
    const int Nstop = nn;
 | 
			
		||||
    const int Nk = nn+20;
 | 
			
		||||
    const int Np = nn+20;
 | 
			
		||||
    const int Nm = Nk+Np;
 | 
			
		||||
    const int MaxIt= 10000;
 | 
			
		||||
    RealD resid = 1.0e-3;
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField> Cheb(0.5,64.0,21);
 | 
			
		||||
    ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
 | 
			
		||||
    //	IRL.lock = 1;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid); gaussian(RNG,noise);
 | 
			
		||||
    FineField tmp(FineGrid); 
 | 
			
		||||
    std::vector<RealD>     eval(Nm);
 | 
			
		||||
    std::vector<FineField> evec(Nm,FineGrid);
 | 
			
		||||
 | 
			
		||||
    int Nconv;
 | 
			
		||||
    IRL.calc(eval,evec,
 | 
			
		||||
    noise,
 | 
			
		||||
    Nconv);
 | 
			
		||||
 | 
			
		||||
    // pull back nn vectors
 | 
			
		||||
    for(int b=0;b<nn;b++){
 | 
			
		||||
 | 
			
		||||
    subspace[b]   = evec[b];
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
 | 
			
		||||
    hermop.Op(subspace[b],tmp); 
 | 
			
		||||
    std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    noise = tmp -  sqrt(eval[b])*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    noise = tmp +  eval[b]*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
    for(int b=0;b<nn;b++){
 | 
			
		||||
    std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    }
 | 
			
		||||
  */
 | 
			
		||||
  virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
@@ -232,54 +212,316 @@ public:
 | 
			
		||||
      subspace[b]   = noise;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
 | 
			
		||||
  // and this is the best I found
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#if 1
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,
 | 
			
		||||
				       int nn,
 | 
			
		||||
				       double hi,
 | 
			
		||||
				       double lo,
 | 
			
		||||
				       int orderfilter,
 | 
			
		||||
				       int ordermin,
 | 
			
		||||
				       int orderstep,
 | 
			
		||||
				       double filterlo
 | 
			
		||||
				       ) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid);
 | 
			
		||||
    FineField Mn(FineGrid);
 | 
			
		||||
    FineField tmp(FineGrid);
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField> Cheb(0.1,64.0,900);
 | 
			
		||||
    // New normalised noise
 | 
			
		||||
    gaussian(RNG,noise);
 | 
			
		||||
    scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
    noise=noise*scale;
 | 
			
		||||
 | 
			
		||||
    // Initial matrix element
 | 
			
		||||
    hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    int b =0;
 | 
			
		||||
    {
 | 
			
		||||
      // Filter
 | 
			
		||||
      Chebyshev<FineField> Cheb(lo,hi,orderfilter);
 | 
			
		||||
      Cheb(hermop,noise,Mn);
 | 
			
		||||
      // normalise
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale;
 | 
			
		||||
      subspace[b]   = Mn;
 | 
			
		||||
      hermop.Op(Mn,tmp); 
 | 
			
		||||
      std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
      b++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Generate a full sequence of Chebyshevs
 | 
			
		||||
    {
 | 
			
		||||
      lo=filterlo;
 | 
			
		||||
      noise=Mn;
 | 
			
		||||
 | 
			
		||||
      FineField T0(FineGrid); T0 = noise;  
 | 
			
		||||
      FineField T1(FineGrid); 
 | 
			
		||||
      FineField T2(FineGrid);
 | 
			
		||||
      FineField y(FineGrid);
 | 
			
		||||
      
 | 
			
		||||
      FineField *Tnm = &T0;
 | 
			
		||||
      FineField *Tn  = &T1;
 | 
			
		||||
      FineField *Tnp = &T2;
 | 
			
		||||
 | 
			
		||||
      // Tn=T1 = (xscale M + mscale)in
 | 
			
		||||
      RealD xscale = 2.0/(hi-lo);
 | 
			
		||||
      RealD mscale = -(hi+lo)/(hi-lo);
 | 
			
		||||
      hermop.HermOp(T0,y);
 | 
			
		||||
      T1=y*xscale+noise*mscale;
 | 
			
		||||
 | 
			
		||||
      for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
 | 
			
		||||
	
 | 
			
		||||
	hermop.HermOp(*Tn,y);
 | 
			
		||||
 | 
			
		||||
	auto y_v = y.View();
 | 
			
		||||
	auto Tn_v = Tn->View();
 | 
			
		||||
	auto Tnp_v = Tnp->View();
 | 
			
		||||
	auto Tnm_v = Tnm->View();
 | 
			
		||||
	const int Nsimd = CComplex::Nsimd();
 | 
			
		||||
	accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
 | 
			
		||||
	  coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
 | 
			
		||||
	  coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
	// Possible more fine grained control is needed than a linear sweep,
 | 
			
		||||
	// but huge productivity gain if this is simple algorithm and not a tunable
 | 
			
		||||
	int m =1;
 | 
			
		||||
	if ( n>=ordermin ) m=n-ordermin;
 | 
			
		||||
	if ( (m%orderstep)==0 ) { 
 | 
			
		||||
	  Mn=*Tnp;
 | 
			
		||||
	  scale = std::pow(norm2(Mn),-0.5);         Mn=Mn*scale;
 | 
			
		||||
	  subspace[b] = Mn;
 | 
			
		||||
	  hermop.Op(Mn,tmp); 
 | 
			
		||||
	  std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
	  b++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Cycle pointers to avoid copies
 | 
			
		||||
	FineField *swizzle = Tnm;
 | 
			
		||||
	Tnm    =Tn;
 | 
			
		||||
	Tn     =Tnp;
 | 
			
		||||
	Tnp    =swizzle;
 | 
			
		||||
	  
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    assert(b==nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
#if 0
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,
 | 
			
		||||
				       int nn,
 | 
			
		||||
				       double hi,
 | 
			
		||||
				       double lo,
 | 
			
		||||
				       int orderfilter,
 | 
			
		||||
				       int ordermin,
 | 
			
		||||
				       int orderstep,
 | 
			
		||||
				       double filterlo
 | 
			
		||||
				       ) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid);
 | 
			
		||||
    FineField Mn(FineGrid);
 | 
			
		||||
    FineField tmp(FineGrid);
 | 
			
		||||
    FineField combined(FineGrid);
 | 
			
		||||
 | 
			
		||||
    for(int b=0;b<nn;b++){
 | 
			
		||||
	
 | 
			
		||||
      gaussian(RNG,noise);
 | 
			
		||||
      scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
      noise=noise*scale;
 | 
			
		||||
    // New normalised noise
 | 
			
		||||
    gaussian(RNG,noise);
 | 
			
		||||
    scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
    noise=noise*scale;
 | 
			
		||||
 | 
			
		||||
      hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
    // Initial matrix element
 | 
			
		||||
    hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      Cheb(hermop,noise,Mn);
 | 
			
		||||
    int b =0;
 | 
			
		||||
#define FILTERb(llo,hhi,oorder)						\
 | 
			
		||||
    {									\
 | 
			
		||||
      Chebyshev<FineField> Cheb(llo,hhi,oorder);			\
 | 
			
		||||
      Cheb(hermop,noise,Mn);						\
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;			\
 | 
			
		||||
      subspace[b]   = Mn;						\
 | 
			
		||||
      hermop.Op(Mn,tmp);						\
 | 
			
		||||
      std::cout<<GridLogMessage << oorder<< " Cheb filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
 | 
			
		||||
      b++;								\
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); 
 | 
			
		||||
      Mn=Mn*scale;
 | 
			
		||||
      subspace[b]   = Mn;
 | 
			
		||||
    //      JacobiPolynomial<FineField> Cheb(0.002,60.0,1500,-0.5,3.5);	\
 | 
			
		||||
 | 
			
		||||
      hermop.Op(Mn,noise); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
    RealD alpha=-0.8;
 | 
			
		||||
    RealD beta =-0.8;
 | 
			
		||||
#define FILTER(llo,hhi,oorder)						\
 | 
			
		||||
    {									\
 | 
			
		||||
      Chebyshev<FineField> Cheb(llo,hhi,oorder);			\
 | 
			
		||||
      /* JacobiPolynomial<FineField> Cheb(0.0,60.0,oorder,alpha,beta);*/\
 | 
			
		||||
      Cheb(hermop,noise,Mn);						\
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;			\
 | 
			
		||||
      subspace[b]   = Mn;						\
 | 
			
		||||
      hermop.Op(Mn,tmp);						\
 | 
			
		||||
      std::cout<<GridLogMessage << oorder<< "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
 | 
			
		||||
      b++;								\
 | 
			
		||||
    }									
 | 
			
		||||
    
 | 
			
		||||
#define FILTERc(llo,hhi,oorder)				\
 | 
			
		||||
    {							\
 | 
			
		||||
      Chebyshev<FineField> Cheb(llo,hhi,oorder);	\
 | 
			
		||||
      Cheb(hermop,noise,combined);			\
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
    double node = 0.000;
 | 
			
		||||
    FILTERb(lo,hi,orderfilter);// 0
 | 
			
		||||
    //    FILTERc(node,hi,51);// 0
 | 
			
		||||
    noise = Mn;
 | 
			
		||||
    int base = 0;
 | 
			
		||||
    int mult = 100;
 | 
			
		||||
    FILTER(node,hi,base+1*mult);
 | 
			
		||||
    FILTER(node,hi,base+2*mult);
 | 
			
		||||
    FILTER(node,hi,base+3*mult);
 | 
			
		||||
    FILTER(node,hi,base+4*mult);
 | 
			
		||||
    FILTER(node,hi,base+5*mult);
 | 
			
		||||
    FILTER(node,hi,base+6*mult);
 | 
			
		||||
    FILTER(node,hi,base+7*mult);
 | 
			
		||||
    FILTER(node,hi,base+8*mult);
 | 
			
		||||
    FILTER(node,hi,base+9*mult);
 | 
			
		||||
    FILTER(node,hi,base+10*mult);
 | 
			
		||||
    FILTER(node,hi,base+11*mult);
 | 
			
		||||
    FILTER(node,hi,base+12*mult);
 | 
			
		||||
    FILTER(node,hi,base+13*mult);
 | 
			
		||||
    FILTER(node,hi,base+14*mult);
 | 
			
		||||
    FILTER(node,hi,base+15*mult);
 | 
			
		||||
    assert(b==nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,
 | 
			
		||||
				       int nn,
 | 
			
		||||
				       double hi,
 | 
			
		||||
				       double lo,
 | 
			
		||||
				       int orderfilter,
 | 
			
		||||
				       int ordermin,
 | 
			
		||||
				       int orderstep,
 | 
			
		||||
				       double filterlo
 | 
			
		||||
				       ) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid);
 | 
			
		||||
    FineField Mn(FineGrid);
 | 
			
		||||
    FineField tmp(FineGrid);
 | 
			
		||||
    FineField combined(FineGrid);
 | 
			
		||||
 | 
			
		||||
    // New normalised noise
 | 
			
		||||
    gaussian(RNG,noise);
 | 
			
		||||
    scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
    noise=noise*scale;
 | 
			
		||||
 | 
			
		||||
    // Initial matrix element
 | 
			
		||||
    hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    int b =0;
 | 
			
		||||
    {						
 | 
			
		||||
      Chebyshev<FineField> JacobiPoly(0.005,60.,1500);
 | 
			
		||||
      //      JacobiPolynomial<FineField> JacobiPoly(0.002,60.0,1500,-0.5,3.5);
 | 
			
		||||
      //JacobiPolynomial<FineField> JacobiPoly(0.03,60.0,500,-0.5,3.5);
 | 
			
		||||
      //      JacobiPolynomial<FineField> JacobiPoly(0.00,60.0,1000,-0.5,3.5);
 | 
			
		||||
      JacobiPoly(hermop,noise,Mn);
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
 | 
			
		||||
      subspace[b]   = Mn;
 | 
			
		||||
      hermop.Op(Mn,tmp);
 | 
			
		||||
      std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; 
 | 
			
		||||
      b++;
 | 
			
		||||
      //      scale = std::pow(norm2(tmp),-0.5);     tmp=tmp*scale;
 | 
			
		||||
      //      subspace[b]   = tmp;      b++;
 | 
			
		||||
      //    }									
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
#define FILTER(lambda)						\
 | 
			
		||||
    {								\
 | 
			
		||||
      hermop.HermOp(subspace[0],tmp);				\
 | 
			
		||||
      tmp = tmp - lambda *subspace[0];				\
 | 
			
		||||
      scale = std::pow(norm2(tmp),-0.5);			\
 | 
			
		||||
      tmp=tmp*scale;							\
 | 
			
		||||
      subspace[b]   = tmp;						\
 | 
			
		||||
      hermop.Op(subspace[b],tmp);					\
 | 
			
		||||
      std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
 | 
			
		||||
      b++;								\
 | 
			
		||||
    }									
 | 
			
		||||
    //      scale = std::pow(norm2(tmp),-0.5);     tmp=tmp*scale;
 | 
			
		||||
    //      subspace[b]   = tmp;      b++;
 | 
			
		||||
    //    }									
 | 
			
		||||
 | 
			
		||||
    FILTER(2.0e-5);
 | 
			
		||||
    FILTER(2.0e-4);
 | 
			
		||||
    FILTER(4.0e-4);
 | 
			
		||||
    FILTER(8.0e-4);
 | 
			
		||||
    FILTER(8.0e-4);
 | 
			
		||||
 | 
			
		||||
    FILTER(2.0e-3);
 | 
			
		||||
    FILTER(3.0e-3);
 | 
			
		||||
    FILTER(4.0e-3);
 | 
			
		||||
    FILTER(5.0e-3);
 | 
			
		||||
    FILTER(6.0e-3);
 | 
			
		||||
 | 
			
		||||
    FILTER(2.5e-3);
 | 
			
		||||
    FILTER(3.5e-3);
 | 
			
		||||
    FILTER(4.5e-3);
 | 
			
		||||
    FILTER(5.5e-3);
 | 
			
		||||
    FILTER(6.5e-3);
 | 
			
		||||
 | 
			
		||||
    //    FILTER(6.0e-5);//6
 | 
			
		||||
    //    FILTER(7.0e-5);//8
 | 
			
		||||
    //    FILTER(8.0e-5);//9
 | 
			
		||||
    //    FILTER(9.0e-5);//3
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    //    FILTER(1.0e-4);//10
 | 
			
		||||
    FILTER(2.0e-4);//11
 | 
			
		||||
    //   FILTER(3.0e-4);//12
 | 
			
		||||
    //    FILTER(4.0e-4);//13
 | 
			
		||||
    FILTER(5.0e-4);//14
 | 
			
		||||
 | 
			
		||||
    FILTER(6.0e-3);//4
 | 
			
		||||
    FILTER(7.0e-4);//1
 | 
			
		||||
    FILTER(8.0e-4);//7
 | 
			
		||||
    FILTER(9.0e-4);//15
 | 
			
		||||
    FILTER(1.0e-3);//2
 | 
			
		||||
 | 
			
		||||
    FILTER(2.0e-3);//2
 | 
			
		||||
    FILTER(3.0e-3);//2
 | 
			
		||||
    FILTER(4.0e-3);//2
 | 
			
		||||
    FILTER(5.0e-3);//2
 | 
			
		||||
    FILTER(6.0e-3);//2
 | 
			
		||||
 | 
			
		||||
    FILTER(7.0e-3);//2
 | 
			
		||||
    FILTER(8.0e-3);//2
 | 
			
		||||
    FILTER(1.0e-2);//2
 | 
			
		||||
    */
 | 
			
		||||
    std::cout << GridLogMessage <<"Jacobi filtering done" <<std::endl;
 | 
			
		||||
    assert(b==nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Fine Object == (per site) type of fine field
 | 
			
		||||
// nbasis      == number of deflation vectors
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  {
 | 
			
		||||
public:
 | 
			
		||||
    
 | 
			
		||||
  typedef iVector<CComplex,nbasis >             siteVector;
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           siteVector;
 | 
			
		||||
  typedef Lattice<CComplex >                  CoarseComplexField;
 | 
			
		||||
  typedef Lattice<siteVector>                 CoarseVector;
 | 
			
		||||
  typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
 | 
			
		||||
 | 
			
		||||
  typedef iMatrix<CComplex,nbasis >  Cobj;
 | 
			
		||||
  typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj >        FineField;
 | 
			
		||||
 | 
			
		||||
@@ -293,7 +535,6 @@ public:
 | 
			
		||||
  CartesianStencil<siteVector,siteVector,int> Stencil; 
 | 
			
		||||
 | 
			
		||||
  std::vector<CoarseMatrix> A;
 | 
			
		||||
 | 
			
		||||
      
 | 
			
		||||
  ///////////////////////
 | 
			
		||||
  // Interface
 | 
			
		||||
@@ -305,33 +546,71 @@ public:
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
    conformable(in.Grid(),out.Grid());
 | 
			
		||||
 | 
			
		||||
    RealD Nin = norm2(in);
 | 
			
		||||
    //    RealD Nin = norm2(in);
 | 
			
		||||
    SimpleCompressor<siteVector> compressor;
 | 
			
		||||
 | 
			
		||||
    double comms_usec = -usecond();
 | 
			
		||||
    Stencil.HaloExchange(in,compressor);
 | 
			
		||||
    comms_usec += usecond();
 | 
			
		||||
 | 
			
		||||
    auto in_v = in.View();
 | 
			
		||||
    auto out_v = out.View();
 | 
			
		||||
    thread_for(ss,Grid()->oSites(),{
 | 
			
		||||
      siteVector res = Zero();
 | 
			
		||||
      siteVector nbr;
 | 
			
		||||
    typedef LatticeView<Cobj> Aview;
 | 
			
		||||
 | 
			
		||||
    Vector<Aview> AcceleratorViewContainer;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
 | 
			
		||||
    Aview *Aview_p = & AcceleratorViewContainer[0];
 | 
			
		||||
 | 
			
		||||
    const int Nsimd = CComplex::Nsimd();
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0])) calcVector;
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
 | 
			
		||||
 | 
			
		||||
    GridStopWatch ArithmeticTimer;
 | 
			
		||||
    int osites=Grid()->oSites();
 | 
			
		||||
    //    double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
 | 
			
		||||
    //    double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
 | 
			
		||||
    double usecs =-usecond();
 | 
			
		||||
    // assert(geom.npoint==9);
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
			
		||||
      int ss = sss/nbasis;
 | 
			
		||||
      int b  = sss%nbasis;
 | 
			
		||||
      calcComplex res = Zero();
 | 
			
		||||
      calcVector nbr;
 | 
			
		||||
      int ptype;
 | 
			
		||||
      StencilEntry *SE;
 | 
			
		||||
 | 
			
		||||
      int lane=SIMTlane(Nsimd);
 | 
			
		||||
      for(int point=0;point<geom.npoint;point++){
 | 
			
		||||
 | 
			
		||||
	SE=Stencil.GetEntry(ptype,point,ss);
 | 
			
		||||
	  
 | 
			
		||||
	if(SE->_is_local&&SE->_permute) { 
 | 
			
		||||
	  permute(nbr,in_v[SE->_offset],ptype);
 | 
			
		||||
	} else if(SE->_is_local) { 
 | 
			
		||||
	  nbr = in_v[SE->_offset];
 | 
			
		||||
	if(SE->_is_local) { 
 | 
			
		||||
	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
 | 
			
		||||
	} else {
 | 
			
		||||
	  nbr = Stencil.CommBuf()[SE->_offset];
 | 
			
		||||
	  nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
 | 
			
		||||
	}
 | 
			
		||||
	synchronise();
 | 
			
		||||
 | 
			
		||||
	for(int bb=0;bb<nbasis;bb++) {
 | 
			
		||||
	  res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
 | 
			
		||||
	}
 | 
			
		||||
	auto A_point = A[point].View();
 | 
			
		||||
	res = res + A_point[ss]*nbr;
 | 
			
		||||
      }
 | 
			
		||||
      vstream(out_v[ss],res);
 | 
			
		||||
      coalescedWrite(out_v[ss](b),res,lane);
 | 
			
		||||
    });
 | 
			
		||||
    usecs +=usecond();
 | 
			
		||||
 | 
			
		||||
    double nrm_usec=-usecond();
 | 
			
		||||
    RealD Nout= norm2(out);
 | 
			
		||||
    nrm_usec+=usecond();
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
        std::cout << GridLogMessage << "\tNorm        " << nrm_usec << " us" <<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\tHalo        " << comms_usec << " us" <<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\tMatrix      " << usecs << " us" <<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\t  mflop/s   " << flops/usecs<<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\t  MB/s      " << bytes/usecs<<std::endl;
 | 
			
		||||
    */
 | 
			
		||||
    return Nout;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
@@ -349,25 +628,54 @@ public:
 | 
			
		||||
      return norm2(out);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 | 
			
		||||
    
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
    conformable(in.Grid(),out.Grid());
 | 
			
		||||
    
 | 
			
		||||
  void MdirComms(const CoarseVector &in)
 | 
			
		||||
  {
 | 
			
		||||
    SimpleCompressor<siteVector> compressor;
 | 
			
		||||
    Stencil.HaloExchange(in,compressor);
 | 
			
		||||
    
 | 
			
		||||
    auto point = [dir, disp](){
 | 
			
		||||
      if(dir == 0 and disp == 0)
 | 
			
		||||
	return 8;
 | 
			
		||||
      else
 | 
			
		||||
	return (4 * dir + 1 - disp) / 2;
 | 
			
		||||
    }();
 | 
			
		||||
  }
 | 
			
		||||
  void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
 | 
			
		||||
  {
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
    conformable(_grid,out.Grid());
 | 
			
		||||
 | 
			
		||||
    typedef LatticeView<Cobj> Aview;
 | 
			
		||||
    Vector<Aview> AcceleratorViewContainer;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
 | 
			
		||||
    Aview *Aview_p = & AcceleratorViewContainer[0];
 | 
			
		||||
 | 
			
		||||
    auto out_v = out.View();
 | 
			
		||||
    auto in_v  = in.View();
 | 
			
		||||
    thread_for(ss,Grid()->oSites(),{
 | 
			
		||||
 | 
			
		||||
    const int Nsimd = CComplex::Nsimd();
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0])) calcVector;
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
			
		||||
      int ss = sss/nbasis;
 | 
			
		||||
      int b  = sss%nbasis;
 | 
			
		||||
      calcComplex res = Zero();
 | 
			
		||||
      calcVector nbr;
 | 
			
		||||
      int ptype;
 | 
			
		||||
      StencilEntry *SE;
 | 
			
		||||
 | 
			
		||||
      int lane=SIMTlane(Nsimd);
 | 
			
		||||
      SE=Stencil.GetEntry(ptype,point,ss);
 | 
			
		||||
	  
 | 
			
		||||
      if(SE->_is_local) { 
 | 
			
		||||
	nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
 | 
			
		||||
      } else {
 | 
			
		||||
	nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
 | 
			
		||||
      }
 | 
			
		||||
      synchronise();
 | 
			
		||||
 | 
			
		||||
      for(int bb=0;bb<nbasis;bb++) {
 | 
			
		||||
	res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
 | 
			
		||||
      }
 | 
			
		||||
      coalescedWrite(out_v[ss](b),res,lane);
 | 
			
		||||
    });
 | 
			
		||||
#if 0
 | 
			
		||||
    accelerator_for(ss,Grid()->oSites(),1,{
 | 
			
		||||
 | 
			
		||||
      siteVector res = Zero();
 | 
			
		||||
      siteVector nbr;
 | 
			
		||||
      int ptype;
 | 
			
		||||
@@ -382,16 +690,65 @@ public:
 | 
			
		||||
      } else {
 | 
			
		||||
	nbr = Stencil.CommBuf()[SE->_offset];
 | 
			
		||||
      }
 | 
			
		||||
      synchronise();
 | 
			
		||||
 | 
			
		||||
      auto A_point = A[point].View();
 | 
			
		||||
      res = res + A_point[ss]*nbr;
 | 
			
		||||
      res = res + Aview_p[point][ss]*nbr;
 | 
			
		||||
      
 | 
			
		||||
      vstream(out_v[ss],res);
 | 
			
		||||
      out_v[ss]=res;
 | 
			
		||||
    });
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
  void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out)
 | 
			
		||||
  {
 | 
			
		||||
    this->MdirComms(in);
 | 
			
		||||
    int ndir=geom.npoint-1;
 | 
			
		||||
    if ((out.size()!=ndir)&&(out.size()!=ndir+1)) { 
 | 
			
		||||
      std::cout <<"MdirAll out size "<< out.size()<<std::endl;
 | 
			
		||||
      std::cout <<"MdirAll ndir "<< ndir<<std::endl;
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
    for(int p=0;p<ndir;p++){
 | 
			
		||||
      MdirCalc(in,out[p],p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 | 
			
		||||
 | 
			
		||||
    this->MdirComms(in);
 | 
			
		||||
 | 
			
		||||
    int ndim = in.Grid()->Nd();
 | 
			
		||||
 | 
			
		||||
    //////////////
 | 
			
		||||
    // 4D action like wilson
 | 
			
		||||
    // 0+ => 0 
 | 
			
		||||
    // 0- => 1
 | 
			
		||||
    // 1+ => 2 
 | 
			
		||||
    // 1- => 3
 | 
			
		||||
    // etc..
 | 
			
		||||
    //////////////
 | 
			
		||||
    // 5D action like DWF
 | 
			
		||||
    // 1+ => 0 
 | 
			
		||||
    // 1- => 1
 | 
			
		||||
    // 2+ => 2 
 | 
			
		||||
    // 2- => 3
 | 
			
		||||
    // etc..
 | 
			
		||||
    auto point = [dir, disp, ndim](){
 | 
			
		||||
      if(dir == 0 and disp == 0)
 | 
			
		||||
	return 8;
 | 
			
		||||
      else if ( ndim==4 ) { 
 | 
			
		||||
	return (4 * dir + 1 - disp) / 2;
 | 
			
		||||
      } else { 
 | 
			
		||||
	return (4 * (dir-1) + 1 - disp) / 2;
 | 
			
		||||
      }
 | 
			
		||||
    }();
 | 
			
		||||
 | 
			
		||||
    MdirCalc(in,out,point);
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Mdiag(const CoarseVector &in, CoarseVector &out){
 | 
			
		||||
    Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
 | 
			
		||||
  void Mdiag(const CoarseVector &in, CoarseVector &out)
 | 
			
		||||
  {
 | 
			
		||||
    int point=geom.npoint-1;
 | 
			
		||||
    MdirCalc(in, out, point); // No comms
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
@@ -401,25 +758,44 @@ public:
 | 
			
		||||
    geom(CoarseGrid._ndimension),
 | 
			
		||||
    hermitian(hermitian_),
 | 
			
		||||
    Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
 | 
			
		||||
    A(geom.npoint,&CoarseGrid)
 | 
			
		||||
      A(geom.npoint,&CoarseGrid)
 | 
			
		||||
  {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
			
		||||
		       Aggregation<Fobj,CComplex,nbasis> & Subspace){
 | 
			
		||||
		       Aggregation<Fobj,CComplex,nbasis> & Subspace)
 | 
			
		||||
  {
 | 
			
		||||
    typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
 | 
			
		||||
    typedef typename Fobj::scalar_type scalar_type;
 | 
			
		||||
 | 
			
		||||
    FineField iblock(FineGrid); // contributions from within this block
 | 
			
		||||
    FineField oblock(FineGrid); // contributions from outwith this block
 | 
			
		||||
    FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
 | 
			
		||||
    FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
 | 
			
		||||
 | 
			
		||||
    std::vector<FineComplexField> masks(geom.npoint,FineGrid);
 | 
			
		||||
    FineComplexField imask(FineGrid); // contributions from within this block
 | 
			
		||||
    FineComplexField omask(FineGrid); // contributions from outwith this block
 | 
			
		||||
 | 
			
		||||
    FineComplexField evenmask(FineGrid);
 | 
			
		||||
    FineComplexField oddmask(FineGrid); 
 | 
			
		||||
 | 
			
		||||
    FineField     phi(FineGrid);
 | 
			
		||||
    FineField     tmp(FineGrid);
 | 
			
		||||
    FineField     zz(FineGrid); zz=Zero();
 | 
			
		||||
    FineField    Mphi(FineGrid);
 | 
			
		||||
    FineField    Mphie(FineGrid);
 | 
			
		||||
    FineField    Mphio(FineGrid);
 | 
			
		||||
    std::vector<FineField>     Mphi_p(geom.npoint,FineGrid);
 | 
			
		||||
 | 
			
		||||
    Lattice<iScalar<vInteger> > coor(FineGrid);
 | 
			
		||||
    Lattice<iScalar<vInteger> > coor (FineGrid);
 | 
			
		||||
    Lattice<iScalar<vInteger> > bcoor(FineGrid);
 | 
			
		||||
    Lattice<iScalar<vInteger> > bcb  (FineGrid); bcb = Zero();
 | 
			
		||||
 | 
			
		||||
    CoarseVector iProj(Grid()); 
 | 
			
		||||
    CoarseVector oProj(Grid()); 
 | 
			
		||||
    CoarseVector SelfProj(Grid()); 
 | 
			
		||||
    CoarseComplexField iZProj(Grid()); 
 | 
			
		||||
    CoarseComplexField oZProj(Grid()); 
 | 
			
		||||
 | 
			
		||||
    CoarseScalar InnerProd(Grid()); 
 | 
			
		||||
 | 
			
		||||
    // Orthogonalise the subblocks over the basis
 | 
			
		||||
@@ -428,69 +804,117 @@ public:
 | 
			
		||||
    // Compute the matrix elements of linop between this orthonormal
 | 
			
		||||
    // set of vectors.
 | 
			
		||||
    int self_stencil=-1;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){ 
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++)
 | 
			
		||||
    { 
 | 
			
		||||
      int dir   = geom.directions[p];
 | 
			
		||||
      int disp  = geom.displacements[p];
 | 
			
		||||
      A[p]=Zero();
 | 
			
		||||
      if( geom.displacements[p]==0){
 | 
			
		||||
	self_stencil=p;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
 | 
			
		||||
 | 
			
		||||
      LatticeCoordinate(coor,dir);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////
 | 
			
		||||
      // Work out even and odd block checkerboarding for fast diagonal term
 | 
			
		||||
      ///////////////////////////////////////////////////////
 | 
			
		||||
      if ( disp==1 ) {
 | 
			
		||||
	bcb   = bcb + div(coor,block);
 | 
			
		||||
      }
 | 
			
		||||
	
 | 
			
		||||
      if ( disp==0 ) {
 | 
			
		||||
	  masks[p]= Zero();
 | 
			
		||||
      } else if ( disp==1 ) {
 | 
			
		||||
	masks[p] = where(mod(coor,block)==(block-1),one,zero);
 | 
			
		||||
      } else if ( disp==-1 ) {
 | 
			
		||||
	masks[p] = where(mod(coor,block)==(Integer)0,one,zero);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
 | 
			
		||||
    oddmask  = one-evenmask;
 | 
			
		||||
 | 
			
		||||
    assert(self_stencil!=-1);
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
 | 
			
		||||
      phi=Subspace.subspace[i];
 | 
			
		||||
	
 | 
			
		||||
      std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
 | 
			
		||||
 | 
			
		||||
      //      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
 | 
			
		||||
      linop.OpDirAll(phi,Mphi_p);
 | 
			
		||||
      linop.OpDiag  (phi,Mphi_p[geom.npoint-1]);
 | 
			
		||||
 | 
			
		||||
      for(int p=0;p<geom.npoint;p++){ 
 | 
			
		||||
 | 
			
		||||
	Mphi = Mphi_p[p];
 | 
			
		||||
 | 
			
		||||
	int dir   = geom.directions[p];
 | 
			
		||||
	int disp  = geom.displacements[p];
 | 
			
		||||
 | 
			
		||||
	Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
 | 
			
		||||
	if ( (disp==-1) || (!hermitian ) ) {
 | 
			
		||||
 | 
			
		||||
	LatticeCoordinate(coor,dir);
 | 
			
		||||
 | 
			
		||||
	if ( disp==0 ){
 | 
			
		||||
	  linop.OpDiag(phi,Mphi);
 | 
			
		||||
	}
 | 
			
		||||
	else  {
 | 
			
		||||
	  linop.OpDir(phi,Mphi,dir,disp); 
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	// Pick out contributions coming from this cell and neighbour cell
 | 
			
		||||
	////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	if ( disp==0 ) {
 | 
			
		||||
	  iblock = Mphi;
 | 
			
		||||
	  oblock = Zero();
 | 
			
		||||
	} else if ( disp==1 ) {
 | 
			
		||||
	  oblock = where(mod(coor,block)==(block-1),Mphi,zz);
 | 
			
		||||
	  iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
 | 
			
		||||
	} else if ( disp==-1 ) {
 | 
			
		||||
	  oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
 | 
			
		||||
	  iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
 | 
			
		||||
	} else {
 | 
			
		||||
	  assert(0);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Subspace.ProjectToSubspace(iProj,iblock);
 | 
			
		||||
	Subspace.ProjectToSubspace(oProj,oblock);
 | 
			
		||||
	//	  blockProject(iProj,iblock,Subspace.subspace);
 | 
			
		||||
	//	  blockProject(oProj,oblock,Subspace.subspace);
 | 
			
		||||
	auto iProj_v = iProj.View() ;
 | 
			
		||||
	auto oProj_v = oProj.View() ;
 | 
			
		||||
	auto A_p     =  A[p].View();
 | 
			
		||||
	auto A_self  = A[self_stencil].View();
 | 
			
		||||
	thread_for(ss, Grid()->oSites(),{
 | 
			
		||||
	  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	  // Pick out contributions coming from this cell and neighbour cell
 | 
			
		||||
	  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	  omask = masks[p];
 | 
			
		||||
	  imask = one-omask;
 | 
			
		||||
	
 | 
			
		||||
	  for(int j=0;j<nbasis;j++){
 | 
			
		||||
	    if( disp!= 0 ) {
 | 
			
		||||
	      A_p[ss](j,i) = oProj_v[ss](j);
 | 
			
		||||
	    }
 | 
			
		||||
	    A_self[ss](j,i) =	A_self[ss](j,i) + iProj_v[ss](j);
 | 
			
		||||
	    
 | 
			
		||||
	    blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi);
 | 
			
		||||
	    
 | 
			
		||||
	    auto iZProj_v = iZProj.View() ;
 | 
			
		||||
	    auto oZProj_v = oZProj.View() ;
 | 
			
		||||
	    auto A_p     =  A[p].View();
 | 
			
		||||
	    auto A_self  = A[self_stencil].View();
 | 
			
		||||
 | 
			
		||||
	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
 | 
			
		||||
	    //      if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });}
 | 
			
		||||
	    //	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); });
 | 
			
		||||
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      // Faster alternate self coupling.. use hermiticity to save 2x
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      {
 | 
			
		||||
	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie);
 | 
			
		||||
	mult(tmp,phi,oddmask );  linop.Op(tmp,Mphio);
 | 
			
		||||
 | 
			
		||||
	{
 | 
			
		||||
	  auto tmp_      = tmp.View();
 | 
			
		||||
	  auto evenmask_ = evenmask.View();
 | 
			
		||||
	  auto oddmask_  =  oddmask.View();
 | 
			
		||||
	  auto Mphie_    =  Mphie.View();
 | 
			
		||||
	  auto Mphio_    =  Mphio.View();
 | 
			
		||||
	  accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ 
 | 
			
		||||
	      coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
 | 
			
		||||
	    });
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	blockProject(SelfProj,tmp,Subspace.subspace);
 | 
			
		||||
 | 
			
		||||
	auto SelfProj_ = SelfProj.View();
 | 
			
		||||
	auto A_self  = A[self_stencil].View();
 | 
			
		||||
 | 
			
		||||
	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
 | 
			
		||||
	  for(int j=0;j<nbasis;j++){
 | 
			
		||||
	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
 | 
			
		||||
	  }
 | 
			
		||||
	});
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if(hermitian) {
 | 
			
		||||
      std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
 | 
			
		||||
      ForceHermitian();
 | 
			
		||||
    }
 | 
			
		||||
      // AssertHermitian();
 | 
			
		||||
      // ForceDiagonal();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
    ///////////////////////////
 | 
			
		||||
@@ -513,17 +937,26 @@ public:
 | 
			
		||||
    std::cout<<GridLogMessage<< iProj <<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
      //      ForceHermitian();
 | 
			
		||||
      // AssertHermitian();
 | 
			
		||||
      // ForceDiagonal();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  void ForceHermitian(void) {
 | 
			
		||||
    for(int d=0;d<4;d++){
 | 
			
		||||
      int dd=d+1;
 | 
			
		||||
      A[2*d] = adj(Cshift(A[2*d+1],dd,1));
 | 
			
		||||
    CoarseMatrix Diff  (Grid());
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){
 | 
			
		||||
      int dir   = geom.directions[p];
 | 
			
		||||
      int disp  = geom.displacements[p];
 | 
			
		||||
      if(disp==-1) {
 | 
			
		||||
	// Find the opposite link
 | 
			
		||||
	for(int pp=0;pp<geom.npoint;pp++){
 | 
			
		||||
	  int dirp   = geom.directions[pp];
 | 
			
		||||
	  int dispp  = geom.displacements[pp];
 | 
			
		||||
	  if ( (dirp==dir) && (dispp==1) ){
 | 
			
		||||
	    //	    Diff = adj(Cshift(A[p],dir,1)) - A[pp]; 
 | 
			
		||||
	    //	    std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
 | 
			
		||||
	    A[pp] = adj(Cshift(A[p],dir,1));
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    //      A[8] = 0.5*(A[8] + adj(A[8]));
 | 
			
		||||
  }
 | 
			
		||||
  void AssertHermitian(void) {
 | 
			
		||||
    CoarseMatrix AA    (Grid());
 | 
			
		||||
 
 | 
			
		||||
@@ -47,6 +47,7 @@ public:
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
  virtual void OpDir  (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
 | 
			
		||||
  virtual void OpDirAll  (const Field &in, std::vector<Field> &out) = 0; // Abstract base
 | 
			
		||||
 | 
			
		||||
  virtual void Op     (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
  virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
@@ -83,6 +84,9 @@ public:
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -93,8 +97,7 @@ public:
 | 
			
		||||
    _Mat.MdagM(in,out,n1,n2);
 | 
			
		||||
  }
 | 
			
		||||
  void HermOp(const Field &in, Field &out){
 | 
			
		||||
    RealD n1,n2;
 | 
			
		||||
    HermOpAndNorm(in,out,n1,n2);
 | 
			
		||||
    _Mat.MdagM(in,out);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -116,6 +119,9 @@ public:
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    assert(0);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
    assert(0);
 | 
			
		||||
@@ -154,6 +160,9 @@ public:
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -162,7 +171,6 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
	
 | 
			
		||||
    ComplexD dot= innerProduct(in,out); n1=real(dot);
 | 
			
		||||
    n2=norm2(out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -183,6 +191,9 @@ public:
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -234,6 +245,9 @@ public:
 | 
			
		||||
      void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
      void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
	assert(0);
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
    template<class Matrix,class Field>
 | 
			
		||||
    class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
@@ -320,9 +334,135 @@ public:
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Field>
 | 
			
		||||
    class NonHermitianSchurOperatorBase :  public LinearOperatorBase<Field> 
 | 
			
		||||
    {
 | 
			
		||||
      public:
 | 
			
		||||
        virtual RealD Mpc      (const Field& in, Field& out) = 0;
 | 
			
		||||
        virtual RealD MpcDag   (const Field& in, Field& out) = 0;
 | 
			
		||||
        virtual void  MpcDagMpc(const Field& in, Field& out, RealD& ni, RealD& no) {
 | 
			
		||||
          Field tmp(in.Grid());
 | 
			
		||||
          tmp.Checkerboard() = in.Checkerboard();
 | 
			
		||||
	        ni = Mpc(in,tmp);
 | 
			
		||||
	        no = MpcDag(tmp,out);
 | 
			
		||||
        }
 | 
			
		||||
        virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) {
 | 
			
		||||
          assert(0);
 | 
			
		||||
        }
 | 
			
		||||
        virtual void HermOp(const Field& in, Field& out) {
 | 
			
		||||
          assert(0);
 | 
			
		||||
        }
 | 
			
		||||
        void Op(const Field& in, Field& out) {
 | 
			
		||||
          Mpc(in, out);
 | 
			
		||||
        }
 | 
			
		||||
        void AdjOp(const Field& in, Field& out) { 
 | 
			
		||||
          MpcDag(in, out);
 | 
			
		||||
        }
 | 
			
		||||
        // Support for coarsening to a multigrid
 | 
			
		||||
        void OpDiag(const Field& in, Field& out) {
 | 
			
		||||
          assert(0); // must coarsen the unpreconditioned system
 | 
			
		||||
        }
 | 
			
		||||
        void OpDir(const Field& in, Field& out, int dir, int disp) {
 | 
			
		||||
          assert(0);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Matrix, class Field>
 | 
			
		||||
    class NonHermitianSchurDiagMooeeOperator :  public NonHermitianSchurOperatorBase<Field> 
 | 
			
		||||
    {
 | 
			
		||||
      public:
 | 
			
		||||
        Matrix& _Mat;
 | 
			
		||||
        NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
 | 
			
		||||
        virtual RealD Mpc(const Field& in, Field& out) {
 | 
			
		||||
          Field tmp(in.Grid());
 | 
			
		||||
          tmp.Checkerboard() = !in.Checkerboard();
 | 
			
		||||
 | 
			
		||||
  	      _Mat.Meooe(in, tmp);
 | 
			
		||||
	        _Mat.MooeeInv(tmp, out);
 | 
			
		||||
	        _Mat.Meooe(out, tmp);
 | 
			
		||||
 | 
			
		||||
	        _Mat.Mooee(in, out);
 | 
			
		||||
	
 | 
			
		||||
          return axpy_norm(out, -1.0, tmp, out);
 | 
			
		||||
        }
 | 
			
		||||
        virtual RealD MpcDag(const Field& in, Field& out) {
 | 
			
		||||
	        Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
	        _Mat.MeooeDag(in, tmp);
 | 
			
		||||
          _Mat.MooeeInvDag(tmp, out);
 | 
			
		||||
	        _Mat.MeooeDag(out, tmp);
 | 
			
		||||
 | 
			
		||||
	        _Mat.MooeeDag(in, out);
 | 
			
		||||
	
 | 
			
		||||
          return axpy_norm(out, -1.0, tmp, out);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    template<class Matrix,class Field>
 | 
			
		||||
    class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Field> 
 | 
			
		||||
    {
 | 
			
		||||
      protected:
 | 
			
		||||
        Matrix &_Mat;
 | 
			
		||||
    
 | 
			
		||||
      public:
 | 
			
		||||
        NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
 | 
			
		||||
        virtual RealD Mpc(const Field& in, Field& out) {
 | 
			
		||||
	        Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
	        _Mat.Meooe(in, out);
 | 
			
		||||
	        _Mat.MooeeInv(out, tmp);
 | 
			
		||||
	        _Mat.Meooe(tmp, out);
 | 
			
		||||
	        _Mat.MooeeInv(out, tmp);
 | 
			
		||||
 | 
			
		||||
	        return axpy_norm(out, -1.0, tmp, in);
 | 
			
		||||
        }
 | 
			
		||||
        virtual RealD MpcDag(const Field& in, Field& out) {
 | 
			
		||||
	        Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
	        _Mat.MooeeInvDag(in, out);
 | 
			
		||||
	        _Mat.MeooeDag(out, tmp);
 | 
			
		||||
	        _Mat.MooeeInvDag(tmp, out);
 | 
			
		||||
	        _Mat.MeooeDag(out, tmp);
 | 
			
		||||
 | 
			
		||||
	        return axpy_norm(out, -1.0, tmp, in);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Matrix, class Field>
 | 
			
		||||
    class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Field> 
 | 
			
		||||
    {
 | 
			
		||||
      protected:
 | 
			
		||||
        Matrix& _Mat;
 | 
			
		||||
    
 | 
			
		||||
      public:
 | 
			
		||||
        NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
 | 
			
		||||
 | 
			
		||||
        virtual RealD Mpc(const Field& in, Field& out) {
 | 
			
		||||
          Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
	        _Mat.MooeeInv(in, out);
 | 
			
		||||
	        _Mat.Meooe(out, tmp);
 | 
			
		||||
	        _Mat.MooeeInv(tmp, out);
 | 
			
		||||
	        _Mat.Meooe(out, tmp);
 | 
			
		||||
 | 
			
		||||
	        return axpy_norm(out, -1.0, tmp, in);
 | 
			
		||||
        }
 | 
			
		||||
        virtual RealD MpcDag(const Field& in, Field& out) {
 | 
			
		||||
	        Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
          _Mat.MeooeDag(in, out);
 | 
			
		||||
          _Mat.MooeeInvDag(out, tmp);
 | 
			
		||||
          _Mat.MeooeDag(tmp, out);
 | 
			
		||||
          _Mat.MooeeInvDag(out, tmp);
 | 
			
		||||
 | 
			
		||||
          return axpy_norm(out, -1.0, tmp, in);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
 | 
			
		||||
    // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
 | 
			
		||||
    // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi
 | 
			
		||||
    ///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
 | 
			
		||||
    template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
 | 
			
		||||
 
 | 
			
		||||
@@ -45,8 +45,13 @@ public:
 | 
			
		||||
    ni=M(in,tmp);
 | 
			
		||||
    no=Mdag(tmp,out);
 | 
			
		||||
  }
 | 
			
		||||
  virtual void  MdagM(const Field &in, Field &out) {
 | 
			
		||||
    RealD ni, no;
 | 
			
		||||
    MdagM(in,out,ni,no);
 | 
			
		||||
  }
 | 
			
		||||
  virtual  void Mdiag    (const Field &in, Field &out)=0;
 | 
			
		||||
  virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0;
 | 
			
		||||
  virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -56,12 +61,12 @@ template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrix
 | 
			
		||||
public:
 | 
			
		||||
  virtual GridBase *RedBlackGrid(void)=0;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      // Query the even even properties to make algorithmic decisions
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      virtual RealD  Mass(void)        { return 0.0; };
 | 
			
		||||
      virtual int    ConstEE(void)     { return 1; }; // Disable assumptions unless overridden
 | 
			
		||||
      virtual int    isTrivialEE(void) { return 0; }; // by a derived class that knows better
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Query the even even properties to make algorithmic decisions
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  virtual RealD  Mass(void)        { return 0.0; };
 | 
			
		||||
  virtual int    ConstEE(void)     { return 1; }; // Disable assumptions unless overridden
 | 
			
		||||
  virtual int    isTrivialEE(void) { return 0; }; // by a derived class that knows better
 | 
			
		||||
 | 
			
		||||
  // half checkerboard operaions
 | 
			
		||||
  virtual  void Meooe    (const Field &in, Field &out)=0;
 | 
			
		||||
 
 | 
			
		||||
@@ -94,6 +94,24 @@ public:
 | 
			
		||||
    Coeffs.assign(0.,order);
 | 
			
		||||
    Coeffs[order-1] = 1.;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.
 | 
			
		||||
  // Similar kick effect below the threshold as Lanczos filter approach
 | 
			
		||||
  void InitLowPass(RealD _lo,RealD _hi,int _order)
 | 
			
		||||
  {
 | 
			
		||||
    lo=_lo;
 | 
			
		||||
    hi=_hi;
 | 
			
		||||
    order=_order;
 | 
			
		||||
      
 | 
			
		||||
    if(order < 2) exit(-1);
 | 
			
		||||
    Coeffs.resize(order);
 | 
			
		||||
    for(int j=0;j<order;j++){
 | 
			
		||||
      RealD k=(order-1.0);
 | 
			
		||||
      RealD s=std::cos( j*M_PI*(k+0.5)/order );
 | 
			
		||||
      Coeffs[j] = s * 2.0/order;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
 | 
			
		||||
  {
 | 
			
		||||
@@ -234,20 +252,20 @@ public:
 | 
			
		||||
    RealD xscale = 2.0/(hi-lo);
 | 
			
		||||
    RealD mscale = -(hi+lo)/(hi-lo);
 | 
			
		||||
    Linop.HermOp(T0,y);
 | 
			
		||||
    T1=y*xscale+in*mscale;
 | 
			
		||||
    axpby(T1,xscale,mscale,y,in);
 | 
			
		||||
 | 
			
		||||
    // sum = .5 c[0] T0 + c[1] T1
 | 
			
		||||
    out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
 | 
			
		||||
    //    out = ()*T0 + Coeffs[1]*T1;
 | 
			
		||||
    axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
 | 
			
		||||
    for(int n=2;n<order;n++){
 | 
			
		||||
	
 | 
			
		||||
      Linop.HermOp(*Tn,y);
 | 
			
		||||
 | 
			
		||||
      y=xscale*y+mscale*(*Tn);
 | 
			
		||||
 | 
			
		||||
      *Tnp=2.0*y-(*Tnm);
 | 
			
		||||
 | 
			
		||||
      out=out+Coeffs[n]* (*Tnp);
 | 
			
		||||
 | 
			
		||||
      //     y=xscale*y+mscale*(*Tn);
 | 
			
		||||
      //      *Tnp=2.0*y-(*Tnm);
 | 
			
		||||
      //      out=out+Coeffs[n]* (*Tnp);
 | 
			
		||||
      axpby(y,xscale,mscale,y,(*Tn));
 | 
			
		||||
      axpby(*Tnp,2.0,-1.0,y,(*Tnm));
 | 
			
		||||
      axpy(out,Coeffs[n],*Tnp,out);
 | 
			
		||||
      // Cycle pointers to avoid copies
 | 
			
		||||
      Field *swizzle = Tnm;
 | 
			
		||||
      Tnm    =Tn;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										129
									
								
								Grid/algorithms/approx/JacobiPolynomial.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								Grid/algorithms/approx/JacobiPolynomial.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,129 @@
 | 
			
		||||
#ifndef GRID_JACOBIPOLYNOMIAL_H
 | 
			
		||||
#define GRID_JACOBIPOLYNOMIAL_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/algorithms/LinearOperator.h>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class JacobiPolynomial : public OperatorFunction<Field> {
 | 
			
		||||
 private:
 | 
			
		||||
  using OperatorFunction<Field>::operator();
 | 
			
		||||
 | 
			
		||||
  int order;
 | 
			
		||||
  RealD hi;
 | 
			
		||||
  RealD lo;
 | 
			
		||||
  RealD alpha;
 | 
			
		||||
  RealD beta;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  void csv(std::ostream &out){
 | 
			
		||||
    csv(out,lo,hi);
 | 
			
		||||
  }
 | 
			
		||||
  void csv(std::ostream &out,RealD llo,RealD hhi){
 | 
			
		||||
    RealD diff = hhi-llo;
 | 
			
		||||
    RealD delta = diff*1.0e-5;
 | 
			
		||||
    for (RealD x=llo-delta; x<=hhi; x+=delta) {
 | 
			
		||||
      RealD f = approx(x);
 | 
			
		||||
      out<< x<<" "<<f <<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  JacobiPolynomial(){};
 | 
			
		||||
  JacobiPolynomial(RealD _lo,RealD _hi,int _order,RealD _alpha, RealD _beta)
 | 
			
		||||
  {
 | 
			
		||||
      lo=_lo;
 | 
			
		||||
      hi=_hi;
 | 
			
		||||
      alpha=_alpha;
 | 
			
		||||
      beta=_beta;
 | 
			
		||||
      order=_order;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  RealD approx(RealD x) // Convenience for plotting the approximation                                                       
 | 
			
		||||
  {
 | 
			
		||||
    RealD Tn;
 | 
			
		||||
    RealD Tnm;
 | 
			
		||||
    RealD Tnp;
 | 
			
		||||
 | 
			
		||||
    RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
 | 
			
		||||
 | 
			
		||||
    RealD T0=1.0;
 | 
			
		||||
    RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
 | 
			
		||||
 | 
			
		||||
    Tn =T1;
 | 
			
		||||
    Tnm=T0;
 | 
			
		||||
    for(int n=2;n<=order;n++){
 | 
			
		||||
      RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
 | 
			
		||||
      RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
 | 
			
		||||
      RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
 | 
			
		||||
      RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
 | 
			
		||||
      Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
 | 
			
		||||
      Tnm=Tn;
 | 
			
		||||
      Tn =Tnp;
 | 
			
		||||
    }
 | 
			
		||||
    return Tnp;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Implement the required interface                                                                                       
 | 
			
		||||
  void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
 | 
			
		||||
    GridBase *grid=in.Grid();
 | 
			
		||||
 | 
			
		||||
    int vol=grid->gSites();
 | 
			
		||||
 | 
			
		||||
    Field T0(grid);
 | 
			
		||||
    Field T1(grid);
 | 
			
		||||
    Field T2(grid);
 | 
			
		||||
    Field y(grid);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    Field *Tnm = &T0;
 | 
			
		||||
    Field *Tn  = &T1;
 | 
			
		||||
    Field *Tnp = &T2;
 | 
			
		||||
 | 
			
		||||
    //    RealD T0=1.0;                                                                                                     
 | 
			
		||||
    T0=in;
 | 
			
		||||
 | 
			
		||||
    //    RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));                                                                           
 | 
			
		||||
    //           = x * 2/(hi-lo) - (hi+lo)/(hi-lo)                                                                          
 | 
			
		||||
    Linop.HermOp(T0,y);
 | 
			
		||||
    RealD xscale = 2.0/(hi-lo);
 | 
			
		||||
    RealD mscale = -(hi+lo)/(hi-lo);
 | 
			
		||||
    Linop.HermOp(T0,y);
 | 
			
		||||
    y=y*xscale+in*mscale;
 | 
			
		||||
 | 
			
		||||
    // RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
 | 
			
		||||
    RealD halfAmB  = (alpha-beta)*0.5;
 | 
			
		||||
    RealD halfApBp2= (alpha+beta+2.0)*0.5;
 | 
			
		||||
    T1 = halfAmB * in + halfApBp2*y;
 | 
			
		||||
 | 
			
		||||
    for(int n=2;n<=order;n++){
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(*Tn,y);
 | 
			
		||||
      y=xscale*y+mscale*(*Tn);
 | 
			
		||||
 | 
			
		||||
      RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
 | 
			
		||||
      RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
 | 
			
		||||
      RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
 | 
			
		||||
      RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
 | 
			
		||||
 | 
			
		||||
      //      Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;                                                             
 | 
			
		||||
      cny=cny/cnp;
 | 
			
		||||
      cn1=cn1/cnp;
 | 
			
		||||
      cn1=cn1/cnp;
 | 
			
		||||
      cnm=cnm/cnp;
 | 
			
		||||
 | 
			
		||||
      *Tnp=cny*y + cn1 *(*Tn) + cnm * (*Tnm);
 | 
			
		||||
 | 
			
		||||
      // Cycle pointers to avoid copies                                                                                     
 | 
			
		||||
      Field *swizzle = Tnm;
 | 
			
		||||
      Tnm    =Tn;
 | 
			
		||||
      Tn     =Tnp;
 | 
			
		||||
      Tnp    =swizzle;
 | 
			
		||||
    }
 | 
			
		||||
    out=*Tnp;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										222
									
								
								Grid/algorithms/iterative/BiCGSTAB.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										222
									
								
								Grid/algorithms/iterative/BiCGSTAB.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,222 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/algorithms/iterative/BiCGSTAB.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: juettner <juettner@soton.ac.uk>
 | 
			
		||||
Author: David Murphy <djmurphy@mit.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_BICGSTAB_H
 | 
			
		||||
#define GRID_BICGSTAB_H
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
// Base classes for iterative processes based on operators
 | 
			
		||||
// single input vec, single output vec.
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class Field>
 | 
			
		||||
class BiCGSTAB : public OperatorFunction<Field> 
 | 
			
		||||
{
 | 
			
		||||
  public:
 | 
			
		||||
    using OperatorFunction<Field>::operator();
 | 
			
		||||
    
 | 
			
		||||
    bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
 | 
			
		||||
                             // Defaults true.
 | 
			
		||||
    RealD Tolerance;
 | 
			
		||||
    Integer MaxIterations;
 | 
			
		||||
    Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  
 | 
			
		||||
    BiCGSTAB(RealD tol, Integer maxit, bool err_on_no_conv = true) : 
 | 
			
		||||
      Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
 | 
			
		||||
 | 
			
		||||
    void operator()(LinearOperatorBase<Field>& Linop, const Field& src, Field& psi) 
 | 
			
		||||
    {
 | 
			
		||||
      psi.Checkerboard() = src.Checkerboard();
 | 
			
		||||
      conformable(psi, src);
 | 
			
		||||
 | 
			
		||||
      RealD cp(0), rho(1), rho_prev(0), alpha(1), beta(0), omega(1);
 | 
			
		||||
      RealD a(0), bo(0), b(0), ssq(0);
 | 
			
		||||
 | 
			
		||||
      Field p(src);
 | 
			
		||||
      Field r(src);
 | 
			
		||||
      Field rhat(src);
 | 
			
		||||
      Field v(src);
 | 
			
		||||
      Field s(src);
 | 
			
		||||
      Field t(src);
 | 
			
		||||
      Field h(src);
 | 
			
		||||
 | 
			
		||||
      v = Zero();
 | 
			
		||||
      p = Zero();
 | 
			
		||||
 | 
			
		||||
      // Initial residual computation & set up
 | 
			
		||||
      RealD guess = norm2(psi);
 | 
			
		||||
      assert(std::isnan(guess) == 0);
 | 
			
		||||
    
 | 
			
		||||
      Linop.Op(psi, v);
 | 
			
		||||
      b = norm2(v);
 | 
			
		||||
 | 
			
		||||
      r = src - v;
 | 
			
		||||
      rhat = r;
 | 
			
		||||
      a = norm2(r);
 | 
			
		||||
      ssq = norm2(src);
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: guess " << guess << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB:   src " << ssq << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB:    mp " << b << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB:     r " << a << std::endl;
 | 
			
		||||
 | 
			
		||||
      RealD rsq = Tolerance * Tolerance * ssq;
 | 
			
		||||
 | 
			
		||||
      // Check if guess is really REALLY good :)
 | 
			
		||||
      if(a <= rsq){ return; }
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: k=0 residual " << a << " target " << rsq << std::endl;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch LinalgTimer;
 | 
			
		||||
      GridStopWatch InnerTimer;
 | 
			
		||||
      GridStopWatch AxpyNormTimer;
 | 
			
		||||
      GridStopWatch LinearCombTimer;
 | 
			
		||||
      GridStopWatch MatrixTimer;
 | 
			
		||||
      GridStopWatch SolverTimer;
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Start();
 | 
			
		||||
      int k;
 | 
			
		||||
      for (k = 1; k <= MaxIterations; k++) 
 | 
			
		||||
      {
 | 
			
		||||
        rho_prev = rho;
 | 
			
		||||
 | 
			
		||||
        LinalgTimer.Start();
 | 
			
		||||
        InnerTimer.Start();
 | 
			
		||||
        ComplexD Crho  = innerProduct(rhat,r);
 | 
			
		||||
        InnerTimer.Stop();
 | 
			
		||||
        rho = Crho.real();
 | 
			
		||||
 | 
			
		||||
        beta = (rho / rho_prev) * (alpha / omega);
 | 
			
		||||
 | 
			
		||||
        LinearCombTimer.Start();
 | 
			
		||||
        bo = beta * omega;
 | 
			
		||||
        auto p_v = p.View();
 | 
			
		||||
        auto r_v = r.View();
 | 
			
		||||
        auto v_v = v.View();
 | 
			
		||||
        accelerator_for(ss, p_v.size(), Field::vector_object::Nsimd(),{
 | 
			
		||||
          coalescedWrite(p_v[ss], beta*p_v(ss) - bo*v_v(ss) + r_v(ss));
 | 
			
		||||
        });
 | 
			
		||||
        LinearCombTimer.Stop();
 | 
			
		||||
        LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
        MatrixTimer.Start();
 | 
			
		||||
        Linop.Op(p,v);
 | 
			
		||||
        MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
        LinalgTimer.Start();
 | 
			
		||||
        InnerTimer.Start();
 | 
			
		||||
        ComplexD Calpha = innerProduct(rhat,v);
 | 
			
		||||
        InnerTimer.Stop();
 | 
			
		||||
        alpha = rho / Calpha.real();
 | 
			
		||||
 | 
			
		||||
        LinearCombTimer.Start();
 | 
			
		||||
        auto h_v = h.View();
 | 
			
		||||
        auto psi_v = psi.View();
 | 
			
		||||
        accelerator_for(ss, h_v.size(), Field::vector_object::Nsimd(),{
 | 
			
		||||
          coalescedWrite(h_v[ss], alpha*p_v(ss) + psi_v(ss));
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
        auto s_v = s.View();
 | 
			
		||||
        accelerator_for(ss, s_v.size(), Field::vector_object::Nsimd(),{
 | 
			
		||||
          coalescedWrite(s_v[ss], -alpha*v_v(ss) + r_v(ss));
 | 
			
		||||
        });
 | 
			
		||||
        LinearCombTimer.Stop();
 | 
			
		||||
        LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
        MatrixTimer.Start();
 | 
			
		||||
        Linop.Op(s,t);
 | 
			
		||||
        MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
        LinalgTimer.Start();
 | 
			
		||||
        InnerTimer.Start();
 | 
			
		||||
        ComplexD Comega = innerProduct(t,s);
 | 
			
		||||
        InnerTimer.Stop();
 | 
			
		||||
        omega = Comega.real() / norm2(t);
 | 
			
		||||
 | 
			
		||||
        LinearCombTimer.Start();
 | 
			
		||||
        auto t_v = t.View();
 | 
			
		||||
        accelerator_for(ss, psi_v.size(), Field::vector_object::Nsimd(),{
 | 
			
		||||
          coalescedWrite(psi_v[ss], h_v(ss) + omega * s_v(ss));
 | 
			
		||||
          coalescedWrite(r_v[ss], -omega * t_v(ss) + s_v(ss));
 | 
			
		||||
        });
 | 
			
		||||
        LinearCombTimer.Stop();
 | 
			
		||||
 | 
			
		||||
        cp = norm2(r);
 | 
			
		||||
        LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogIterative << "BiCGSTAB: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
        // Stopping condition
 | 
			
		||||
        if(cp <= rsq) 
 | 
			
		||||
        {
 | 
			
		||||
          SolverTimer.Stop();
 | 
			
		||||
          Linop.Op(psi, v);
 | 
			
		||||
          p = v - src;
 | 
			
		||||
 | 
			
		||||
          RealD srcnorm = sqrt(norm2(src));
 | 
			
		||||
          RealD resnorm = sqrt(norm2(p));
 | 
			
		||||
          RealD true_residual = resnorm / srcnorm;
 | 
			
		||||
 | 
			
		||||
          std::cout << GridLogMessage << "BiCGSTAB Converged on iteration " << k << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp/ssq) << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
          std::cout << GridLogMessage << "Time breakdown " << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() << std::endl;
 | 
			
		||||
          std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() << std::endl;
 | 
			
		||||
 | 
			
		||||
          if(ErrorOnNoConverge){ assert(true_residual / Tolerance < 10000.0); }
 | 
			
		||||
 | 
			
		||||
          IterationsToComplete = k;	
 | 
			
		||||
 | 
			
		||||
          return;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      std::cout << GridLogMessage << "BiCGSTAB did NOT converge" << std::endl;
 | 
			
		||||
 | 
			
		||||
      if(ErrorOnNoConverge){ assert(0); }
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										158
									
								
								Grid/algorithms/iterative/BiCGSTABMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										158
									
								
								Grid/algorithms/iterative/BiCGSTABMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,158 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/algorithms/iterative/BiCGSTABMixedPrec.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
 | 
			
		||||
Author: David Murphy <djmurphy@mit.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_BICGSTAB_MIXED_PREC_H
 | 
			
		||||
#define GRID_BICGSTAB_MIXED_PREC_H
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
// Mixed precision restarted defect correction BiCGSTAB
 | 
			
		||||
template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
 | 
			
		||||
class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD> 
 | 
			
		||||
{
 | 
			
		||||
  public:                                                
 | 
			
		||||
    RealD   Tolerance;
 | 
			
		||||
    RealD   InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed
 | 
			
		||||
    Integer MaxInnerIterations;
 | 
			
		||||
    Integer MaxOuterIterations;
 | 
			
		||||
    GridBase* SinglePrecGrid; // Grid for single-precision fields
 | 
			
		||||
    RealD OuterLoopNormMult; // Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
 | 
			
		||||
    LinearOperatorBase<FieldF> &Linop_f;
 | 
			
		||||
    LinearOperatorBase<FieldD> &Linop_d;
 | 
			
		||||
 | 
			
		||||
    Integer TotalInnerIterations; //Number of inner CG iterations
 | 
			
		||||
    Integer TotalOuterIterations; //Number of restarts
 | 
			
		||||
    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
 | 
			
		||||
 | 
			
		||||
    //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
 | 
			
		||||
    LinearFunction<FieldF> *guesser;
 | 
			
		||||
    
 | 
			
		||||
    MixedPrecisionBiCGSTAB(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, 
 | 
			
		||||
        LinearOperatorBase<FieldF>& _Linop_f, LinearOperatorBase<FieldD>& _Linop_d) : 
 | 
			
		||||
      Linop_f(_Linop_f), Linop_d(_Linop_d), Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), 
 | 
			
		||||
      MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), guesser(NULL) {};
 | 
			
		||||
 | 
			
		||||
    void useGuesser(LinearFunction<FieldF>& g){
 | 
			
		||||
      guesser = &g;
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    void operator() (const FieldD& src_d_in, FieldD& sol_d)
 | 
			
		||||
    {
 | 
			
		||||
      TotalInnerIterations = 0;
 | 
			
		||||
    
 | 
			
		||||
      GridStopWatch TotalTimer;
 | 
			
		||||
      TotalTimer.Start();
 | 
			
		||||
      
 | 
			
		||||
      int cb = src_d_in.Checkerboard();
 | 
			
		||||
      sol_d.Checkerboard() = cb;
 | 
			
		||||
      
 | 
			
		||||
      RealD src_norm = norm2(src_d_in);
 | 
			
		||||
      RealD stop = src_norm * Tolerance*Tolerance;
 | 
			
		||||
 | 
			
		||||
      GridBase* DoublePrecGrid = src_d_in.Grid();
 | 
			
		||||
      FieldD tmp_d(DoublePrecGrid);
 | 
			
		||||
      tmp_d.Checkerboard() = cb;
 | 
			
		||||
      
 | 
			
		||||
      FieldD tmp2_d(DoublePrecGrid);
 | 
			
		||||
      tmp2_d.Checkerboard() = cb;
 | 
			
		||||
      
 | 
			
		||||
      FieldD src_d(DoublePrecGrid);
 | 
			
		||||
      src_d = src_d_in; //source for next inner iteration, computed from residual during operation
 | 
			
		||||
      
 | 
			
		||||
      RealD inner_tol = InnerTolerance;
 | 
			
		||||
      
 | 
			
		||||
      FieldF src_f(SinglePrecGrid);
 | 
			
		||||
      src_f.Checkerboard() = cb;
 | 
			
		||||
      
 | 
			
		||||
      FieldF sol_f(SinglePrecGrid);
 | 
			
		||||
      sol_f.Checkerboard() = cb;
 | 
			
		||||
      
 | 
			
		||||
      BiCGSTAB<FieldF> CG_f(inner_tol, MaxInnerIterations);
 | 
			
		||||
      CG_f.ErrorOnNoConverge = false;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch InnerCGtimer;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch PrecChangeTimer;
 | 
			
		||||
      
 | 
			
		||||
      Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
 | 
			
		||||
        
 | 
			
		||||
      for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++)
 | 
			
		||||
      {
 | 
			
		||||
        // Compute double precision rsd and also new RHS vector.
 | 
			
		||||
        Linop_d.Op(sol_d, tmp_d);
 | 
			
		||||
        RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
 | 
			
		||||
        
 | 
			
		||||
        std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Outer iteration " << outer_iter << " residual " << norm << " target " << stop << std::endl;
 | 
			
		||||
 | 
			
		||||
        if(norm < OuterLoopNormMult * stop){
 | 
			
		||||
          std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Outer iteration converged on iteration " << outer_iter << std::endl;
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        while(norm * inner_tol * inner_tol < stop){ inner_tol *= 2; } // inner_tol = sqrt(stop/norm) ??
 | 
			
		||||
 | 
			
		||||
        PrecChangeTimer.Start();
 | 
			
		||||
        precisionChange(src_f, src_d);
 | 
			
		||||
        PrecChangeTimer.Stop();
 | 
			
		||||
        
 | 
			
		||||
        sol_f = Zero();
 | 
			
		||||
 | 
			
		||||
        //Optionally improve inner solver guess (eg using known eigenvectors)
 | 
			
		||||
        if(guesser != NULL){ (*guesser)(src_f, sol_f); }
 | 
			
		||||
 | 
			
		||||
        //Inner CG
 | 
			
		||||
        CG_f.Tolerance = inner_tol;
 | 
			
		||||
        InnerCGtimer.Start();
 | 
			
		||||
        CG_f(Linop_f, src_f, sol_f);
 | 
			
		||||
        InnerCGtimer.Stop();
 | 
			
		||||
        TotalInnerIterations += CG_f.IterationsToComplete;
 | 
			
		||||
        
 | 
			
		||||
        //Convert sol back to double and add to double prec solution
 | 
			
		||||
        PrecChangeTimer.Start();
 | 
			
		||||
        precisionChange(tmp_d, sol_f);
 | 
			
		||||
        PrecChangeTimer.Stop();
 | 
			
		||||
        
 | 
			
		||||
        axpy(sol_d, 1.0, tmp_d, sol_d);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      //Final trial CG
 | 
			
		||||
      std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Starting final patch-up double-precision solve" << std::endl;
 | 
			
		||||
      
 | 
			
		||||
      BiCGSTAB<FieldD> CG_d(Tolerance, MaxInnerIterations);
 | 
			
		||||
      CG_d(Linop_d, src_d_in, sol_d);
 | 
			
		||||
      TotalFinalStepIterations = CG_d.IterationsToComplete;
 | 
			
		||||
 | 
			
		||||
      TotalTimer.Stop();
 | 
			
		||||
      std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -52,6 +52,7 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  Integer PrintInterval; //GridLogMessages or Iterative
 | 
			
		||||
  RealD TrueResidual;
 | 
			
		||||
  
 | 
			
		||||
  BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
			
		||||
    : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
 | 
			
		||||
@@ -306,7 +307,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(X, AD);
 | 
			
		||||
      AD = AD-B;
 | 
			
		||||
      std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
 | 
			
		||||
      TrueResidual = std::sqrt(norm2(AD)/norm2(B));
 | 
			
		||||
      std::cout << GridLogMessage <<"\tTrue residual is " << TrueResidual <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
@@ -442,7 +444,8 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(Psi, AP);
 | 
			
		||||
      AP = AP-Src;
 | 
			
		||||
      std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
 | 
			
		||||
      TrueResidual = std::sqrt(norm2(AP)/norm2(Src));
 | 
			
		||||
      std::cout <<GridLogMessage << "\tTrue residual is " << TrueResidual <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
@@ -653,7 +656,7 @@ void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field
 | 
			
		||||
      if ( rr > max_resid ) max_resid = rr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< std::sqrt(rrsum/sssum) << " max "<< std::sqrt(max_resid) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    if ( max_resid < Tolerance*Tolerance ) { 
 | 
			
		||||
 | 
			
		||||
@@ -668,7 +671,8 @@ void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field
 | 
			
		||||
 | 
			
		||||
      for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
 | 
			
		||||
      for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
 | 
			
		||||
      std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
 | 
			
		||||
      TrueResidual = std::sqrt(normv(AD)/normv(B));
 | 
			
		||||
      std::cout << GridLogMessage << "\tTrue residual is " << TrueResidual <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
 
 | 
			
		||||
@@ -49,6 +49,7 @@ public:
 | 
			
		||||
  RealD Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  RealD TrueResidual;
 | 
			
		||||
  
 | 
			
		||||
  ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
			
		||||
    : Tolerance(tol),
 | 
			
		||||
@@ -71,7 +72,6 @@ public:
 | 
			
		||||
    // Initial residual computation & set up
 | 
			
		||||
    RealD guess = norm2(psi);
 | 
			
		||||
    assert(std::isnan(guess) == 0);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, b);
 | 
			
		||||
    
 | 
			
		||||
@@ -82,6 +82,14 @@ public:
 | 
			
		||||
    cp = a;
 | 
			
		||||
    ssq = norm2(src);
 | 
			
		||||
 | 
			
		||||
    // Handle trivial case of zero src
 | 
			
		||||
    if (ssq == 0.){
 | 
			
		||||
      psi = Zero();
 | 
			
		||||
      IterationsToComplete = 1;
 | 
			
		||||
      TrueResidual = 0.;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   src " << ssq << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:    mp " << d << std::endl;
 | 
			
		||||
@@ -93,6 +101,7 @@ public:
 | 
			
		||||
 | 
			
		||||
    // Check if guess is really REALLY good :)
 | 
			
		||||
    if (cp <= rsq) {
 | 
			
		||||
      TrueResidual = std::sqrt(a/ssq);
 | 
			
		||||
      std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
 | 
			
		||||
      IterationsToComplete = 0;	
 | 
			
		||||
      return;
 | 
			
		||||
@@ -142,7 +151,7 @@ public:
 | 
			
		||||
      LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
 | 
			
		||||
                << " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
 | 
			
		||||
                << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
      // Stopping condition
 | 
			
		||||
      if (cp <= rsq) {
 | 
			
		||||
@@ -154,26 +163,33 @@ public:
 | 
			
		||||
        RealD resnorm = std::sqrt(norm2(p));
 | 
			
		||||
        RealD true_residual = resnorm / srcnorm;
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k 
 | 
			
		||||
		  << "\tComputed residual " << std::sqrt(cp / ssq)
 | 
			
		||||
		  << "\tTrue residual " << true_residual
 | 
			
		||||
		  << "\tTarget " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "Time breakdown "<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
 | 
			
		||||
        std::cout << GridLogIterative << "Time breakdown "<<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
 | 
			
		||||
 | 
			
		||||
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
			
		||||
 | 
			
		||||
	IterationsToComplete = k;	
 | 
			
		||||
	TrueResidual = true_residual;
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // Failed. Calculate true residual before giving up                                                         
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, qq);
 | 
			
		||||
    p = mmp - src;
 | 
			
		||||
 | 
			
		||||
    TrueResidual = sqrt(norm2(p)/ssq);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl;
 | 
			
		||||
 | 
			
		||||
    if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
 
 | 
			
		||||
@@ -46,15 +46,19 @@ public:
 | 
			
		||||
 | 
			
		||||
  RealD   Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
    Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  std::vector<int> IterationsToCompleteShift;  // Iterations for this shift
 | 
			
		||||
  int verbose;
 | 
			
		||||
  MultiShiftFunction shifts;
 | 
			
		||||
  std::vector<RealD> TrueResidualShift;
 | 
			
		||||
 | 
			
		||||
  ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : 
 | 
			
		||||
    MaxIterations(maxit),
 | 
			
		||||
    shifts(_shifts)
 | 
			
		||||
  { 
 | 
			
		||||
    verbose=1;
 | 
			
		||||
    IterationsToCompleteShift.resize(_shifts.order);
 | 
			
		||||
    TrueResidualShift.resize(_shifts.order);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
 | 
			
		||||
@@ -125,6 +129,17 @@ public:
 | 
			
		||||
    // Residuals "r" are src
 | 
			
		||||
    // First search direction "p" is also src
 | 
			
		||||
    cp = norm2(src);
 | 
			
		||||
 | 
			
		||||
    // Handle trivial case of zero src.
 | 
			
		||||
    if( cp == 0. ){
 | 
			
		||||
      for(int s=0;s<nshift;s++){
 | 
			
		||||
	psi[s] = Zero();
 | 
			
		||||
	IterationsToCompleteShift[s] = 1;
 | 
			
		||||
	TrueResidualShift[s] = 0.;
 | 
			
		||||
      }
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<nshift;s++){
 | 
			
		||||
      rsq[s] = cp * mresidual[s] * mresidual[s];
 | 
			
		||||
      std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
 | 
			
		||||
@@ -270,6 +285,7 @@ public:
 | 
			
		||||
      for(int s=0;s<nshift;s++){
 | 
			
		||||
      
 | 
			
		||||
	if ( (!converged[s]) ){
 | 
			
		||||
	  IterationsToCompleteShift[s] = k;
 | 
			
		||||
	
 | 
			
		||||
	  RealD css  = c * z[s][iz]* z[s][iz];
 | 
			
		||||
	
 | 
			
		||||
@@ -299,7 +315,8 @@ public:
 | 
			
		||||
	  axpy(r,-alpha[s],src,tmp);
 | 
			
		||||
	  RealD rn = norm2(r);
 | 
			
		||||
	  RealD cn = norm2(src);
 | 
			
		||||
	  std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
 | 
			
		||||
	  TrueResidualShift[s] = std::sqrt(rn/cn);
 | 
			
		||||
	  std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<< TrueResidualShift[s] <<std::endl;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
 
 | 
			
		||||
@@ -43,6 +43,11 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) 
 | 
			
		||||
{
 | 
			
		||||
  // If assume basis[j] are already orthonormal,
 | 
			
		||||
  // can take all inner products in parallel saving 2x bandwidth
 | 
			
		||||
  // Save 3x bandwidth on the second line of loop.
 | 
			
		||||
  // perhaps 2.5x speed up.
 | 
			
		||||
  // 2x overall in Multigrid Lanczos  
 | 
			
		||||
  for(int j=0; j<k; ++j){
 | 
			
		||||
    auto ip = innerProduct(basis[j],w);
 | 
			
		||||
    w = w - ip*basis[j];
 | 
			
		||||
@@ -54,16 +59,15 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
 | 
			
		||||
{
 | 
			
		||||
  typedef decltype(basis[0].View()) View;
 | 
			
		||||
  auto tmp_v = basis[0].View();
 | 
			
		||||
  std::vector<View> basis_v(basis.size(),tmp_v);
 | 
			
		||||
  Vector<View> basis_v(basis.size(),tmp_v);
 | 
			
		||||
  typedef typename Field::vector_object vobj;
 | 
			
		||||
  GridBase* grid = basis[0].Grid();
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
  for(int k=0;k<basis.size();k++){
 | 
			
		||||
    basis_v[k] = basis[k].View();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
 | 
			
		||||
 | 
			
		||||
  thread_region
 | 
			
		||||
  {
 | 
			
		||||
    vobj* B = Bt.data() + Nm * thread_num();
 | 
			
		||||
@@ -81,24 +85,89 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
  int nrot = j1-j0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  uint64_t oSites   =grid->oSites();
 | 
			
		||||
  uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
 | 
			
		||||
 | 
			
		||||
  //  printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock);
 | 
			
		||||
 | 
			
		||||
  Vector <vobj> Bt(siteBlock * nrot); 
 | 
			
		||||
  auto Bp=&Bt[0];
 | 
			
		||||
 | 
			
		||||
  // GPU readable copy of Eigen matrix
 | 
			
		||||
  Vector<double> Qt_jv(Nm*Nm);
 | 
			
		||||
  double *Qt_p = & Qt_jv[0];
 | 
			
		||||
  for(int k=0;k<Nm;++k){
 | 
			
		||||
    for(int j=0;j<Nm;++j){
 | 
			
		||||
      Qt_p[j*Nm+k]=Qt(j,k);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Block the loop to keep storage footprint down
 | 
			
		||||
  vobj zz=Zero();
 | 
			
		||||
  for(uint64_t s=0;s<oSites;s+=siteBlock){
 | 
			
		||||
 | 
			
		||||
    // remaining work in this block
 | 
			
		||||
    int ssites=MIN(siteBlock,oSites-s);
 | 
			
		||||
 | 
			
		||||
    // zero out the accumulators
 | 
			
		||||
    accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
 | 
			
		||||
	auto z=coalescedRead(zz);
 | 
			
		||||
	coalescedWrite(Bp[ss],z);
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
 | 
			
		||||
	
 | 
			
		||||
      int j =sj%nrot;
 | 
			
		||||
      int jj  =j0+j;
 | 
			
		||||
      int ss =sj/nrot;
 | 
			
		||||
      int sss=ss+s;
 | 
			
		||||
 | 
			
		||||
      for(int k=k0; k<k1; ++k){
 | 
			
		||||
	auto tmp = coalescedRead(Bp[ss*nrot+j]);
 | 
			
		||||
	coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
 | 
			
		||||
      int j =sj%nrot;
 | 
			
		||||
      int jj  =j0+j;
 | 
			
		||||
      int ss =sj/nrot;
 | 
			
		||||
      int sss=ss+s;
 | 
			
		||||
      coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Extract a single rotated vector
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) 
 | 
			
		||||
{
 | 
			
		||||
  typedef decltype(basis[0].View()) View;
 | 
			
		||||
  typedef typename Field::vector_object vobj;
 | 
			
		||||
  GridBase* grid = basis[0].Grid();
 | 
			
		||||
 | 
			
		||||
  result.Checkerboard() = basis[0].Checkerboard();
 | 
			
		||||
  auto result_v=result.View();
 | 
			
		||||
  thread_for(ss, grid->oSites(),{
 | 
			
		||||
    vobj B = Zero();
 | 
			
		||||
  Vector<View> basis_v(basis.size(),result_v);
 | 
			
		||||
  for(int k=0;k<basis.size();k++){
 | 
			
		||||
    basis_v[k] = basis[k].View();
 | 
			
		||||
  }
 | 
			
		||||
  vobj zz=Zero();
 | 
			
		||||
  Vector<double> Qt_jv(Nm);
 | 
			
		||||
  double * Qt_j = & Qt_jv[0];
 | 
			
		||||
  for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
 | 
			
		||||
  accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
 | 
			
		||||
    auto B=coalescedRead(zz);
 | 
			
		||||
    for(int k=k0; k<k1; ++k){
 | 
			
		||||
      auto basis_k = basis[k].View();
 | 
			
		||||
      B +=Qt(j,k) * basis_k[ss];
 | 
			
		||||
      B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
 | 
			
		||||
    }
 | 
			
		||||
    result_v[ss] = B;
 | 
			
		||||
    coalescedWrite(result_v[ss], B);
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -282,7 +351,7 @@ public:
 | 
			
		||||
			    RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			    int _MaxIter, // Max iterations
 | 
			
		||||
			    RealD _betastp=0.0, // if beta(k) < betastp: converged
 | 
			
		||||
			    int _MinRestart=1, int _orth_period = 1,
 | 
			
		||||
			    int _MinRestart=0, int _orth_period = 1,
 | 
			
		||||
			    IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
 | 
			
		||||
    SimpleTester(HermOp), _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(Tester),
 | 
			
		||||
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
 | 
			
		||||
@@ -298,7 +367,7 @@ public:
 | 
			
		||||
			       RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			       int _MaxIter, // Max iterations
 | 
			
		||||
			       RealD _betastp=0.0, // if beta(k) < betastp: converged
 | 
			
		||||
			       int _MinRestart=1, int _orth_period = 1,
 | 
			
		||||
			       int _MinRestart=0, int _orth_period = 1,
 | 
			
		||||
			       IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
 | 
			
		||||
    SimpleTester(HermOp),  _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(SimpleTester),
 | 
			
		||||
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
 | 
			
		||||
@@ -347,7 +416,7 @@ until convergence
 | 
			
		||||
    GridBase *grid = src.Grid();
 | 
			
		||||
    assert(grid == evec[0].Grid());
 | 
			
		||||
    
 | 
			
		||||
    GridLogIRL.TimingMode(1);
 | 
			
		||||
    //    GridLogIRL.TimingMode(1);
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 /  "<< MaxIter<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
@@ -372,14 +441,17 @@ until convergence
 | 
			
		||||
    {
 | 
			
		||||
      auto src_n = src;
 | 
			
		||||
      auto tmp = src;
 | 
			
		||||
      std::cout << GridLogIRL << " IRL source norm " << norm2(src) << std::endl;
 | 
			
		||||
      const int _MAX_ITER_IRL_MEVAPP_ = 50;
 | 
			
		||||
      for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
 | 
			
		||||
	normalise(src_n);
 | 
			
		||||
	_HermOp(src_n,tmp);
 | 
			
		||||
	//	std::cout << GridLogMessage<< tmp<<std::endl; exit(0);
 | 
			
		||||
	//	std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl;
 | 
			
		||||
	RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
 | 
			
		||||
	RealD vden = norm2(src_n);
 | 
			
		||||
	RealD na = vnum/vden;
 | 
			
		||||
	if (fabs(evalMaxApprox/na - 1.0) < 0.05)
 | 
			
		||||
	if (fabs(evalMaxApprox/na - 1.0) < 0.0001)
 | 
			
		||||
	  i=_MAX_ITER_IRL_MEVAPP_;
 | 
			
		||||
	evalMaxApprox = na;
 | 
			
		||||
	std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
 | 
			
		||||
@@ -577,11 +649,11 @@ until convergence
 | 
			
		||||
/* Saad PP. 195
 | 
			
		||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
 | 
			
		||||
2. For k = 1,2,...,m Do:
 | 
			
		||||
3. wk:=Avk−βkv_{k−1}      
 | 
			
		||||
4. αk:=(wk,vk)       // 
 | 
			
		||||
5. wk:=wk−αkvk       // wk orthog vk 
 | 
			
		||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
7. vk+1 := wk/βk+1
 | 
			
		||||
3. wk:=Avk - b_k v_{k-1}      
 | 
			
		||||
4. ak:=(wk,vk)       // 
 | 
			
		||||
5. wk:=wk-akvk       // wk orthog vk 
 | 
			
		||||
6. bk+1 := ||wk||_2. If b_k+1 = 0 then Stop
 | 
			
		||||
7. vk+1 := wk/b_k+1
 | 
			
		||||
8. EndDo
 | 
			
		||||
 */
 | 
			
		||||
  void step(std::vector<RealD>& lmd,
 | 
			
		||||
@@ -589,6 +661,7 @@ until convergence
 | 
			
		||||
	    std::vector<Field>& evec,
 | 
			
		||||
	    Field& w,int Nm,int k)
 | 
			
		||||
  {
 | 
			
		||||
    std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
 | 
			
		||||
    const RealD tiny = 1.0e-20;
 | 
			
		||||
    assert( k< Nm );
 | 
			
		||||
 | 
			
		||||
@@ -600,20 +673,20 @@ until convergence
 | 
			
		||||
 | 
			
		||||
    if(k>0) w -= lme[k-1] * evec[k-1];
 | 
			
		||||
 | 
			
		||||
    ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
 | 
			
		||||
    ComplexD zalph = innerProduct(evec_k,w);
 | 
			
		||||
    RealD     alph = real(zalph);
 | 
			
		||||
 | 
			
		||||
    w = w - alph * evec_k;// 5. wk:=wk−αkvk
 | 
			
		||||
    w = w - alph * evec_k;
 | 
			
		||||
 | 
			
		||||
    RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
    // 7. vk+1 := wk/βk+1
 | 
			
		||||
    RealD beta = normalise(w); 
 | 
			
		||||
 | 
			
		||||
    lmd[k] = alph;
 | 
			
		||||
    lme[k] = beta;
 | 
			
		||||
 | 
			
		||||
    if (k>0 && k % orth_period == 0) {
 | 
			
		||||
    if ( (k>0) && ( (k % orth_period) == 0 )) {
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl;
 | 
			
		||||
      orthogonalize(w,evec,k); // orthonormalise
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(k < Nm-1) evec[k+1] = w;
 | 
			
		||||
@@ -621,6 +694,8 @@ until convergence
 | 
			
		||||
    std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
 | 
			
		||||
    if ( beta < tiny ) 
 | 
			
		||||
      std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
 | 
			
		||||
 
 | 
			
		||||
@@ -33,26 +33,78 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Take a matrix and form an NE solver calling a Herm solver
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class Field> class NormalEquations : public OperatorFunction<Field>{
 | 
			
		||||
template<class Field> class NormalEquations {
 | 
			
		||||
private:
 | 
			
		||||
  SparseMatrixBase<Field> & _Matrix;
 | 
			
		||||
  OperatorFunction<Field> & _HermitianSolver;
 | 
			
		||||
 | 
			
		||||
  LinearFunction<Field>   & _Guess;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Wrap the usual normal equations trick
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver) 
 | 
			
		||||
    :  _Matrix(Matrix), _HermitianSolver(HermitianSolver) {}; 
 | 
			
		||||
 NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
 | 
			
		||||
		 LinearFunction<Field> &Guess) 
 | 
			
		||||
   :  _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {}; 
 | 
			
		||||
 | 
			
		||||
  void operator() (const Field &in, Field &out){
 | 
			
		||||
 
 | 
			
		||||
    Field src(in.Grid());
 | 
			
		||||
    Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
    MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
 | 
			
		||||
    _Matrix.Mdag(in,src);
 | 
			
		||||
    _HermitianSolver(src,out);  // Mdag M out = Mdag in
 | 
			
		||||
    _Guess(src,out);
 | 
			
		||||
    _HermitianSolver(MdagMOp,src,out);  // Mdag M out = Mdag in
 | 
			
		||||
 | 
			
		||||
  }     
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Field> class HPDSolver {
 | 
			
		||||
private:
 | 
			
		||||
  LinearOperatorBase<Field> & _Matrix;
 | 
			
		||||
  OperatorFunction<Field> & _HermitianSolver;
 | 
			
		||||
  LinearFunction<Field>   & _Guess;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Wrap the usual normal equations trick
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
 HPDSolver(LinearOperatorBase<Field> &Matrix,
 | 
			
		||||
	   OperatorFunction<Field> &HermitianSolver,
 | 
			
		||||
	   LinearFunction<Field> &Guess) 
 | 
			
		||||
   :  _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {}; 
 | 
			
		||||
 | 
			
		||||
  void operator() (const Field &in, Field &out){
 | 
			
		||||
 
 | 
			
		||||
    _Guess(in,out);
 | 
			
		||||
    _HermitianSolver(_Matrix,in,out);  // Mdag M out = Mdag in
 | 
			
		||||
 | 
			
		||||
  }     
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class Field> class MdagMSolver {
 | 
			
		||||
private:
 | 
			
		||||
  SparseMatrixBase<Field> & _Matrix;
 | 
			
		||||
  OperatorFunction<Field> & _HermitianSolver;
 | 
			
		||||
  LinearFunction<Field>   & _Guess;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Wrap the usual normal equations trick
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
 MdagMSolver(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
 | 
			
		||||
	     LinearFunction<Field> &Guess) 
 | 
			
		||||
   :  _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {}; 
 | 
			
		||||
 | 
			
		||||
  void operator() (const Field &in, Field &out){
 | 
			
		||||
 
 | 
			
		||||
    MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
 | 
			
		||||
    _Guess(in,out);
 | 
			
		||||
 | 
			
		||||
    _HermitianSolver(MdagMOp,in,out);  // Mdag M out = Mdag in
 | 
			
		||||
 | 
			
		||||
  }     
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -30,12 +30,12 @@ template<class Field> class PowerMethod
 | 
			
		||||
      RealD vden = norm2(src_n); 
 | 
			
		||||
      RealD na = vnum/vden; 
 | 
			
		||||
      
 | 
			
		||||
      if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) { 
 | 
			
		||||
      if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) { 
 | 
			
		||||
 	evalMaxApprox = na; 
 | 
			
		||||
	std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
 | 
			
		||||
 	return evalMaxApprox; 
 | 
			
		||||
      } 
 | 
			
		||||
      evalMaxApprox = na; 
 | 
			
		||||
      std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
 | 
			
		||||
      src_n = tmp;
 | 
			
		||||
    }
 | 
			
		||||
    assert(0);
 | 
			
		||||
 
 | 
			
		||||
@@ -38,10 +38,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" " 
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
 | 
			
		||||
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
 | 
			
		||||
public:                                                
 | 
			
		||||
  using OperatorFunction<Field>::operator();
 | 
			
		||||
 | 
			
		||||
  RealD   Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
@@ -49,23 +50,29 @@ public:
 | 
			
		||||
  int mmax;
 | 
			
		||||
  int nstep;
 | 
			
		||||
  int steps;
 | 
			
		||||
  int level;
 | 
			
		||||
  GridStopWatch PrecTimer;
 | 
			
		||||
  GridStopWatch MatTimer;
 | 
			
		||||
  GridStopWatch LinalgTimer;
 | 
			
		||||
 | 
			
		||||
  LinearFunction<Field> &Preconditioner;
 | 
			
		||||
  LinearFunction<Field>     &Preconditioner;
 | 
			
		||||
  LinearOperatorBase<Field> &Linop;
 | 
			
		||||
 | 
			
		||||
  PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
 | 
			
		||||
  void Level(int lv) { level=lv; };
 | 
			
		||||
 | 
			
		||||
  PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
 | 
			
		||||
    Tolerance(tol), 
 | 
			
		||||
    MaxIterations(maxit),
 | 
			
		||||
    Linop(_Linop),
 | 
			
		||||
    Preconditioner(Prec),
 | 
			
		||||
    mmax(_mmax),
 | 
			
		||||
    nstep(_nstep)
 | 
			
		||||
  { 
 | 
			
		||||
    level=1;
 | 
			
		||||
    verbose=1;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
 | 
			
		||||
  void operator() (const Field &src, Field &psi){
 | 
			
		||||
 | 
			
		||||
    psi=Zero();
 | 
			
		||||
    RealD cp, ssq,rsq;
 | 
			
		||||
@@ -84,9 +91,9 @@ public:
 | 
			
		||||
    steps=0;
 | 
			
		||||
    for(int k=0;k<MaxIterations;k++){
 | 
			
		||||
 | 
			
		||||
      cp=GCRnStep(Linop,src,psi,rsq);
 | 
			
		||||
      cp=GCRnStep(src,psi,rsq);
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
 | 
			
		||||
      GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
 | 
			
		||||
 | 
			
		||||
      if(cp<rsq) {
 | 
			
		||||
 | 
			
		||||
@@ -95,24 +102,26 @@ public:
 | 
			
		||||
	Linop.HermOp(psi,r);
 | 
			
		||||
	axpy(r,-1.0,src,r);
 | 
			
		||||
	RealD tr = norm2(r);
 | 
			
		||||
	std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
 | 
			
		||||
	GCRLogLevel<<"PGCR: Converged on iteration " <<steps
 | 
			
		||||
		 << " computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
		 << " true residual "    <<sqrt(tr/ssq)
 | 
			
		||||
		 << " target "           <<Tolerance <<std::endl;
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total  "<< SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<<   PrecTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<<    MatTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	GCRLogLevel<<"PGCR Time elapsed: Total  "<< SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	/*
 | 
			
		||||
	  GCRLogLevel<<"PGCR Time elapsed: Precon "<<   PrecTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  GCRLogLevel<<"PGCR Time elapsed: Matrix "<<    MatTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  GCRLogLevel<<"PGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	*/
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
 | 
			
		||||
    assert(0);
 | 
			
		||||
    GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
 | 
			
		||||
    //    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
 | 
			
		||||
  RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
 | 
			
		||||
 | 
			
		||||
    RealD cp;
 | 
			
		||||
    RealD a, b;
 | 
			
		||||
@@ -134,9 +143,7 @@ public:
 | 
			
		||||
    std::vector<Field> p(mmax,grid);
 | 
			
		||||
    std::vector<RealD> qq(mmax);
 | 
			
		||||
      
 | 
			
		||||
    std::cout<<GridLogIterative<< " ************** "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< "   GCRnStep("<<nstep<<")"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< " ************** "<< std::endl;
 | 
			
		||||
    GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // initial guess x0 is taken as nonzero.
 | 
			
		||||
@@ -150,35 +157,15 @@ public:
 | 
			
		||||
    LinalgTimer.Start();
 | 
			
		||||
    r=src-Az;
 | 
			
		||||
    LinalgTimer.Stop();
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep true residual r = src - A psi   "<<norm2(r) <<std::endl;
 | 
			
		||||
    GCRLogLevel<< "PGCR true residual r = src - A psi   "<<norm2(r) <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    /////////////////////
 | 
			
		||||
    // p = Prec(r)
 | 
			
		||||
    /////////////////////
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
    PrecTimer.Start();
 | 
			
		||||
    Preconditioner(r,z);
 | 
			
		||||
    PrecTimer.Stop();
 | 
			
		||||
    std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    //    MatTimer.Start();
 | 
			
		||||
    //    Linop.HermOp(z,tmp); 
 | 
			
		||||
    //    MatTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //    LinalgTimer.Start();
 | 
			
		||||
    //    ttmp=tmp;
 | 
			
		||||
    //    tmp=tmp-r;
 | 
			
		||||
    //    LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
      std::cout<<GridLogMessage<<r<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<z<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<ttmp<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<tmp<<std::endl;
 | 
			
		||||
    */
 | 
			
		||||
 | 
			
		||||
    MatTimer.Start();
 | 
			
		||||
    Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
 | 
			
		||||
@@ -190,7 +177,6 @@ public:
 | 
			
		||||
    p[0]= z;
 | 
			
		||||
    q[0]= Az;
 | 
			
		||||
    qq[0]= zAAz;
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep p0=z, q0 = A p0 " <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    cp =norm2(r);
 | 
			
		||||
    LinalgTimer.Stop();
 | 
			
		||||
@@ -212,20 +198,16 @@ public:
 | 
			
		||||
      cp = axpy_norm(r,-a,q[peri_k],r);
 | 
			
		||||
      LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"]  resid " << cp << " target " <<rsq<<std::endl; 
 | 
			
		||||
      GCRLogLevel<< "PGCR step["<<steps<<"]  resid " << cp << " target " <<rsq<<std::endl; 
 | 
			
		||||
 | 
			
		||||
      if((k==nstep-1)||(cp<rsq)){
 | 
			
		||||
	return cp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
      PrecTimer.Start();
 | 
			
		||||
      Preconditioner(r,z);// solve Az = r
 | 
			
		||||
      PrecTimer.Stop();
 | 
			
		||||
      std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 | 
			
		||||
 
 | 
			
		||||
@@ -405,6 +405,70 @@ namespace Grid {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class Field> class NonHermitianSchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> 
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
 | 
			
		||||
 | 
			
		||||
      NonHermitianSchurRedBlackDiagMooeeSolve(OperatorFunction<Field>& RBSolver, const bool initSubGuess = false,
 | 
			
		||||
          const bool _solnAsInitGuess = false)  
 | 
			
		||||
      : SchurRedBlackBase<Field>(RBSolver, initSubGuess, _solnAsInitGuess) {};
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////
 | 
			
		||||
      // Override RedBlack specialisation
 | 
			
		||||
      //////////////////////////////////////////////////////
 | 
			
		||||
      virtual void RedBlackSource(Matrix& _Matrix, const Field& src, Field& src_e, Field& src_o)
 | 
			
		||||
      {
 | 
			
		||||
        GridBase* grid  = _Matrix.RedBlackGrid();
 | 
			
		||||
        GridBase* fgrid = _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
        Field  tmp(grid);
 | 
			
		||||
        Field Mtmp(grid);
 | 
			
		||||
 | 
			
		||||
        pickCheckerboard(Even, src_e, src);
 | 
			
		||||
        pickCheckerboard(Odd , src_o, src);
 | 
			
		||||
 | 
			
		||||
        /////////////////////////////////////////////////////
 | 
			
		||||
        // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
        /////////////////////////////////////////////////////
 | 
			
		||||
        _Matrix.MooeeInv(src_e, tmp);   assert(   tmp.Checkerboard() == Even );
 | 
			
		||||
        _Matrix.Meooe   (tmp, Mtmp);    assert(  Mtmp.Checkerboard() == Odd  );     
 | 
			
		||||
        src_o -= Mtmp;                  assert( src_o.Checkerboard() == Odd  );     
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      virtual void RedBlackSolution(Matrix& _Matrix, const Field& sol_o, const Field& src_e, Field& sol)
 | 
			
		||||
      {
 | 
			
		||||
        GridBase* grid  = _Matrix.RedBlackGrid();
 | 
			
		||||
        GridBase* fgrid = _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
        Field     tmp(grid);
 | 
			
		||||
        Field   sol_e(grid);
 | 
			
		||||
        Field src_e_i(grid);
 | 
			
		||||
        
 | 
			
		||||
        ///////////////////////////////////////////////////
 | 
			
		||||
        // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
        ///////////////////////////////////////////////////
 | 
			
		||||
        _Matrix.Meooe(sol_o, tmp);         assert(     tmp.Checkerboard() == Even );
 | 
			
		||||
        src_e_i = src_e - tmp;             assert( src_e_i.Checkerboard() == Even );
 | 
			
		||||
        _Matrix.MooeeInv(src_e_i, sol_e);  assert(   sol_e.Checkerboard() == Even );
 | 
			
		||||
       
 | 
			
		||||
        setCheckerboard(sol, sol_e); assert( sol_e.Checkerboard() == Even );
 | 
			
		||||
        setCheckerboard(sol, sol_o); assert( sol_o.Checkerboard() == Odd  );
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      virtual void RedBlackSolve(Matrix& _Matrix, const Field& src_o, Field& sol_o)
 | 
			
		||||
      {
 | 
			
		||||
        NonHermitianSchurDiagMooeeOperator<Matrix,Field> _OpEO(_Matrix);
 | 
			
		||||
        this->_HermitianRBSolver(_OpEO, src_o, sol_o);  assert(sol_o.Checkerboard() == Odd);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      virtual void RedBlackSolve(Matrix& _Matrix, const std::vector<Field>& src_o, std::vector<Field>& sol_o)
 | 
			
		||||
      {
 | 
			
		||||
        NonHermitianSchurDiagMooeeOperator<Matrix,Field> _OpEO(_Matrix);
 | 
			
		||||
        this->_HermitianRBSolver(_OpEO, src_o, sol_o); 
 | 
			
		||||
      }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Site diagonal is identity, right preconditioned by Mee^inv
 | 
			
		||||
  // ( 1 - Meo Moo^inv Moe Mee^inv  ) phi =( 1 - Meo Moo^inv Moe Mee^inv  ) Mee psi =  = eta  = eta
 | 
			
		||||
@@ -482,5 +546,76 @@ namespace Grid {
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class Field> class NonHermitianSchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> 
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
 | 
			
		||||
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // Wrap the usual normal equations Schur trick
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      NonHermitianSchurRedBlackDiagTwoSolve(OperatorFunction<Field>& RBSolver, const bool initSubGuess = false,
 | 
			
		||||
          const bool _solnAsInitGuess = false)  
 | 
			
		||||
      : SchurRedBlackBase<Field>(RBSolver, initSubGuess, _solnAsInitGuess) {};
 | 
			
		||||
 | 
			
		||||
      virtual void RedBlackSource(Matrix& _Matrix, const Field& src, Field& src_e, Field& src_o)
 | 
			
		||||
      {
 | 
			
		||||
        GridBase* grid  = _Matrix.RedBlackGrid();
 | 
			
		||||
        GridBase* fgrid = _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
        Field  tmp(grid);
 | 
			
		||||
        Field Mtmp(grid);
 | 
			
		||||
 | 
			
		||||
        pickCheckerboard(Even, src_e, src);
 | 
			
		||||
        pickCheckerboard(Odd , src_o, src);
 | 
			
		||||
      
 | 
			
		||||
        /////////////////////////////////////////////////////
 | 
			
		||||
        // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
        /////////////////////////////////////////////////////
 | 
			
		||||
        _Matrix.MooeeInv(src_e, tmp);   assert(   tmp.Checkerboard() == Even );
 | 
			
		||||
        _Matrix.Meooe   (tmp, Mtmp);    assert(  Mtmp.Checkerboard() == Odd  );     
 | 
			
		||||
        src_o -= Mtmp;                  assert( src_o.Checkerboard() == Odd  );     
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      virtual void RedBlackSolution(Matrix& _Matrix, const Field& sol_o, const Field& src_e, Field& sol)
 | 
			
		||||
      {
 | 
			
		||||
        GridBase* grid  = _Matrix.RedBlackGrid();
 | 
			
		||||
        GridBase* fgrid = _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
        Field sol_o_i(grid);
 | 
			
		||||
        Field     tmp(grid);
 | 
			
		||||
        Field   sol_e(grid);
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////////////////////////
 | 
			
		||||
        // MooeeInv due to pecond
 | 
			
		||||
        ////////////////////////////////////////////////
 | 
			
		||||
        _Matrix.MooeeInv(sol_o, tmp);
 | 
			
		||||
        sol_o_i = tmp;
 | 
			
		||||
 | 
			
		||||
        ///////////////////////////////////////////////////
 | 
			
		||||
        // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
        ///////////////////////////////////////////////////
 | 
			
		||||
        _Matrix.Meooe(sol_o_i, tmp);    assert(   tmp.Checkerboard() == Even );
 | 
			
		||||
        tmp = src_e - tmp;              assert( src_e.Checkerboard() == Even );
 | 
			
		||||
        _Matrix.MooeeInv(tmp, sol_e);   assert( sol_e.Checkerboard() == Even );
 | 
			
		||||
       
 | 
			
		||||
        setCheckerboard(sol, sol_e);    assert(   sol_e.Checkerboard() == Even );
 | 
			
		||||
        setCheckerboard(sol, sol_o_i);  assert( sol_o_i.Checkerboard() == Odd  );
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      virtual void RedBlackSolve(Matrix& _Matrix, const Field& src_o, Field& sol_o)
 | 
			
		||||
      {
 | 
			
		||||
        NonHermitianSchurDiagTwoOperator<Matrix,Field> _OpEO(_Matrix);
 | 
			
		||||
        this->_HermitianRBSolver(_OpEO, src_o, sol_o);
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      virtual void RedBlackSolve(Matrix& _Matrix, const std::vector<Field>& src_o,  std::vector<Field>& sol_o)
 | 
			
		||||
      {
 | 
			
		||||
        NonHermitianSchurDiagTwoOperator<Matrix,Field> _OpEO(_Matrix);
 | 
			
		||||
        this->_HermitianRBSolver(_OpEO, src_o, sol_o); 
 | 
			
		||||
      }
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user