mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	develop pull
This commit is contained in:
		@@ -35,6 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
#include <Grid/algorithms/approx/Zolotarev.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Chebyshev.h>
 | 
			
		||||
#include <Grid/algorithms/approx/JacobiPolynomial.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Remez.h>
 | 
			
		||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Forecast.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,14 @@
 | 
			
		||||
    // blockZaxpy in bockPromote - 3s, 5%
 | 
			
		||||
    // noncoalesced linalg in Preconditionoer ~ 3s 5%
 | 
			
		||||
    // Lancos tuning or replace 10-20s ~ 25%, open ended
 | 
			
		||||
    // setup tuning   5s  ~  8%
 | 
			
		||||
    //    -- e.g. ordermin, orderstep tunables.
 | 
			
		||||
    // MdagM path without norm in LinOp code.     few seconds
 | 
			
		||||
 | 
			
		||||
    // Mdir calc blocking kernels
 | 
			
		||||
    // Fuse kernels in blockMaskedInnerProduct
 | 
			
		||||
    // preallocate Vectors in Cayley 5D ~ few percent few seconds
 | 
			
		||||
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
@@ -34,15 +45,42 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
template<class vobj,class CComplex>
 | 
			
		||||
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1,
 | 
			
		||||
				    Lattice<CComplex> &CoarseInner2,
 | 
			
		||||
				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask1,
 | 
			
		||||
				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask2,
 | 
			
		||||
				    const Lattice<vobj> &fineX,
 | 
			
		||||
				    const Lattice<vobj> &fineY)
 | 
			
		||||
{
 | 
			
		||||
  typedef decltype(innerProduct(vobj(),vobj())) dotp;
 | 
			
		||||
 | 
			
		||||
  GridBase *coarse(CoarseInner1.Grid());
 | 
			
		||||
  GridBase *fine  (fineX.Grid());
 | 
			
		||||
 | 
			
		||||
  Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
 | 
			
		||||
  Lattice<dotp> fine_inner_msk(fine);
 | 
			
		||||
 | 
			
		||||
  // Multiply could be fused with innerProduct
 | 
			
		||||
  // Single block sum kernel could do both masks.
 | 
			
		||||
  fine_inner = localInnerProduct(fineX,fineY);
 | 
			
		||||
 | 
			
		||||
  mult(fine_inner_msk, fine_inner,FineMask1);
 | 
			
		||||
  blockSum(CoarseInner1,fine_inner_msk);
 | 
			
		||||
 | 
			
		||||
  mult(fine_inner_msk, fine_inner,FineMask2);
 | 
			
		||||
  blockSum(CoarseInner2,fine_inner_msk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Geometry {
 | 
			
		||||
  //    int dimension;
 | 
			
		||||
public:
 | 
			
		||||
  int npoint;
 | 
			
		||||
  std::vector<int> directions   ;
 | 
			
		||||
  std::vector<int> displacements;
 | 
			
		||||
 | 
			
		||||
  Geometry(int _d)  {
 | 
			
		||||
  
 | 
			
		||||
    
 | 
			
		||||
    int base = (_d==5) ? 1:0;
 | 
			
		||||
 | 
			
		||||
    // make coarse grid stencil for 4d , not 5d
 | 
			
		||||
@@ -52,10 +90,10 @@ public:
 | 
			
		||||
    directions.resize(npoint);
 | 
			
		||||
    displacements.resize(npoint);
 | 
			
		||||
    for(int d=0;d<_d;d++){
 | 
			
		||||
      directions[2*d  ] = d+base;
 | 
			
		||||
      directions[2*d+1] = d+base;
 | 
			
		||||
      displacements[2*d  ] = +1;
 | 
			
		||||
      displacements[2*d+1] = -1;
 | 
			
		||||
      directions[d   ] = d+base;
 | 
			
		||||
      directions[d+_d] = d+base;
 | 
			
		||||
      displacements[d  ] = +1;
 | 
			
		||||
      displacements[d+_d]= -1;
 | 
			
		||||
    }
 | 
			
		||||
    directions   [2*_d]=0;
 | 
			
		||||
    displacements[2*_d]=0;
 | 
			
		||||
@@ -63,7 +101,7 @@ public:
 | 
			
		||||
    //// report back
 | 
			
		||||
    std::cout<<GridLogMessage<<"directions    :";
 | 
			
		||||
    for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
 | 
			
		||||
    std::cout <<std::endl;
 | 
			
		||||
    std::cout<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"displacements :";
 | 
			
		||||
    for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
 | 
			
		||||
    std::cout<<std::endl;
 | 
			
		||||
@@ -115,10 +153,10 @@ public:
 | 
			
		||||
  
 | 
			
		||||
  void Orthogonalise(void){
 | 
			
		||||
    CoarseScalar InnerProd(CoarseGrid); 
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    //    std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 2"<<std::endl; // Really have to do twice? Yuck
 | 
			
		||||
    //    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
 | 
			
		||||
    //      CheckOrthogonal();
 | 
			
		||||
  } 
 | 
			
		||||
@@ -128,7 +166,7 @@ public:
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
      blockProject(iProj,subspace[i],subspace);
 | 
			
		||||
      eProj=Zero(); 
 | 
			
		||||
      thread_for(ss, CoarseGrid->oSites(),{
 | 
			
		||||
      accelerator_for(ss, CoarseGrid->oSites(),1,{
 | 
			
		||||
	eProj[ss](i)=CComplex(1.0);
 | 
			
		||||
      });
 | 
			
		||||
      eProj=eProj - iProj;
 | 
			
		||||
@@ -146,61 +184,9 @@ public:
 | 
			
		||||
  void CreateSubspaceRandom(GridParallelRNG &RNG){
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
      random(RNG,subspace[i]);
 | 
			
		||||
      std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
    virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) 
 | 
			
		||||
    {
 | 
			
		||||
    // Run a Lanczos with sloppy convergence
 | 
			
		||||
    const int Nstop = nn;
 | 
			
		||||
    const int Nk = nn+20;
 | 
			
		||||
    const int Np = nn+20;
 | 
			
		||||
    const int Nm = Nk+Np;
 | 
			
		||||
    const int MaxIt= 10000;
 | 
			
		||||
    RealD resid = 1.0e-3;
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField> Cheb(0.5,64.0,21);
 | 
			
		||||
    ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
 | 
			
		||||
    //	IRL.lock = 1;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid); gaussian(RNG,noise);
 | 
			
		||||
    FineField tmp(FineGrid); 
 | 
			
		||||
    std::vector<RealD>     eval(Nm);
 | 
			
		||||
    std::vector<FineField> evec(Nm,FineGrid);
 | 
			
		||||
 | 
			
		||||
    int Nconv;
 | 
			
		||||
    IRL.calc(eval,evec,
 | 
			
		||||
    noise,
 | 
			
		||||
    Nconv);
 | 
			
		||||
 | 
			
		||||
    // pull back nn vectors
 | 
			
		||||
    for(int b=0;b<nn;b++){
 | 
			
		||||
 | 
			
		||||
    subspace[b]   = evec[b];
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
 | 
			
		||||
    hermop.Op(subspace[b],tmp); 
 | 
			
		||||
    std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    noise = tmp -  sqrt(eval[b])*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    noise = tmp +  eval[b]*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
    for(int b=0;b<nn;b++){
 | 
			
		||||
    std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    }
 | 
			
		||||
  */
 | 
			
		||||
  virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
@@ -232,54 +218,316 @@ public:
 | 
			
		||||
      subspace[b]   = noise;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
 | 
			
		||||
  // and this is the best I found
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#if 1
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,
 | 
			
		||||
				       int nn,
 | 
			
		||||
				       double hi,
 | 
			
		||||
				       double lo,
 | 
			
		||||
				       int orderfilter,
 | 
			
		||||
				       int ordermin,
 | 
			
		||||
				       int orderstep,
 | 
			
		||||
				       double filterlo
 | 
			
		||||
				       ) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid);
 | 
			
		||||
    FineField Mn(FineGrid);
 | 
			
		||||
    FineField tmp(FineGrid);
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField> Cheb(0.1,64.0,900);
 | 
			
		||||
    // New normalised noise
 | 
			
		||||
    gaussian(RNG,noise);
 | 
			
		||||
    scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
    noise=noise*scale;
 | 
			
		||||
 | 
			
		||||
    // Initial matrix element
 | 
			
		||||
    hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    int b =0;
 | 
			
		||||
    {
 | 
			
		||||
      // Filter
 | 
			
		||||
      Chebyshev<FineField> Cheb(lo,hi,orderfilter);
 | 
			
		||||
      Cheb(hermop,noise,Mn);
 | 
			
		||||
      // normalise
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); 	Mn=Mn*scale;
 | 
			
		||||
      subspace[b]   = Mn;
 | 
			
		||||
      hermop.Op(Mn,tmp); 
 | 
			
		||||
      std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
      b++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Generate a full sequence of Chebyshevs
 | 
			
		||||
    {
 | 
			
		||||
      lo=filterlo;
 | 
			
		||||
      noise=Mn;
 | 
			
		||||
 | 
			
		||||
      FineField T0(FineGrid); T0 = noise;  
 | 
			
		||||
      FineField T1(FineGrid); 
 | 
			
		||||
      FineField T2(FineGrid);
 | 
			
		||||
      FineField y(FineGrid);
 | 
			
		||||
      
 | 
			
		||||
      FineField *Tnm = &T0;
 | 
			
		||||
      FineField *Tn  = &T1;
 | 
			
		||||
      FineField *Tnp = &T2;
 | 
			
		||||
 | 
			
		||||
      // Tn=T1 = (xscale M + mscale)in
 | 
			
		||||
      RealD xscale = 2.0/(hi-lo);
 | 
			
		||||
      RealD mscale = -(hi+lo)/(hi-lo);
 | 
			
		||||
      hermop.HermOp(T0,y);
 | 
			
		||||
      T1=y*xscale+noise*mscale;
 | 
			
		||||
 | 
			
		||||
      for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
 | 
			
		||||
	
 | 
			
		||||
	hermop.HermOp(*Tn,y);
 | 
			
		||||
 | 
			
		||||
	auto y_v = y.View();
 | 
			
		||||
	auto Tn_v = Tn->View();
 | 
			
		||||
	auto Tnp_v = Tnp->View();
 | 
			
		||||
	auto Tnm_v = Tnm->View();
 | 
			
		||||
	const int Nsimd = CComplex::Nsimd();
 | 
			
		||||
	accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
 | 
			
		||||
	  coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
 | 
			
		||||
	  coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
	// Possible more fine grained control is needed than a linear sweep,
 | 
			
		||||
	// but huge productivity gain if this is simple algorithm and not a tunable
 | 
			
		||||
	int m =1;
 | 
			
		||||
	if ( n>=ordermin ) m=n-ordermin;
 | 
			
		||||
	if ( (m%orderstep)==0 ) { 
 | 
			
		||||
	  Mn=*Tnp;
 | 
			
		||||
	  scale = std::pow(norm2(Mn),-0.5);         Mn=Mn*scale;
 | 
			
		||||
	  subspace[b] = Mn;
 | 
			
		||||
	  hermop.Op(Mn,tmp); 
 | 
			
		||||
	  std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
	  b++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Cycle pointers to avoid copies
 | 
			
		||||
	FineField *swizzle = Tnm;
 | 
			
		||||
	Tnm    =Tn;
 | 
			
		||||
	Tn     =Tnp;
 | 
			
		||||
	Tnp    =swizzle;
 | 
			
		||||
	  
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    assert(b==nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
#if 0
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,
 | 
			
		||||
				       int nn,
 | 
			
		||||
				       double hi,
 | 
			
		||||
				       double lo,
 | 
			
		||||
				       int orderfilter,
 | 
			
		||||
				       int ordermin,
 | 
			
		||||
				       int orderstep,
 | 
			
		||||
				       double filterlo
 | 
			
		||||
				       ) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid);
 | 
			
		||||
    FineField Mn(FineGrid);
 | 
			
		||||
    FineField tmp(FineGrid);
 | 
			
		||||
    FineField combined(FineGrid);
 | 
			
		||||
 | 
			
		||||
    for(int b=0;b<nn;b++){
 | 
			
		||||
	
 | 
			
		||||
      gaussian(RNG,noise);
 | 
			
		||||
      scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
      noise=noise*scale;
 | 
			
		||||
    // New normalised noise
 | 
			
		||||
    gaussian(RNG,noise);
 | 
			
		||||
    scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
    noise=noise*scale;
 | 
			
		||||
 | 
			
		||||
      hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise   ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
    // Initial matrix element
 | 
			
		||||
    hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      Cheb(hermop,noise,Mn);
 | 
			
		||||
    int b =0;
 | 
			
		||||
#define FILTERb(llo,hhi,oorder)						\
 | 
			
		||||
    {									\
 | 
			
		||||
      Chebyshev<FineField> Cheb(llo,hhi,oorder);			\
 | 
			
		||||
      Cheb(hermop,noise,Mn);						\
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;			\
 | 
			
		||||
      subspace[b]   = Mn;						\
 | 
			
		||||
      hermop.Op(Mn,tmp);						\
 | 
			
		||||
      std::cout<<GridLogMessage << oorder<< " Cheb filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
 | 
			
		||||
      b++;								\
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); 
 | 
			
		||||
      Mn=Mn*scale;
 | 
			
		||||
      subspace[b]   = Mn;
 | 
			
		||||
    //      JacobiPolynomial<FineField> Cheb(0.002,60.0,1500,-0.5,3.5);	\
 | 
			
		||||
 | 
			
		||||
      hermop.Op(Mn,noise); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Orthogonalise();
 | 
			
		||||
    RealD alpha=-0.8;
 | 
			
		||||
    RealD beta =-0.8;
 | 
			
		||||
#define FILTER(llo,hhi,oorder)						\
 | 
			
		||||
    {									\
 | 
			
		||||
      Chebyshev<FineField> Cheb(llo,hhi,oorder);			\
 | 
			
		||||
      /* JacobiPolynomial<FineField> Cheb(0.0,60.0,oorder,alpha,beta);*/\
 | 
			
		||||
      Cheb(hermop,noise,Mn);						\
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;			\
 | 
			
		||||
      subspace[b]   = Mn;						\
 | 
			
		||||
      hermop.Op(Mn,tmp);						\
 | 
			
		||||
      std::cout<<GridLogMessage << oorder<< "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
 | 
			
		||||
      b++;								\
 | 
			
		||||
    }									
 | 
			
		||||
    
 | 
			
		||||
#define FILTERc(llo,hhi,oorder)				\
 | 
			
		||||
    {							\
 | 
			
		||||
      Chebyshev<FineField> Cheb(llo,hhi,oorder);	\
 | 
			
		||||
      Cheb(hermop,noise,combined);			\
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
    double node = 0.000;
 | 
			
		||||
    FILTERb(lo,hi,orderfilter);// 0
 | 
			
		||||
    //    FILTERc(node,hi,51);// 0
 | 
			
		||||
    noise = Mn;
 | 
			
		||||
    int base = 0;
 | 
			
		||||
    int mult = 100;
 | 
			
		||||
    FILTER(node,hi,base+1*mult);
 | 
			
		||||
    FILTER(node,hi,base+2*mult);
 | 
			
		||||
    FILTER(node,hi,base+3*mult);
 | 
			
		||||
    FILTER(node,hi,base+4*mult);
 | 
			
		||||
    FILTER(node,hi,base+5*mult);
 | 
			
		||||
    FILTER(node,hi,base+6*mult);
 | 
			
		||||
    FILTER(node,hi,base+7*mult);
 | 
			
		||||
    FILTER(node,hi,base+8*mult);
 | 
			
		||||
    FILTER(node,hi,base+9*mult);
 | 
			
		||||
    FILTER(node,hi,base+10*mult);
 | 
			
		||||
    FILTER(node,hi,base+11*mult);
 | 
			
		||||
    FILTER(node,hi,base+12*mult);
 | 
			
		||||
    FILTER(node,hi,base+13*mult);
 | 
			
		||||
    FILTER(node,hi,base+14*mult);
 | 
			
		||||
    FILTER(node,hi,base+15*mult);
 | 
			
		||||
    assert(b==nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  virtual void CreateSubspaceChebyshev(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,
 | 
			
		||||
				       int nn,
 | 
			
		||||
				       double hi,
 | 
			
		||||
				       double lo,
 | 
			
		||||
				       int orderfilter,
 | 
			
		||||
				       int ordermin,
 | 
			
		||||
				       int orderstep,
 | 
			
		||||
				       double filterlo
 | 
			
		||||
				       ) {
 | 
			
		||||
 | 
			
		||||
    RealD scale;
 | 
			
		||||
 | 
			
		||||
    FineField noise(FineGrid);
 | 
			
		||||
    FineField Mn(FineGrid);
 | 
			
		||||
    FineField tmp(FineGrid);
 | 
			
		||||
    FineField combined(FineGrid);
 | 
			
		||||
 | 
			
		||||
    // New normalised noise
 | 
			
		||||
    gaussian(RNG,noise);
 | 
			
		||||
    scale = std::pow(norm2(noise),-0.5); 
 | 
			
		||||
    noise=noise*scale;
 | 
			
		||||
 | 
			
		||||
    // Initial matrix element
 | 
			
		||||
    hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    int b =0;
 | 
			
		||||
    {						
 | 
			
		||||
      Chebyshev<FineField> JacobiPoly(0.005,60.,1500);
 | 
			
		||||
      //      JacobiPolynomial<FineField> JacobiPoly(0.002,60.0,1500,-0.5,3.5);
 | 
			
		||||
      //JacobiPolynomial<FineField> JacobiPoly(0.03,60.0,500,-0.5,3.5);
 | 
			
		||||
      //      JacobiPolynomial<FineField> JacobiPoly(0.00,60.0,1000,-0.5,3.5);
 | 
			
		||||
      JacobiPoly(hermop,noise,Mn);
 | 
			
		||||
      scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
 | 
			
		||||
      subspace[b]   = Mn;
 | 
			
		||||
      hermop.Op(Mn,tmp);
 | 
			
		||||
      std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; 
 | 
			
		||||
      b++;
 | 
			
		||||
      //      scale = std::pow(norm2(tmp),-0.5);     tmp=tmp*scale;
 | 
			
		||||
      //      subspace[b]   = tmp;      b++;
 | 
			
		||||
      //    }									
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
#define FILTER(lambda)						\
 | 
			
		||||
    {								\
 | 
			
		||||
      hermop.HermOp(subspace[0],tmp);				\
 | 
			
		||||
      tmp = tmp - lambda *subspace[0];				\
 | 
			
		||||
      scale = std::pow(norm2(tmp),-0.5);			\
 | 
			
		||||
      tmp=tmp*scale;							\
 | 
			
		||||
      subspace[b]   = tmp;						\
 | 
			
		||||
      hermop.Op(subspace[b],tmp);					\
 | 
			
		||||
      std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
 | 
			
		||||
      b++;								\
 | 
			
		||||
    }									
 | 
			
		||||
    //      scale = std::pow(norm2(tmp),-0.5);     tmp=tmp*scale;
 | 
			
		||||
    //      subspace[b]   = tmp;      b++;
 | 
			
		||||
    //    }									
 | 
			
		||||
 | 
			
		||||
    FILTER(2.0e-5);
 | 
			
		||||
    FILTER(2.0e-4);
 | 
			
		||||
    FILTER(4.0e-4);
 | 
			
		||||
    FILTER(8.0e-4);
 | 
			
		||||
    FILTER(8.0e-4);
 | 
			
		||||
 | 
			
		||||
    FILTER(2.0e-3);
 | 
			
		||||
    FILTER(3.0e-3);
 | 
			
		||||
    FILTER(4.0e-3);
 | 
			
		||||
    FILTER(5.0e-3);
 | 
			
		||||
    FILTER(6.0e-3);
 | 
			
		||||
 | 
			
		||||
    FILTER(2.5e-3);
 | 
			
		||||
    FILTER(3.5e-3);
 | 
			
		||||
    FILTER(4.5e-3);
 | 
			
		||||
    FILTER(5.5e-3);
 | 
			
		||||
    FILTER(6.5e-3);
 | 
			
		||||
 | 
			
		||||
    //    FILTER(6.0e-5);//6
 | 
			
		||||
    //    FILTER(7.0e-5);//8
 | 
			
		||||
    //    FILTER(8.0e-5);//9
 | 
			
		||||
    //    FILTER(9.0e-5);//3
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    //    FILTER(1.0e-4);//10
 | 
			
		||||
    FILTER(2.0e-4);//11
 | 
			
		||||
    //   FILTER(3.0e-4);//12
 | 
			
		||||
    //    FILTER(4.0e-4);//13
 | 
			
		||||
    FILTER(5.0e-4);//14
 | 
			
		||||
 | 
			
		||||
    FILTER(6.0e-3);//4
 | 
			
		||||
    FILTER(7.0e-4);//1
 | 
			
		||||
    FILTER(8.0e-4);//7
 | 
			
		||||
    FILTER(9.0e-4);//15
 | 
			
		||||
    FILTER(1.0e-3);//2
 | 
			
		||||
 | 
			
		||||
    FILTER(2.0e-3);//2
 | 
			
		||||
    FILTER(3.0e-3);//2
 | 
			
		||||
    FILTER(4.0e-3);//2
 | 
			
		||||
    FILTER(5.0e-3);//2
 | 
			
		||||
    FILTER(6.0e-3);//2
 | 
			
		||||
 | 
			
		||||
    FILTER(7.0e-3);//2
 | 
			
		||||
    FILTER(8.0e-3);//2
 | 
			
		||||
    FILTER(1.0e-2);//2
 | 
			
		||||
    */
 | 
			
		||||
    std::cout << GridLogMessage <<"Jacobi filtering done" <<std::endl;
 | 
			
		||||
    assert(b==nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Fine Object == (per site) type of fine field
 | 
			
		||||
// nbasis      == number of deflation vectors
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > >  {
 | 
			
		||||
public:
 | 
			
		||||
    
 | 
			
		||||
  typedef iVector<CComplex,nbasis >             siteVector;
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           siteVector;
 | 
			
		||||
  typedef Lattice<CComplex >                  CoarseComplexField;
 | 
			
		||||
  typedef Lattice<siteVector>                 CoarseVector;
 | 
			
		||||
  typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
 | 
			
		||||
 | 
			
		||||
  typedef iMatrix<CComplex,nbasis >  Cobj;
 | 
			
		||||
  typedef Lattice< CComplex >   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj >        FineField;
 | 
			
		||||
 | 
			
		||||
@@ -293,7 +541,6 @@ public:
 | 
			
		||||
  CartesianStencil<siteVector,siteVector,int> Stencil; 
 | 
			
		||||
 | 
			
		||||
  std::vector<CoarseMatrix> A;
 | 
			
		||||
 | 
			
		||||
      
 | 
			
		||||
  ///////////////////////
 | 
			
		||||
  // Interface
 | 
			
		||||
@@ -305,33 +552,71 @@ public:
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
    conformable(in.Grid(),out.Grid());
 | 
			
		||||
 | 
			
		||||
    RealD Nin = norm2(in);
 | 
			
		||||
    //    RealD Nin = norm2(in);
 | 
			
		||||
    SimpleCompressor<siteVector> compressor;
 | 
			
		||||
 | 
			
		||||
    double comms_usec = -usecond();
 | 
			
		||||
    Stencil.HaloExchange(in,compressor);
 | 
			
		||||
    comms_usec += usecond();
 | 
			
		||||
 | 
			
		||||
    auto in_v = in.View();
 | 
			
		||||
    auto out_v = out.View();
 | 
			
		||||
    thread_for(ss,Grid()->oSites(),{
 | 
			
		||||
      siteVector res = Zero();
 | 
			
		||||
      siteVector nbr;
 | 
			
		||||
    typedef LatticeView<Cobj> Aview;
 | 
			
		||||
 | 
			
		||||
    Vector<Aview> AcceleratorViewContainer;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
 | 
			
		||||
    Aview *Aview_p = & AcceleratorViewContainer[0];
 | 
			
		||||
 | 
			
		||||
    const int Nsimd = CComplex::Nsimd();
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0])) calcVector;
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
 | 
			
		||||
 | 
			
		||||
    GridStopWatch ArithmeticTimer;
 | 
			
		||||
    int osites=Grid()->oSites();
 | 
			
		||||
    //    double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
 | 
			
		||||
    //    double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
 | 
			
		||||
    double usecs =-usecond();
 | 
			
		||||
    // assert(geom.npoint==9);
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
			
		||||
      int ss = sss/nbasis;
 | 
			
		||||
      int b  = sss%nbasis;
 | 
			
		||||
      calcComplex res = Zero();
 | 
			
		||||
      calcVector nbr;
 | 
			
		||||
      int ptype;
 | 
			
		||||
      StencilEntry *SE;
 | 
			
		||||
 | 
			
		||||
      int lane=SIMTlane(Nsimd);
 | 
			
		||||
      for(int point=0;point<geom.npoint;point++){
 | 
			
		||||
 | 
			
		||||
	SE=Stencil.GetEntry(ptype,point,ss);
 | 
			
		||||
	  
 | 
			
		||||
	if(SE->_is_local&&SE->_permute) { 
 | 
			
		||||
	  permute(nbr,in_v[SE->_offset],ptype);
 | 
			
		||||
	} else if(SE->_is_local) { 
 | 
			
		||||
	  nbr = in_v[SE->_offset];
 | 
			
		||||
	if(SE->_is_local) { 
 | 
			
		||||
	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
 | 
			
		||||
	} else {
 | 
			
		||||
	  nbr = Stencil.CommBuf()[SE->_offset];
 | 
			
		||||
	  nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
 | 
			
		||||
	}
 | 
			
		||||
	synchronise();
 | 
			
		||||
 | 
			
		||||
	for(int bb=0;bb<nbasis;bb++) {
 | 
			
		||||
	  res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
 | 
			
		||||
	}
 | 
			
		||||
	auto A_point = A[point].View();
 | 
			
		||||
	res = res + A_point[ss]*nbr;
 | 
			
		||||
      }
 | 
			
		||||
      vstream(out_v[ss],res);
 | 
			
		||||
      coalescedWrite(out_v[ss](b),res,lane);
 | 
			
		||||
    });
 | 
			
		||||
    usecs +=usecond();
 | 
			
		||||
 | 
			
		||||
    double nrm_usec=-usecond();
 | 
			
		||||
    RealD Nout= norm2(out);
 | 
			
		||||
    nrm_usec+=usecond();
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
        std::cout << GridLogMessage << "\tNorm        " << nrm_usec << " us" <<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\tHalo        " << comms_usec << " us" <<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\tMatrix      " << usecs << " us" <<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\t  mflop/s   " << flops/usecs<<std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\t  MB/s      " << bytes/usecs<<std::endl;
 | 
			
		||||
    */
 | 
			
		||||
    return Nout;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
@@ -349,25 +634,54 @@ public:
 | 
			
		||||
      return norm2(out);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 | 
			
		||||
    
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
    conformable(in.Grid(),out.Grid());
 | 
			
		||||
    
 | 
			
		||||
  void MdirComms(const CoarseVector &in)
 | 
			
		||||
  {
 | 
			
		||||
    SimpleCompressor<siteVector> compressor;
 | 
			
		||||
    Stencil.HaloExchange(in,compressor);
 | 
			
		||||
    
 | 
			
		||||
    auto point = [dir, disp](){
 | 
			
		||||
      if(dir == 0 and disp == 0)
 | 
			
		||||
	return 8;
 | 
			
		||||
      else
 | 
			
		||||
	return (4 * dir + 1 - disp) / 2;
 | 
			
		||||
    }();
 | 
			
		||||
  }
 | 
			
		||||
  void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
 | 
			
		||||
  {
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
    conformable(_grid,out.Grid());
 | 
			
		||||
 | 
			
		||||
    typedef LatticeView<Cobj> Aview;
 | 
			
		||||
    Vector<Aview> AcceleratorViewContainer;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
 | 
			
		||||
    Aview *Aview_p = & AcceleratorViewContainer[0];
 | 
			
		||||
 | 
			
		||||
    auto out_v = out.View();
 | 
			
		||||
    auto in_v  = in.View();
 | 
			
		||||
    thread_for(ss,Grid()->oSites(),{
 | 
			
		||||
 | 
			
		||||
    const int Nsimd = CComplex::Nsimd();
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0])) calcVector;
 | 
			
		||||
    typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
			
		||||
      int ss = sss/nbasis;
 | 
			
		||||
      int b  = sss%nbasis;
 | 
			
		||||
      calcComplex res = Zero();
 | 
			
		||||
      calcVector nbr;
 | 
			
		||||
      int ptype;
 | 
			
		||||
      StencilEntry *SE;
 | 
			
		||||
 | 
			
		||||
      int lane=SIMTlane(Nsimd);
 | 
			
		||||
      SE=Stencil.GetEntry(ptype,point,ss);
 | 
			
		||||
	  
 | 
			
		||||
      if(SE->_is_local) { 
 | 
			
		||||
	nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
 | 
			
		||||
      } else {
 | 
			
		||||
	nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
 | 
			
		||||
      }
 | 
			
		||||
      synchronise();
 | 
			
		||||
 | 
			
		||||
      for(int bb=0;bb<nbasis;bb++) {
 | 
			
		||||
	res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
 | 
			
		||||
      }
 | 
			
		||||
      coalescedWrite(out_v[ss](b),res,lane);
 | 
			
		||||
    });
 | 
			
		||||
#if 0
 | 
			
		||||
    accelerator_for(ss,Grid()->oSites(),1,{
 | 
			
		||||
 | 
			
		||||
      siteVector res = Zero();
 | 
			
		||||
      siteVector nbr;
 | 
			
		||||
      int ptype;
 | 
			
		||||
@@ -382,16 +696,65 @@ public:
 | 
			
		||||
      } else {
 | 
			
		||||
	nbr = Stencil.CommBuf()[SE->_offset];
 | 
			
		||||
      }
 | 
			
		||||
      synchronise();
 | 
			
		||||
 | 
			
		||||
      auto A_point = A[point].View();
 | 
			
		||||
      res = res + A_point[ss]*nbr;
 | 
			
		||||
      res = res + Aview_p[point][ss]*nbr;
 | 
			
		||||
      
 | 
			
		||||
      vstream(out_v[ss],res);
 | 
			
		||||
      out_v[ss]=res;
 | 
			
		||||
    });
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
  void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out)
 | 
			
		||||
  {
 | 
			
		||||
    this->MdirComms(in);
 | 
			
		||||
    int ndir=geom.npoint-1;
 | 
			
		||||
    if ((out.size()!=ndir)&&(out.size()!=ndir+1)) { 
 | 
			
		||||
      std::cout <<"MdirAll out size "<< out.size()<<std::endl;
 | 
			
		||||
      std::cout <<"MdirAll ndir "<< ndir<<std::endl;
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
    for(int p=0;p<ndir;p++){
 | 
			
		||||
      MdirCalc(in,out[p],p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 | 
			
		||||
 | 
			
		||||
    this->MdirComms(in);
 | 
			
		||||
 | 
			
		||||
    int ndim = in.Grid()->Nd();
 | 
			
		||||
 | 
			
		||||
    //////////////
 | 
			
		||||
    // 4D action like wilson
 | 
			
		||||
    // 0+ => 0 
 | 
			
		||||
    // 0- => 1
 | 
			
		||||
    // 1+ => 2 
 | 
			
		||||
    // 1- => 3
 | 
			
		||||
    // etc..
 | 
			
		||||
    //////////////
 | 
			
		||||
    // 5D action like DWF
 | 
			
		||||
    // 1+ => 0 
 | 
			
		||||
    // 1- => 1
 | 
			
		||||
    // 2+ => 2 
 | 
			
		||||
    // 2- => 3
 | 
			
		||||
    // etc..
 | 
			
		||||
    auto point = [dir, disp, ndim](){
 | 
			
		||||
      if(dir == 0 and disp == 0)
 | 
			
		||||
	return 8;
 | 
			
		||||
      else if ( ndim==4 ) { 
 | 
			
		||||
	return (4 * dir + 1 - disp) / 2;
 | 
			
		||||
      } else { 
 | 
			
		||||
	return (4 * (dir-1) + 1 - disp) / 2;
 | 
			
		||||
      }
 | 
			
		||||
    }();
 | 
			
		||||
 | 
			
		||||
    MdirCalc(in,out,point);
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Mdiag(const CoarseVector &in, CoarseVector &out){
 | 
			
		||||
    Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil
 | 
			
		||||
  void Mdiag(const CoarseVector &in, CoarseVector &out)
 | 
			
		||||
  {
 | 
			
		||||
    int point=geom.npoint-1;
 | 
			
		||||
    MdirCalc(in, out, point); // No comms
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
@@ -401,25 +764,44 @@ public:
 | 
			
		||||
    geom(CoarseGrid._ndimension),
 | 
			
		||||
    hermitian(hermitian_),
 | 
			
		||||
    Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
 | 
			
		||||
    A(geom.npoint,&CoarseGrid)
 | 
			
		||||
      A(geom.npoint,&CoarseGrid)
 | 
			
		||||
  {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
			
		||||
		       Aggregation<Fobj,CComplex,nbasis> & Subspace){
 | 
			
		||||
		       Aggregation<Fobj,CComplex,nbasis> & Subspace)
 | 
			
		||||
  {
 | 
			
		||||
    typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
 | 
			
		||||
    typedef typename Fobj::scalar_type scalar_type;
 | 
			
		||||
 | 
			
		||||
    FineField iblock(FineGrid); // contributions from within this block
 | 
			
		||||
    FineField oblock(FineGrid); // contributions from outwith this block
 | 
			
		||||
    FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
 | 
			
		||||
    FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
 | 
			
		||||
 | 
			
		||||
    std::vector<FineComplexField> masks(geom.npoint,FineGrid);
 | 
			
		||||
    FineComplexField imask(FineGrid); // contributions from within this block
 | 
			
		||||
    FineComplexField omask(FineGrid); // contributions from outwith this block
 | 
			
		||||
 | 
			
		||||
    FineComplexField evenmask(FineGrid);
 | 
			
		||||
    FineComplexField oddmask(FineGrid); 
 | 
			
		||||
 | 
			
		||||
    FineField     phi(FineGrid);
 | 
			
		||||
    FineField     tmp(FineGrid);
 | 
			
		||||
    FineField     zz(FineGrid); zz=Zero();
 | 
			
		||||
    FineField    Mphi(FineGrid);
 | 
			
		||||
    FineField    Mphie(FineGrid);
 | 
			
		||||
    FineField    Mphio(FineGrid);
 | 
			
		||||
    std::vector<FineField>     Mphi_p(geom.npoint,FineGrid);
 | 
			
		||||
 | 
			
		||||
    Lattice<iScalar<vInteger> > coor(FineGrid);
 | 
			
		||||
    Lattice<iScalar<vInteger> > coor (FineGrid);
 | 
			
		||||
    Lattice<iScalar<vInteger> > bcoor(FineGrid);
 | 
			
		||||
    Lattice<iScalar<vInteger> > bcb  (FineGrid);
 | 
			
		||||
 | 
			
		||||
    CoarseVector iProj(Grid()); 
 | 
			
		||||
    CoarseVector oProj(Grid()); 
 | 
			
		||||
    CoarseVector SelfProj(Grid()); 
 | 
			
		||||
    CoarseComplexField iZProj(Grid()); 
 | 
			
		||||
    CoarseComplexField oZProj(Grid()); 
 | 
			
		||||
 | 
			
		||||
    CoarseScalar InnerProd(Grid()); 
 | 
			
		||||
 | 
			
		||||
    // Orthogonalise the subblocks over the basis
 | 
			
		||||
@@ -428,69 +810,114 @@ public:
 | 
			
		||||
    // Compute the matrix elements of linop between this orthonormal
 | 
			
		||||
    // set of vectors.
 | 
			
		||||
    int self_stencil=-1;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){ 
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++)
 | 
			
		||||
    { 
 | 
			
		||||
      int dir   = geom.directions[p];
 | 
			
		||||
      int disp  = geom.displacements[p];
 | 
			
		||||
      A[p]=Zero();
 | 
			
		||||
      if( geom.displacements[p]==0){
 | 
			
		||||
	self_stencil=p;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
 | 
			
		||||
 | 
			
		||||
      LatticeCoordinate(coor,dir);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////
 | 
			
		||||
      // Work out even and odd block checkerboarding for fast diagonal term
 | 
			
		||||
      ///////////////////////////////////////////////////////
 | 
			
		||||
      if ( disp==1 ) {
 | 
			
		||||
	bcb   = bcb + div(coor,block);
 | 
			
		||||
      }
 | 
			
		||||
	
 | 
			
		||||
      if ( disp==0 ) {
 | 
			
		||||
	  masks[p]= Zero();
 | 
			
		||||
      } else if ( disp==1 ) {
 | 
			
		||||
	masks[p] = where(mod(coor,block)==(block-1),one,zero);
 | 
			
		||||
      } else if ( disp==-1 ) {
 | 
			
		||||
	masks[p] = where(mod(coor,block)==(Integer)0,one,zero);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
 | 
			
		||||
    oddmask  = one-evenmask;
 | 
			
		||||
 | 
			
		||||
    assert(self_stencil!=-1);
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
 | 
			
		||||
      phi=Subspace.subspace[i];
 | 
			
		||||
	
 | 
			
		||||
      std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
 | 
			
		||||
 | 
			
		||||
      //      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
 | 
			
		||||
      linop.OpDirAll(phi,Mphi_p);
 | 
			
		||||
      linop.OpDiag  (phi,Mphi_p[geom.npoint-1]);
 | 
			
		||||
 | 
			
		||||
      for(int p=0;p<geom.npoint;p++){ 
 | 
			
		||||
 | 
			
		||||
	Mphi = Mphi_p[p];
 | 
			
		||||
 | 
			
		||||
	int dir   = geom.directions[p];
 | 
			
		||||
	int disp  = geom.displacements[p];
 | 
			
		||||
 | 
			
		||||
	Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
 | 
			
		||||
	if ( (disp==-1) || (!hermitian ) ) {
 | 
			
		||||
 | 
			
		||||
	LatticeCoordinate(coor,dir);
 | 
			
		||||
 | 
			
		||||
	if ( disp==0 ){
 | 
			
		||||
	  linop.OpDiag(phi,Mphi);
 | 
			
		||||
	}
 | 
			
		||||
	else  {
 | 
			
		||||
	  linop.OpDir(phi,Mphi,dir,disp); 
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	// Pick out contributions coming from this cell and neighbour cell
 | 
			
		||||
	////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	if ( disp==0 ) {
 | 
			
		||||
	  iblock = Mphi;
 | 
			
		||||
	  oblock = Zero();
 | 
			
		||||
	} else if ( disp==1 ) {
 | 
			
		||||
	  oblock = where(mod(coor,block)==(block-1),Mphi,zz);
 | 
			
		||||
	  iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
 | 
			
		||||
	} else if ( disp==-1 ) {
 | 
			
		||||
	  oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
 | 
			
		||||
	  iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
 | 
			
		||||
	} else {
 | 
			
		||||
	  assert(0);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Subspace.ProjectToSubspace(iProj,iblock);
 | 
			
		||||
	Subspace.ProjectToSubspace(oProj,oblock);
 | 
			
		||||
	//	  blockProject(iProj,iblock,Subspace.subspace);
 | 
			
		||||
	//	  blockProject(oProj,oblock,Subspace.subspace);
 | 
			
		||||
	auto iProj_v = iProj.View() ;
 | 
			
		||||
	auto oProj_v = oProj.View() ;
 | 
			
		||||
	auto A_p     =  A[p].View();
 | 
			
		||||
	auto A_self  = A[self_stencil].View();
 | 
			
		||||
	thread_for(ss, Grid()->oSites(),{
 | 
			
		||||
	  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	  // Pick out contributions coming from this cell and neighbour cell
 | 
			
		||||
	  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	  omask = masks[p];
 | 
			
		||||
	  imask = one-omask;
 | 
			
		||||
	
 | 
			
		||||
	  for(int j=0;j<nbasis;j++){
 | 
			
		||||
	    if( disp!= 0 ) {
 | 
			
		||||
	      A_p[ss](j,i) = oProj_v[ss](j);
 | 
			
		||||
	    }
 | 
			
		||||
	    A_self[ss](j,i) =	A_self[ss](j,i) + iProj_v[ss](j);
 | 
			
		||||
	    
 | 
			
		||||
	    blockMaskedInnerProduct(iZProj,oZProj,imask,omask,Subspace.subspace[j],Mphi);
 | 
			
		||||
	    
 | 
			
		||||
	    auto iZProj_v = iZProj.View() ;
 | 
			
		||||
	    auto oZProj_v = oZProj.View() ;
 | 
			
		||||
	    auto A_p     =  A[p].View();
 | 
			
		||||
	    auto A_self  = A[self_stencil].View();
 | 
			
		||||
 | 
			
		||||
	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
 | 
			
		||||
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      // Faster alternate self coupling.. use hermiticity to save 2x
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      {
 | 
			
		||||
	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie);
 | 
			
		||||
	mult(tmp,phi,oddmask );   linop.Op(tmp,Mphio);
 | 
			
		||||
 | 
			
		||||
	//	tmp = Mphie*evenmask + Mphio*oddmask;
 | 
			
		||||
	{
 | 
			
		||||
	  auto tmp_      = tmp.View();
 | 
			
		||||
	  auto evenmask_ = evenmask.View();
 | 
			
		||||
	  auto oddmask_  =  oddmask.View();
 | 
			
		||||
	  auto Mphie_    =  Mphie.View();
 | 
			
		||||
	  auto Mphio_    =  Mphio.View();
 | 
			
		||||
	  accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ 
 | 
			
		||||
	      coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
 | 
			
		||||
	    });
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	blockProject(SelfProj,tmp,Subspace.subspace);
 | 
			
		||||
 | 
			
		||||
	auto SelfProj_ = SelfProj.View();
 | 
			
		||||
	auto A_self  = A[self_stencil].View();
 | 
			
		||||
	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
 | 
			
		||||
	  for(int j=0;j<nbasis;j++){
 | 
			
		||||
	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
 | 
			
		||||
	  }
 | 
			
		||||
	});
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if(hermitian) {
 | 
			
		||||
      std::cout << GridLogMessage << " ForceHermitian "<<std::endl;
 | 
			
		||||
      ForceHermitian();
 | 
			
		||||
    }
 | 
			
		||||
      // AssertHermitian();
 | 
			
		||||
      // ForceDiagonal();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
    ///////////////////////////
 | 
			
		||||
@@ -513,17 +940,26 @@ public:
 | 
			
		||||
    std::cout<<GridLogMessage<< iProj <<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
      //      ForceHermitian();
 | 
			
		||||
      // AssertHermitian();
 | 
			
		||||
      // ForceDiagonal();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  void ForceHermitian(void) {
 | 
			
		||||
    for(int d=0;d<4;d++){
 | 
			
		||||
      int dd=d+1;
 | 
			
		||||
      A[2*d] = adj(Cshift(A[2*d+1],dd,1));
 | 
			
		||||
    CoarseMatrix Diff  (Grid());
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){
 | 
			
		||||
      int dir   = geom.directions[p];
 | 
			
		||||
      int disp  = geom.displacements[p];
 | 
			
		||||
      if(disp==-1) {
 | 
			
		||||
	// Find the opposite link
 | 
			
		||||
	for(int pp=0;pp<geom.npoint;pp++){
 | 
			
		||||
	  int dirp   = geom.directions[pp];
 | 
			
		||||
	  int dispp  = geom.displacements[pp];
 | 
			
		||||
	  if ( (dirp==dir) && (dispp==1) ){
 | 
			
		||||
	    //	    Diff = adj(Cshift(A[p],dir,1)) - A[pp]; 
 | 
			
		||||
	    //	    std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
 | 
			
		||||
	    A[pp] = adj(Cshift(A[p],dir,1));
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    //      A[8] = 0.5*(A[8] + adj(A[8]));
 | 
			
		||||
  }
 | 
			
		||||
  void AssertHermitian(void) {
 | 
			
		||||
    CoarseMatrix AA    (Grid());
 | 
			
		||||
 
 | 
			
		||||
@@ -47,6 +47,7 @@ public:
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
  virtual void OpDir  (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
 | 
			
		||||
  virtual void OpDirAll  (const Field &in, std::vector<Field> &out) = 0; // Abstract base
 | 
			
		||||
 | 
			
		||||
  virtual void Op     (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
  virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
@@ -83,6 +84,9 @@ public:
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -93,8 +97,7 @@ public:
 | 
			
		||||
    _Mat.MdagM(in,out,n1,n2);
 | 
			
		||||
  }
 | 
			
		||||
  void HermOp(const Field &in, Field &out){
 | 
			
		||||
    RealD n1,n2;
 | 
			
		||||
    HermOpAndNorm(in,out,n1,n2);
 | 
			
		||||
    _Mat.MdagM(in,out);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -116,6 +119,9 @@ public:
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    assert(0);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
    assert(0);
 | 
			
		||||
@@ -154,6 +160,9 @@ public:
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -162,7 +171,6 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
	
 | 
			
		||||
    ComplexD dot= innerProduct(in,out); n1=real(dot);
 | 
			
		||||
    n2=norm2(out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -183,6 +191,9 @@ public:
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
    _Mat.Mdir(in,out,dir,disp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
  };
 | 
			
		||||
  void Op     (const Field &in, Field &out){
 | 
			
		||||
    _Mat.M(in,out);
 | 
			
		||||
  }
 | 
			
		||||
@@ -234,6 +245,9 @@ public:
 | 
			
		||||
      void OpDir  (const Field &in, Field &out,int dir,int disp) {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
      void OpDirAll  (const Field &in, std::vector<Field> &out){
 | 
			
		||||
	assert(0);
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
    template<class Matrix,class Field>
 | 
			
		||||
    class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
 
 | 
			
		||||
@@ -45,8 +45,13 @@ public:
 | 
			
		||||
    ni=M(in,tmp);
 | 
			
		||||
    no=Mdag(tmp,out);
 | 
			
		||||
  }
 | 
			
		||||
  virtual void  MdagM(const Field &in, Field &out) {
 | 
			
		||||
    RealD ni, no;
 | 
			
		||||
    MdagM(in,out,ni,no);
 | 
			
		||||
  }
 | 
			
		||||
  virtual  void Mdiag    (const Field &in, Field &out)=0;
 | 
			
		||||
  virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0;
 | 
			
		||||
  virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -56,12 +61,12 @@ template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrix
 | 
			
		||||
public:
 | 
			
		||||
  virtual GridBase *RedBlackGrid(void)=0;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      // Query the even even properties to make algorithmic decisions
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      virtual RealD  Mass(void)        { return 0.0; };
 | 
			
		||||
      virtual int    ConstEE(void)     { return 1; }; // Disable assumptions unless overridden
 | 
			
		||||
      virtual int    isTrivialEE(void) { return 0; }; // by a derived class that knows better
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Query the even even properties to make algorithmic decisions
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  virtual RealD  Mass(void)        { return 0.0; };
 | 
			
		||||
  virtual int    ConstEE(void)     { return 1; }; // Disable assumptions unless overridden
 | 
			
		||||
  virtual int    isTrivialEE(void) { return 0; }; // by a derived class that knows better
 | 
			
		||||
 | 
			
		||||
  // half checkerboard operaions
 | 
			
		||||
  virtual  void Meooe    (const Field &in, Field &out)=0;
 | 
			
		||||
 
 | 
			
		||||
@@ -94,6 +94,24 @@ public:
 | 
			
		||||
    Coeffs.assign(0.,order);
 | 
			
		||||
    Coeffs[order-1] = 1.;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.
 | 
			
		||||
  // Similar kick effect below the threshold as Lanczos filter approach
 | 
			
		||||
  void InitLowPass(RealD _lo,RealD _hi,int _order)
 | 
			
		||||
  {
 | 
			
		||||
    lo=_lo;
 | 
			
		||||
    hi=_hi;
 | 
			
		||||
    order=_order;
 | 
			
		||||
      
 | 
			
		||||
    if(order < 2) exit(-1);
 | 
			
		||||
    Coeffs.resize(order);
 | 
			
		||||
    for(int j=0;j<order;j++){
 | 
			
		||||
      RealD k=(order-1.0);
 | 
			
		||||
      RealD s=std::cos( j*M_PI*(k+0.5)/order );
 | 
			
		||||
      Coeffs[j] = s * 2.0/order;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
 | 
			
		||||
  {
 | 
			
		||||
@@ -234,20 +252,20 @@ public:
 | 
			
		||||
    RealD xscale = 2.0/(hi-lo);
 | 
			
		||||
    RealD mscale = -(hi+lo)/(hi-lo);
 | 
			
		||||
    Linop.HermOp(T0,y);
 | 
			
		||||
    T1=y*xscale+in*mscale;
 | 
			
		||||
    axpby(T1,xscale,mscale,y,in);
 | 
			
		||||
 | 
			
		||||
    // sum = .5 c[0] T0 + c[1] T1
 | 
			
		||||
    out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
 | 
			
		||||
    //    out = ()*T0 + Coeffs[1]*T1;
 | 
			
		||||
    axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
 | 
			
		||||
    for(int n=2;n<order;n++){
 | 
			
		||||
	
 | 
			
		||||
      Linop.HermOp(*Tn,y);
 | 
			
		||||
 | 
			
		||||
      y=xscale*y+mscale*(*Tn);
 | 
			
		||||
 | 
			
		||||
      *Tnp=2.0*y-(*Tnm);
 | 
			
		||||
 | 
			
		||||
      out=out+Coeffs[n]* (*Tnp);
 | 
			
		||||
 | 
			
		||||
      //     y=xscale*y+mscale*(*Tn);
 | 
			
		||||
      //      *Tnp=2.0*y-(*Tnm);
 | 
			
		||||
      //      out=out+Coeffs[n]* (*Tnp);
 | 
			
		||||
      axpby(y,xscale,mscale,y,(*Tn));
 | 
			
		||||
      axpby(*Tnp,2.0,-1.0,y,(*Tnm));
 | 
			
		||||
      axpy(out,Coeffs[n],*Tnp,out);
 | 
			
		||||
      // Cycle pointers to avoid copies
 | 
			
		||||
      Field *swizzle = Tnm;
 | 
			
		||||
      Tnm    =Tn;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										129
									
								
								Grid/algorithms/approx/JacobiPolynomial.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								Grid/algorithms/approx/JacobiPolynomial.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,129 @@
 | 
			
		||||
#ifndef GRID_JACOBIPOLYNOMIAL_H
 | 
			
		||||
#define GRID_JACOBIPOLYNOMIAL_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/algorithms/LinearOperator.h>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class JacobiPolynomial : public OperatorFunction<Field> {
 | 
			
		||||
 private:
 | 
			
		||||
  using OperatorFunction<Field>::operator();
 | 
			
		||||
 | 
			
		||||
  int order;
 | 
			
		||||
  RealD hi;
 | 
			
		||||
  RealD lo;
 | 
			
		||||
  RealD alpha;
 | 
			
		||||
  RealD beta;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  void csv(std::ostream &out){
 | 
			
		||||
    csv(out,lo,hi);
 | 
			
		||||
  }
 | 
			
		||||
  void csv(std::ostream &out,RealD llo,RealD hhi){
 | 
			
		||||
    RealD diff = hhi-llo;
 | 
			
		||||
    RealD delta = diff*1.0e-5;
 | 
			
		||||
    for (RealD x=llo-delta; x<=hhi; x+=delta) {
 | 
			
		||||
      RealD f = approx(x);
 | 
			
		||||
      out<< x<<" "<<f <<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  JacobiPolynomial(){};
 | 
			
		||||
  JacobiPolynomial(RealD _lo,RealD _hi,int _order,RealD _alpha, RealD _beta)
 | 
			
		||||
  {
 | 
			
		||||
      lo=_lo;
 | 
			
		||||
      hi=_hi;
 | 
			
		||||
      alpha=_alpha;
 | 
			
		||||
      beta=_beta;
 | 
			
		||||
      order=_order;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  RealD approx(RealD x) // Convenience for plotting the approximation                                                       
 | 
			
		||||
  {
 | 
			
		||||
    RealD Tn;
 | 
			
		||||
    RealD Tnm;
 | 
			
		||||
    RealD Tnp;
 | 
			
		||||
 | 
			
		||||
    RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
 | 
			
		||||
 | 
			
		||||
    RealD T0=1.0;
 | 
			
		||||
    RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
 | 
			
		||||
 | 
			
		||||
    Tn =T1;
 | 
			
		||||
    Tnm=T0;
 | 
			
		||||
    for(int n=2;n<=order;n++){
 | 
			
		||||
      RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
 | 
			
		||||
      RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
 | 
			
		||||
      RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
 | 
			
		||||
      RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
 | 
			
		||||
      Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
 | 
			
		||||
      Tnm=Tn;
 | 
			
		||||
      Tn =Tnp;
 | 
			
		||||
    }
 | 
			
		||||
    return Tnp;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Implement the required interface                                                                                       
 | 
			
		||||
  void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
 | 
			
		||||
    GridBase *grid=in.Grid();
 | 
			
		||||
 | 
			
		||||
    int vol=grid->gSites();
 | 
			
		||||
 | 
			
		||||
    Field T0(grid);
 | 
			
		||||
    Field T1(grid);
 | 
			
		||||
    Field T2(grid);
 | 
			
		||||
    Field y(grid);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    Field *Tnm = &T0;
 | 
			
		||||
    Field *Tn  = &T1;
 | 
			
		||||
    Field *Tnp = &T2;
 | 
			
		||||
 | 
			
		||||
    //    RealD T0=1.0;                                                                                                     
 | 
			
		||||
    T0=in;
 | 
			
		||||
 | 
			
		||||
    //    RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));                                                                           
 | 
			
		||||
    //           = x * 2/(hi-lo) - (hi+lo)/(hi-lo)                                                                          
 | 
			
		||||
    Linop.HermOp(T0,y);
 | 
			
		||||
    RealD xscale = 2.0/(hi-lo);
 | 
			
		||||
    RealD mscale = -(hi+lo)/(hi-lo);
 | 
			
		||||
    Linop.HermOp(T0,y);
 | 
			
		||||
    y=y*xscale+in*mscale;
 | 
			
		||||
 | 
			
		||||
    // RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
 | 
			
		||||
    RealD halfAmB  = (alpha-beta)*0.5;
 | 
			
		||||
    RealD halfApBp2= (alpha+beta+2.0)*0.5;
 | 
			
		||||
    T1 = halfAmB * in + halfApBp2*y;
 | 
			
		||||
 | 
			
		||||
    for(int n=2;n<=order;n++){
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(*Tn,y);
 | 
			
		||||
      y=xscale*y+mscale*(*Tn);
 | 
			
		||||
 | 
			
		||||
      RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
 | 
			
		||||
      RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
 | 
			
		||||
      RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
 | 
			
		||||
      RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
 | 
			
		||||
 | 
			
		||||
      //      Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;                                                             
 | 
			
		||||
      cny=cny/cnp;
 | 
			
		||||
      cn1=cn1/cnp;
 | 
			
		||||
      cn1=cn1/cnp;
 | 
			
		||||
      cnm=cnm/cnp;
 | 
			
		||||
 | 
			
		||||
      *Tnp=cny*y + cn1 *(*Tn) + cnm * (*Tnm);
 | 
			
		||||
 | 
			
		||||
      // Cycle pointers to avoid copies                                                                                     
 | 
			
		||||
      Field *swizzle = Tnm;
 | 
			
		||||
      Tnm    =Tn;
 | 
			
		||||
      Tn     =Tnp;
 | 
			
		||||
      Tnp    =swizzle;
 | 
			
		||||
    }
 | 
			
		||||
    out=*Tnp;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
#endif
 | 
			
		||||
@@ -71,7 +71,6 @@ public:
 | 
			
		||||
    // Initial residual computation & set up
 | 
			
		||||
    RealD guess = norm2(psi);
 | 
			
		||||
    assert(std::isnan(guess) == 0);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, b);
 | 
			
		||||
    
 | 
			
		||||
@@ -154,18 +153,18 @@ public:
 | 
			
		||||
        RealD resnorm = std::sqrt(norm2(p));
 | 
			
		||||
        RealD true_residual = resnorm / srcnorm;
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k 
 | 
			
		||||
		  << "\tComputed residual " << std::sqrt(cp / ssq)
 | 
			
		||||
		  << "\tTrue residual " << true_residual
 | 
			
		||||
		  << "\tTarget " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "Time breakdown "<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
 | 
			
		||||
        std::cout << GridLogIterative << "Time breakdown "<<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
 | 
			
		||||
 | 
			
		||||
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -43,6 +43,11 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) 
 | 
			
		||||
{
 | 
			
		||||
  // If assume basis[j] are already orthonormal,
 | 
			
		||||
  // can take all inner products in parallel saving 2x bandwidth
 | 
			
		||||
  // Save 3x bandwidth on the second line of loop.
 | 
			
		||||
  // perhaps 2.5x speed up.
 | 
			
		||||
  // 2x overall in Multigrid Lanczos  
 | 
			
		||||
  for(int j=0; j<k; ++j){
 | 
			
		||||
    auto ip = innerProduct(basis[j],w);
 | 
			
		||||
    w = w - ip*basis[j];
 | 
			
		||||
@@ -282,7 +287,7 @@ public:
 | 
			
		||||
			    RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			    int _MaxIter, // Max iterations
 | 
			
		||||
			    RealD _betastp=0.0, // if beta(k) < betastp: converged
 | 
			
		||||
			    int _MinRestart=1, int _orth_period = 1,
 | 
			
		||||
			    int _MinRestart=0, int _orth_period = 1,
 | 
			
		||||
			    IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
 | 
			
		||||
    SimpleTester(HermOp), _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(Tester),
 | 
			
		||||
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
 | 
			
		||||
@@ -347,7 +352,7 @@ until convergence
 | 
			
		||||
    GridBase *grid = src.Grid();
 | 
			
		||||
    assert(grid == evec[0].Grid());
 | 
			
		||||
    
 | 
			
		||||
    GridLogIRL.TimingMode(1);
 | 
			
		||||
    //    GridLogIRL.TimingMode(1);
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 /  "<< MaxIter<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
@@ -577,11 +582,11 @@ until convergence
 | 
			
		||||
/* Saad PP. 195
 | 
			
		||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
 | 
			
		||||
2. For k = 1,2,...,m Do:
 | 
			
		||||
3. wk:=Avk−βkv_{k−1}      
 | 
			
		||||
4. αk:=(wk,vk)       // 
 | 
			
		||||
5. wk:=wk−αkvk       // wk orthog vk 
 | 
			
		||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
7. vk+1 := wk/βk+1
 | 
			
		||||
3. wk:=Avk - b_k v_{k-1}      
 | 
			
		||||
4. ak:=(wk,vk)       // 
 | 
			
		||||
5. wk:=wk-akvk       // wk orthog vk 
 | 
			
		||||
6. bk+1 := ||wk||_2. If b_k+1 = 0 then Stop
 | 
			
		||||
7. vk+1 := wk/b_k+1
 | 
			
		||||
8. EndDo
 | 
			
		||||
 */
 | 
			
		||||
  void step(std::vector<RealD>& lmd,
 | 
			
		||||
@@ -589,6 +594,7 @@ until convergence
 | 
			
		||||
	    std::vector<Field>& evec,
 | 
			
		||||
	    Field& w,int Nm,int k)
 | 
			
		||||
  {
 | 
			
		||||
    std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
 | 
			
		||||
    const RealD tiny = 1.0e-20;
 | 
			
		||||
    assert( k< Nm );
 | 
			
		||||
 | 
			
		||||
@@ -600,20 +606,20 @@ until convergence
 | 
			
		||||
 | 
			
		||||
    if(k>0) w -= lme[k-1] * evec[k-1];
 | 
			
		||||
 | 
			
		||||
    ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
 | 
			
		||||
    ComplexD zalph = innerProduct(evec_k,w);
 | 
			
		||||
    RealD     alph = real(zalph);
 | 
			
		||||
 | 
			
		||||
    w = w - alph * evec_k;// 5. wk:=wk−αkvk
 | 
			
		||||
    w = w - alph * evec_k;
 | 
			
		||||
 | 
			
		||||
    RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
    // 7. vk+1 := wk/βk+1
 | 
			
		||||
    RealD beta = normalise(w); 
 | 
			
		||||
 | 
			
		||||
    lmd[k] = alph;
 | 
			
		||||
    lme[k] = beta;
 | 
			
		||||
 | 
			
		||||
    if (k>0 && k % orth_period == 0) {
 | 
			
		||||
    if ( (k>0) && ( (k % orth_period) == 0 )) {
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl;
 | 
			
		||||
      orthogonalize(w,evec,k); // orthonormalise
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(k < Nm-1) evec[k+1] = w;
 | 
			
		||||
@@ -621,6 +627,8 @@ until convergence
 | 
			
		||||
    std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
 | 
			
		||||
    if ( beta < tiny ) 
 | 
			
		||||
      std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
 | 
			
		||||
 
 | 
			
		||||
@@ -33,26 +33,30 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Take a matrix and form an NE solver calling a Herm solver
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class Field> class NormalEquations : public OperatorFunction<Field>{
 | 
			
		||||
template<class Field> class NormalEquations {
 | 
			
		||||
private:
 | 
			
		||||
  SparseMatrixBase<Field> & _Matrix;
 | 
			
		||||
  OperatorFunction<Field> & _HermitianSolver;
 | 
			
		||||
 | 
			
		||||
  LinearFunction<Field>   & _Guess;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Wrap the usual normal equations trick
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver) 
 | 
			
		||||
    :  _Matrix(Matrix), _HermitianSolver(HermitianSolver) {}; 
 | 
			
		||||
 NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
 | 
			
		||||
		 LinearFunction<Field> &Guess) 
 | 
			
		||||
   :  _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {}; 
 | 
			
		||||
 | 
			
		||||
  void operator() (const Field &in, Field &out){
 | 
			
		||||
 
 | 
			
		||||
    Field src(in.Grid());
 | 
			
		||||
    Field tmp(in.Grid());
 | 
			
		||||
 | 
			
		||||
    MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
 | 
			
		||||
    _Matrix.Mdag(in,src);
 | 
			
		||||
    _HermitianSolver(src,out);  // Mdag M out = Mdag in
 | 
			
		||||
 
 | 
			
		||||
    _Guess(src,out);
 | 
			
		||||
    _HermitianSolver(MdagMOp,src,out);  // Mdag M out = Mdag in
 | 
			
		||||
 | 
			
		||||
  }     
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -30,12 +30,12 @@ template<class Field> class PowerMethod
 | 
			
		||||
      RealD vden = norm2(src_n); 
 | 
			
		||||
      RealD na = vnum/vden; 
 | 
			
		||||
      
 | 
			
		||||
      if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) { 
 | 
			
		||||
      if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) { 
 | 
			
		||||
 	evalMaxApprox = na; 
 | 
			
		||||
	std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
 | 
			
		||||
 	return evalMaxApprox; 
 | 
			
		||||
      } 
 | 
			
		||||
      evalMaxApprox = na; 
 | 
			
		||||
      std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
 | 
			
		||||
      src_n = tmp;
 | 
			
		||||
    }
 | 
			
		||||
    assert(0);
 | 
			
		||||
 
 | 
			
		||||
@@ -38,10 +38,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" " 
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
 | 
			
		||||
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
 | 
			
		||||
public:                                                
 | 
			
		||||
  using OperatorFunction<Field>::operator();
 | 
			
		||||
 | 
			
		||||
  RealD   Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
@@ -49,23 +50,29 @@ public:
 | 
			
		||||
  int mmax;
 | 
			
		||||
  int nstep;
 | 
			
		||||
  int steps;
 | 
			
		||||
  int level;
 | 
			
		||||
  GridStopWatch PrecTimer;
 | 
			
		||||
  GridStopWatch MatTimer;
 | 
			
		||||
  GridStopWatch LinalgTimer;
 | 
			
		||||
 | 
			
		||||
  LinearFunction<Field> &Preconditioner;
 | 
			
		||||
  LinearFunction<Field>     &Preconditioner;
 | 
			
		||||
  LinearOperatorBase<Field> &Linop;
 | 
			
		||||
 | 
			
		||||
  PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
 | 
			
		||||
  void Level(int lv) { level=lv; };
 | 
			
		||||
 | 
			
		||||
  PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
 | 
			
		||||
    Tolerance(tol), 
 | 
			
		||||
    MaxIterations(maxit),
 | 
			
		||||
    Linop(_Linop),
 | 
			
		||||
    Preconditioner(Prec),
 | 
			
		||||
    mmax(_mmax),
 | 
			
		||||
    nstep(_nstep)
 | 
			
		||||
  { 
 | 
			
		||||
    level=1;
 | 
			
		||||
    verbose=1;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
 | 
			
		||||
  void operator() (const Field &src, Field &psi){
 | 
			
		||||
 | 
			
		||||
    psi=Zero();
 | 
			
		||||
    RealD cp, ssq,rsq;
 | 
			
		||||
@@ -84,9 +91,9 @@ public:
 | 
			
		||||
    steps=0;
 | 
			
		||||
    for(int k=0;k<MaxIterations;k++){
 | 
			
		||||
 | 
			
		||||
      cp=GCRnStep(Linop,src,psi,rsq);
 | 
			
		||||
      cp=GCRnStep(src,psi,rsq);
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
 | 
			
		||||
      GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
 | 
			
		||||
 | 
			
		||||
      if(cp<rsq) {
 | 
			
		||||
 | 
			
		||||
@@ -95,24 +102,26 @@ public:
 | 
			
		||||
	Linop.HermOp(psi,r);
 | 
			
		||||
	axpy(r,-1.0,src,r);
 | 
			
		||||
	RealD tr = norm2(r);
 | 
			
		||||
	std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
 | 
			
		||||
	GCRLogLevel<<"PGCR: Converged on iteration " <<steps
 | 
			
		||||
		 << " computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
		 << " true residual "    <<sqrt(tr/ssq)
 | 
			
		||||
		 << " target "           <<Tolerance <<std::endl;
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total  "<< SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<<   PrecTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<<    MatTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	GCRLogLevel<<"PGCR Time elapsed: Total  "<< SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	/*
 | 
			
		||||
	  GCRLogLevel<<"PGCR Time elapsed: Precon "<<   PrecTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  GCRLogLevel<<"PGCR Time elapsed: Matrix "<<    MatTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  GCRLogLevel<<"PGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	*/
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
 | 
			
		||||
    GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
 | 
			
		||||
  RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
 | 
			
		||||
 | 
			
		||||
    RealD cp;
 | 
			
		||||
    RealD a, b;
 | 
			
		||||
@@ -134,9 +143,7 @@ public:
 | 
			
		||||
    std::vector<Field> p(mmax,grid);
 | 
			
		||||
    std::vector<RealD> qq(mmax);
 | 
			
		||||
      
 | 
			
		||||
    std::cout<<GridLogIterative<< " ************** "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< "   GCRnStep("<<nstep<<")"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< " ************** "<< std::endl;
 | 
			
		||||
    GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // initial guess x0 is taken as nonzero.
 | 
			
		||||
@@ -150,35 +157,15 @@ public:
 | 
			
		||||
    LinalgTimer.Start();
 | 
			
		||||
    r=src-Az;
 | 
			
		||||
    LinalgTimer.Stop();
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep true residual r = src - A psi   "<<norm2(r) <<std::endl;
 | 
			
		||||
    GCRLogLevel<< "PGCR true residual r = src - A psi   "<<norm2(r) <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    /////////////////////
 | 
			
		||||
    // p = Prec(r)
 | 
			
		||||
    /////////////////////
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
    PrecTimer.Start();
 | 
			
		||||
    Preconditioner(r,z);
 | 
			
		||||
    PrecTimer.Stop();
 | 
			
		||||
    std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    //    MatTimer.Start();
 | 
			
		||||
    //    Linop.HermOp(z,tmp); 
 | 
			
		||||
    //    MatTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //    LinalgTimer.Start();
 | 
			
		||||
    //    ttmp=tmp;
 | 
			
		||||
    //    tmp=tmp-r;
 | 
			
		||||
    //    LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
      std::cout<<GridLogMessage<<r<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<z<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<ttmp<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<tmp<<std::endl;
 | 
			
		||||
    */
 | 
			
		||||
 | 
			
		||||
    MatTimer.Start();
 | 
			
		||||
    Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
 | 
			
		||||
@@ -190,7 +177,6 @@ public:
 | 
			
		||||
    p[0]= z;
 | 
			
		||||
    q[0]= Az;
 | 
			
		||||
    qq[0]= zAAz;
 | 
			
		||||
    std::cout<<GridLogIterative<< " GCRnStep p0=z, q0 = A p0 " <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    cp =norm2(r);
 | 
			
		||||
    LinalgTimer.Stop();
 | 
			
		||||
@@ -212,20 +198,16 @@ public:
 | 
			
		||||
      cp = axpy_norm(r,-a,q[peri_k],r);
 | 
			
		||||
      LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"]  resid " << cp << " target " <<rsq<<std::endl; 
 | 
			
		||||
      GCRLogLevel<< "PGCR step["<<steps<<"]  resid " << cp << " target " <<rsq<<std::endl; 
 | 
			
		||||
 | 
			
		||||
      if((k==nstep-1)||(cp<rsq)){
 | 
			
		||||
	return cp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
      PrecTimer.Start();
 | 
			
		||||
      Preconditioner(r,z);// solve Az = r
 | 
			
		||||
      PrecTimer.Stop();
 | 
			
		||||
      std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,12 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
MemoryStats *MemoryProfiler::stats = nullptr;
 | 
			
		||||
bool         MemoryProfiler::debug = false;
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_NVCC
 | 
			
		||||
#define SMALL_LIMIT (0)
 | 
			
		||||
#else
 | 
			
		||||
#define SMALL_LIMIT (4096)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef POINTER_CACHE
 | 
			
		||||
int PointerCache::victim;
 | 
			
		||||
 | 
			
		||||
@@ -13,7 +19,7 @@ PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
 | 
			
		||||
 | 
			
		||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
 | 
			
		||||
 | 
			
		||||
  if (bytes < 4096 ) return ptr;
 | 
			
		||||
  if (bytes < SMALL_LIMIT ) return ptr;
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
  assert(omp_in_parallel()==0);
 | 
			
		||||
@@ -50,7 +56,7 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
 | 
			
		||||
 | 
			
		||||
void *PointerCache::Lookup(size_t bytes) {
 | 
			
		||||
 | 
			
		||||
  if (bytes < 4096 ) return NULL;
 | 
			
		||||
  if (bytes < SMALL_LIMIT ) return NULL;
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
  assert(omp_in_parallel()==0);
 | 
			
		||||
 
 | 
			
		||||
@@ -49,8 +49,13 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
#ifdef POINTER_CACHE
 | 
			
		||||
class PointerCache {
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
/*Pinning pages is costly*/
 | 
			
		||||
/*Could maintain separate large and small allocation caches*/
 | 
			
		||||
#ifdef GRID_NVCC 
 | 
			
		||||
  static const int Ncache=128;
 | 
			
		||||
#else
 | 
			
		||||
  static const int Ncache=8;
 | 
			
		||||
#endif
 | 
			
		||||
  static int victim;
 | 
			
		||||
 | 
			
		||||
  typedef struct { 
 | 
			
		||||
@@ -63,7 +68,6 @@ private:
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static void *Insert(void *ptr,size_t bytes) ;
 | 
			
		||||
  static void *Lookup(size_t bytes) ;
 | 
			
		||||
 | 
			
		||||
@@ -170,13 +174,14 @@ public:
 | 
			
		||||
    // Unified (managed) memory
 | 
			
		||||
    ////////////////////////////////////
 | 
			
		||||
    if ( ptr == (_Tp *) NULL ) {
 | 
			
		||||
      //      printf(" alignedAllocater cache miss %ld bytes ",bytes);      BACKTRACEFP(stdout);
 | 
			
		||||
      auto err = cudaMallocManaged((void **)&ptr,bytes);
 | 
			
		||||
      if( err != cudaSuccess ) {
 | 
			
		||||
	ptr = (_Tp *) NULL;
 | 
			
		||||
	std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    } 
 | 
			
		||||
    assert( ptr != (_Tp *)NULL);
 | 
			
		||||
#else 
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -37,19 +37,18 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
 | 
			
		||||
  GridBase *grid = l.Grid();
 | 
			
		||||
  int Nsimd = grid->iSites();
 | 
			
		||||
 | 
			
		||||
  Coordinate gcoor;
 | 
			
		||||
  ExtractBuffer<scalar_type> mergebuf(Nsimd);
 | 
			
		||||
 | 
			
		||||
  vector_type vI;
 | 
			
		||||
  auto l_v = l.View();
 | 
			
		||||
  for(int o=0;o<grid->oSites();o++){
 | 
			
		||||
  thread_for( o, grid->oSites(), {
 | 
			
		||||
    vector_type vI;
 | 
			
		||||
    Coordinate gcoor;
 | 
			
		||||
    ExtractBuffer<scalar_type> mergebuf(Nsimd);
 | 
			
		||||
    for(int i=0;i<grid->iSites();i++){
 | 
			
		||||
      grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
 | 
			
		||||
      mergebuf[i]=(Integer)gcoor[mu];
 | 
			
		||||
    }
 | 
			
		||||
    merge<vector_type,scalar_type>(vI,mergebuf);
 | 
			
		||||
    l_v[o]=vI;
 | 
			
		||||
  }
 | 
			
		||||
  });
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// LatticeCoordinate();
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,4 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_transfer.h
 | 
			
		||||
@@ -83,12 +82,35 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
template<class vobj,class CComplex,int nbasis>
 | 
			
		||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
			  const             Lattice<vobj>   &fineData,
 | 
			
		||||
			  const std::vector<Lattice<vobj> > &Basis)
 | 
			
		||||
{
 | 
			
		||||
  GridBase * fine  = fineData.Grid();
 | 
			
		||||
  GridBase * coarse= coarseData.Grid();
 | 
			
		||||
 | 
			
		||||
  Lattice<CComplex> ip(coarse); 
 | 
			
		||||
 | 
			
		||||
  //  auto fineData_   = fineData.View();
 | 
			
		||||
  auto coarseData_ = coarseData.View();
 | 
			
		||||
  auto ip_         = ip.View();
 | 
			
		||||
  for(int v=0;v<nbasis;v++) {
 | 
			
		||||
    blockInnerProduct(ip,Basis[v],fineData);
 | 
			
		||||
    accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
 | 
			
		||||
	coalescedWrite(coarseData_[sc](v),ip_(sc));
 | 
			
		||||
      });
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj,class CComplex,int nbasis>
 | 
			
		||||
inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
			 const             Lattice<vobj>   &fineData,
 | 
			
		||||
			 const std::vector<Lattice<vobj> > &Basis)
 | 
			
		||||
{
 | 
			
		||||
  typedef iVector<CComplex,nbasis > coarseSiteData;
 | 
			
		||||
  coarseSiteData elide;
 | 
			
		||||
  typedef decltype(coalescedRead(elide)) ScalarComplex;
 | 
			
		||||
  GridBase * fine  = fineData.Grid();
 | 
			
		||||
  GridBase * coarse= coarseData.Grid();
 | 
			
		||||
  int  _ndimension = coarse->_ndimension;
 | 
			
		||||
@@ -106,26 +128,40 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
 | 
			
		||||
    assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
 | 
			
		||||
  }
 | 
			
		||||
  int blockVol = fine->oSites()/coarse->oSites();
 | 
			
		||||
 | 
			
		||||
  coarseData=Zero();
 | 
			
		||||
 | 
			
		||||
  auto fineData_   = fineData.View();
 | 
			
		||||
  auto coarseData_ = coarseData.View();
 | 
			
		||||
  // Loop over coars parallel, and then loop over fine associated with coarse.
 | 
			
		||||
  thread_for( sf, fine->oSites(), {
 | 
			
		||||
    int sc;
 | 
			
		||||
    Coordinate coor_c(_ndimension);
 | 
			
		||||
    Coordinate coor_f(_ndimension);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
 | 
			
		||||
  // Otherwise do fine inner product per site, and make the update atomic
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), {
 | 
			
		||||
 | 
			
		||||
    thread_critical {
 | 
			
		||||
      for(int i=0;i<nbasis;i++) {
 | 
			
		||||
	auto Basis_      = Basis[i].View();
 | 
			
		||||
	coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
 | 
			
		||||
      }
 | 
			
		||||
    auto sc=sci/nbasis;
 | 
			
		||||
    auto i=sci%nbasis;
 | 
			
		||||
    auto Basis_      = Basis[i].View();
 | 
			
		||||
 | 
			
		||||
    Coordinate coor_c(_ndimension);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions);  // Block coordinate
 | 
			
		||||
 | 
			
		||||
    int sf;
 | 
			
		||||
    decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero();
 | 
			
		||||
 | 
			
		||||
    for(int sb=0;sb<blockVol;sb++){
 | 
			
		||||
 | 
			
		||||
      Coordinate coor_b(_ndimension);
 | 
			
		||||
      Coordinate coor_f(_ndimension);
 | 
			
		||||
 | 
			
		||||
      Lexicographic::CoorFromIndex(coor_b,sb,block_r);
 | 
			
		||||
      for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
 | 
			
		||||
      Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
      
 | 
			
		||||
      reduce=reduce+innerProduct(Basis_(sf),fineData_(sf));
 | 
			
		||||
    }
 | 
			
		||||
    coalescedWrite(coarseData_[sc](i),reduce);
 | 
			
		||||
  });
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
@@ -160,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
 | 
			
		||||
  auto fineY_  = fineY.View();
 | 
			
		||||
  auto coarseA_= coarseA.View();
 | 
			
		||||
 | 
			
		||||
  thread_for(sf, fine->oSites(), {
 | 
			
		||||
  accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
 | 
			
		||||
    
 | 
			
		||||
    int sc;
 | 
			
		||||
    Coordinate coor_c(_ndimension);
 | 
			
		||||
@@ -171,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
 | 
			
		||||
    // z = A x + y
 | 
			
		||||
    fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf];
 | 
			
		||||
    coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
 | 
			
		||||
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
@@ -196,7 +232,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
 | 
			
		||||
 | 
			
		||||
  fine_inner = localInnerProduct(fineX,fineY);
 | 
			
		||||
  blockSum(coarse_inner,fine_inner);
 | 
			
		||||
  thread_for(ss, coarse->oSites(),{
 | 
			
		||||
  accelerator_for(ss, coarse->oSites(), 1, {
 | 
			
		||||
    CoarseInner_[ss] = coarse_inner_[ss];
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
@@ -226,23 +262,29 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
 | 
			
		||||
  for(int d=0 ; d<_ndimension;d++){
 | 
			
		||||
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
 | 
			
		||||
  }
 | 
			
		||||
  int blockVol = fine->oSites()/coarse->oSites();
 | 
			
		||||
 | 
			
		||||
  // Turn this around to loop threaded over sc and interior loop 
 | 
			
		||||
  // over sf would thread better
 | 
			
		||||
  coarseData=Zero();
 | 
			
		||||
  auto coarseData_ = coarseData.View();
 | 
			
		||||
  auto fineData_   = fineData.View();
 | 
			
		||||
 | 
			
		||||
  thread_for(sf,fine->oSites(),{
 | 
			
		||||
    int sc;
 | 
			
		||||
  accelerator_for(sc,coarse->oSites(),1,{
 | 
			
		||||
 | 
			
		||||
    // One thread per sub block
 | 
			
		||||
    Coordinate coor_c(_ndimension);
 | 
			
		||||
    Coordinate coor_f(_ndimension);
 | 
			
		||||
    
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
    
 | 
			
		||||
    thread_critical { 
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions);  // Block coordinate
 | 
			
		||||
    coarseData_[sc]=Zero();
 | 
			
		||||
 | 
			
		||||
    for(int sb=0;sb<blockVol;sb++){
 | 
			
		||||
      
 | 
			
		||||
      int sf;
 | 
			
		||||
      Coordinate coor_b(_ndimension);
 | 
			
		||||
      Coordinate coor_f(_ndimension);
 | 
			
		||||
      Lexicographic::CoorFromIndex(coor_b,sb,block_r);               // Block sub coordinate
 | 
			
		||||
      for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
 | 
			
		||||
      Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
 | 
			
		||||
      coarseData_[sc]=coarseData_[sc]+fineData_[sf];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -296,6 +338,7 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
template<class vobj,class CComplex,int nbasis>
 | 
			
		||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
			 Lattice<vobj>   &fineData,
 | 
			
		||||
@@ -321,7 +364,7 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
  auto coarseData_ = coarseData.View();
 | 
			
		||||
 | 
			
		||||
  // Loop with a cache friendly loop ordering
 | 
			
		||||
  thread_for(sf,fine->oSites(),{
 | 
			
		||||
  accelerator_for(sf,fine->oSites(),1,{
 | 
			
		||||
    int sc;
 | 
			
		||||
    Coordinate coor_c(_ndimension);
 | 
			
		||||
    Coordinate coor_f(_ndimension);
 | 
			
		||||
@@ -332,13 +375,35 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<nbasis;i++) {
 | 
			
		||||
      auto basis_ = Basis[i].View();
 | 
			
		||||
      if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf];
 | 
			
		||||
      else     fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf];
 | 
			
		||||
      if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]);
 | 
			
		||||
      else     fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]);
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
  return;
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
template<class vobj,class CComplex,int nbasis>
 | 
			
		||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
			 Lattice<vobj>   &fineData,
 | 
			
		||||
			 const std::vector<Lattice<vobj> > &Basis)
 | 
			
		||||
{
 | 
			
		||||
  GridBase * fine  = fineData.Grid();
 | 
			
		||||
  GridBase * coarse= coarseData.Grid();
 | 
			
		||||
 | 
			
		||||
  fineData=Zero();
 | 
			
		||||
  for(int i=0;i<nbasis;i++) {
 | 
			
		||||
    Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
 | 
			
		||||
    Lattice<CComplex> cip(coarse);
 | 
			
		||||
    auto cip_ = cip.View();
 | 
			
		||||
    auto  ip_ =  ip.View();
 | 
			
		||||
    accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
 | 
			
		||||
	coalescedWrite(cip_[sc], ip_(sc)());
 | 
			
		||||
    });
 | 
			
		||||
    blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
 | 
			
		||||
// Simd layouts need not match since we use peek/poke Local
 | 
			
		||||
 
 | 
			
		||||
@@ -101,7 +101,8 @@ public:
 | 
			
		||||
  virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
 | 
			
		||||
  // Efficient support for multigrid coarsening
 | 
			
		||||
  virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
 | 
			
		||||
  void   Meooe5D       (const FermionField &in, FermionField &out);
 | 
			
		||||
  void   MeooeDag5D    (const FermionField &in, FermionField &out);
 | 
			
		||||
 
 | 
			
		||||
@@ -62,14 +62,15 @@ public:
 | 
			
		||||
 | 
			
		||||
  // Efficient support for multigrid coarsening
 | 
			
		||||
  virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      // Physical surface field utilities
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      //      virtual void Dminus(const FermionField &psi, FermionField &chi);     // Inherit trivial case
 | 
			
		||||
      //      virtual void DminusDag(const FermionField &psi, FermionField &chi);  // Inherit trivial case
 | 
			
		||||
      virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
 | 
			
		||||
      virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d);
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  // Physical surface field utilities
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  //      virtual void Dminus(const FermionField &psi, FermionField &chi);     // Inherit trivial case
 | 
			
		||||
  //      virtual void DminusDag(const FermionField &psi, FermionField &chi);  // Inherit trivial case
 | 
			
		||||
  virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
 | 
			
		||||
  virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d);
 | 
			
		||||
 | 
			
		||||
  // Constructors
 | 
			
		||||
  ContinuedFractionFermion5D(GaugeField &_Umu,
 | 
			
		||||
 
 | 
			
		||||
@@ -89,6 +89,7 @@ public:
 | 
			
		||||
 | 
			
		||||
  virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's
 | 
			
		||||
  virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
 | 
			
		||||
 
 | 
			
		||||
@@ -103,6 +103,7 @@ public:
 | 
			
		||||
  // Multigrid assistance; force term uses too
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
 | 
			
		||||
  void MdirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
  void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -86,7 +86,8 @@ public:
 | 
			
		||||
  void   MooeeDag    (const FermionField &in, FermionField &out);
 | 
			
		||||
  void   MooeeInvDag (const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  void Mdir   (const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  void MdirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
  void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
 | 
			
		||||
  // These can be overridden by fancy 5d chiral action
 | 
			
		||||
 
 | 
			
		||||
@@ -67,12 +67,13 @@ public:
 | 
			
		||||
 | 
			
		||||
  // Efficient support for multigrid coarsening
 | 
			
		||||
  virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      // Physical surface field utilities
 | 
			
		||||
      ///////////////////////////////////////////////////////////////
 | 
			
		||||
      virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
 | 
			
		||||
      virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d);
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  // Physical surface field utilities
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
 | 
			
		||||
  virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d);
 | 
			
		||||
 | 
			
		||||
  // Constructors
 | 
			
		||||
  PartialFractionFermion5D(GaugeField &_Umu,
 | 
			
		||||
 
 | 
			
		||||
@@ -115,9 +115,10 @@ public:
 | 
			
		||||
  // Multigrid assistance; force term uses too
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
 | 
			
		||||
  void MdirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
  void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
 | 
			
		||||
  void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
 | 
			
		||||
                   int gamma, int dag);
 | 
			
		||||
  void DhopDirAll(const FermionField &in, std::vector<FermionField> &out);
 | 
			
		||||
  void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag);
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  // Extra methods added by derived
 | 
			
		||||
 
 | 
			
		||||
@@ -111,15 +111,16 @@ public:
 | 
			
		||||
  virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
  virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
 | 
			
		||||
  virtual void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
  virtual void   MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
 | 
			
		||||
  // These can be overridden by fancy 5d chiral action
 | 
			
		||||
  virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
  virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
  virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
			
		||||
 | 
			
		||||
      void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
			
		||||
      void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
			
		||||
      void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
			
		||||
  void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
			
		||||
  void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
			
		||||
  void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
			
		||||
 | 
			
		||||
  // Implement hopping term non-hermitian hopping term; half cb or both
 | 
			
		||||
  // Implement s-diagonal DW
 | 
			
		||||
@@ -131,6 +132,9 @@ public:
 | 
			
		||||
  // add a DhopComm
 | 
			
		||||
  // -- suboptimal interface will presently trigger multiple comms.
 | 
			
		||||
  void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
 | 
			
		||||
  void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
 | 
			
		||||
  void DhopDirComms(const FermionField &in);
 | 
			
		||||
  void DhopDirCalc(const FermionField &in, FermionField &out,int point);
 | 
			
		||||
    
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  // New methods added 
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,9 @@ public:
 | 
			
		||||
			    int Ls, int Nsite, const FermionField &in, FermionField &out,
 | 
			
		||||
			    int interior=1,int exterior=1) ;
 | 
			
		||||
 | 
			
		||||
  static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
 | 
			
		||||
			  int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
 | 
			
		||||
 | 
			
		||||
  static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
 | 
			
		||||
			    int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
 | 
			
		||||
 | 
			
		||||
@@ -100,8 +103,17 @@ public:
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
 | 
			
		||||
  static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
 | 
			
		||||
				   int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
 | 
			
		||||
 | 
			
		||||
  static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirTp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
  static accelerator_inline void DhopDirTm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
			
		||||
      
 | 
			
		||||
  // Specialised variants
 | 
			
		||||
  static accelerator void GenericDhopSite(StencilView &st,  DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
 | 
			
		||||
 
 | 
			
		||||
@@ -54,6 +54,14 @@ public:
 | 
			
		||||
    _Mat.Mdir(in,tmp,dir,disp);
 | 
			
		||||
    G5R5(out,tmp);
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll(const Field &in, std::vector<Field> &out) {
 | 
			
		||||
    Field tmp(in.Grid());
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
    for(int p=0;p<out.size();p++) {
 | 
			
		||||
      tmp=out[p];
 | 
			
		||||
      G5R5(out[p],tmp);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
 | 
			
		||||
 | 
			
		||||
@@ -96,6 +104,12 @@ public:
 | 
			
		||||
    _Mat.Mdir(in,tmp,dir,disp);
 | 
			
		||||
    out=g5*tmp;
 | 
			
		||||
  }
 | 
			
		||||
  void OpDirAll(const Field &in, std::vector<Field> &out) {
 | 
			
		||||
    _Mat.MdirAll(in,out);
 | 
			
		||||
    for(int p=0;p<out.size();p++) {
 | 
			
		||||
      out[p]=g5*out[p];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -389,6 +389,14 @@ void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,in
 | 
			
		||||
  Meo5D(psi,tmp);
 | 
			
		||||
  this->DhopDir(tmp,chi,dir,disp);
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void  CayleyFermion5D<Impl>::MdirAll(const FermionField &psi, std::vector<FermionField> &out)
 | 
			
		||||
{
 | 
			
		||||
  FermionField tmp(psi.Grid());
 | 
			
		||||
  Meo5D(psi,tmp);
 | 
			
		||||
  this->DhopDirAll(tmp,out);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// force terms; five routines; default to Dhop on diagonal
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
 | 
			
		||||
 
 | 
			
		||||
@@ -143,6 +143,25 @@ void  ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFi
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void  ContinuedFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
  this->DhopDirAll(psi,chi); // Dslash on diagonal. g5 Dslash is hermitian
 | 
			
		||||
 | 
			
		||||
  for(int p=0;p<chi.size();p++){
 | 
			
		||||
    int sign=1;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      if ( s==(Ls-1) ){
 | 
			
		||||
	ag5xpby_ssp(chi[p],Beta[s]*ZoloHiInv,chi[p],0.0,chi[p],s,s);
 | 
			
		||||
      } else {
 | 
			
		||||
	ag5xpby_ssp(chi[p],cc[s]*Beta[s]*sign*ZoloHiInv,chi[p],0.0,chi[p],s,s);
 | 
			
		||||
      }
 | 
			
		||||
      sign=-sign; 
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void   ContinuedFractionFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 
 | 
			
		||||
@@ -538,10 +538,16 @@ void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
 | 
			
		||||
// Implement the general interface. Here we use SAME mass on all slices
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
 | 
			
		||||
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) 
 | 
			
		||||
{
 | 
			
		||||
  DhopDir(in, out, dir, disp);
 | 
			
		||||
}
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out) 
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
template <class Impl>
 | 
			
		||||
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
 | 
			
		||||
  out.Checkerboard() = in.Checkerboard();
 | 
			
		||||
  Dhop(in, out, DaggerNo);
 | 
			
		||||
 
 | 
			
		||||
@@ -362,12 +362,19 @@ void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
 | 
			
		||||
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) 
 | 
			
		||||
{
 | 
			
		||||
  DhopDir(in, out, dir, disp);
 | 
			
		||||
}
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void ImprovedStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out) 
 | 
			
		||||
{
 | 
			
		||||
  assert(0); // Not implemented yet
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
 | 
			
		||||
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) 
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  Compressor compressor;
 | 
			
		||||
  Stencil.HaloExchange(in, compressor);
 | 
			
		||||
@@ -380,6 +387,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
 | 
			
		||||
  });
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
			
		||||
						  DoubledGaugeField &U,
 | 
			
		||||
@@ -404,7 +412,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
  Compressor compressor; 
 | 
			
		||||
  int len =  U.Grid()->oSites();
 | 
			
		||||
  const int LLs =  1;
 | 
			
		||||
 | 
			
		||||
  DhopTotalTime   -= usecond();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -31,7 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
 template<class Impl>
 | 
			
		||||
void  PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
 | 
			
		||||
  // this does both dag and undag but is trivial; make a common helper routing
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
@@ -45,8 +45,25 @@ void  PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFiel
 | 
			
		||||
    ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1); 
 | 
			
		||||
  }
 | 
			
		||||
  ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void  PartialFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi){
 | 
			
		||||
  // this does both dag and undag but is trivial; make a common helper routing
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
  this->DhopDirAll(psi,chi);
 | 
			
		||||
 | 
			
		||||
  for(int point=0;point<chi.size();point++){
 | 
			
		||||
    int nblock=(Ls-1)/2;
 | 
			
		||||
    for(int b=0;b<nblock;b++){
 | 
			
		||||
      int s = 2*b;
 | 
			
		||||
      ag5xpby_ssp(chi[point],-scale,chi[point],0.0,chi[point],s,s); 
 | 
			
		||||
      ag5xpby_ssp(chi[point], scale,chi[point],0.0,chi[point],s+1,s+1); 
 | 
			
		||||
    }
 | 
			
		||||
    ag5xpby_ssp(chi[point],p[nblock]*scale/amax,chi[point],0.0,chi[point],Ls-1,Ls-1);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void   PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -241,6 +241,15 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
 | 
			
		||||
  Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
			
		||||
{
 | 
			
		||||
  Compressor compressor(DaggerNo);
 | 
			
		||||
  Stencil.HaloExchange(in,compressor);
 | 
			
		||||
  uint64_t Nsite = Umu.Grid()->oSites();
 | 
			
		||||
  Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
 | 
			
		||||
 
 | 
			
		||||
@@ -319,28 +319,51 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) {
 | 
			
		||||
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) 
 | 
			
		||||
{
 | 
			
		||||
  DhopDir(in, out, dir, disp);
 | 
			
		||||
}
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out) 
 | 
			
		||||
{
 | 
			
		||||
  DhopDirAll(in, out);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) 
 | 
			
		||||
{
 | 
			
		||||
  Compressor compressor(DaggerNo);
 | 
			
		||||
  Stencil.HaloExchange(in, compressor);
 | 
			
		||||
 | 
			
		||||
  int skip = (disp == 1) ? 0 : 1;
 | 
			
		||||
  int dirdisp = dir + skip * 4;
 | 
			
		||||
  int gamma = dir + (1 - skip) * 4;
 | 
			
		||||
 | 
			
		||||
  DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
 | 
			
		||||
  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) 
 | 
			
		||||
void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out) 
 | 
			
		||||
{
 | 
			
		||||
  Compressor compressor(dag);
 | 
			
		||||
 | 
			
		||||
  Compressor compressor(DaggerNo);
 | 
			
		||||
  Stencil.HaloExchange(in, compressor);
 | 
			
		||||
 | 
			
		||||
  assert((out.size()==8)||(out.size()==9)); 
 | 
			
		||||
  for(int dir=0;dir<Nd;dir++){
 | 
			
		||||
    for(int disp=-1;disp<=1;disp+=2){
 | 
			
		||||
 | 
			
		||||
      int skip = (disp == 1) ? 0 : 1;
 | 
			
		||||
      int dirdisp = dir + skip * 4;
 | 
			
		||||
      int gamma = dir + (1 - skip) * 4;
 | 
			
		||||
 | 
			
		||||
      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) 
 | 
			
		||||
{
 | 
			
		||||
  int Ls=1;
 | 
			
		||||
  int Nsite=in.oSites();
 | 
			
		||||
  uint64_t Nsite=in.oSites();
 | 
			
		||||
  Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -348,7 +371,8 @@ template <class Impl>
 | 
			
		||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
			
		||||
                                       DoubledGaugeField &U,
 | 
			
		||||
                                       const FermionField &in,
 | 
			
		||||
                                       FermionField &out, int dag) {
 | 
			
		||||
                                       FermionField &out, int dag) 
 | 
			
		||||
{
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
  if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
 | 
			
		||||
    DhopInternalOverlappedComms(st,lo,U,in,out,dag);
 | 
			
		||||
 
 | 
			
		||||
@@ -91,8 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
 | 
			
		||||
  }								\
 | 
			
		||||
  synchronise();						
 | 
			
		||||
 | 
			
		||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon)			\
 | 
			
		||||
  if (gamma == Dir) {						\
 | 
			
		||||
#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon)		\
 | 
			
		||||
    if (SE->_is_local ) {					\
 | 
			
		||||
      int perm= SE->_permute;					\
 | 
			
		||||
      auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	\
 | 
			
		||||
@@ -102,10 +101,14 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
 | 
			
		||||
    }								\
 | 
			
		||||
    synchronise();						\
 | 
			
		||||
    Impl::multLink(Uchi, U[sU], chi, dir, SE, st);		\
 | 
			
		||||
    Recon(result, Uchi);					\
 | 
			
		||||
    synchronise();						\
 | 
			
		||||
    Recon(result, Uchi);					
 | 
			
		||||
 | 
			
		||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon)			\
 | 
			
		||||
  if (gamma == Dir) {						\
 | 
			
		||||
    GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon);			\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // All legs kernels ; comms then compute
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -284,7 +287,36 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st,  DoubledGaugeField
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
#define DhopDirMacro(Dir,spProj,spRecon)	\
 | 
			
		||||
  template <class Impl>							\
 | 
			
		||||
  void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
 | 
			
		||||
					 int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
 | 
			
		||||
  {									\
 | 
			
		||||
  typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;		\
 | 
			
		||||
  typedef decltype(coalescedRead(in[0]))  calcSpinor;			\
 | 
			
		||||
  calcHalfSpinor chi;							\
 | 
			
		||||
  calcSpinor result;							\
 | 
			
		||||
  calcHalfSpinor Uchi;							\
 | 
			
		||||
  StencilEntry *SE;							\
 | 
			
		||||
  int ptype;								\
 | 
			
		||||
  const int Nsimd = SiteHalfSpinor::Nsimd();				\
 | 
			
		||||
  const int lane=SIMTlane(Nsimd);					\
 | 
			
		||||
									\
 | 
			
		||||
  SE = st.GetEntry(ptype, dir, sF);					\
 | 
			
		||||
  GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon);				\
 | 
			
		||||
  coalescedWrite(out[sF], result,lane);					\
 | 
			
		||||
  }									
 | 
			
		||||
 | 
			
		||||
DhopDirMacro(Xp,spProjXp,spReconXp);
 | 
			
		||||
DhopDirMacro(Yp,spProjYp,spReconYp);
 | 
			
		||||
DhopDirMacro(Zp,spProjZp,spReconZp);
 | 
			
		||||
DhopDirMacro(Tp,spProjTp,spReconTp);
 | 
			
		||||
DhopDirMacro(Xm,spProjXm,spReconXm);
 | 
			
		||||
DhopDirMacro(Ym,spProjYm,spReconYm);
 | 
			
		||||
DhopDirMacro(Zm,spProjZm,spReconZm);
 | 
			
		||||
DhopDirMacro(Tm,spProjTm,spReconTm);
 | 
			
		||||
 | 
			
		||||
template <class Impl> 
 | 
			
		||||
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
 | 
			
		||||
				    int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) 
 | 
			
		||||
{
 | 
			
		||||
@@ -299,18 +331,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
 | 
			
		||||
  const int lane=SIMTlane(Nsimd);
 | 
			
		||||
 | 
			
		||||
  SE = st.GetEntry(ptype, dir, sF);
 | 
			
		||||
  if (gamma == Xp) {						
 | 
			
		||||
    if (SE->_is_local ) {					
 | 
			
		||||
      int perm= SE->_permute;					
 | 
			
		||||
      auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	
 | 
			
		||||
      spProjXp(chi,tmp);						
 | 
			
		||||
    } else {							
 | 
			
		||||
      chi = coalescedRead(buf[SE->_offset],lane);			
 | 
			
		||||
    }								
 | 
			
		||||
    Impl::multLink(Uchi, U[sU], chi, dir, SE, st);		
 | 
			
		||||
    spReconXp(result, Uchi);					
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
 | 
			
		||||
  GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
 | 
			
		||||
  GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
 | 
			
		||||
  GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
 | 
			
		||||
@@ -321,6 +342,38 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
 | 
			
		||||
  coalescedWrite(out[sF], result,lane);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
 | 
			
		||||
				      int Nsite, const FermionField &in, std::vector<FermionField> &out) 
 | 
			
		||||
{
 | 
			
		||||
   auto U_v   = U.View();
 | 
			
		||||
   auto in_v  = in.View();
 | 
			
		||||
   auto st_v  = st.View();
 | 
			
		||||
 | 
			
		||||
   auto out_Xm = out[0].View();
 | 
			
		||||
   auto out_Ym = out[1].View();
 | 
			
		||||
   auto out_Zm = out[2].View();
 | 
			
		||||
   auto out_Tm = out[3].View();
 | 
			
		||||
   auto out_Xp = out[4].View();
 | 
			
		||||
   auto out_Yp = out[5].View();
 | 
			
		||||
   auto out_Zp = out[6].View();
 | 
			
		||||
   auto out_Tp = out[7].View();
 | 
			
		||||
 | 
			
		||||
   accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{
 | 
			
		||||
      int sU=sss/Ls;				
 | 
			
		||||
      int sF =sss;				
 | 
			
		||||
      DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0);
 | 
			
		||||
      DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1);
 | 
			
		||||
      DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2);
 | 
			
		||||
      DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3);
 | 
			
		||||
      DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4);
 | 
			
		||||
      DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5);
 | 
			
		||||
      DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6);
 | 
			
		||||
      DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7);
 | 
			
		||||
   });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
 | 
			
		||||
					 int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) 
 | 
			
		||||
@@ -332,13 +385,32 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
 | 
			
		||||
   auto in_v  = in.View();
 | 
			
		||||
   auto out_v = out.View();
 | 
			
		||||
   auto st_v  = st.View();
 | 
			
		||||
   accelerator_for(ss,Nsite,Simd::Nsimd(),{
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      int sU=ss;
 | 
			
		||||
      int sF = s+Ls*sU; 
 | 
			
		||||
      DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
#define LoopBody(Dir)				\
 | 
			
		||||
   case Dir :			\
 | 
			
		||||
     accelerator_forNB(ss,Nsite,Simd::Nsimd(),{	\
 | 
			
		||||
       for(int s=0;s<Ls;s++){			\
 | 
			
		||||
	 int sU=ss;				\
 | 
			
		||||
	 int sF = s+Ls*sU;						\
 | 
			
		||||
	 DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\
 | 
			
		||||
       }							       \
 | 
			
		||||
       });							       \
 | 
			
		||||
     break;
 | 
			
		||||
 | 
			
		||||
   switch(gamma){
 | 
			
		||||
   LoopBody(Xp);
 | 
			
		||||
   LoopBody(Yp);
 | 
			
		||||
   LoopBody(Zp);
 | 
			
		||||
   LoopBody(Tp);
 | 
			
		||||
 | 
			
		||||
   LoopBody(Xm);
 | 
			
		||||
   LoopBody(Ym);
 | 
			
		||||
   LoopBody(Zm);
 | 
			
		||||
   LoopBody(Tm);
 | 
			
		||||
   default:
 | 
			
		||||
     assert(0);
 | 
			
		||||
     break;
 | 
			
		||||
   }
 | 
			
		||||
#undef LoopBody
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
#define KERNEL_CALLNB(A) \
 | 
			
		||||
 
 | 
			
		||||
@@ -92,6 +92,7 @@ public:
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);}
 | 
			
		||||
  void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);}
 | 
			
		||||
  void Mdiag(const GaugeField&, GaugeField&){ assert(0);}
 | 
			
		||||
 | 
			
		||||
  void ImportGauge(const GaugeField& _U) {
 | 
			
		||||
 
 | 
			
		||||
@@ -403,6 +403,10 @@ namespace Optimization {
 | 
			
		||||
    accelerator_inline GpuVectorRD operator()(GpuVectorRD a, GpuVectorRD b){
 | 
			
		||||
      return a/b;
 | 
			
		||||
    }
 | 
			
		||||
    accelerator_inline GpuVectorI operator()(GpuVectorI a, GpuVectorI b){
 | 
			
		||||
      return a/b;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Danger -- element wise divide fro complex, not complex div. 
 | 
			
		||||
    // See Grid_vector_types.h lines around 735, applied after "toReal"
 | 
			
		||||
    accelerator_inline GpuVectorCF operator()(GpuVectorCF a, GpuVectorCF b){
 | 
			
		||||
 
 | 
			
		||||
@@ -628,6 +628,7 @@ void Grid_debug_handler_init(void)
 | 
			
		||||
  sigaction(SIGSEGV,&sa,NULL);
 | 
			
		||||
  sigaction(SIGTRAP,&sa,NULL);
 | 
			
		||||
  sigaction(SIGBUS,&sa,NULL);
 | 
			
		||||
  sigaction(SIGUSR2,&sa,NULL);
 | 
			
		||||
 | 
			
		||||
  feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,4 @@
 | 
			
		||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid)
 | 
			
		||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid)
 | 
			
		||||
 | 
			
		||||
**Data parallel C++ mathematical object library.**
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -280,7 +280,8 @@ case ${CXX} in
 | 
			
		||||
#    CXXLD="nvcc -v -link"
 | 
			
		||||
    CXX="nvcc -x cu "
 | 
			
		||||
    CXXLD="nvcc -link"
 | 
			
		||||
    CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
 | 
			
		||||
#    CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
 | 
			
		||||
    CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
 | 
			
		||||
    if test $ac_openmp = yes; then
 | 
			
		||||
       CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
 | 
			
		||||
    fi
 | 
			
		||||
 
 | 
			
		||||
@@ -73,6 +73,7 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  void OpDirAll  (const Field &in, std::vector<Field> &out){};
 | 
			
		||||
  void OpDiag (const Field &in, Field &out) {};
 | 
			
		||||
  void OpDir  (const Field &in, Field &out,int dir,int disp){};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,6 @@
 | 
			
		||||
   /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
@@ -29,323 +31,174 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 | 
			
		||||
//#include <algorithms/iterative/PrecConjugateResidual.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 ;
 | 
			
		||||
/* Params
 | 
			
		||||
 * Grid: 
 | 
			
		||||
 * block1(4)
 | 
			
		||||
 * block2(4)
 | 
			
		||||
 * 
 | 
			
		||||
 * Subspace
 | 
			
		||||
 * * Fine  : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
 | 
			
		||||
 * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
 | 
			
		||||
 | 
			
		||||
class myclass: Serializable {
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
 | 
			
		||||
			  int, domaindecompose,
 | 
			
		||||
			  int, domainsize,
 | 
			
		||||
			  int, order,
 | 
			
		||||
			  int, Ls,
 | 
			
		||||
			  double, mq,
 | 
			
		||||
			  double, lo,
 | 
			
		||||
			  double, hi,
 | 
			
		||||
			  int, steps);
 | 
			
		||||
 | 
			
		||||
  myclass(){};
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
myclass params;
 | 
			
		||||
 * Smoother:
 | 
			
		||||
 * * Fine: Cheby(hi, lo, order)            --  60,0.5,10
 | 
			
		||||
 * * Coarse: Cheby(hi, lo, order)          --  12,0.1,4
 | 
			
		||||
 | 
			
		||||
 * Lanczos:
 | 
			
		||||
 * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order))   24,36,24,0.002,4.0,61 
 | 
			
		||||
 */
 | 
			
		||||
RealD InverseApproximation(RealD x){
 | 
			
		||||
  return 1.0/x;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis, class Matrix>
 | 
			
		||||
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  typedef LinearOperatorBase<Field>                            FineOperator;
 | 
			
		||||
  Matrix         & _SmootherMatrix;
 | 
			
		||||
  FineOperator   & _SmootherOperator;
 | 
			
		||||
  
 | 
			
		||||
  Chebyshev<Field> Cheby;
 | 
			
		||||
 | 
			
		||||
  ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
 | 
			
		||||
    _SmootherOperator(SmootherOperator),
 | 
			
		||||
    _SmootherMatrix(SmootherMatrix),
 | 
			
		||||
    Cheby(_lo,_hi,_ord,InverseApproximation)
 | 
			
		||||
  {};
 | 
			
		||||
 | 
			
		||||
  void operator() (const Field &in, Field &out) 
 | 
			
		||||
  {
 | 
			
		||||
    Field tmp(in.Grid());
 | 
			
		||||
    MdagMLinearOperator<Matrix,Field>   MdagMOp(_SmootherMatrix); 
 | 
			
		||||
    _SmootherOperator.AdjOp(in,tmp);
 | 
			
		||||
    Cheby(MdagMOp,tmp,out);         
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  typedef LinearOperatorBase<Field>                            FineOperator;
 | 
			
		||||
  Matrix         & SmootherMatrix;
 | 
			
		||||
  FineOperator   & SmootherOperator;
 | 
			
		||||
  RealD tol;
 | 
			
		||||
  RealD shift;
 | 
			
		||||
  int   maxit;
 | 
			
		||||
 | 
			
		||||
  MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
 | 
			
		||||
    shift(_shift),tol(_tol),maxit(_maxit),
 | 
			
		||||
    SmootherOperator(_SmootherOperator),
 | 
			
		||||
    SmootherMatrix(_SmootherMatrix)
 | 
			
		||||
  {};
 | 
			
		||||
 | 
			
		||||
  void operator() (const Field &in, Field &out) 
 | 
			
		||||
  {
 | 
			
		||||
    ZeroGuesser<Field> Guess;
 | 
			
		||||
    ConjugateGradient<Field>  CG(tol,maxit,false);
 | 
			
		||||
 
 | 
			
		||||
    Field src(in.Grid());
 | 
			
		||||
 | 
			
		||||
    ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
 | 
			
		||||
    SmootherOperator.AdjOp(in,src);
 | 
			
		||||
    Guess(src,out);
 | 
			
		||||
    CG(MdagMOp,src,out); 
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
 | 
			
		||||
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
 | 
			
		||||
  typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
 | 
			
		||||
 | 
			
		||||
  typedef typename Aggregation<Fobj,CComplex,nbasis>::siteVector     siteVector;
 | 
			
		||||
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseScalar CoarseScalar;
 | 
			
		||||
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
 | 
			
		||||
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
 | 
			
		||||
  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
 | 
			
		||||
  typedef LinearOperatorBase<FineField>                            FineOperator;
 | 
			
		||||
  typedef LinearFunction    <FineField>                            FineSmoother;
 | 
			
		||||
 | 
			
		||||
  Aggregates     & _Aggregates;
 | 
			
		||||
  CoarseOperator & _CoarseOperator;
 | 
			
		||||
  Matrix         & _FineMatrix;
 | 
			
		||||
  FineOperator   & _FineOperator;
 | 
			
		||||
  Matrix         & _SmootherMatrix;
 | 
			
		||||
  FineOperator   & _SmootherOperator;
 | 
			
		||||
  Guesser        & _Guess;
 | 
			
		||||
  FineSmoother   & _Smoother;
 | 
			
		||||
  CoarseSolver   & _CoarseSolve;
 | 
			
		||||
 | 
			
		||||
  int    level;  void Level(int lv) {level = lv; };
 | 
			
		||||
 | 
			
		||||
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
 | 
			
		||||
 | 
			
		||||
  // Constructor
 | 
			
		||||
  MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, 
 | 
			
		||||
			  FineOperator &Fine,Matrix &FineMatrix,
 | 
			
		||||
			  FineOperator &Smooth,Matrix &SmootherMatrix) 
 | 
			
		||||
			  FineSmoother &Smoother,
 | 
			
		||||
			  Guesser &Guess_,
 | 
			
		||||
			  CoarseSolver &CoarseSolve_)
 | 
			
		||||
    : _Aggregates(Agg),
 | 
			
		||||
      _CoarseOperator(Coarse),
 | 
			
		||||
      _FineOperator(Fine),
 | 
			
		||||
      _FineMatrix(FineMatrix),
 | 
			
		||||
      _SmootherOperator(Smooth),
 | 
			
		||||
      _SmootherMatrix(SmootherMatrix)
 | 
			
		||||
      _Smoother(Smoother),
 | 
			
		||||
      _Guess(Guess_),
 | 
			
		||||
      _CoarseSolve(CoarseSolve_),
 | 
			
		||||
      level(1)  {  }
 | 
			
		||||
 | 
			
		||||
  virtual void operator()(const FineField &in, FineField & out) 
 | 
			
		||||
  {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void PowerMethod(const FineField &in) {
 | 
			
		||||
 | 
			
		||||
    FineField p1(in.Grid());
 | 
			
		||||
    FineField p2(in.Grid());
 | 
			
		||||
 | 
			
		||||
    MdagMLinearOperator<Matrix,FineField>   fMdagMOp(_FineMatrix);
 | 
			
		||||
 | 
			
		||||
    p1=in;
 | 
			
		||||
    for(int i=0;i<20;i++){
 | 
			
		||||
      RealD absp1=std::sqrt(norm2(p1));
 | 
			
		||||
      fMdagMOp.HermOp(p1,p2);// this is the G5 herm bit      
 | 
			
		||||
      //      _FineOperator.Op(p1,p2);// this is the G5 herm bit      
 | 
			
		||||
      RealD absp2=std::sqrt(norm2(p2));
 | 
			
		||||
      if(i%10==9)
 | 
			
		||||
	std::cout<<GridLogMessage << "Power method on mdagm "<<i<<" " << absp2/absp1<<std::endl;
 | 
			
		||||
      p1=p2*(1.0/std::sqrt(absp2));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void operator()(const FineField &in, FineField & out) {
 | 
			
		||||
    operatorCheby(in,out);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
 | 
			
		||||
    // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in  
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#if 1
 | 
			
		||||
  void operatorADEF2(const FineField &in, FineField & out) {
 | 
			
		||||
 | 
			
		||||
    CoarseVector Csrc(_CoarseOperator.Grid());
 | 
			
		||||
    CoarseVector Ctmp(_CoarseOperator.Grid());
 | 
			
		||||
    CoarseVector Csol(_CoarseOperator.Grid());
 | 
			
		||||
 | 
			
		||||
    ConjugateGradient<CoarseVector>  CG(1.0e-10,100000);
 | 
			
		||||
    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
 | 
			
		||||
 | 
			
		||||
    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
 | 
			
		||||
    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
 | 
			
		||||
    MdagMLinearOperator<Matrix,FineField>               fMdagMOp(_FineMatrix);
 | 
			
		||||
 | 
			
		||||
    FineField tmp(in.Grid());
 | 
			
		||||
    FineField res(in.Grid());
 | 
			
		||||
    FineField Min(in.Grid());
 | 
			
		||||
 | 
			
		||||
    // Monitor completeness of low mode space
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,in);
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Csrc,out);
 | 
			
		||||
    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
 | 
			
		||||
 | 
			
		||||
    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
 | 
			
		||||
    _FineOperator.Op(in,tmp);// this is the G5 herm bit
 | 
			
		||||
    fCG(fMdagMOp,tmp,Min);    // solves  MdagM = g5 M g5M
 | 
			
		||||
 | 
			
		||||
    // Monitor completeness of low mode space
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,Min);
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Csrc,out);
 | 
			
		||||
    std::cout<<GridLogMessage<<"Completeness Min: "<<std::sqrt(norm2(out)/norm2(Min))<<std::endl;
 | 
			
		||||
 | 
			
		||||
    _FineOperator.Op(Min,tmp);
 | 
			
		||||
    tmp = in - tmp;   // in - A Min
 | 
			
		||||
 | 
			
		||||
    Csol=Zero();
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,tmp);
 | 
			
		||||
    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
 | 
			
		||||
    CG(MdagMOp,Ctmp,Csol);
 | 
			
		||||
 | 
			
		||||
    HermOp.Op(Csol,Ctmp);
 | 
			
		||||
    Ctmp=Ctmp-Csrc;
 | 
			
		||||
    std::cout<<GridLogMessage<<"coarse space true residual "<<std::sqrt(norm2(Ctmp)/norm2(Csrc))<<std::endl;
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Csol,out);
 | 
			
		||||
 | 
			
		||||
    _FineOperator.Op(out,res);
 | 
			
		||||
    res=res-tmp;
 | 
			
		||||
    std::cout<<GridLogMessage<<"promoted sol residual "<<std::sqrt(norm2(res)/norm2(tmp))<<std::endl;
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,res);
 | 
			
		||||
    std::cout<<GridLogMessage<<"coarse space proj of residual "<<norm2(Csrc)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    out = out+Min; // additive coarse space correction
 | 
			
		||||
    //    out = Min; // no additive coarse space correction
 | 
			
		||||
 | 
			
		||||
    _FineOperator.Op(out,tmp);
 | 
			
		||||
    tmp=tmp-in;         // tmp is new residual
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage<< " Preconditioner in  " << norm2(in)<<std::endl; 
 | 
			
		||||
    std::cout<<GridLogMessage<< " Preconditioner out " << norm2(out)<<std::endl; 
 | 
			
		||||
    std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in  
 | 
			
		||||
#if 1
 | 
			
		||||
  void operatorADEF1(const FineField &in, FineField & out) {
 | 
			
		||||
 | 
			
		||||
    CoarseVector Csrc(_CoarseOperator.Grid());
 | 
			
		||||
    CoarseVector Ctmp(_CoarseOperator.Grid());
 | 
			
		||||
    CoarseVector Csol(_CoarseOperator.Grid()); Csol=Zero();
 | 
			
		||||
 | 
			
		||||
    ConjugateGradient<CoarseVector>  CG(1.0e-10,100000);
 | 
			
		||||
    ConjugateGradient<FineField>    fCG(3.0e-2,1000);
 | 
			
		||||
 | 
			
		||||
    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
 | 
			
		||||
    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
 | 
			
		||||
    ShiftedMdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix,0.1);
 | 
			
		||||
 | 
			
		||||
    FineField tmp(in.Grid());
 | 
			
		||||
    FineField res(in.Grid());
 | 
			
		||||
    FineField Qin(in.Grid());
 | 
			
		||||
 | 
			
		||||
    // Monitor completeness of low mode space
 | 
			
		||||
    //    _Aggregates.ProjectToSubspace  (Csrc,in);
 | 
			
		||||
    //    _Aggregates.PromoteFromSubspace(Csrc,out);
 | 
			
		||||
    //    std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,in);
 | 
			
		||||
    HermOp.AdjOp(Csrc,Ctmp);// Normal equations
 | 
			
		||||
    CG(MdagMOp,Ctmp,Csol);
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Csol,Qin);
 | 
			
		||||
 | 
			
		||||
    //    Qin=0;
 | 
			
		||||
    _FineOperator.Op(Qin,tmp);// A Q in
 | 
			
		||||
    tmp = in - tmp;            // in - A Q in
 | 
			
		||||
 | 
			
		||||
    _FineOperator.Op(tmp,res);// this is the G5 herm bit
 | 
			
		||||
    fCG(fMdagMOp,res,out);    // solves  MdagM = g5 M g5M
 | 
			
		||||
 | 
			
		||||
    out = out + Qin;
 | 
			
		||||
 | 
			
		||||
    _FineOperator.Op(out,tmp);
 | 
			
		||||
    tmp=tmp-in;         // tmp is new residual
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  void SmootherTest (const FineField & in){
 | 
			
		||||
    
 | 
			
		||||
    FineField vec1(in.Grid());
 | 
			
		||||
    FineField vec2(in.Grid());
 | 
			
		||||
    RealD lo[3] = { 0.5, 1.0, 2.0};
 | 
			
		||||
 | 
			
		||||
    //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
 | 
			
		||||
    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
 | 
			
		||||
 | 
			
		||||
    RealD Ni,r;
 | 
			
		||||
 | 
			
		||||
    Ni = norm2(in);
 | 
			
		||||
 | 
			
		||||
    for(int ilo=0;ilo<3;ilo++){
 | 
			
		||||
      for(int ord=5;ord<50;ord*=2){
 | 
			
		||||
 | 
			
		||||
	std::cout << " lo "<<lo[ilo]<<" order "<<ord<<std::endl;
 | 
			
		||||
 | 
			
		||||
	_SmootherOperator.AdjOp(in,vec1);
 | 
			
		||||
 | 
			
		||||
	Chebyshev<FineField> Cheby  (lo[ilo],70.0,ord,InverseApproximation);
 | 
			
		||||
	Cheby(fMdagMOp,vec1,vec2);    // solves  MdagM = g5 M g5M
 | 
			
		||||
 | 
			
		||||
	_FineOperator.Op(vec2,vec1);// this is the G5 herm bit
 | 
			
		||||
	vec1  = in - vec1;   // tmp  = in - A Min
 | 
			
		||||
	r=norm2(vec1);
 | 
			
		||||
	std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void operatorCheby(const FineField &in, FineField & out) {
 | 
			
		||||
 | 
			
		||||
    CoarseVector Csrc(_CoarseOperator.Grid());
 | 
			
		||||
    CoarseVector Ctmp(_CoarseOperator.Grid());
 | 
			
		||||
    CoarseVector Csol(_CoarseOperator.Grid()); Csol=Zero();
 | 
			
		||||
 | 
			
		||||
    ConjugateGradient<CoarseVector>  CG(3.0e-3,100000);
 | 
			
		||||
 | 
			
		||||
    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
 | 
			
		||||
    MdagMLinearOperator<CoarseOperator,CoarseVector>     MdagMOp(_CoarseOperator);
 | 
			
		||||
    //    MdagMLinearOperator<Matrix,FineField>        fMdagMOp(_FineMatrix);
 | 
			
		||||
    ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
 | 
			
		||||
 | 
			
		||||
    CoarseVector Csol(_CoarseOperator.Grid()); 
 | 
			
		||||
    FineField vec1(in.Grid());
 | 
			
		||||
    FineField vec2(in.Grid());
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField> Cheby    (params.lo,params.hi,params.order,InverseApproximation);
 | 
			
		||||
    Chebyshev<FineField> ChebyAccu(params.lo,params.hi,params.order,InverseApproximation);
 | 
			
		||||
    double t;
 | 
			
		||||
    // Fine Smoother
 | 
			
		||||
    t=-usecond();
 | 
			
		||||
    _Smoother(in,out);
 | 
			
		||||
    t+=usecond();
 | 
			
		||||
    GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
 | 
			
		||||
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,in);
 | 
			
		||||
    // Update the residual
 | 
			
		||||
    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
 | 
			
		||||
 | 
			
		||||
    //    _Aggregates.PromoteFromSubspace(Csrc,out);
 | 
			
		||||
    //    std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    //    ofstream fout("smoother");
 | 
			
		||||
    //    Cheby.csv(fout);
 | 
			
		||||
 | 
			
		||||
    // V11 multigrid.
 | 
			
		||||
    // Use a fixed chebyshev and hope hermiticity helps.
 | 
			
		||||
 | 
			
		||||
    // To make a working smoother for indefinite operator
 | 
			
		||||
    // must multiply by "Mdag" (ouch loses all low mode content)
 | 
			
		||||
    // and apply to poly approx of (mdagm)^-1.
 | 
			
		||||
    // so that we end up with an odd polynomial.
 | 
			
		||||
 | 
			
		||||
    RealD Ni = norm2(in);
 | 
			
		||||
 | 
			
		||||
    _SmootherOperator.AdjOp(in,vec1);// this is the G5 herm bit
 | 
			
		||||
    ChebyAccu(fMdagMOp,vec1,out);    // solves  MdagM = g5 M g5M
 | 
			
		||||
 | 
			
		||||
    // Update with residual for out
 | 
			
		||||
    _FineOperator.Op(out,vec1);// this is the G5 herm bit
 | 
			
		||||
    vec1  = in - vec1;   // tmp  = in - A Min
 | 
			
		||||
 | 
			
		||||
    RealD r = norm2(vec1);
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    // Fine to Coarse 
 | 
			
		||||
    t=-usecond();
 | 
			
		||||
    _Aggregates.ProjectToSubspace  (Csrc,vec1);
 | 
			
		||||
    
 | 
			
		||||
    HermOp.AdjOp(Csrc,Ctmp);// Normal equations // This appears to be zero.
 | 
			
		||||
    CG(MdagMOp,Ctmp,Csol);
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s
 | 
			
		||||
                                                // Q = Q[in - A Min]  
 | 
			
		||||
    out = out+vec1;
 | 
			
		||||
    t+=usecond();
 | 
			
		||||
    GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
 | 
			
		||||
 | 
			
		||||
    // Three preconditioner smoothing -- hermitian if C3 = C1
 | 
			
		||||
    // Recompute error
 | 
			
		||||
    _FineOperator.Op(out,vec1);// this is the G5 herm bit
 | 
			
		||||
    vec1  = in - vec1;   // tmp  = in - A Min
 | 
			
		||||
    r=norm2(vec1);
 | 
			
		||||
    // Coarse correction
 | 
			
		||||
    t=-usecond();
 | 
			
		||||
    _CoarseSolve(Csrc,Csol);
 | 
			
		||||
    t+=usecond();
 | 
			
		||||
    GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl;
 | 
			
		||||
    // Coarse to Fine
 | 
			
		||||
    t=-usecond();
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Csol,vec1); 
 | 
			
		||||
    add(out,out,vec1);
 | 
			
		||||
    t+=usecond();
 | 
			
		||||
    GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
 | 
			
		||||
 | 
			
		||||
    // Reapply smoother
 | 
			
		||||
    _SmootherOperator.Op(vec1,vec2);  // this is the G5 herm bit
 | 
			
		||||
    ChebyAccu(fMdagMOp,vec2,vec1);    // solves  MdagM = g5 M g5M
 | 
			
		||||
    // Residual
 | 
			
		||||
    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
 | 
			
		||||
 | 
			
		||||
    out =out+vec1;
 | 
			
		||||
    vec1  = in - vec1;   // tmp  = in - A Min
 | 
			
		||||
    r=norm2(vec1);
 | 
			
		||||
    std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
 | 
			
		||||
    // Fine Smoother
 | 
			
		||||
    t=-usecond();
 | 
			
		||||
    _Smoother(vec1,vec2);
 | 
			
		||||
    t+=usecond();
 | 
			
		||||
    GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
 | 
			
		||||
 | 
			
		||||
    add( out,out,vec2);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  XmlReader RD("params.xml");
 | 
			
		||||
  read(RD,"params",params);
 | 
			
		||||
  std::cout<<"Params: Order "<<params.order<<"["<<params.lo<<","<<params.hi<<"]"<< " steps "<<params.steps<<std::endl;
 | 
			
		||||
 | 
			
		||||
  const int Ls=params.Ls;
 | 
			
		||||
  const int Ls=16;
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
@@ -357,14 +210,22 @@ int main (int argc, char ** argv)
 | 
			
		||||
  // Construct a coarsened grid; utility for this?
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  std::vector<int> block ({2,2,2,2});
 | 
			
		||||
  std::vector<int> blockc ({2,2,2,2});
 | 
			
		||||
  const int nbasis= 32;
 | 
			
		||||
 | 
			
		||||
  const int nbasisc= 32;
 | 
			
		||||
  auto clatt = GridDefaultLatt();
 | 
			
		||||
  for(int d=0;d<clatt.size();d++){
 | 
			
		||||
    clatt[d] = clatt[d]/block[d];
 | 
			
		||||
  }
 | 
			
		||||
  auto cclatt = clatt;
 | 
			
		||||
  for(int d=0;d<clatt.size();d++){
 | 
			
		||||
    cclatt[d] = clatt[d]/blockc[d];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
 | 
			
		||||
  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
 | 
			
		||||
  GridCartesian *CoarseCoarse4d =  SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
 | 
			
		||||
  GridCartesian *CoarseCoarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
@@ -372,186 +233,167 @@ int main (int argc, char ** argv)
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
 | 
			
		||||
 | 
			
		||||
  Gamma g5(Gamma::Algebra::Gamma5);
 | 
			
		||||
 | 
			
		||||
  LatticeFermion    src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
 | 
			
		||||
  LatticeFermion result(FGrid); result=Zero();
 | 
			
		||||
  LatticeFermion    ref(FGrid); ref=Zero();
 | 
			
		||||
  LatticeFermion    tmp(FGrid);
 | 
			
		||||
  LatticeFermion    err(FGrid);
 | 
			
		||||
  LatticeFermion result(FGrid); 
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); 
 | 
			
		||||
  LatticeGaugeField UmuDD(UGrid); 
 | 
			
		||||
  LatticeColourMatrix U(UGrid);
 | 
			
		||||
  LatticeColourMatrix zz(UGrid);
 | 
			
		||||
 | 
			
		||||
  FieldMetaData header;
 | 
			
		||||
  std::string file("./ckpoint_lat.4000");
 | 
			
		||||
  NerscIO::readConfiguration(Umu,header,file);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  if ( params.domaindecompose ) { 
 | 
			
		||||
    Lattice<iScalar<vInteger> > coor(UGrid);
 | 
			
		||||
    zz=Zero();
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      LatticeCoordinate(coor,mu);
 | 
			
		||||
      U = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
      U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
 | 
			
		||||
      PokeIndex<LorentzIndex>(UmuDD,U,mu);
 | 
			
		||||
    }
 | 
			
		||||
  } else { 
 | 
			
		||||
    UmuDD = Umu;
 | 
			
		||||
  }
 | 
			
		||||
  //  SU3::ColdConfiguration(RNG4,Umu);
 | 
			
		||||
  //  SU3::TepidConfiguration(RNG4,Umu);
 | 
			
		||||
  //  SU3::HotConfiguration(RNG4,Umu);
 | 
			
		||||
  //  Umu=Zero();
 | 
			
		||||
 | 
			
		||||
  RealD mass=params.mq;
 | 
			
		||||
  RealD M5=1.8;
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  RealD mass=0.001;
 | 
			
		||||
  RealD M5=1.8;
 | 
			
		||||
  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
  DomainWallFermionR DdwfDD(UmuDD,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
 | 
			
		||||
  typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
 | 
			
		||||
  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
 | 
			
		||||
  typedef CoarseOperator::CoarseVector                                 CoarseVector;
 | 
			
		||||
 | 
			
		||||
  typedef CoarseOperator::siteVector siteVector;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
 | 
			
		||||
  Subspace Aggregates(Coarse5d,FGrid,0);
 | 
			
		||||
  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
 | 
			
		||||
  assert ( (nbasis & 0x1)==0);
 | 
			
		||||
  int nb=nbasis/2;
 | 
			
		||||
  std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
 | 
			
		||||
  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
 | 
			
		||||
  //  Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
 | 
			
		||||
  Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb);
 | 
			
		||||
  for(int n=0;n<nb;n++){
 | 
			
		||||
    G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
 | 
			
		||||
    std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  for(int n=0;n<nbasis;n++){
 | 
			
		||||
    std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n])  <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
//  for(int i=0;i<nbasis;i++){
 | 
			
		||||
//    result =     Aggregates.subspace[i];
 | 
			
		||||
//    Aggregates.subspace[i]=result+g5*result;
 | 
			
		||||
//  }
 | 
			
		||||
  result=Zero();
 | 
			
		||||
  Subspace Aggregates(Coarse5d,FGrid,0);
 | 
			
		||||
 | 
			
		||||
  assert ( (nbasis & 0x1)==0);
 | 
			
		||||
  {
 | 
			
		||||
    int nb=nbasis/2;
 | 
			
		||||
    Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0);
 | 
			
		||||
    for(int n=0;n<nb;n++){
 | 
			
		||||
      G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
 | 
			
		||||
    }
 | 
			
		||||
    LatticeFermion A(FGrid);
 | 
			
		||||
    LatticeFermion B(FGrid);
 | 
			
		||||
    for(int n=0;n<nb;n++){
 | 
			
		||||
      A = Aggregates.subspace[n];
 | 
			
		||||
      B = Aggregates.subspace[n+nb];
 | 
			
		||||
      Aggregates.subspace[n]   = A+B; // 1+G5 // eigen value of G5R5 is +1
 | 
			
		||||
      Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>    Level1Op;
 | 
			
		||||
  typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
 | 
			
		||||
 | 
			
		||||
  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
 | 
			
		||||
  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOpDD(DdwfDD);
 | 
			
		||||
  CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse5d,1); // Hermitian matrix
 | 
			
		||||
  LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
 | 
			
		||||
 | 
			
		||||
  Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // Deflate the course space. Recursive multigrid?
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc>                   CoarseSubspace;
 | 
			
		||||
  CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing some coarse space solvers  " <<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  CoarseVector c_src (Coarse5d);
 | 
			
		||||
  CoarseVector c_res (Coarse5d);
 | 
			
		||||
  gaussian(CRNG,c_src);
 | 
			
		||||
  c_res=Zero();
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
 | 
			
		||||
  ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
 | 
			
		||||
  CG(PosdefLdop,c_src,c_res);
 | 
			
		||||
  {
 | 
			
		||||
    int nb=nbasisc/2;
 | 
			
		||||
    CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,12.0,0.02,500,100,100,0.0);
 | 
			
		||||
    for(int n=0;n<nb;n++){
 | 
			
		||||
      auto subspace    = CoarseAggregates.subspace[n].View();
 | 
			
		||||
      auto subspace_g5 = CoarseAggregates.subspace[n+nb].View();
 | 
			
		||||
      for(int nn=0;nn<nb;nn++){
 | 
			
		||||
	for(int site=0;site<Coarse5d->oSites();site++){
 | 
			
		||||
	  subspace_g5[site](nn)   = subspace[site](nn);
 | 
			
		||||
	  subspace_g5[site](nn+nb)=-subspace[site](nn+nb);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Level2Op L2Op(*CoarseCoarse5d,1); // Hermitian matrix
 | 
			
		||||
  typedef Level2Op::CoarseVector CoarseCoarseVector;
 | 
			
		||||
  HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
 | 
			
		||||
  L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
//    HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
 | 
			
		||||
//    ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
 | 
			
		||||
//    MCR(HermIndefLdop,c_src,c_res);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
 | 
			
		||||
  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> Precon  (Aggregates, LDOp,
 | 
			
		||||
											   HermIndefOp,Ddwf,
 | 
			
		||||
											   HermIndefOp,Ddwf);
 | 
			
		||||
  MdagMLinearOperator<Level2Op,CoarseCoarseVector> IRLHermOpL2(L2Op);
 | 
			
		||||
  Chebyshev<CoarseCoarseVector> IRLChebyL2(0.001,4.2,71);
 | 
			
		||||
  FunctionHermOp<CoarseCoarseVector> IRLOpChebyL2(IRLChebyL2,IRLHermOpL2);
 | 
			
		||||
  PlainHermOp<CoarseCoarseVector> IRLOpL2    (IRLHermOpL2);
 | 
			
		||||
  int cNk=24;
 | 
			
		||||
  int cNm=36;
 | 
			
		||||
  int cNstop=24;
 | 
			
		||||
  ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
 | 
			
		||||
 | 
			
		||||
  //  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> PreconDD(Aggregates, LDOp,
 | 
			
		||||
  //											   HermIndefOp,Ddwf,
 | 
			
		||||
  //											   HermIndefOpDD,DdwfDD);
 | 
			
		||||
  //  TrivialPrecon<LatticeFermion> simple;
 | 
			
		||||
  int cNconv;
 | 
			
		||||
  std::vector<RealD>          eval2(cNm);
 | 
			
		||||
  std::vector<CoarseCoarseVector>   evec2(cNm,CoarseCoarse5d);
 | 
			
		||||
  CoarseCoarseVector cc_src(CoarseCoarse5d); cc_src=1.0;
 | 
			
		||||
  IRLL2.calc(eval2,evec2,cc_src,cNconv);
 | 
			
		||||
 | 
			
		||||
  ConjugateGradient<CoarseCoarseVector>  CoarseCoarseCG(0.1,1000);
 | 
			
		||||
  DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
 | 
			
		||||
  NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(L2Op,CoarseCoarseCG,DeflCoarseCoarseGuesser);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing smoother efficacy"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Building 3 level Multigrid            "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  Precon.SmootherTest(src);
 | 
			
		||||
  typedef MultiGridPreconditioner<vSpinColourVector,  vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> >   TwoLevelMG;
 | 
			
		||||
  typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
 | 
			
		||||
  typedef MultiGridPreconditioner<vSpinColourVector,  vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> >     ThreeLevelMG;
 | 
			
		||||
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "Testing DD smoother efficacy"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  PreconDD.SmootherTest(src);
 | 
			
		||||
  // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
 | 
			
		||||
  ChebyshevSmoother<CoarseVector,  Level1Op >        CoarseSmoother(0.1,12.0,3,L1LinOp,LDOp);
 | 
			
		||||
  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf);
 | 
			
		||||
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "Testing SAP smoother efficacy"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  PreconDD.SAP(src,result);
 | 
			
		||||
  //  MirsSmoother<CoarseVector,  Level1Op >        CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp);
 | 
			
		||||
  //  MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf);
 | 
			
		||||
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "Unprec CG "<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
 | 
			
		||||
  //  TrivialPrecon<LatticeFermion> simple;
 | 
			
		||||
  //  ConjugateGradient<LatticeFermion> fCG(1.0e-8,100000);
 | 
			
		||||
  //  fCG(HermDefOp,src,result);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  LatticeFermion    src_o(FrbGrid);
 | 
			
		||||
  LatticeFermion result_o(FrbGrid);
 | 
			
		||||
  pickCheckerboard(Odd,src_o,src);
 | 
			
		||||
  result_o=Zero();
 | 
			
		||||
  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
 | 
			
		||||
  ConjugateGradient<LatticeFermion> pCG(1.0e-8,10000);
 | 
			
		||||
  //  pCG(HermOpEO,src_o,result_o);
 | 
			
		||||
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "Testing GCR on indef matrix "<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  PrecGeneralisedConjugateResidual<LatticeFermion> UPGCR(1.0e-8,100000,simple,8,128);
 | 
			
		||||
  //  UPGCR(HermIndefOp,src,result);
 | 
			
		||||
  CoarseMG Level2Precon (CoarseAggregates, L2Op,
 | 
			
		||||
			 L1LinOp,LDOp,
 | 
			
		||||
			 CoarseSmoother,
 | 
			
		||||
			 DeflCoarseCoarseGuesser,
 | 
			
		||||
			 DeflCoarseCoarseCGNE);
 | 
			
		||||
  Level2Precon.Level(2);
 | 
			
		||||
 | 
			
		||||
  // PGCR Applying this solver to solve the coarse space problem
 | 
			
		||||
  PrecGeneralisedConjugateResidual<CoarseVector>  l2PGCR(0.1, 100, L1LinOp,Level2Precon,16,16);
 | 
			
		||||
  l2PGCR.Level(2);
 | 
			
		||||
  
 | 
			
		||||
  /// Get themax eval
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage <<" Applying power method to find spectral range      "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  Precon.PowerMethod(src);
 | 
			
		||||
  // Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
 | 
			
		||||
  ZeroGuesser<CoarseVector> CoarseZeroGuesser;
 | 
			
		||||
  ThreeLevelMG ThreeLevelPrecon(Aggregates, LDOp,
 | 
			
		||||
				HermIndefOp,Ddwf,
 | 
			
		||||
				FineSmoother,
 | 
			
		||||
				CoarseZeroGuesser,
 | 
			
		||||
				l2PGCR);
 | 
			
		||||
  ThreeLevelPrecon.Level(1);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "Building a two level DDPGCR "<< std::endl;
 | 
			
		||||
  //  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  //  PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
 | 
			
		||||
  //  result=Zero();
 | 
			
		||||
  //  std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
 | 
			
		||||
  //  PGCRDD(HermIndefOp,src,result);
 | 
			
		||||
  // Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
 | 
			
		||||
  PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,ThreeLevelPrecon,16,16);
 | 
			
		||||
  l1PGCR.Level(1);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Building a two level PGCR "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling 3 level Multigrid            "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,8);
 | 
			
		||||
  std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
 | 
			
		||||
  result=Zero();
 | 
			
		||||
  PGCR(HermIndefOp,src,result);
 | 
			
		||||
  l1PGCR(src,result);
 | 
			
		||||
 | 
			
		||||
  CoarseVector c_src(Coarse5d); c_src=1.0;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << " Fine        PowerMethod           "<< std::endl;
 | 
			
		||||
  PowerMethod<LatticeFermion>       PM;   PM(HermDefOp,src);
 | 
			
		||||
  std::cout<<GridLogMessage << " Coarse       PowerMethod           "<< std::endl;
 | 
			
		||||
  PowerMethod<CoarseVector>        cPM;  cPM(PosdefLdop,c_src);
 | 
			
		||||
  std::cout<<GridLogMessage << " CoarseCoarse PowerMethod           "<< std::endl;
 | 
			
		||||
  PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Done "<< std::endl;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user