mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Some small steps towards a multigrid
This commit is contained in:
		@@ -1,4 +1,4 @@
 | 
			
		||||
 | 
			
		||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h
 | 
			
		||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/BfmHDCG.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h
 | 
			
		||||
 | 
			
		||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
 | 
			
		||||
 
 | 
			
		||||
@@ -85,10 +85,6 @@ namespace Grid {
 | 
			
		||||
    void Orthogonalise(void){
 | 
			
		||||
      CoarseScalar InnerProd(CoarseGrid); 
 | 
			
		||||
      blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
#if 1
 | 
			
		||||
      //      CheckOrthogonal();
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    } 
 | 
			
		||||
    void CheckOrthogonal(void){
 | 
			
		||||
      CoarseVector iProj(CoarseGrid); 
 | 
			
		||||
@@ -125,7 +121,7 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      RealD scale;
 | 
			
		||||
 | 
			
		||||
      ConjugateGradient<FineField> CG(1.0e-4,10000);
 | 
			
		||||
      ConjugateGradient<FineField> CG(1.0e-3,10000);
 | 
			
		||||
      FineField noise(FineGrid);
 | 
			
		||||
      FineField Mn(FineGrid);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,7 @@ public:
 | 
			
		||||
    Integer MaxIterations;
 | 
			
		||||
    int verbose;
 | 
			
		||||
    ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { 
 | 
			
		||||
      verbose=0;
 | 
			
		||||
      verbose=1;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -48,11 +48,11 @@ namespace Grid {
 | 
			
		||||
	if(cp<rsq) {
 | 
			
		||||
	  Linop.HermOp(psi,r);
 | 
			
		||||
	  axpy(r,-1.0,src,r);
 | 
			
		||||
	  RealD true_resid = norm2(r);
 | 
			
		||||
	  RealD tr = norm2(r);
 | 
			
		||||
	  std::cout<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
 | 
			
		||||
		   << " computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
	           << " true residual "<<true_resid
 | 
			
		||||
	           << " target "       <<Tolerance <<std::endl;
 | 
			
		||||
	           << " true residual "    <<sqrt(tr/ssq)
 | 
			
		||||
	           << " target "           <<Tolerance <<std::endl;
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -87,8 +87,8 @@ void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int
 | 
			
		||||
{
 | 
			
		||||
  int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
 | 
			
		||||
                    // we drop off the innermost fifth dimension
 | 
			
		||||
  assert( (disp==1)||(disp==-1) );
 | 
			
		||||
  assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
 | 
			
		||||
  //  assert( (disp==1)||(disp==-1) );
 | 
			
		||||
  //  assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
 | 
			
		||||
 | 
			
		||||
  WilsonCompressor compressor(DaggerNo);
 | 
			
		||||
  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
 | 
			
		||||
@@ -100,7 +100,7 @@ void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int
 | 
			
		||||
  assert(dirdisp<=7);
 | 
			
		||||
  assert(dirdisp>=0);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
//PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<Umu._grid->oSites();ss++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      int sU=ss;
 | 
			
		||||
@@ -114,7 +114,7 @@ void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
 | 
			
		||||
				   LatticeDoubledGaugeField & U,
 | 
			
		||||
			   const LatticeFermion &in, LatticeFermion &out,int dag)
 | 
			
		||||
{
 | 
			
		||||
  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
			
		||||
  //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
			
		||||
 | 
			
		||||
  WilsonCompressor compressor(dag);
 | 
			
		||||
 | 
			
		||||
@@ -127,29 +127,32 @@ void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
 | 
			
		||||
  // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
 | 
			
		||||
  if ( dag == DaggerYes ) {
 | 
			
		||||
    if( HandOptDslash ) {
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
#pragma parallel for
 | 
			
		||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
			
		||||
	for(int s=0;s<Ls;s++){
 | 
			
		||||
	  //int sU=lo.Reorder(ss);
 | 
			
		||||
	  int sU=ss;
 | 
			
		||||
	  int sF = s+Ls*sU;
 | 
			
		||||
	  DiracOptHand::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 | 
			
		||||
	{
 | 
			
		||||
	  for(int s=0;s<Ls;s++){
 | 
			
		||||
	    int sU=ss;
 | 
			
		||||
	    int sF = s+Ls*sU;
 | 
			
		||||
	    DiracOptHand::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    } else { 
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
#pragma parallel for
 | 
			
		||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
			
		||||
	for(int s=0;s<Ls;s++){
 | 
			
		||||
	  //	  int sU=lo.Reorder(ss);
 | 
			
		||||
	  int sU=ss;
 | 
			
		||||
	  int sF = s+Ls*sU;
 | 
			
		||||
	  DiracOpt::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 | 
			
		||||
	{
 | 
			
		||||
	  int sd;
 | 
			
		||||
	  for(sd=0;sd<Ls;sd++){
 | 
			
		||||
	    int sU=ss;
 | 
			
		||||
	    int sF = sd+Ls*sU;
 | 
			
		||||
	    DiracOpt::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    if( HandOptDslash ) {
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
#pragma parallel for
 | 
			
		||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
			
		||||
	for(int s=0;s<Ls;s++){
 | 
			
		||||
	  //	  int sU=lo.Reorder(ss);
 | 
			
		||||
@@ -160,7 +163,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    } else { 
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
#pragma parallel for
 | 
			
		||||
      for(int ss=0;ss<U._grid->oSites();ss++){
 | 
			
		||||
	for(int s=0;s<Ls;s++){
 | 
			
		||||
	  //	  int sU=lo.Reorder(ss);
 | 
			
		||||
 
 | 
			
		||||
@@ -166,7 +166,7 @@ public:
 | 
			
		||||
    su2SubGroupIndex(i0,i1,su2_index);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int ss=0;ss!=grid->oSites();ss++){
 | 
			
		||||
    for(int ss=0;ss<grid->oSites();ss++){
 | 
			
		||||
      subgroup._odata[ss]()()(0,0) = source._odata[ss]()()(i0,i0);
 | 
			
		||||
      subgroup._odata[ss]()()(0,1) = source._odata[ss]()()(i0,i1);
 | 
			
		||||
      subgroup._odata[ss]()()(1,0) = source._odata[ss]()()(i1,i0);
 | 
			
		||||
@@ -201,7 +201,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
    dest = 1.0; // start out with identity
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
    for(int ss=0;ss!=grid->oSites();ss++){
 | 
			
		||||
    for(int ss=0;ss<grid->oSites();ss++){
 | 
			
		||||
      dest._odata[ss]()()(i0,i0) = subgroup._odata[ss]()()(0,0);
 | 
			
		||||
      dest._odata[ss]()()(i0,i1) = subgroup._odata[ss]()()(0,1);
 | 
			
		||||
      dest._odata[ss]()()(i1,i0) = subgroup._odata[ss]()()(1,0);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user