PartialFraction Hw with Zolo and Tanh approx converged under CG and passed EO breakdown

and hermiticity tests.
2026-07-17 23:53:27 +01:00 · 2015-06-04 13:28:37 +01:00
parent 21d193b1c8
commit 63a61fcc2a
21 changed files with 501 additions and 56 deletions
@@ -1 +1 @@
-/usr/share/automake-1.14/INSTALL
+/opt/local/share/automake-1.15/INSTALL
@@ -2574,7 +2574,7 @@ test -n "$target_alias" &&
    NONENONEs,x,x, &&
  program_prefix=${target_alias}-

-am__api_version='1.14'
+am__api_version='1.15'

 # Find a good install program.  We prefer a C program (faster),
 # so one script is as good as another.  But avoid the broken or
@@ -2746,8 +2746,8 @@ test "$program_suffix" != NONE &&
 ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
 program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`

-# expand $ac_aux_dir to an absolute path
-am_aux_dir=`cd $ac_aux_dir && pwd`
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`

 if test x"${MISSING+set}" != xset; then
  case $am_aux_dir in
@@ -2766,7 +2766,7 @@ else
 $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
 fi

-if test x"${install_sh}" != xset; then
+if test x"${install_sh+set}" != xset; then
  case $am_aux_dir in
  *\ * | *\	*)
    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@@ -3094,8 +3094,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
 # <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
 mkdir_p='$(MKDIR_P)'

-# We need awk for the "check" target.  The system "awk" is bad on
-# some platforms.
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
 # Always define AMTAR for backward compatibility.  Yes, it's still used
 # in the wild :-(  We should find a proper way to deprecate it ...
 AMTAR='$${TAR-tar}'
@@ -3154,6 +3154,7 @@ END
 fi


+
 ac_config_headers="$ac_config_headers lib/GridConfig.h"

 # Check whether --enable-silent-rules was given.
@@ -6656,9 +6657,6 @@ fi



-
-
-
 # Check whether --enable-simd was given.
 if test "${enable_simd+set}" = set; then :
  enableval=$enable_simd; \
@@ -6673,21 +6671,21 @@ supported=no
 case ${ac_SIMD} in
     SSE4)
       echo Configuring for SSE4
-       if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
+
 $as_echo "#define SSE4 1" >>confdefs.h

-       supported=yes
+       if test x"$ax_cv_support_ssse3_ext" = x"yes"; then           supported=yes
       else
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support SSE4 instructions" >&5
+  	{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support SSE4 instructions" >&5
 $as_echo "$as_me: WARNING: Your processor does not support SSE4 instructions" >&2;}
       fi
     ;;
     AVX)
       echo Configuring for AVX
-       if test x"$ax_cv_support_avx_ext" = x"yes"; then
+
 $as_echo "#define AVX1 1" >>confdefs.h

-       supported=yes
+       if test x"$ax_cv_support_avx_ext" = x"yes"; then         supported=yes
       else
       	{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support AVX instructions" >&5
 $as_echo "$as_me: WARNING: Your processor does not support AVX instructions" >&2;}
@@ -6695,10 +6693,10 @@ $as_echo "$as_me: WARNING: Your processor does not support AVX instructions" >&2
     ;;
     AVX2)
       echo Configuring for AVX2
-       if test x"$ax_cv_support_avx2_ext" = x"yes"; then
+
 $as_echo "#define AVX2 1" >>confdefs.h

-       supported=yes
+       if test x"$ax_cv_support_avx2_ext" = x"yes"; then         supported=yes
       else
       { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support AVX2 instructions" >&5
 $as_echo "$as_me: WARNING: Your processor does not support AVX2 instructions" >&2;}
@@ -66,9 +66,6 @@ Please install or provide the correct path to your installation
 Info at: http://www.mpfr.org/)])


-
-
-
 AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
 	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
 	[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
@@ -78,17 +75,17 @@ supported=no
 case ${ac_SIMD} in
     SSE4)
       echo Configuring for SSE4
-       if test x"$ax_cv_support_ssse3_ext" = x"yes"; then  dnl minimal support for SSE4
       AC_DEFINE([SSE4],[1],[SSE4] )
-       supported=yes
+       if test x"$ax_cv_support_ssse3_ext" = x"yes"; then  dnl minimal support for SSE4
+         supported=yes
       else
-	AC_MSG_WARN([Your processor does not support SSE4 instructions])
+  	AC_MSG_WARN([Your processor does not support SSE4 instructions])
       fi
     ;;
     AVX)
       echo Configuring for AVX
-       if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX
       AC_DEFINE([AVX1],[1],[AVX] )
+       if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX
       supported=yes			  
       else
       	AC_MSG_WARN([Your processor does not support AVX instructions])
@@ -96,8 +93,8 @@ case ${ac_SIMD} in
     ;;
     AVX2)
       echo Configuring for AVX2
-       if test x"$ax_cv_support_avx2_ext" = x"yes"; then  dnl minimal support for AVX2
       AC_DEFINE([AVX2],[1],[AVX2] )
+       if test x"$ax_cv_support_avx2_ext" = x"yes"; then  dnl minimal support for AVX2
       supported=yes
       else
       AC_MSG_WARN([Your processor does not support AVX2 instructions])
@@ -1,4 +1,4 @@

-HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vRealF.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/LinalgUtils.h ./qcd/TwoSpinor.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Comparison.h ./Grid.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./stencil/Lebesgue.h
+HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h

-CCFILES=./qcd/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/Dirac.cc ./GridInit.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
+CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
@@ -293,7 +293,7 @@ static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k,
 * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
 * type = 1 for the approximation which is infinite at x = 0. */

-zolotarev_data* grid_zolotarev(PRECISION epsilon, int n, int type) {
+zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
  INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
    l, invlambda, xi, xisq, *tv, s, opl;
  int m, czero, ts;
@@ -414,7 +414,19 @@ zolotarev_data* grid_zolotarev(PRECISION epsilon, int n, int type) {
  return zd;
 }

-zolotarev_data* grid_higham(PRECISION epsilon, int n) {
+
+void zolotarev_free(zolotarev_data *zdata)
+{
+    free(zdata -> a);
+    free(zdata -> ap);
+    free(zdata -> alpha);
+    free(zdata -> beta);
+    free(zdata -> gamma);
+    free(zdata);
+}
+
+
+zolotarev_data* higham(PRECISION epsilon, int n) {
  INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
  int m, czero;
  zolotarev_data *zd;
@@ -77,8 +77,9 @@ typedef struct {
 * zolotarev_data structure. The arguments must satisfy the constraints that
 * epsilon > 0, n > 0, and type = 0 or 1. */

-ZOLOTAREV_DATA* grid_higham(PRECISION epsilon, int n) ;
-ZOLOTAREV_DATA* grid_zolotarev(PRECISION epsilon, int n, int type);
+ZOLOTAREV_DATA* higham(PRECISION epsilon, int n) ;
+ZOLOTAREV_DATA* zolotarev(PRECISION epsilon, int n, int type);
+void zolotarev_free(zolotarev_data *zdata);
 #endif

 #ifdef __cplusplus
@@ -62,6 +62,8 @@
 // Partial fraction
 //////////////////////
 #include <qcd/action/fermion/PartialFractionFermion5D.h>
+#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
+#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>


    // Chroma interface defining FermionAction
@@ -9,6 +9,16 @@ namespace Grid {
    }
    void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
    {
+      // How to check Ls matches??
+      //      std::cout << Ls << " Ls"<<std::endl;
+      //      std::cout << zdata->n  << " - n"<<std::endl;
+      //      std::cout << zdata->da << " -da "<<std::endl;
+      //      std::cout << zdata->db << " -db"<<std::endl;
+      //      std::cout << zdata->dn << " -dn"<<std::endl;
+      //      std::cout << zdata->dd << " -dd"<<std::endl;
+
+      assert(zdata->db==Ls);// Beta has Ls coeffs
+
      R=(1+this->mass)/(1-this->mass);

      Beta.resize(Ls);
@@ -29,7 +39,7 @@ namespace Grid {


      ZoloHiInv =1.0/zolo_hi;
-      double dw_diag = (4.0-M5)*ZoloHiInv;
+      dw_diag = (4.0-M5)*ZoloHiInv;
    
      See.resize(Ls);
      Aee.resize(Ls);
@@ -105,8 +115,6 @@ namespace Grid {
    }
    void   ContinuedFractionFermion5D::Mooee       (const LatticeFermion &psi, LatticeFermion &chi)
    {
-      double dw_diag = (4.0-M5)*ZoloHiInv;
-    
      int sign=1;
      for(int s=0;s<Ls;s++){
 	if ( s==0 ) {
@@ -37,9 +37,8 @@ namespace Grid {
      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
      void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;

-      Approx::zolotarev_data *zdata;
-
      // Cont frac
+      RealD dw_diag;
      RealD mass;
      RealD R;
      RealD ZoloHiInv;
@@ -29,13 +29,14 @@ namespace Grid {
      {
 	RealD eps = 1.0;

-	Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
+	Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
 	assert(zdata->n==this->Ls);
 	
 	std::cout << "DomainWallFermion with Ls="<<Ls<<std::endl;
 	// Call base setter
 	this->CayleyFermion5D::SetCoefficientsTanh(zdata,1.0,0.0);
- 
+
+	Approx::zolotarev_free(zdata);
      }

    };
@@ -31,11 +31,13 @@ namespace Grid {
 	RealD eps = 1.0;

 	std::cout << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Tanh approx"<<std::endl;
-	Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
+	Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
 	assert(zdata->n==this->Ls);
 	
 	// Call base setter
 	this->CayleyFermion5D::SetCoefficientsTanh(zdata,b,c);
+
+	Approx::zolotarev_free(zdata);
 
      }

@@ -31,7 +31,7 @@ namespace Grid {
      {
 	RealD eps = lo/hi;

-	Approx::zolotarev_data *zdata = Approx::grid_zolotarev(eps,this->Ls,0);// eps is ignored for higham
+	Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
 	assert(zdata->n==this->Ls);

 	std::cout << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
@@ -39,6 +39,7 @@ namespace Grid {
 	// Call base setter
 	this->CayleyFermion5D::SetCoefficientsZolotarev(hi,zdata,b,c);
 
+	Approx::zolotarev_free(zdata);
      }

    };
@@ -30,8 +30,9 @@ namespace Grid {
 	{
 	  assert((Ls&0x1)==1); // Odd Ls required
 	  int nrational=Ls-1;// Even rational order
-	  zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham
+	  Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
 	  SetCoefficientsTanh(zdata,scale);
+	  Approx::zolotarev_free(zdata);
 	}
    };
  }
@@ -30,12 +30,12 @@ namespace Grid {
 	{
 	  assert((Ls&0x1)==1); // Odd Ls required

-	  int nrational=Ls-1;// Even rational order
+	  int nrational=Ls;// Odd rational order
 	  RealD eps = lo/hi;

-	  Approx::zolotarev_data *zdata = Approx::grid_zolotarev(eps,nrational,0);
-
+	  Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
 	  SetCoefficientsZolotarev(hi,zdata);
+	  Approx::zolotarev_free(zdata);

 	}
    };
@@ -0,0 +1,40 @@
+#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
+#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D
+    {
+    public:
+
+      virtual void   Instantiatable(void){};
+      // Constructors
+    OverlapWilsonPartialFractionTanhFermion(LatticeGaugeField &_Umu,
+					    GridCartesian         &FiveDimGrid,
+					    GridRedBlackCartesian &FiveDimRedBlackGrid,
+					    GridCartesian         &FourDimGrid,
+					    GridRedBlackCartesian &FourDimRedBlackGrid,
+					    RealD _mass,RealD _M5,
+					    RealD scale) :
+      
+      // b+c=scale, b-c = 0 <=> b =c = scale/2
+      PartialFractionFermion5D(_Umu,
+			       FiveDimGrid,
+			       FiveDimRedBlackGrid,
+			       FourDimGrid,
+			       FourDimRedBlackGrid,_mass,_M5)
+	{
+	  assert((Ls&0x1)==1); // Odd Ls required
+	  int nrational=Ls-1;// Even rational order
+	  Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
+	  SetCoefficientsTanh(zdata,scale);
+	  Approx::zolotarev_free(zdata);
+	}
+    };
+  }
+}
+#endif
@@ -0,0 +1,44 @@
+#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
+#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D
+    {
+    public:
+
+      virtual void   Instantiatable(void){};
+      // Constructors
+    OverlapWilsonPartialFractionZolotarevFermion(LatticeGaugeField &_Umu,
+					  GridCartesian         &FiveDimGrid,
+					  GridRedBlackCartesian &FiveDimRedBlackGrid,
+					  GridCartesian         &FourDimGrid,
+					  GridRedBlackCartesian &FourDimRedBlackGrid,
+					  RealD _mass,RealD _M5,
+					  RealD lo,RealD hi):
+      
+      // b+c=scale, b-c = 0 <=> b =c = scale/2
+      PartialFractionFermion5D(_Umu,
+			       FiveDimGrid,
+			       FiveDimRedBlackGrid,
+			       FourDimGrid,
+			       FourDimRedBlackGrid,_mass,_M5)
+	{
+	  assert((Ls&0x1)==1); // Odd Ls required
+
+	  int nrational=Ls;// Odd rational order
+	  RealD eps = lo/hi;
+
+	  Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
+	  SetCoefficientsZolotarev(hi,zdata);
+	  Approx::zolotarev_free(zdata);
+
+	}
+    };
+  }
+}
+#endif
@@ -1 +1,310 @@
+#include <Grid.h>
+namespace Grid {
+  namespace QCD {
+
+    void   PartialFractionFermion5D::Meooe_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    {
+      // this does both dag and undag but is trivial; make a common helper routing
+      int sign = dag ? (-1) : 1;
+
+      if ( psi.checkerboard == Odd ) {
+	DhopEO(psi,chi,DaggerNo);
+      } else {
+	DhopOE(psi,chi,DaggerNo);
+      }
+
+      int nblock=(Ls-1)/2;
+      for(int b=0;b<nblock;b++){
+	int s = 2*b;
+	ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s); 
+	ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1); 
+      }
+      ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
+    }
+
+    void   PartialFractionFermion5D::Mooee_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    {
+      // again dag and undag are trivially related
+      int sign = dag ? (-1) : 1;
+      
+      int nblock=(Ls-1)/2;
+      for(int b=0;b<nblock;b++){
+	
+	int s = 2*b;
+	RealD pp = p[nblock-1-b];
+	RealD qq = q[nblock-1-b];
+	
+	// Do each 2x2 block aligned at s and multiplies Dw site diagonal by G5 so Hw
+	ag5xpby_ssp(chi,-dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s  ,s+1); 
+	ag5xpby_ssp(chi, dw_diag*scale,psi,amax*sqrt(qq)*scale,psi, s+1,s);
+	axpby_ssp  (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
+      }
+      
+      {
+	RealD R=(1+mass)/(1-mass);
+	//R g5 psi[Ls-1] + p[0] H
+	ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale*dw_diag/amax,psi,Ls-1,Ls-1);
+	
+	for(int b=0;b<nblock;b++){
+	  int s = 2*b+1;
+	  RealD pp = p[nblock-1-b];
+	  axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
+	}
+      }
+    }
+
+    void   PartialFractionFermion5D::MooeeInv_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    {
+      int sign = dag ? (-1) : 1;
+
+      LatticeFermion tmp(psi._grid);
+      
+      ///////////////////////////////////////////////////////////////////////////////////////
+      //Linv
+      ///////////////////////////////////////////////////////////////////////////////////////
+      int nblock=(Ls-1)/2;
+
+      axpy(chi,0.0,psi,psi); // Identity piece
+      
+      for(int b=0;b<nblock;b++){
+	int s = 2*b;
+	RealD pp = p[nblock-1-b];
+	RealD qq = q[nblock-1-b];
+	RealD coeff1=sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
+	RealD coeff2=sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
+	axpby_ssp  (chi,1.0,chi,coeff1,psi,Ls-1,s);
+	axpbg5y_ssp(chi,1.0,chi,coeff2,psi,Ls-1,s+1);
+      }
+      
+      ///////////////////////////////////////////////////////////////////////////////////////
+      //Dinv (note D isn't really diagonal -- just diagonal enough that we can still invert)
+      // Compute Seeinv (coeff of gamma5)
+      ///////////////////////////////////////////////////////////////////////////////////////
+      RealD R=(1+mass)/(1-mass);
+      RealD Seeinv = R + p[nblock]*dw_diag/amax;
+      for(int b=0;b<nblock;b++){
+	Seeinv += p[nblock-1-b]*dw_diag/amax / ( dw_diag*dw_diag/amax/amax + q[nblock-1-b]);
+      }    
+      Seeinv = 1.0/Seeinv;
+      
+      for(int b=0;b<nblock;b++){
+	int s = 2*b;
+	RealD pp = p[nblock-1-b];
+	RealD qq = q[nblock-1-b];
+	RealD coeff1=dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
+	RealD coeff2=amax*sqrt(qq) / ( dw_diag*dw_diag + amax*amax* qq);
+	ag5xpby_ssp  (tmp,-coeff1,chi,coeff2,chi,s,s+1);
+	ag5xpby_ssp  (tmp, coeff1,chi,coeff2,chi,s+1,s);
+      }
+      ag5xpby_ssp  (tmp, Seeinv,chi,0.0,chi,Ls-1,Ls-1);
+      
+      ///////////////////////////////////////////////////////////////////////////////////////
+      // Uinv
+      ///////////////////////////////////////////////////////////////////////////////////////
+      for(int b=0;b<nblock;b++){
+	int s = 2*b;
+	RealD pp = p[nblock-1-b];
+	RealD qq = q[nblock-1-b];
+	RealD coeff1=-sign*sqrt(amax*amax*amax*pp*qq) / ( dw_diag*dw_diag + amax*amax* qq);
+	RealD coeff2=-sign*sqrt(amax*pp)*dw_diag / ( dw_diag*dw_diag + amax*amax* qq); // Implicit g5 here
+	axpby_ssp  (chi,1.0/scale,tmp,coeff1/scale,tmp,s,Ls-1);
+	axpbg5y_ssp(chi,1.0/scale,tmp,coeff2/scale,tmp,s+1,Ls-1);
+      }
+      axpby_ssp  (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
+    }
+
+    void   PartialFractionFermion5D::M_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    {
+      LatticeFermion D(psi._grid);
+  
+      int sign = dag ? (-1) : 1;
+
+      // For partial frac Hw case (b5=c5=1) chroma quirkily computes
+      //
+      // Conventions for partfrac appear to be a mess.
+      // Tony's Nara lectures have
+      //
+      // BlockDiag(  H/p_i  1             | 1       )    
+      //          (  1      p_i H / q_i^2 | 0       )  
+      //           ---------------------------------
+      //           ( -1      0                | R  +p0 H  )
+      //
+      //Chroma     ( -2H    2sqrt(q_i)    |   0         )
+      //           (2 sqrt(q_i)   2H      |  2 sqrt(p_i) )
+      //           ---------------------------------
+      //           ( 0     -2 sqrt(p_i)   |  2 R gamma_5 + p0 2H
+      //
+      // Edwards/Joo/Kennedy/Wenger
+      //
+      // Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields
+      // incorporate the approx scale factor. This is obtained by propagating the
+      // scale on "H" out to the off diagonal elements as follows:
+      //
+      // BlockDiag(  H/p_i  1             | 1       ) 
+      //          (  1      p_i H / q_i^2 | 0       )  
+      //           ---------------------------------
+      //          ( -1      0                | R  + p_0 H  )
+      //
+      // becomes:
+      // BlockDiag(  H/ sp_i  1               | 1             ) 
+      //          (  1      sp_i H / s^2q_i^2 | 0             )  
+      //           ---------------------------------
+      //           ( -1      0                | R + p_0/s H   )
+      //
+      //
+      // This is implemented in Chroma by
+      //           p0' = p0/approxMax
+      //           p_i' = p_i*approxMax
+      //           q_i' = q_i*approxMax*approxMax
+      //
+      // After the equivalence transform is applied the matrix becomes
+      // 
+      //Chroma     ( -2H    sqrt(q'_i)    |   0         )
+      //           (sqrt(q'_i)   2H       |   sqrt(p'_i) )
+      //           ---------------------------------
+      //           ( 0     -sqrt(p'_i)    |  2 R gamma_5 + p'0 2H
+      //
+      //     =     ( -2H    sqrt(q_i)amax    |   0              )
+      //           (sqrt(q_i)amax   2H       |   sqrt(p_i*amax) )
+      //           ---------------------------------
+      //           ( 0     -sqrt(p_i)*amax   |  2 R gamma_5 + p0/amax 2H
+      //
+
+      DW(psi,D,DaggerNo); 
+
+      int nblock=(Ls-1)/2;
+      for(int b=0;b<nblock;b++){
+	
+	int s = 2*b;
+	double pp = p[nblock-1-b];
+	double qq = q[nblock-1-b];
+	
+	// Do each 2x2 block aligned at s and
+	ag5xpby_ssp(chi,-1.0*scale,D,amax*sqrt(qq)*scale,psi, s  ,s+1); // Multiplies Dw by G5 so Hw
+	ag5xpby_ssp(chi, 1.0*scale,D,amax*sqrt(qq)*scale,psi, s+1,s);
+	
+	// Pick up last column
+	axpby_ssp  (chi, 1.0, chi,sqrt(amax*pp)*scale*sign,psi,s+1,Ls-1);
+      }
+	
+      {
+	double R=(1+this->mass)/(1-this->mass);
+	//R g5 psi[Ls] + p[0] H
+	ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1);
+	
+	for(int b=0;b<nblock;b++){
+	  int s = 2*b+1;
+	  double pp = p[nblock-1-b];
+	  axpby_ssp(chi,1.0,chi,-sqrt(amax*pp)*scale*sign,psi,Ls-1,s);
+	}
+      }
+
+    }
+
+    RealD  PartialFractionFermion5D::M    (const LatticeFermion &in, LatticeFermion &out)
+    {
+      M_internal(in,out,DaggerNo);
+      return norm2(out);
+    }
+    RealD  PartialFractionFermion5D::Mdag (const LatticeFermion &in, LatticeFermion &out)
+    {
+      M_internal(in,out,DaggerYes);
+      return norm2(out);
+    }
+
+    void PartialFractionFermion5D::Meooe       (const LatticeFermion &in, LatticeFermion &out)
+    {
+      Meooe_internal(in,out,DaggerNo);
+    }
+    void PartialFractionFermion5D::MeooeDag    (const LatticeFermion &in, LatticeFermion &out)
+    {
+      Meooe_internal(in,out,DaggerYes);
+    }
+    void PartialFractionFermion5D::Mooee       (const LatticeFermion &in, LatticeFermion &out)
+    {
+      Mooee_internal(in,out,DaggerNo);
+    }
+    void PartialFractionFermion5D::MooeeDag    (const LatticeFermion &in, LatticeFermion &out)
+    {
+      Mooee_internal(in,out,DaggerYes);
+    }
+
+    void PartialFractionFermion5D::MooeeInv    (const LatticeFermion &in, LatticeFermion &out)
+    {
+      MooeeInv_internal(in,out,DaggerNo);
+    }
+    void PartialFractionFermion5D::MooeeInvDag (const LatticeFermion &in, LatticeFermion &out)
+    {
+      MooeeInv_internal(in,out,DaggerYes);
+    }
+
+    void  PartialFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
+      SetCoefficientsZolotarev(1.0/scale,zdata);
+    }
+    void  PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
+
+      // check on degree matching
+      //      std::cout << Ls << " Ls"<<std::endl;
+      //      std::cout << zdata->n  << " - n"<<std::endl;
+      //      std::cout << zdata->da << " -da "<<std::endl;
+      //      std::cout << zdata->db << " -db"<<std::endl;
+      //      std::cout << zdata->dn << " -dn"<<std::endl;
+      //      std::cout << zdata->dd << " -dd"<<std::endl;
+      assert(Ls == (2*zdata->da -1) );
+
+      // Part frac
+      //      RealD R;
+      R=(1+mass)/(1-mass);
+      dw_diag = (4.0-M5);
+
+      //      std::vector<RealD> p; 
+      //      std::vector<RealD> q;
+      p.resize(zdata->da);
+      q.resize(zdata->dd);
+	
+      for(int n=0;n<zdata->da;n++){
+	p[n] = zdata -> alpha[n];
+      }
+      for(int n=0;n<zdata->dd;n++){
+	q[n] = -zdata -> ap[n];
+      }
+      
+      scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me
+
+      amax=zolo_hi;
+    }
+
+      // Constructors
+    PartialFractionFermion5D::PartialFractionFermion5D(LatticeGaugeField &_Umu,
+						       GridCartesian         &FiveDimGrid,
+						       GridRedBlackCartesian &FiveDimRedBlackGrid,
+						       GridCartesian         &FourDimGrid,
+						       GridRedBlackCartesian &FourDimRedBlackGrid,
+						       RealD _mass,RealD M5) :
+      WilsonFermion5D(_Umu,
+		      FiveDimGrid, FiveDimRedBlackGrid,
+		      FourDimGrid, FourDimRedBlackGrid,M5),
+      mass(_mass)
+
+    {
+      assert((Ls&0x1)==1); // Odd Ls required
+      int nrational=Ls-1;
+
+
+      Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);
+
+      // NB: chroma uses a cast to "float" for the zolotarev range(!?).
+      // this creates a real difference in the operator which I do not like but we can replicate here
+      // to demonstrate compatibility
+      //      RealD eps = (zolo_lo / zolo_hi);
+      //      zdata = bfm_zolotarev(eps,nrational,0);
+      
+      SetCoefficientsTanh(zdata,1.0);
+
+      Approx::zolotarev_free(zdata);
+
+    }
+ 
+ }
+}

@@ -9,6 +9,13 @@ namespace Grid {
    {
    public:

+      const int part_frac_chroma_convention=1;
+
+      void   Meooe_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void   Mooee_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void   MooeeInv_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void   M_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
+
      // override multiply
      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
@@ -21,16 +28,7 @@ namespace Grid {
      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);

-    private:
-
-      virtual void PartialFractionCoefficients(void);
-
-      Approx::zolotarev_data *zdata;
-
-      // Part frac
-      double R;
-      std::vector<double> p; 
-      std::vector<double> q;
+      virtual void   Instantiatable(void) =0; // ensure no make-eee

      // Constructors
      PartialFractionFermion5D(LatticeGaugeField &_Umu,
@@ -40,6 +38,20 @@ namespace Grid {
 				    GridRedBlackCartesian &FourDimRedBlackGrid,
 				    RealD _mass,RealD M5);

+    protected:
+
+      virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
+      virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);
+
+      // Part frac
+      RealD mass;
+      RealD dw_diag;
+      RealD R;
+      RealD amax;
+      RealD scale;
+      std::vector<double> p; 
+      std::vector<double> q;
+
    };


@@ -1,5 +1,5 @@
-#ifndef  GRID_QCD_DWF_H
-#define  GRID_QCD_DWF_H
+#ifndef  GRID_QCD_WILSON_FERMION_5D_H
+#define  GRID_QCD_WILSON_FERMION_5D_H

 namespace Grid {

@@ -82,6 +82,16 @@ int main (int argc, char ** argv)
  OverlapWilsonContFracZolotarevFermion Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
  TestCGinversions<OverlapWilsonContFracZolotarevFermion>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

+
+  std::cout <<"OverlapWilsonPartialFractionTanhFermion  test"<<std::endl;
+  OverlapWilsonPartialFractionTanhFermion Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonPartialFractionTanhFermion>(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonPartialFractionZolotarevFermion  test"<<std::endl;
+  OverlapWilsonPartialFractionZolotarevFermion Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestCGinversions<OverlapWilsonPartialFractionZolotarevFermion>(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+
  Grid_finalize();
 }
 template<class What> 
@@ -57,6 +57,14 @@ int main (int argc, char ** argv)
  OverlapWilsonContFracZolotarevFermion Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
  TestWhat<OverlapWilsonContFracZolotarevFermion>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);

+  std::cout <<"OverlapWilsonPartialFractionTanhFermion  test"<<std::endl;
+  OverlapWilsonPartialFractionTanhFermion Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestWhat<OverlapWilsonPartialFractionTanhFermion>(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonPartialFractionZolotarevFermion  test"<<std::endl;
+  OverlapWilsonPartialFractionZolotarevFermion Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestWhat<OverlapWilsonPartialFractionZolotarevFermion>(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
  Grid_finalize();
 }