1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

OverlappedComm for Staggered 5D and 4D.

This commit is contained in:
Azusa Yamaguchi 2018-02-22 12:50:09 +00:00
parent 97b9c6f03d
commit 0f468e2179
11 changed files with 269 additions and 71 deletions

View File

@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
@ -101,7 +102,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
for (k = 1; k <= MaxIterations*1000; k++) {
c = cp;
MatrixTimer.Start();
@ -109,8 +110,9 @@ class ConjugateGradient : public OperatorFunction<Field> {
MatrixTimer.Stop();
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
// AA
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c / d;
b_pred = a * (a * qq - d) / c;

View File

@ -689,7 +689,12 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
inline void loadLinkElement(Simd &reg, ref &memory) {
reg = memory;
}
inline void InsertGaugeField(DoubledGaugeField &U_ds,
const GaugeLinkField &U,int mu)
{
PokeIndex<LorentzIndex>(U_ds, U, mu);
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
@ -728,8 +733,10 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
U = U *phases;
Udag = Udag *phases;
PokeIndex<LorentzIndex>(Uds, U, mu);
PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
InsertGaugeField(Uds,U,mu);
InsertGaugeField(Uds,Udag,mu+4);
// PokeIndex<LorentzIndex>(Uds, U, mu);
// PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
@ -741,8 +748,8 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
UUU = UUU *phases;
UUUdag = UUUdag *phases;
PokeIndex<LorentzIndex>(UUUds, UUU, mu);
PokeIndex<LorentzIndex>(UUUds, UUUdag, mu+4);
InsertGaugeField(UUUds,UUU,mu);
InsertGaugeField(UUUds,UUUdag,mu+4);
}
}
@ -834,6 +841,23 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
mac(&phi(), &UU(), &chi());
}
inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu)
{
GridBase *GaugeGrid = U_ds._grid;
parallel_for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, U_ds, lcoor);
peekLocalSite(ScalarU, U, lcoor);
ScalarUds(mu) = ScalarU();
}
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &UUUds, // for Naik term
DoubledGaugeField &Uds,
@ -875,23 +899,8 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
U = U *phases;
Udag = Udag *phases;
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, Uds, lcoor);
peekLocalSite(ScalarU, U, lcoor);
ScalarUds(mu) = ScalarU();
peekLocalSite(ScalarU, Udag, lcoor);
ScalarUds(mu + 4) = ScalarU();
pokeLocalSite(ScalarUds, Uds, lcoor);
}
InsertGaugeField(Uds,U,mu);
InsertGaugeField(Uds,Udag,mu+4);
// 3 hop based on thin links. Crazy huh ?
U = PeekIndex<LorentzIndex>(Uthin, mu);
@ -903,24 +912,8 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
UUU = UUU *phases;
UUUdag = UUUdag *phases;
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
SiteScalarGaugeLink ScalarU;
SiteDoubledGaugeField ScalarUds;
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUds, UUUds, lcoor);
peekLocalSite(ScalarU, UUU, lcoor);
ScalarUds(mu) = ScalarU();
peekLocalSite(ScalarU, UUUdag, lcoor);
ScalarUds(mu + 4) = ScalarU();
pokeLocalSite(ScalarUds, UUUds, lcoor);
}
InsertGaugeField(UUUds,UUU,mu);
InsertGaugeField(UUUds,UUUdag,mu+4);
}
}

View File

@ -44,6 +44,7 @@ ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3,
template <class Impl>
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid,
RealD _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p)
: Kernels(p),
_grid(&Fgrid),
@ -62,6 +63,16 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GridCartesian &Fgrid, G
UUUmuOdd(&Hgrid) ,
_tmp(&Hgrid)
{
int vol4;
int LLs=1;
c1=_c1;
c2=_c2;
u0=_u0;
vol4= _grid->oSites();
Stencil.BuildSurfaceList(LLs,vol4);
vol4= _cbgrid->oSites();
StencilEven.BuildSurfaceList(LLs,vol4);
StencilOdd.BuildSurfaceList(LLs,vol4);
}
template <class Impl>
@ -69,18 +80,15 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau
GridRedBlackCartesian &Hgrid, RealD _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p)
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p)
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,_c1,_c2,_u0,p)
{
c1=_c1;
c2=_c2;
u0=_u0;
ImportGauge(_Uthin,_Ufat);
}
template <class Impl>
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin,GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid,
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p)
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p)
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,1.0,1.0,1.0,p)
{
ImportGaugeSimple(_Utriple,_Ufat);
}
@ -374,20 +382,116 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out, int dag) {
FermionField &out, int dag)
{
#ifdef GRID_OMP
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
else
#endif
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out, int dag)
{
#ifdef GRID_OMP
Compressor compressor;
int len = U._grid->oSites();
const int LLs = 1;
st.Prepare();
st.HaloGather(in,compressor);
st.CommsMergeSHM(compressor);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons.
//////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma omp parallel
{
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int ncomms = CartesianCommunicator::nCommThreads;
if (ncomms == -1) ncomms = 1;
assert(nthreads > ncomms);
if (tid >= ncomms) {
nthreads -= ncomms;
int ttid = tid - ncomms;
int n = len;
int chunk = n / nthreads;
int rem = n % nthreads;
int myblock, myn;
if (ttid < rem) {
myblock = ttid * chunk + ttid;
myn = chunk+1;
} else {
myblock = ttid*chunk + rem;
myn = chunk;
}
// do the compute
if (dag == DaggerYes) {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
// Interior = 1; Exterior = 0; must implement for staggered
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
// Interior = 1; Exterior = 0;
int sU = ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
}
}
} else {
st.CommunicateThreaded();
}
}
// First to enter, last to leave timing
st.CommsMerge(compressor);
if (dag == DaggerYes) {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
}
} else {
int sz=st.surface_list.size();
parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss];
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
}
}
#else
assert(0);
#endif
}
template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
const FermionField &in,
FermionField &out, int dag)
{
assert((dag == DaggerNo) || (dag == DaggerYes));
Compressor compressor;
st.HaloExchange(in, compressor);
if (dag == DaggerYes) {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
} else {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
}
}

View File

@ -105,21 +105,31 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
const FermionField &in, FermionField &out, int dag);
void DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
const FermionField &in, FermionField &out, int dag);
void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
const FermionField &in, FermionField &out, int dag);
// Constructor
ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p = ImplParams());
ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid,
//////////////////////////////////////////////////////////////////////////
// MILC constructor no coefficients; premultiply links by desired scaling
//////////////////////////////////////////////////////////////////////////
ImprovedStaggeredFermion(GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p = ImplParams());
//////////////////////////////////////////////////////////////////////////
// A don't initialise the gauge field constructor; largely internal
//////////////////////////////////////////////////////////////////////////
ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p = ImplParams());
// DoubleStore impl dependent
void ImportGaugeSimple(const GaugeField &_Utriple, const GaugeField &_Ufat);
void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat);

View File

@ -41,8 +41,7 @@ ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3,
// 5d lattice for DWF.
template<class Impl>
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
@ -121,18 +120,43 @@ ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,
assert(FiveDimGrid._simd_layout[0] ==1);
}
// Allocate the required comms buffer
ImportGauge(_Uthin,_Ufat);
int LLs = FiveDimGrid._rdimensions[0];
int vol4= FourDimGrid.oSites();
Stencil.BuildSurfaceList(LLs,vol4);
vol4=FourDimRedBlackGrid.oSites();
StencilEven.BuildSurfaceList(LLs,vol4);
StencilOdd.BuildSurfaceList(LLs,vol4);
StencilOdd.BuildSurfaceList(LLs,vol4);
}
template<class Impl>
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,
RealD _c1,RealD _c2, RealD _u0,
const ImplParams &p) :
ImprovedStaggeredFermion5D(FiveDimGrid,FiveDimRedBlackGrid,
FourDimGrid,FourDimRedBlackGrid,
_mass,_c1,_c2,_u0,p)
{
ImportGauge(_Uthin,_Ufat);
}
template<class Impl>
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Utriple,GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,
const ImplParams &p) :
ImprovedStaggeredFermion5D(FiveDimGrid,FiveDimRedBlackGrid,
FourDimGrid,FourDimRedBlackGrid,
_mass,1.0,1.0,1.0,p)
{
ImportGaugeSimple(_Utriple,_Ufat);
}
template <class Impl>
@ -140,6 +164,35 @@ void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin)
{
ImportGauge(_Uthin,_Uthin);
};
///////////////////////////////////////////////////
// For MILC use; pass three link U's and 1 link U
///////////////////////////////////////////////////
template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat)
{
/////////////////////////////////////////////////////////////////
// Trivial import; phases and fattening and such like preapplied
/////////////////////////////////////////////////////////////////
for (int mu = 0; mu < Nd; mu++) {
auto U = PeekIndex<LorentzIndex>(_Utriple, mu);
Impl::InsertGaugeField(UUUmu,U,mu);
U = adj( Cshift(U, mu, -3));
Impl::InsertGaugeField(UUUmu,-U,mu+4);
U = PeekIndex<LorentzIndex>(_Ufat, mu);
Impl::InsertGaugeField(Umu,U,mu);
U = adj( Cshift(U, mu, -1));
Impl::InsertGaugeField(Umu,-U,mu+4);
}
pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd , Umu);
pickCheckerboard(Even, UUUmuEven,UUUmu);
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
}
template<class Impl>
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat)
{

View File

@ -139,6 +139,15 @@ namespace QCD {
// Constructors
// -- No Gauge field
ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p= ImplParams());
// -- Thin link and fat link, with coefficients
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
@ -146,12 +155,24 @@ namespace QCD {
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
RealD _c1, RealD _c2,RealD _u0,
const ImplParams &p= ImplParams());
////////////////////////////////////////////////////////////////////////////////////////////////
// MILC constructor ; triple links, no rescale factors; must be externally pre multiplied
////////////////////////////////////////////////////////////////////////////////////////////////
ImprovedStaggeredFermion5D(GaugeField &_Utriple,
GaugeField &_Ufat,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
double _mass,
const ImplParams &p= ImplParams());
// DoubleStore
void ImportGauge(const GaugeField &_U);
void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat);
void ImportGaugeSimple(const GaugeField &_Uthin,const GaugeField &_Ufat);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality

View File

@ -75,7 +75,10 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
RealD mass=0.003;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass);
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0);
SchurStaggeredOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds);
ConjugateGradient<FermionField> CG(1.0e-8,10000);

View File

@ -74,7 +74,10 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
RealD mass=0.003;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass);
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,c1,c2,u0);
MdagMLinearOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds);
ConjugateGradient<FermionField> CG(1.0e-8,10000);
@ -86,7 +89,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
std::cout << GridLogMessage << " Calling 4d CG "<<std::endl;
std::cout << GridLogMessage << "****************************************************************** "<<std::endl;
ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass);
ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass,c1,c2,u0);
MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d);
FermionField src4d(UGrid); random(pRNG,src4d);
FermionField result4d(UGrid); result4d=zero;

View File

@ -71,7 +71,10 @@ int main (int argc, char ** argv)
}
RealD mass=0.003;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
FermionField res_o(&RBGrid);
FermionField src_o(&RBGrid);

View File

@ -65,7 +65,10 @@ int main (int argc, char ** argv)
FermionField resid(&Grid);
RealD mass=0.1;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
ConjugateGradient<FermionField> CG(1.0e-8,10000);
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);

View File

@ -73,7 +73,10 @@ int main (int argc, char ** argv)
}
RealD mass=0.1;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass);
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0);
MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds);
ConjugateGradient<FermionField> CG(1.0e-6,10000);