diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index f94a3e38..f86fd072 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -82,7 +82,7 @@ int main (int argc, char ** argv) DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); std::cout< header file. */ #undef HAVE_ENDIAN_H +/* Define to 1 if you have the header file. */ +#undef HAVE_EXECINFO_H + /* Support FMA3 (Fused Multiply-Add) instructions */ #undef HAVE_FMA @@ -134,9 +137,6 @@ /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME -/* Define to the home page for this package. */ -#undef PACKAGE_URL - /* Define to the version of this package. */ #undef PACKAGE_VERSION diff --git a/lib/Init.cc b/lib/Init.cc index e427fe18..ab3b5571 100644 --- a/lib/Init.cc +++ b/lib/Init.cc @@ -17,7 +17,6 @@ #define __X86_64 - #ifdef HAVE_EXECINFO_H #include #endif diff --git a/lib/PerfCount.h b/lib/PerfCount.h index 264d571e..c379639d 100644 --- a/lib/PerfCount.h +++ b/lib/PerfCount.h @@ -5,22 +5,27 @@ #include #include #include + #include + +#ifdef __linux__ #include #include - +#endif namespace Grid { +#ifdef __linux__ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { - int ret; + int ret=0; ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); return ret; } +#endif class PerformanceCounter { @@ -63,7 +68,6 @@ public: int PCT; - struct perf_event_attr pe; long long count; int fd; uint64_t elapsed; @@ -74,15 +78,19 @@ public: } PerformanceCounter(int _pct) { +#ifdef __linux__ assert(_pct>=0); assert(_pct void SizeSquare(DenseMatrix & mat, int &N) assert(N==M); } +template void Resize(DenseVector & mat, int N) { + mat.resize(N); +} template void Resize(DenseMatrix & mat, int N, int M) { mat.resize(N); for(int i=0;i const RealD small = 1.0e-16; public: int lock; - int converged; + int get; int Niter; + int converged; int Nk; // Number of converged sought int Np; // Np -- Number of spare vecs in kryloc space @@ -59,6 +59,7 @@ public: // Sanity checked this routine (step) against Saad. ///////////////////////// void RitzMatrix(DenseVector& evec,int k){ + if(1) return; GridBase *grid = evec[0]._grid; @@ -451,8 +452,9 @@ until convergence std::cout << " -- Nconv = "<< Nconv << "\n"; } - + ///////////////////////////////////////////////// // Adapted from Rudy's lanczos factor routine + ///////////////////////////////////////////////// int Lanczos_Factor(int start, int end, int cont, DenseVector & bq, Field &bf, @@ -546,10 +548,16 @@ until convergence std::cout << "alpha = " << alpha << " fnorm = " << fnorm << '\n'; ///Iterative refinement of orthogonality V = [ bq[0] bq[1] ... bq[M] ] -#if 0 int re = 0; + // FIXME undefined params; how set in Rudy's code + int ref =0; + Real rho = 1.0e-8; + while( re == ref || (sqbt < rho * bck && re < 5) ){ + Field tmp2(grid); + Field tmp1(grid); + //bex = V^dag bf DenseVector bex(j+1); for(int k=0;k 1) std::cout << "orthagonality refined " << re << " times" < H; Resize(H,Nm,Nm); - Resize(evals,Nm,Nm); + Resize(evals,Nm); Resize(evecs,Nm); int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with @@ -702,7 +710,6 @@ until convergence RealD beta; Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,0); Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,1); ///Accumulate eigenvector @@ -758,11 +765,11 @@ until convergence RealD resid_nrm= norm2(bf); if(!lock) converged = 0; - +#if 0 for(int i = SS - lock_num - 1; i >= SS - Nk && i >= 0; --i){ RealD diff = 0; - diff = abs(tevecs[i][Nm - 1 - lock_num]) * resid_nrm; + diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; @@ -785,53 +792,29 @@ until convergence break; } } +#endif std::cout << "Got " << converged << " so far " < goodval(get); + ///Check + void Check(DenseVector &evals, + DenseVector > &evecs) { + + DenseVector goodval(this->get); + EigenSort(evals,evecs); int NM = Nm; - int Nget = this->get; - S **V; - V = new S* [NM]; - RealD *QZ; - QZ = new RealD [NM*NM]; + DenseVector< DenseVector > V; Size(V,NM); + DenseVector QZ(NM*NM); + for(int i = 0; i < NM; i++){ for(int j = 0; j < NM; j++){ - - QZ[i*NM+j] = this->evecs[i][j]; - - int f_size_cb = 24*dop.cbLs*dop.node_cbvol; - - for(int cb = this->prec; cb < 2; cb++){ - for(int i = 0; i < NM; i++){ - V[i] = (S*)(this->bq[i][cb]); - - const int m0 = 4 * 4; // this is new code - assert(m0 % 16 == 0); // see the reason in VtimesQ.C - - const int row_per_thread = f_size_cb / (bfmarg::threads); - { - - { - DenseVector vrow_tmp0(m0*NM); - DenseVector vrow_tmp1(m0*NM); - RealD *row_tmp0 = vrow_tmp0.data(); - RealD *row_tmp1 = vrow_tmp1.data(); - VtimesQ(QZ, NM, V, row_tmp0, row_tmp1, id * row_per_thread, m0, (id + 1) * row_per_thread); - } - } - } - } + // evecs[i][j]; } } } -#endif /** @@ -1020,4 +1003,4 @@ static void Lock(DenseMatrix &H, ///Hess mtx } #endif -#endif + diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index 6d72ef31..c5698751 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -24,6 +24,17 @@ PARALLEL_FOR_LOOP return ret; } + template Lattice div(const Lattice &rhs,Integer y){ + Lattice ret(rhs._grid); + ret.checkerboard = rhs.checkerboard; + conformable(ret,rhs); +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ + ret._odata[ss]=div(rhs._odata[ss],y); + } + return ret; + } + template Lattice expMat(const Lattice &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 9b336374..410aa629 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -266,11 +266,8 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, if( this->HandOptDslash ) { #pragma omp parallel for schedule(static) for(int ss=0;ssoSites();ss++){ + int sU=ss; for(int s=0;sHandOptDslash ) { + /* -#pragma omp parallel for +#pragma omp parallel for schedule(static) for(int t=0;toSites();ss++){ - for(int s=0;soSites();ss++){ + int sU=ss; + for(int s=0;soSites();ss++){ + int sU=ss; for(int s=0;s > &buf, int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma); #if defined(AVX512) || defined(IMCI) - void DiracOptAsmDhopSite(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out,uint64_t *); #else - void DiracOptAsmDhopSite(CartesianStencil &st,DoubledGaugeField &U, + void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out,uint64_t *p){ DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 diff --git a/lib/serialisation/BinaryIO.cc b/lib/serialisation/BinaryIO.cc index eb3f9833..f546ddb4 100644 --- a/lib/serialisation/BinaryIO.cc +++ b/lib/serialisation/BinaryIO.cc @@ -1,15 +1,14 @@ #include -using namespace Grid; -using namespace std; +namespace Grid { // Writer implementation /////////////////////////////////////////////////////// -BinaryWriter::BinaryWriter(const string &fileName) -: file_(fileName, ios::binary|ios::out) +BinaryWriter::BinaryWriter(const std::string &fileName) +: file_(fileName, std::ios::binary|std::ios::out) {} template <> -void BinaryWriter::writeDefault(const string &s, const string &output) +void BinaryWriter::writeDefault(const std::string &s, const std::string &output) { uint64_t sz = output.size(); @@ -21,12 +20,12 @@ void BinaryWriter::writeDefault(const string &s, const string &output) } // Reader implementation /////////////////////////////////////////////////////// -BinaryReader::BinaryReader(const string &fileName) -: file_(fileName, ios::binary|ios::in) +BinaryReader::BinaryReader(const std::string &fileName) +: file_(fileName, std::ios::binary|std::ios::in) {} template <> -void BinaryReader::readDefault(const string &s, string &output) +void BinaryReader::readDefault(const std::string &s, std::string &output) { uint64_t sz; @@ -34,3 +33,4 @@ void BinaryReader::readDefault(const string &s, string &output) output.reserve(sz); file_.read((char *)output.data(), sz); } +} diff --git a/lib/serialisation/TextIO.cc b/lib/serialisation/TextIO.cc index 9d88b26a..8b9c4f01 100644 --- a/lib/serialisation/TextIO.cc +++ b/lib/serialisation/TextIO.cc @@ -1,14 +1,12 @@ #include -using namespace Grid; -using namespace std; - +namespace Grid { // Writer implementation /////////////////////////////////////////////////////// -TextWriter::TextWriter(const string &fileName) -: file_(fileName, ios::out) +TextWriter::TextWriter(const std::string &fileName) +: file_(fileName, std::ios::out) {} -void TextWriter::push(const string &s) +void TextWriter::push(const std::string &s) { level_++; }; @@ -27,11 +25,11 @@ void TextWriter::indent(void) }; // Reader implementation /////////////////////////////////////////////////////// -TextReader::TextReader(const string &fileName) -: file_(fileName, ios::in) +TextReader::TextReader(const std::string &fileName) +: file_(fileName, std::ios::in) {} -void TextReader::push(const string &s) +void TextReader::push(const std::string &s) { level_++; }; @@ -50,17 +48,18 @@ void TextReader::checkIndent(void) file_.get(c); if (c != '\t') { - cerr << "mismatch on tab " << c << " level " << level_; - cerr << " i "<< i < -void TextReader::readDefault(const string &s, string &output) +void TextReader::readDefault(const std::string &s, std::string &output) { checkIndent(); output.clear(); getline(file_, output); } +} diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index 8dd93de7..b5c6e5bd 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -1,10 +1,8 @@ #include -using namespace Grid; -using namespace std; - +namespace Grid { // Writer implementation /////////////////////////////////////////////////////// -XmlWriter::XmlWriter(const string &fileName) +XmlWriter::XmlWriter(const std::string &fileName) : fileName_(fileName) { node_ = doc_.append_child(); @@ -16,7 +14,7 @@ XmlWriter::~XmlWriter(void) doc_.save_file(fileName_.c_str(), " "); } -void XmlWriter::push(const string &s) +void XmlWriter::push(const std::string &s) { node_ = node_.append_child(s.c_str()); } @@ -27,22 +25,22 @@ void XmlWriter::pop(void) } // Reader implementation /////////////////////////////////////////////////////// -XmlReader::XmlReader(const string &fileName) +XmlReader::XmlReader(const std::string &fileName) : fileName_(fileName) { pugi::xml_parse_result result = doc_.load_file(fileName_.c_str()); if ( !result ) { - cerr << "XML error description: " << result.description() << "\n"; - cerr << "XML error offset : " << result.offset << "\n"; - abort(); + std::cerr << "XML error description: " << result.description() << "\n"; + std::cerr << "XML error offset : " << result.offset << "\n"; + std::abort(); } node_ = doc_.child("grid"); } -void XmlReader::push(const string &s) +void XmlReader::push(const std::string &s) { node_ = node_.child(s.c_str()); } @@ -53,7 +51,8 @@ void XmlReader::pop(void) } template <> -void XmlReader::readDefault(const string &s, string &output) +void XmlReader::readDefault(const std::string &s, std::string &output) { output = node_.child(s.c_str()).first_child().value(); } +} diff --git a/lib/serialisation/XmlIO.h b/lib/serialisation/XmlIO.h index ba8af167..45fc6fac 100644 --- a/lib/serialisation/XmlIO.h +++ b/lib/serialisation/XmlIO.h @@ -96,6 +96,7 @@ namespace Grid node_.child("elem").set_name("elem-done"); i++; } + // assert( is.tellg()==-1); pop(); } diff --git a/lib/simd/Grid_vector_unops.h b/lib/simd/Grid_vector_unops.h index e4942937..540a54f5 100644 --- a/lib/simd/Grid_vector_unops.h +++ b/lib/simd/Grid_vector_unops.h @@ -67,6 +67,14 @@ namespace Grid { } }; + template struct DivIntFunctor { + Integer y; + DivIntFunctor(Integer _y) : y(_y) {}; + scalar operator()(const scalar &a) const { + return Integer(a)/y; + } + }; + template struct RealFunctor { scalar operator()(const scalar &a) const { return real(a); @@ -131,6 +139,10 @@ namespace Grid { inline Grid_simd mod(const Grid_simd &r,Integer y) { return SimdApply(ModIntFunctor(y),r); } + template < class S, class V > + inline Grid_simd div(const Grid_simd &r,Integer y) { + return SimdApply(DivIntFunctor(y),r); + } //////////////////////////////////////////////////////////////////////////// // Allows us to assign into **conformable** real vectors from complex //////////////////////////////////////////////////////////////////////////// diff --git a/lib/tensors/Tensor_unary.h b/lib/tensors/Tensor_unary.h index 045097a3..d2c3fae4 100644 --- a/lib/tensors/Tensor_unary.h +++ b/lib/tensors/Tensor_unary.h @@ -111,7 +111,7 @@ template inline auto toComplex(const iMatrix &z) -> type return ret; } - +BINARY_RSCALAR(div,Integer); BINARY_RSCALAR(mod,Integer); BINARY_RSCALAR(pow,RealD); diff --git a/scripts/configure-commands b/scripts/configure-commands index 07506d27..a3599d1f 100755 --- a/scripts/configure-commands +++ b/scripts/configure-commands @@ -59,7 +59,7 @@ clang-avx2) CXX=clang++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none ;; clang-avx-openmp) -CXX=clang-omp++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LDFLAGS="-fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=none +CXX=clang-omp++ ../../configure --enable-precision=double --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -std=c++11" LDFLAGS="-fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=none ;; clang-xc30) CXX=$HOME/Clang/install/bin/clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11 -I/opt/gcc/4.9.2/snos/include/g++/x86_64-suse-linux/ -I/opt/gcc/4.9.2/snos/include/g++/ " LDFLAGS="" LIBS="-lgmp -lmpfr" --enable-comms=none diff --git a/tests/Make.inc b/tests/Make.inc index ae1faca3..e0a17529 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,5 +1,5 @@ -bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi +bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_synthetic_lanczos Test_GaugeAction_SOURCES=Test_GaugeAction.cc diff --git a/tests/Test_cheby.cc b/tests/Test_cheby.cc index 08897c90..81b08816 100644 --- a/tests/Test_cheby.cc +++ b/tests/Test_cheby.cc @@ -57,5 +57,21 @@ int main (int argc, char ** argv) ChebyStep.csv(of); } + lo=-8; + hi=8; + Chebyshev ChebyIndefInv(lo,hi,40,InverseApproximation); + { + std::ofstream of("chebyindefinv"); + ChebyIndefInv.csv(of); + } + + lo=0; + hi=64; + Chebyshev ChebyNE(lo,hi,40,InverseApproximation); + { + std::ofstream of("chebyNE"); + ChebyNE.csv(of); + } + Grid_finalize(); } diff --git a/tests/Test_dwf_hdcr.cc b/tests/Test_dwf_hdcr.cc index 94b8481a..006d0e5e 100644 --- a/tests/Test_dwf_hdcr.cc +++ b/tests/Test_dwf_hdcr.cc @@ -6,6 +6,22 @@ using namespace std; using namespace Grid; using namespace Grid::QCD; +class myclass: Serializable { +public: + + GRID_DECL_CLASS_MEMBERS(myclass, + int, domaindecompose, + int, domainsize, + int, order, + double, lo, + double, hi, + int, steps); + + myclass(){}; + +}; +myclass params; + RealD InverseApproximation(RealD x){ return 1.0/x; } @@ -26,15 +42,21 @@ public: Aggregates & _Aggregates; CoarseOperator & _CoarseOperator; - Matrix & _Matrix; + Matrix & _FineMatrix; FineOperator & _FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; // Constructor - MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, FineOperator &Fine,Matrix &FineMatrix) + MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, + FineOperator &Fine,Matrix &FineMatrix, + FineOperator &Smooth,Matrix &SmootherMatrix) : _Aggregates(Agg), _CoarseOperator(Coarse), _FineOperator(Fine), - _Matrix(FineMatrix) + _FineMatrix(FineMatrix), + _SmootherOperator(Smooth), + _SmootherMatrix(SmootherMatrix) { } @@ -43,7 +65,7 @@ public: FineField p1(in._grid); FineField p2(in._grid); - MdagMLinearOperator fMdagMOp(_Matrix); + MdagMLinearOperator fMdagMOp(_FineMatrix); p1=in; RealD absp2; @@ -58,74 +80,20 @@ public: } } -#if 0 void operator()(const FineField &in, FineField & out) { - - FineField Min(in._grid); - FineField tmp(in._grid); - - CoarseVector Csrc(_CoarseOperator.Grid()); - CoarseVector Ctmp(_CoarseOperator.Grid()); - CoarseVector Csol(_CoarseOperator.Grid()); - - // Monitor completeness of low mode space - _Aggregates.ProjectToSubspace (Csrc,in); - _Aggregates.PromoteFromSubspace(Csrc,out); - std::cout< fCG(1.0e-3,1000); - ConjugateGradient CG(1.0e-8,100000); - - //////////////////////////////////////////////////////////////////////// - // ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] - //////////////////////////////////////////////////////////////////////// - - // Smoothing step, followed by coarse grid correction - MdagMLinearOperator MdagMOp(_Matrix); - - Min=in; - std::cout< MdagMOp(_CoarseOperator); - HermitianLinearOperator HermOp(_CoarseOperator); - Csol=zero; - _Aggregates.ProjectToSubspace (Csrc,out); - HermOp.AdjOp(Csrc,Ctmp);// Normal equations - CG(MdagMOp ,Ctmp,Csol); - _Aggregates.PromoteFromSubspace(Csol,out); - - out = Min + out;; - */ - + if ( params.domaindecompose ) { + operatorSAP(in,out); + } else { + operatorCheby(in,out); + } } -#endif //////////////////////////////////////////////////////////////////////// // ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in //////////////////////////////////////////////////////////////////////// -#if 0 - void operator()(const FineField &in, FineField & out) { +#if 1 + void operatorADEF2(const FineField &in, FineField & out) { CoarseVector Csrc(_CoarseOperator.Grid()); CoarseVector Ctmp(_CoarseOperator.Grid()); @@ -136,7 +104,7 @@ public: HermitianLinearOperator HermOp(_CoarseOperator); MdagMLinearOperator MdagMOp(_CoarseOperator); - MdagMLinearOperator fMdagMOp(_Matrix); + MdagMLinearOperator fMdagMOp(_FineMatrix); FineField tmp(in._grid); FineField res(in._grid); @@ -189,8 +157,8 @@ public: } #endif // ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in -#if 0 - void operator()(const FineField &in, FineField & out) { +#if 1 + void operatorADEF1(const FineField &in, FineField & out) { CoarseVector Csrc(_CoarseOperator.Grid()); CoarseVector Ctmp(_CoarseOperator.Grid()); @@ -201,7 +169,7 @@ public: HermitianLinearOperator HermOp(_CoarseOperator); MdagMLinearOperator MdagMOp(_CoarseOperator); - ShiftedMdagMLinearOperator fMdagMOp(_Matrix,0.1); + ShiftedMdagMLinearOperator fMdagMOp(_FineMatrix,0.1); FineField tmp(in._grid); FineField res(in._grid); @@ -234,14 +202,79 @@ public: } #endif + void SAP (const FineField & src,FineField & psi){ + + Lattice > coor(src._grid); + Lattice > subset(src._grid); + + FineField r(src._grid); + FineField zz(src._grid); zz=zero; + FineField vec1(src._grid); + FineField vec2(src._grid); + + const Integer block=params.domainsize; + + subset=zero; + for(int mu=0;mu fMdagMOp(_SmootherMatrix,0.0); + Chebyshev Cheby (params.lo,params.hi,params.order,InverseApproximation); + + RealD resid; + for(int i=0;i 20*71 = 1400 matmuls. +// 2*71 = 140 comms. + + // Even domain solve + r= where(subset==(Integer)0,r,zz); + _SmootherOperator.AdjOp(r,vec1); + Cheby(fMdagMOp,vec1,vec2); // solves MdagM = g5 M g5M + psi = psi + vec2; + + // Odd domain residual + _FineOperator.Op(psi,vec1);// this is the G5 herm bit + r= src - vec1 ; + r= where(subset==(Integer)1,r,zz); + + resid = norm2(r) /norm2(src); + std::cout << "SAP "< fMdagMOp(_Matrix); - ShiftedMdagMLinearOperator fMdagMOp(_Matrix,0.5); + // MdagMLinearOperator fMdagMOp(_FineMatrix); + ShiftedMdagMLinearOperator fMdagMOp(_SmootherMatrix,0.0); RealD Ni,r; @@ -250,7 +283,7 @@ public: for(int ilo=0;ilo<3;ilo++){ for(int ord=5;ord<50;ord*=2){ - _FineOperator.AdjOp(in,vec1); + _SmootherOperator.AdjOp(in,vec1); Chebyshev Cheby (lo[ilo],70.0,ord,InverseApproximation); Cheby(fMdagMOp,vec1,vec2); // solves MdagM = g5 M g5M @@ -264,7 +297,7 @@ public: } } - void operator()(const FineField &in, FineField & out) { + void operatorCheby(const FineField &in, FineField & out) { CoarseVector Csrc(_CoarseOperator.Grid()); CoarseVector Ctmp(_CoarseOperator.Grid()); @@ -275,18 +308,18 @@ public: HermitianLinearOperator HermOp(_CoarseOperator); MdagMLinearOperator MdagMOp(_CoarseOperator); - // MdagMLinearOperator fMdagMOp(_Matrix); - ShiftedMdagMLinearOperator fMdagMOp(_Matrix,0.0); + // MdagMLinearOperator fMdagMOp(_FineMatrix); + ShiftedMdagMLinearOperator fMdagMOp(_SmootherMatrix,0.0); FineField vec1(in._grid); FineField vec2(in._grid); // Chebyshev Cheby (0.5,70.0,30,InverseApproximation); // Chebyshev ChebyAccu(0.5,70.0,30,InverseApproximation); - Chebyshev Cheby (2.0,70.0,10,InverseApproximation); - Chebyshev ChebyAccu(2.0,70.0,10,InverseApproximation); - Cheby.JacksonSmooth(); - ChebyAccu.JacksonSmooth(); + Chebyshev Cheby (2.0,70.0,15,InverseApproximation); + Chebyshev ChebyAccu(2.0,70.0,15,InverseApproximation); + // Cheby.JacksonSmooth(); + // ChebyAccu.JacksonSmooth(); _Aggregates.ProjectToSubspace (Csrc,in); _Aggregates.PromoteFromSubspace(Csrc,out); @@ -305,7 +338,7 @@ public: RealD Ni = norm2(in); - _FineOperator.AdjOp(in,vec1);// this is the G5 herm bit + _SmootherOperator.AdjOp(in,vec1);// this is the G5 herm bit ChebyAccu(fMdagMOp,vec1,out); // solves MdagM = g5 M g5M std::cout< CG(1.0e-3,100000); + + HermitianLinearOperator HermOp(_CoarseOperator); + MdagMLinearOperator MdagMOp(_CoarseOperator); + + FineField vec1(in._grid); + FineField vec2(in._grid); + + _Aggregates.ProjectToSubspace (Csrc,in); + _Aggregates.PromoteFromSubspace(Csrc,out); + std::cout< > coor(UGrid); + zz=zero; + for(int mu=0;mu(Umu,mu); + U = where(mod(coor,params.domainsize)==(Integer)0,zz,U); + PokeIndex(UmuDD,U,mu); + } + } else { + UmuDD = Umu; + } // SU3::ColdConfiguration(RNG4,Umu); // SU3::TepidConfiguration(RNG4,Umu); // SU3::HotConfiguration(RNG4,Umu); @@ -402,6 +517,7 @@ int main (int argc, char ** argv) std::cout< HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOpDD(DdwfDD); CoarsenedMatrix LDOp(*Coarse5d); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates); @@ -467,7 +584,13 @@ int main (int argc, char ** argv) std::cout< Precon(Aggregates, LDOp,HermIndefOp,Ddwf); + MultiGridPreconditioner Precon (Aggregates, LDOp, + HermIndefOp,Ddwf, + HermIndefOp,Ddwf); + + MultiGridPreconditioner PreconDD(Aggregates, LDOp, + HermIndefOp,Ddwf, + HermIndefOpDD,DdwfDD); TrivialPrecon simple; std::cout< simple; // ConjugateGradient fCG(1.0e-8,100000); // fCG(HermDefOp,src,result); @@ -496,12 +630,22 @@ int main (int argc, char ** argv) std::cout< PGCRDD(1.0e-8,100000,PreconDD,8,128); + result=zero; + std::cout< PGCR(1.0e-8,100000,Precon,8,128); - std::cout< PGCR(1.0e-8,100000,Precon,8,128); + // std::cout< > Stencil; for(int dir=0;dir<4;dir++){ for(int disp=0;disp directions(npoint,dir); std::vector displacements(npoint,disp); - CartesianStencil myStencil(&Fine,npoint,0,directions,displacements); + Stencil myStencil(&Fine,npoint,0,directions,displacements); std::vector ocoor(4); for(int o=0;o directions(npoint,dir); std::vector displacements(npoint,disp); - CartesianStencil EStencil(&rbFine,npoint,Even,directions,displacements); - CartesianStencil OStencil(&rbFine,npoint,Odd,directions,displacements); + Stencil EStencil(&rbFine,npoint,Even,directions,displacements); + Stencil OStencil(&rbFine,npoint,Odd,directions,displacements); std::vector ocoor(4); for(int o=0;o Cheby(alpha,beta,mu,order); - - std::ofstream file("pooh.dat"); + std::ofstream file("cheby.dat"); Cheby.csv(file); HermOpOperatorFunction X; @@ -114,9 +117,9 @@ int main (int argc, char ** argv) } { - std::vector eval(Nm); - std::vector evec(Nm,grid); - ChebyIRL.calc(eval,evec,src, Nconv); + // std::vector eval(Nm); + // std::vector evec(Nm,grid); + // ChebyIRL.calc(eval,evec,src, Nconv); } Grid_finalize();