diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h index 584ed1d5..857f4168 100644 --- a/Grid/algorithms/approx/Chebyshev.h +++ b/Grid/algorithms/approx/Chebyshev.h @@ -240,12 +240,14 @@ public: Field T0(grid); T0 = in; Field T1(grid); Field T2(grid); + Field Tout(grid); Field y(grid); Field *Tnm = &T0; Field *Tn = &T1; Field *Tnp = &T2; + std::cout << GridLogMessage << "Chebyshev() starts"< &Linop, // op LinearOperatorBase &SLinop, // op GridRedBlackCartesian * FrbGrid, @@ -262,8 +262,8 @@ public: int Nbatch = R/Nevec_acc; assert( R%Nevec_acc == 0 ); - Glog << "nBatch, Nevec_acc, R, Nu = " - << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl; +// Glog << "nBatch, Nevec_acc, R, Nu = " +// << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl; #if 0 // a trivial test for (int col=0; colshow_decomposition(); - printf("GRID_CUDA\n"); +// printf("GRID_CUDA\n"); // set eigenvector buffers for the cuBLAS calls //const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->lSites(); cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(CUDA_COMPLEX)); - Glog << "w_acc= "< 0 && MaxIter > 1) Np /= MaxIter; int Nblock_p = Np/Nu; + for(int i=0;i< evec.size();i++) evec[0].Advise()=AdviseInfrequentUse; Glog << std::string(74,'*') << std::endl; Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; @@ -879,10 +879,10 @@ private: assert((Nu%mrhs)==0); std::vector in(mrhs,f_grid); - Field s_in(sf_grid); - Field s_out(sf_grid); + Field s_in(sf_grid); + Field s_out(sf_grid); // unnecessary copy. Can or should it be avoided? -int k_start = 0; + int k_start = 0; while ( k_start < Nu) { Glog << "k_start= "< eval(JP.Nm); std::vector src(JP.Nu,FrbGrid); if (0) { +// in case RNG is too slow std::cout << GridLogMessage << "Using RNG5"<_processor,re); src_tmp=re; pickCheckerboard(Odd,src[i],src_tmp); }