diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 9cffaed0..c56b3840 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice &CoarseInner, Lattice fine_inner_msk(fine); // Multiply could be fused with innerProduct - // Single block sum kernel could do both masks. fine_inner = localInnerProduct(fineX,fineY); mult(fine_inner_msk, fine_inner,FineMask); blockSum(CoarseInner,fine_inner_msk); } - class Geometry { public: int npoint; @@ -80,8 +78,12 @@ public: } directions [2*_d]=0; displacements[2*_d]=0; - } + std::cout < @@ -285,6 +287,8 @@ public: /////////////////////// GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know + virtual std::vector Directions(void) { return geom.directions; }; + virtual std::vector Displacements(void){ return geom.displacements; }; void M (const CoarseVector &in, CoarseVector &out) { conformable(_grid,in.Grid()); @@ -418,32 +422,17 @@ public: int ndim = in.Grid()->Nd(); - ////////////// - // 4D action like wilson - // 0+ => 0 - // 0- => 1 - // 1+ => 2 - // 1- => 3 - // etc.. - ////////////// - // 5D action like DWF - // 1+ => 0 - // 1- => 1 - // 2+ => 2 - // 2- => 3 - // etc.. - auto point = [dir, disp, ndim](){ - if(dir == 0 and disp == 0) - return 8; - else if ( ndim==4 ) { - return (4 * dir + 1 - disp) / 2; - } else { - return (4 * (dir-1) + 1 - disp) / 2; - } - }(); + int point=-1; + for(int p=0;pM(Cin,Cout); - std::cout << GridLogMessage<< " Cout "< > &linop, @@ -547,13 +536,20 @@ public: CoarseScalar InnerProd(Grid()); - size_t free,total; - cudaMemGetInfo(&free,&total); std::cout << "ForceHermitian "<oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); - + if ( lhermitian && (disp==-1) ) { + for(int pp=0;pp = * + int dirp = geom.directions[pp]; + int dispp = geom.displacements[pp]; + if ( (dirp==dir) && (dispp==1) ){ + auto sft = conjugate(Cshift(oZProj,dir,1)); + autoView( sft_v , sft , AcceleratorWrite); + autoView( A_pp , A[pp], AcceleratorWrite); + accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); + } + } + } } } } @@ -664,17 +681,11 @@ public: } } - if(lhermitian) { - std::cout << GridLogMessage << " ForceHermitian, new code "<