From bdccb0c91f951fadc138fe69880e60a98bfcc594 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 6 Feb 2020 17:26:55 -0500 Subject: [PATCH] Working 2 types of decomposition --- Grid/algorithms/CoarsenedMatrix.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 01b0da4d..dd304a95 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -46,10 +46,8 @@ Author: paboyle NAMESPACE_BEGIN(Grid); template -inline void blockMaskedInnerProduct(Lattice &CoarseInner1, - Lattice &CoarseInner2, - const Lattice &FineMask1, - const Lattice &FineMask2, +inline void blockMaskedInnerProduct(Lattice &CoarseInner, + const Lattice &FineMask, const Lattice &fineX, const Lattice &fineY) { @@ -64,12 +62,8 @@ inline void blockMaskedInnerProduct(Lattice &CoarseInner1, // Multiply could be fused with innerProduct // Single block sum kernel could do both masks. fine_inner = localInnerProduct(fineX,fineY); - - mult(fine_inner_msk, fine_inner,FineMask1); - blockSum(CoarseInner1,fine_inner_msk); - - mult(fine_inner_msk, fine_inner,FineMask2); - blockSum(CoarseInner2,fine_inner_msk); + mult(fine_inner_msk, fine_inner,FineMask); + blockSum(CoarseInner,fine_inner_msk); } @@ -794,7 +788,7 @@ public: Lattice > coor (FineGrid); Lattice > bcoor(FineGrid); - Lattice > bcb (FineGrid); + Lattice > bcb (FineGrid); bcb = Zero(); CoarseVector iProj(Grid()); CoarseVector oProj(Grid()); @@ -868,7 +862,7 @@ public: for(int j=0;joSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); + // if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });} + // accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); }); } } @@ -886,9 +882,8 @@ public: /////////////////////////////////////////// { mult(tmp,phi,evenmask); linop.Op(tmp,Mphie); - mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); + mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); - // tmp = Mphie*evenmask + Mphio*oddmask; { auto tmp_ = tmp.View(); auto evenmask_ = evenmask.View(); @@ -904,15 +899,17 @@ public: auto SelfProj_ = SelfProj.View(); auto A_self = A[self_stencil].View(); + accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ for(int j=0;j