diff --git a/Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h b/Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h index 1da968bd..132fcbf8 100644 --- a/Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h +++ b/Grid/algorithms/multigrid/GeneralCoarsenedMatrixMultiRHS.h @@ -94,26 +94,27 @@ public: int ghost_zone=0; for(int32_t point = 0 ; point < geom.npoint; point++){ int i=s*geom.npoint+point; - if( Stencil._entries[i]._permute ) { + if( Stencil._entries[i]._wrap ) { ghost_zone=1; } } + // std::cout << "site " <oSites()<oSites()*geom.npoint==StencilTmp.size()); acceleratorCopyToDevice(&StencilTmp[0],&StencilMasked[0],sizeof(GeneralStencilEntryReordered)*StencilTmp.size()); CopyMatrix(); @@ -198,9 +199,9 @@ public: bytes = 1.0*osites*sizeof(siteMatrix)*npoint/pin.Grid()->GlobalDimensions()[0] + 2.0*osites*sizeof(siteVector)*npoint; - std::cout << " osites "<LocalDimensions()<LocalDimensions()<LocalDimensions()<LocalDimensions()<GlobalDimensions()[0] + 2.0*osites*sizeof(siteVector)*npoint; - std::cout << " osites "<LocalDimensions()<LocalDimensions()<LocalDimensions()<LocalDimensions()<_input; + int32_t s = SE->_input; // site of padded int32_t snbr= SE->_offset; - std::cout << " unpadded " << ss<<" padded " << s<< " point "< &From,Lattice & To,Coordinate Fro #endif }); t_acc+=usecond(); - std::cout << " localCopyRegion cpu " < inline void ScatterSlice(const cshiftVector &buf, accelerator_for(ss, face_ovol/simd[dim],Nsimd,{ // scalar layout won't coalesce - int blane=acceleratorSIMTlane(Nsimd); // buffer lane - int olane=blane%rNsimd; // reduced lattice lane - int obit =blane/rNsimd; +#ifdef GRID_SIMT + { + int blane=acceleratorSIMTlane(Nsimd); // buffer lane +#else + for(int blane=0;blane inline void GatherSlice(cshiftVector &buf, accelerator_for(ss, face_ovol/simd[dim],Nsimd,{ // scalar layout won't coalesce - int blane=acceleratorSIMTlane(Nsimd); // buffer lane - int olane=blane%rNsimd; // reduced lattice lane - int obit =blane/rNsimd; +#ifdef GRID_SIMT + { + int blane=acceleratorSIMTlane(Nsimd); // buffer lane +#else + for(int blane=0;blane_fdimensions[d]; int rd = grid->_rdimensions[d]; + int ld = grid->_ldimensions[d]; int ly = grid->_simd_layout[d]; assert((ly==1)||(ly==2)||(ly==grid->Nsimd())); @@ -116,6 +119,10 @@ public: int shift = (shifts[ii][d]+fd)%fd; // make it strictly positive 0.. L-1 int x = Coor[d]; // x in [0... rd-1] as an oSite + if ( (x + shift)%fd != (x+shift)%ld ){ + SE._wrap = 1; + } + int permute_dim = grid->PermuteDim(d); int permute_slice=0; if(permute_dim){