From 1292d595634172e28158f99d31863d63267ca5ac Mon Sep 17 00:00:00 2001 From: Daniel Richtmann Date: Thu, 11 Jun 2020 13:16:00 +0200 Subject: [PATCH 1/3] Add a typedef + broaden interface of CMat --- Grid/algorithms/CoarsenedMatrix.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 8d184aea..76950baf 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -268,6 +268,21 @@ public: typedef iMatrix Cobj; typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field typedef Lattice FineField; + typedef CoarseVector FermionField; + + // enrich interface + void Meooe(CoarseVector const& in, CoarseVector& out) { assert(0); } + void MeooeDag(CoarseVector const& in, CoarseVector& out) { assert(0); } + void Mooee(CoarseVector const& in, CoarseVector& out) { assert(0); } + void MooeeDag(CoarseVector const& in, CoarseVector& out) { assert(0); } + void MooeeInv(CoarseVector const& in, CoarseVector& out) { assert(0); } + void MooeeInvDag(CoarseVector const& in, CoarseVector& out) { assert(0); } + void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; } + void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; } + void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; } + void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; } + void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; + void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; }; //////////////////// // Data members From dd1ba266b269b093a8bcd9bf815438d9896d7148 Mon Sep 17 00:00:00 2001 From: Daniel Richtmann Date: Fri, 17 Jul 2020 11:58:02 +0200 Subject: [PATCH 2/3] Fix mapping between dir + disp and point in CMat --- Grid/algorithms/CoarsenedMatrix.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 76950baf..d18fba43 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -432,25 +432,25 @@ public: ////////////// // 4D action like wilson - // 0+ => 0 - // 0- => 1 - // 1+ => 2 - // 1- => 3 + // 0+ => 0 + // 0- => 4 + // 1+ => 1 + // 1- => 5 // etc.. ////////////// // 5D action like DWF - // 1+ => 0 - // 1- => 1 - // 2+ => 2 - // 2- => 3 + // 1+ => 0 + // 1- => 4 + // 2+ => 1 + // 2- => 5 // etc.. auto point = [dir, disp, ndim](){ if(dir == 0 and disp == 0) return 8; else if ( ndim==4 ) { - return (4 * dir + 1 - disp) / 2; + return (1 - disp) / 2 * 4 + dir; } else { - return (4 * (dir-1) + 1 - disp) / 2; + return (1 - disp) / 2 * 4 + dir - 1; } }(); From b2087f14c48e881ab041bbaef7f6c444c88a895d Mon Sep 17 00:00:00 2001 From: Daniel Richtmann Date: Mon, 24 Aug 2020 16:54:36 +0200 Subject: [PATCH 3/3] Fix CoarsenedMatrix regarding illegal memory accesses Need a reference to geom since the lambda copies the this pointer which points to host memory, see - https://docs.nvidia.com/cuda/cuda-c-programming-guide/#star-this-capture - https://devblogs.nvidia.com/new-compiler-features-cuda-8/ --- Grid/algorithms/CoarsenedMatrix.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index d18fba43..ba40535c 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -310,6 +310,8 @@ public: Stencil.HaloExchange(in,compressor); autoView( in_v , in, AcceleratorRead); autoView( out_v , out, AcceleratorWrite); + autoView( Stencil_v , Stencil, AcceleratorRead); + auto& geom_v = geom; typedef LatticeView Aview; Vector AcceleratorViewContainer; @@ -331,14 +333,14 @@ public: int ptype; StencilEntry *SE; - for(int point=0;point_is_local) { nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); } acceleratorSynchronise(); @@ -382,6 +384,7 @@ public: autoView( out_v , out, AcceleratorWrite); autoView( in_v , in, AcceleratorRead); + autoView( Stencil_v , Stencil, AcceleratorRead); const int Nsimd = CComplex::Nsimd(); typedef decltype(coalescedRead(in_v[0])) calcVector; @@ -395,12 +398,12 @@ public: int ptype; StencilEntry *SE; - SE=Stencil.GetEntry(ptype,point,ss); + SE=Stencil_v.GetEntry(ptype,point,ss); if(SE->_is_local) { nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); } acceleratorSynchronise();