From 2c22db841ae77087905423060ca6e31f80016436 Mon Sep 17 00:00:00 2001 From: "Henrique B.R" Date: Thu, 2 Apr 2020 17:38:47 +0100 Subject: [PATCH 01/86] Added momentum scaling to scalar HMC theories in order to follow UKQCD/CPS conventions --- Grid/qcd/action/scalar/ScalarImpl.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index febb315e..203e1824 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -1,5 +1,13 @@ #pragma once +#define CPS_MD_TIME + +#ifdef CPS_MD_TIME +#define HMC_MOMENTUM_DENOMINATOR (2.0) +#else +#define HMC_MOMENTUM_DENOMINATOR (1.0) +#endif + NAMESPACE_BEGIN(Grid); template @@ -20,13 +28,17 @@ public: typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); + // CPS and UKQCD conventions not yet implemented for U(1) scalars. gaussian(pRNG, P); + P *= scale; } static inline Field projectForce(Field& P){return P;} static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; + std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; } static inline RealD FieldSquareNorm(Field& U) { @@ -66,7 +78,7 @@ public: } static void FreePropagator(const Field &in, Field &out, - const Field &momKernel) + const Field &momKernel) { FFT fft((GridCartesian *)in.Grid()); Field inFT(in.Grid()); @@ -139,14 +151,17 @@ public: static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // Being consistent with CPS and UKQCD conventions #ifndef USE_FFT_ACCELERATION Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); + #else Field Pgaussian(P.Grid()), Pp(P.Grid()); ComplexField p2(P.Grid()); p2 = zero; RealD M = FFT_MASS; - + + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian); FFT theFFT((GridCartesian*)P.Grid()); @@ -156,8 +171,8 @@ public: p2 = sqrt(p2); Pp *= p2; theFFT.FFT_all_dim(P, Pp, FFT::backward); - #endif //USE_FFT_ACCELERATION + P *= scale; } static inline Field projectForce(Field& P) {return P;} @@ -166,7 +181,8 @@ public: { #ifndef USE_FFT_ACCELERATION double t0=usecond(); - U += P*ep; + U += P*ep; + std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; double t1=usecond(); double total_time = (t1-t0)/1e6; std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl; From 28a1fcaaffa1c644d75208aa4fa435b454f99a29 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 5 May 2020 11:13:27 -0700 Subject: [PATCH 02/86] First compile against SYCL --- Grid/Grid_Eigen_Dense.h | 6 +- Grid/allocator/AlignedAllocator.cc | 2 +- Grid/allocator/AlignedAllocator.h | 32 ++++----- Grid/communicator/SharedMemoryMPI.cc | 15 ++-- Grid/lattice/Lattice_base.h | 4 +- Grid/lattice/Lattice_reduction.h | 8 +-- Grid/perfmon/PerfCount.h | 3 +- Grid/pugixml/pugixml.cc | 2 +- Grid/qcd/action/fermion/Fermion.h | 2 +- Grid/qcd/action/fermion/GparityWilsonImpl.h | 2 +- .../CayleyFermion5DImplementation.h | 6 +- .../WilsonKernelsImplementation.h | 41 ++++++----- Grid/serialisation/Serialisation.h | 2 +- Grid/simd/Grid_gpu_vec.h | 4 +- Grid/simd/Simd.h | 4 +- Grid/stencil/Stencil.h | 2 +- Grid/tensors/Tensor_SIMT.h | 12 +++- Grid/tensors/Tensor_exp.h | 2 +- Grid/threads/Pragmas.h | 53 ++++++++++++-- Grid/util/Init.cc | 71 +++++++++++++++++-- benchmarks/Benchmark_dwf.cc | 2 +- benchmarks/Benchmark_su3_gpu.cc | 2 +- configure.ac | 13 ++++ ..._WilsonMixedRepresentationsFermionGauge.cc | 2 +- 24 files changed, 205 insertions(+), 87 deletions(-) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index 9d779e05..d7119358 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -15,12 +15,12 @@ #ifdef __NVCC__ #pragma push #pragma diag_suppress code_is_unreachable -#pragma push_macro("__CUDA_ARCH__") +#pragma push_macro("GRID_SIMT") #pragma push_macro("__NVCC__") #pragma push_macro("__CUDACC__") #undef __NVCC__ #undef __CUDACC__ -#undef __CUDA_ARCH__ +#undef GRID_SIMT #define __NVCC__REDEFINE__ #endif @@ -41,7 +41,7 @@ #ifdef __NVCC__REDEFINE__ #pragma pop_macro("__CUDACC__") #pragma pop_macro("__NVCC__") -#pragma pop_macro("__CUDA_ARCH__") +#pragma pop_macro("GRID_SIMT") #pragma pop #endif diff --git a/Grid/allocator/AlignedAllocator.cc b/Grid/allocator/AlignedAllocator.cc index d53c4dc2..18854c95 100644 --- a/Grid/allocator/AlignedAllocator.cc +++ b/Grid/allocator/AlignedAllocator.cc @@ -6,7 +6,7 @@ NAMESPACE_BEGIN(Grid); MemoryStats *MemoryProfiler::stats = nullptr; bool MemoryProfiler::debug = false; -#ifdef GRID_NVCC +#ifdef GRID_CUDA #define SMALL_LIMIT (0) #else #define SMALL_LIMIT (4096) diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 8c189be8..56f937f5 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -51,11 +51,8 @@ class PointerCache { private: /*Pinning pages is costly*/ /*Could maintain separate large and small allocation caches*/ -#ifdef GRID_NVCC + static const int Ncache=128; -#else - static const int Ncache=8; -#endif static int victim; typedef struct { @@ -169,7 +166,7 @@ public: pointer ptr = nullptr; #endif -#ifdef GRID_NVCC +#ifdef GRID_CUDA //////////////////////////////////// // Unified (managed) memory //////////////////////////////////// @@ -183,7 +180,13 @@ public: } } assert( ptr != (_Tp *)NULL); -#else +#endif + +#ifdef GRID_SYCL + if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) malloc_shared(bytes,*theGridAccelerator); +#endif + +#if ( !defined(GRID_CUDA)) && (!defined(GRID_SYCL)) ////////////////////////////////////////////////////////////////////////////////////////// // 2MB align; could make option probably doesn't need configurability ////////////////////////////////////////////////////////////////////////////////////////// @@ -193,14 +196,6 @@ public: if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); #endif assert( ptr != (_Tp *)NULL); - - ////////////////////////////////////////////////// - // First touch optimise in threaded loop - ////////////////////////////////////////////////// - uint64_t *cp = (uint64_t *)ptr; - thread_for(n,bytes/sizeof(uint64_t), { // need only one touch per page - cp[n]=0; - }); #endif return ptr; } @@ -216,9 +211,14 @@ public: pointer __freeme = __p; #endif -#ifdef GRID_NVCC +#ifdef GRID_CUDA if ( __freeme ) cudaFree((void *)__freeme); -#else +#endif +#ifdef GRID_SYCL + if ( __freeme ) free((void *)__freeme,*theGridAccelerator); +#endif + +#if ( !defined(GRID_CUDA)) && (!defined(GRID_SYCL)) #ifdef HAVE_MM_MALLOC_H if ( __freeme ) _mm_free((void *)__freeme); #else diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index ed465252..1d62b8b7 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -29,7 +29,7 @@ Author: Peter Boyle #include #include -#ifdef GRID_NVCC +#ifdef GRID_CUDA #include #endif @@ -413,7 +413,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) //////////////////////////////////////////////////////////////////////////////////////////// // Hugetlbfs mapping intended //////////////////////////////////////////////////////////////////////////////////////////// -#ifdef GRID_NVCC +#ifdef GRID_CUDA void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { void * ShmCommBuf ; @@ -433,13 +433,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) ////////////////////////////////////////////////////////////////////////////////////////////////////////// // cudaDeviceGetP2PAttribute(&perfRank, cudaDevP2PAttrPerformanceRank, device1, device2); -#ifdef GRID_IBM_SUMMIT - // IBM Jsrun makes cuda Device numbering screwy and not match rank - std::cout << "IBM Summit or similar - NOT setting device to WorldShmRank"<_odata[i]); } #else accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } @@ -211,7 +211,7 @@ public: LatticeView accessor(*( (LatticeAccelerator *) this)); return accessor; } - + ~Lattice() { if ( this->_odata_size ) { dealloc(); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 3c5b03e5..1f06ac66 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -24,7 +24,7 @@ Author: paboyle #include -#ifdef GRID_NVCC +#ifdef GRID_CUDA #include #endif @@ -67,7 +67,7 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { -#ifdef GRID_NVCC +#ifdef GRID_CUDA return sum_gpu(arg,osites); #else return sum_cpu(arg,osites); @@ -108,7 +108,7 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); -#ifdef GRID_NVCC +#ifdef GRID_CUDA // GPU - SIMT lane compliance... typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; Vector inner_tmp(sites); @@ -174,7 +174,7 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); -#ifdef GRID_NVCC +#ifdef GRID_CUDA // GPU typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); diff --git a/Grid/perfmon/PerfCount.h b/Grid/perfmon/PerfCount.h index 1e2a9528..dd25b41e 100644 --- a/Grid/perfmon/PerfCount.h +++ b/Grid/perfmon/PerfCount.h @@ -44,7 +44,7 @@ Author: paboyle #include #endif #ifdef __x86_64__ -#ifdef GRID_NVCC +#ifdef GRID_CUDA accelerator_inline uint64_t __rdtsc(void) { return 0; } accelerator_inline uint64_t __rdpmc(int ) { return 0; } #else @@ -112,7 +112,6 @@ class PerformanceCounter { private: typedef struct { - public: uint32_t type; uint64_t config; const char *name; diff --git a/Grid/pugixml/pugixml.cc b/Grid/pugixml/pugixml.cc index e7b395ad..45e6496a 100644 --- a/Grid/pugixml/pugixml.cc +++ b/Grid/pugixml/pugixml.cc @@ -12773,7 +12773,7 @@ namespace pugi #undef PUGI__THROW_ERROR #undef PUGI__CHECK_ERROR -#ifdef GRID_NVCC +#ifdef GRID_CUDA #pragma pop #endif diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index fb6f18bb..af5bebcc 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -286,7 +286,7 @@ typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DD; -#ifndef GRID_NVCC +#ifndef GRID_CUDA typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dD; diff --git a/Grid/qcd/action/fermion/GparityWilsonImpl.h b/Grid/qcd/action/fermion/GparityWilsonImpl.h index 0b147b3f..77381ff9 100644 --- a/Grid/qcd/action/fermion/GparityWilsonImpl.h +++ b/Grid/qcd/action/fermion/GparityWilsonImpl.h @@ -96,7 +96,7 @@ public: int sl = St._simd_layout[direction]; Coordinate icoor; -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT _Spinor tmp; const int Nsimd =SiteDoubledGaugeField::Nsimd(); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index c80d2425..082e4b73 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -180,7 +180,7 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl; std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl; -#ifdef GRID_NVCC +#ifdef GRID_CUDA RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; @@ -644,7 +644,7 @@ void CayleyFermion5D::ContractConservedCurrent( PropagatorField &q_in_1, Current curr_type, unsigned int mu) { -#ifndef GRID_NVCC +#ifndef GRID_CUDA Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, @@ -828,7 +828,7 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, } #endif -#ifndef GRID_NVCC +#ifndef GRID_CUDA //////////////////////////////////////////////// // GENERAL CAYLEY CASE //////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 1fff4f5a..c164a1f2 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -39,9 +39,10 @@ NAMESPACE_BEGIN(Grid); // Generic implementation; move to different file? //////////////////////////////////////////// +/* accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) { -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size"); uint4 * mem_pun = (uint4 *)mem; // force 128 bit loads uint4 * chip_pun = (uint4 *)&chip; @@ -51,7 +52,8 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) #endif return; } - +*/ + #define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \ SE = st.GetEntry(ptype, Dir, sF); \ if (SE->_is_local) { \ @@ -358,18 +360,18 @@ void WilsonKernels::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,Site auto out_Yp = out[5].View(); auto out_Zp = out[6].View(); auto out_Tp = out[7].View(); - + auto CBp=st.CommBuf(); accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{ int sU=sss/Ls; int sF =sss; - DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0); - DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1); - DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2); - DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3); - DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4); - DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5); - DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6); - DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7); + DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0); + DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1); + DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2); + DhopDirTm(st_v,U_v,CBp,sF,sU,in_v,out_Tm,3); + DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,4); + DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,5); + DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,6); + DhopDirTp(st_v,U_v,CBp,sF,sU,in_v,out_Tp,7); }); } @@ -385,13 +387,14 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S auto in_v = in.View(); auto out_v = out.View(); auto st_v = st.View(); + auto CBp=st.CommBuf(); #define LoopBody(Dir) \ - case Dir : \ + case Dir : \ accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \ for(int s=0;s::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;} #endif } else if( interior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;} #endif } else if( exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;} #endif @@ -473,19 +476,19 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;} #endif } else if( interior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} #endif } else if( exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} #endif diff --git a/Grid/serialisation/Serialisation.h b/Grid/serialisation/Serialisation.h index c95226b1..177a65f9 100644 --- a/Grid/serialisation/Serialisation.h +++ b/Grid/serialisation/Serialisation.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include "BinaryIO.h" #include "TextIO.h" #include "XmlIO.h" -#ifndef GRID_NVCC +#ifndef GRID_CUDA #include "JSON_IO.h" #endif diff --git a/Grid/simd/Grid_gpu_vec.h b/Grid/simd/Grid_gpu_vec.h index 4584fb36..0bff4c2f 100644 --- a/Grid/simd/Grid_gpu_vec.h +++ b/Grid/simd/Grid_gpu_vec.h @@ -142,7 +142,7 @@ typedef GpuVector GpuVectorI; accelerator_inline float half2float(half h) { float f; -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT f = __half2float(h); #else //f = __half2float(h); @@ -156,7 +156,7 @@ accelerator_inline float half2float(half h) accelerator_inline half float2half(float f) { half h; -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT h = __float2half(f); #else Grid_half hh = sfw_float_to_half(f); diff --git a/Grid/simd/Simd.h b/Grid/simd/Simd.h index bc8cd2fd..80f7c2e7 100644 --- a/Grid/simd/Simd.h +++ b/Grid/simd/Simd.h @@ -31,7 +31,7 @@ directory #ifndef GRID_SIMD_H #define GRID_SIMD_H -#ifdef GRID_NVCC +#ifdef GRID_CUDA #include #endif @@ -65,7 +65,7 @@ typedef RealD Real; typedef RealF Real; #endif -#ifdef GRID_NVCC +#ifdef GRID_CUDA typedef thrust::complex ComplexF; typedef thrust::complex ComplexD; typedef thrust::complex Complex; diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 37b866cb..7a200ba6 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -107,7 +107,7 @@ void Gather_plane_exchange_table(Vector >& table,const Lattic } struct StencilEntry { -#ifdef GRID_NVCC +#ifdef GRID_CUDA uint64_t _byte_offset; // 8 bytes uint32_t _offset; // 4 bytes #else diff --git a/Grid/tensors/Tensor_SIMT.h b/Grid/tensors/Tensor_SIMT.h index c18b8484..c92edc82 100644 --- a/Grid/tensors/Tensor_SIMT.h +++ b/Grid/tensors/Tensor_SIMT.h @@ -34,14 +34,16 @@ NAMESPACE_BEGIN(Grid); //accelerator_inline void SIMTsynchronise(void) accelerator_inline void synchronise(void) { -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT +#ifdef GRID_CUDA // __syncthreads(); __syncwarp(); +#endif #endif return; } -#ifndef __CUDA_ARCH__ +#ifndef GRID_SIMT ////////////////////////////////////////// // Trivial mapping of vectors on host ////////////////////////////////////////// @@ -75,7 +77,13 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ vstream(vec, extracted); } #else +#ifdef GRID_CUDA accelerator_inline int SIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific +#endif +#ifdef GRID_SYCL +//accelerator_inline int SIMTlane(int Nsimd) { return __spirv_BuiltInGlobalInvocationId[2]; } //SYCL specific +accelerator_inline int SIMTlane(int Nsimd) { return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; } // SYCL specific +#endif ////////////////////////////////////////// // Extract and insert slices on the GPU diff --git a/Grid/tensors/Tensor_exp.h b/Grid/tensors/Tensor_exp.h index 11d37f9c..0a1d6389 100644 --- a/Grid/tensors/Tensor_exp.h +++ b/Grid/tensors/Tensor_exp.h @@ -55,7 +55,7 @@ template accelerator_inline iVector Exponentiate(c // Specialisation: Cayley-Hamilton exponential for SU(3) -#ifndef GRID_NVCC +#ifndef GRID_CUDA template::TensorLevel == 0>::type * =nullptr> accelerator_inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) { diff --git a/Grid/threads/Pragmas.h b/Grid/threads/Pragmas.h index d05f8ee9..45eca253 100644 --- a/Grid/threads/Pragmas.h +++ b/Grid/threads/Pragmas.h @@ -68,16 +68,17 @@ Author: paboyle ////////////////////////////////////////////////////////////////////////////////// -// Accelerator primitives; fall back to threading +// Accelerator primitives; fall back to threading if not CUDA or SYCL ////////////////////////////////////////////////////////////////////////////////// -#ifdef __NVCC__ -#define GRID_NVCC -#endif -#ifdef GRID_NVCC +#ifdef GRID_CUDA extern uint32_t gpu_threads; +#ifdef __CUDA_ARCH__ +#define GRID_SIMT +#endif + #define accelerator __host__ __device__ #define accelerator_inline __host__ __device__ inline @@ -123,7 +124,47 @@ void LambdaApplySIMT(uint64_t Isites, uint64_t Osites, lambda Lambda) accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ accelerator_barrier(dummy); -#else +#endif + +#ifdef GRID_SYCL + +#ifdef __SYCL_DEVICE_ONLY__ +#define GRID_SIMT +#endif + +#include +#include + +extern cl::sycl::queue *theGridAccelerator; + +extern uint32_t gpu_threads; + +#define accelerator +#define accelerator_inline strong_inline + +#define accelerator_forNB(iterator,num,nsimd, ... ) \ + theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ + cl::sycl::range<3> local {gpu_threads,1,nsimd}; \ + cl::sycl::range<3> global{(unsigned long)num,1,(unsigned long)nsimd}; \ + cgh.parallel_for( \ + cl::sycl::nd_range<3>(global,local), \ + [=] (cl::sycl::nd_item<3> item) mutable { \ + auto iterator = item.get_global_id(0); \ + auto lane = item.get_global_id(2); \ + { __VA_ARGS__ }; \ + }); \ + }); + +#define accelerator_barrier(dummy) theGridAccelerator->wait(); + +#define accelerator_for( iterator, num, nsimd, ... ) \ + accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ + accelerator_barrier(dummy); + + +#endif + +#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) ) #define accelerator #define accelerator_inline strong_inline diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 570f4234..442c51d3 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -74,6 +74,10 @@ feenableexcept (unsigned int excepts) #endif uint32_t gpu_threads=8; +#ifdef GRID_SYCL +cl::sycl::queue *theGridAccelerator; +#endif + NAMESPACE_BEGIN(Grid); @@ -194,7 +198,7 @@ void GridParseLayout(char **argv,int argc, } if( GridCmdOptionExists(argv,argv+argc,"--gpu-threads") ){ std::vector gputhreads(0); -#ifndef GRID_NVCC +#ifndef GRID_CUDA std::cout << GridLogWarning << "'--gpu-threads' option used but Grid was" << " not compiled with GPU support" << std::endl; #endif @@ -281,12 +285,10 @@ void GridBanner(void) printed=1; } } -#ifdef GRID_NVCC +#ifdef GRID_CUDA cudaDeviceProp *gpu_props; -#endif void GridGpuInit(void) { -#ifdef GRID_NVCC int nDevices = 1; cudaGetDeviceCount(&nDevices); gpu_props = new cudaDeviceProp[nDevices]; @@ -335,11 +337,70 @@ void GridGpuInit(void) // GPU_PROP(singleToDoublePrecisionPerfRatio); } } +#ifdef GRID_IBM_SUMMIT + // IBM Jsrun makes cuda Device numbering screwy and not match rank + if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); +#else + if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); + cudaSetDevice(rank); +#endif + if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); +} +#endif +#ifdef GRID_SYCL +void GridGpuInit(void) +{ + int nDevices = 1; + cl::sycl::gpu_selector selector; + cl::sycl::device selectedDevice { selector }; + theGridAccelerator = new sycl::queue (selectedDevice); + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + if ( world_rank == 0 ) { + GridBanner(); + } + /* + for (int i = 0; i < nDevices; i++) { + +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); + + cudaGetDeviceProperties(&gpu_props[i], i); + if ( world_rank == 0) { + cudaDeviceProp prop; + prop = gpu_props[i]; + printf("GpuInit: ========================\n"); + printf("GpuInit: Device Number : %d\n", i); + printf("GpuInit: ========================\n"); + printf("GpuInit: Device identifier: %s\n", prop.name); + } + } + */ if ( world_rank == 0 ) { printf("GpuInit: ================================================\n"); } -#endif } +#endif +#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL)) +void GridGpuInit(void){} +#endif void Grid_init(int *argc,char ***argv) { diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 4d6b026f..a3146cbc 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -21,7 +21,7 @@ *************************************************************************************/ /* END LEGAL */ #include -#ifdef GRID_NVCC +#ifdef GRID_CUDA #define CUDA_PROFILE #endif diff --git a/benchmarks/Benchmark_su3_gpu.cc b/benchmarks/Benchmark_su3_gpu.cc index c87f501e..181d5894 100644 --- a/benchmarks/Benchmark_su3_gpu.cc +++ b/benchmarks/Benchmark_su3_gpu.cc @@ -41,7 +41,7 @@ int main (int argc, char ** argv) #define LADD (8) int64_t Nwarm=20; - int64_t Nloop=500; + int64_t Nloop=50; Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); Coordinate mpi_layout = GridDefaultMpi(); diff --git a/configure.ac b/configure.ac index 93b32002..cf5ca85b 100644 --- a/configure.ac +++ b/configure.ac @@ -147,6 +147,19 @@ case ${ac_SUMMIT} in AC_DEFINE([GRID_IBM_SUMMIT],[1],[Let JSRUN manage the GPU device allocation]);; esac +############### SYCL +AC_ARG_ENABLE([sycl], + [AC_HELP_STRING([--enable-sycl=yes|no], [enable SYCL])], + [ac_JSRUN=${enable_sycl}], [ac_SYCL=no]) +case ${ac_SYCL} in + no);; + yes) + AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);; + *) + AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);; +esac + + ############### Intel libraries AC_ARG_ENABLE([mkl], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], diff --git a/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc b/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc index 6fa90f32..3b8cdda6 100644 --- a/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc @@ -35,7 +35,7 @@ directory int main(int argc, char **argv) { -#ifndef GRID_NVCC +#ifndef GRID_CUDA using namespace Grid; From f8b8e000902e83eb474474edf42f21df1cf1cfa4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 8 May 2020 06:23:55 -0700 Subject: [PATCH 03/86] Systematise the accelerator primitives and locate to Grid/threads/Accelerator.h / Accelerator.cc Aim to reduce the amount of cuda and other code variations floating around all over the place. Will move GpuInit iinto Accelerator.cc from Init.cc Need to worry about SharedMemoryMPI.cc and the Peer2Peer windows --- Grid/GridCore.h | 2 +- Grid/algorithms/CoarsenedMatrix.h | 386 +----------------- .../iterative/ImplicitlyRestartedLanczos.h | 10 +- Grid/allocator/AlignedAllocator.h | 67 +-- Grid/lattice/Lattice_rng.h | 4 +- Grid/qcd/action/fermion/GparityWilsonImpl.h | 2 +- .../WilsonKernelsImplementation.h | 26 +- Grid/tensors/Tensor_SIMT.h | 27 +- Grid/threads/Accelerator.cc | 10 + Grid/threads/Accelerator.h | 345 ++++++++++++++++ Grid/threads/Pragmas.h | 149 +------ Grid/threads/ThreadReduction.h | 127 ++++++ Grid/threads/Threads.h | 120 ++---- 13 files changed, 557 insertions(+), 718 deletions(-) create mode 100644 Grid/threads/Accelerator.cc create mode 100644 Grid/threads/Accelerator.h create mode 100644 Grid/threads/ThreadReduction.h diff --git a/Grid/GridCore.h b/Grid/GridCore.h index a48d2d49..f7c1267a 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -49,7 +49,7 @@ Author: paboyle #include #include #include -#include +#include #include #include #include diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index a6b01986..8e5c91a7 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -1,14 +1,3 @@ - // blockZaxpy in bockPromote - 3s, 5% - // noncoalesced linalg in Preconditionoer ~ 3s 5% - // Lancos tuning or replace 10-20s ~ 25%, open ended - // setup tuning 5s ~ 8% - // -- e.g. ordermin, orderstep tunables. - // MdagM path without norm in LinOp code. few seconds - - // Mdir calc blocking kernels - // Fuse kernels in blockMaskedInnerProduct - // preallocate Vectors in Cayley 5D ~ few percent few seconds - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -91,34 +80,7 @@ public: } directions [2*_d]=0; displacements[2*_d]=0; - - //// report back - std::cout< GetDelta(int point) { - std::vector delta(dimension,0); - delta[directions[point]] = displacements[point]; - return delta; - }; - */ }; @@ -149,25 +111,7 @@ public: CoarseScalar InnerProd(CoarseGrid); std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<oSites(),1,{ - eProj[ss](i)=CComplex(1.0); - }); - eProj=eProj - iProj; - std::cout< &hermop,int nn=nbasis) { - - RealD scale; - - ConjugateGradient CG(1.0e-2,100,false); - FineField noise(FineGrid); - FineField Mn(FineGrid); - - for(int b=0;b "< "< &hermop, int nn, double hi, @@ -313,201 +219,6 @@ public: } assert(b==nn); } -#endif -#if 0 - virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase &hermop, - int nn, - double hi, - double lo, - int orderfilter, - int ordermin, - int orderstep, - double filterlo - ) { - - RealD scale; - - FineField noise(FineGrid); - FineField Mn(FineGrid); - FineField tmp(FineGrid); - FineField combined(FineGrid); - - // New normalised noise - gaussian(RNG,noise); - scale = std::pow(norm2(noise),-0.5); - noise=noise*scale; - - // Initial matrix element - hermop.Op(noise,Mn); std::cout< "< Cheb(llo,hhi,oorder); \ - Cheb(hermop,noise,Mn); \ - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \ - subspace[b] = Mn; \ - hermop.Op(Mn,tmp); \ - std::cout< "< Cheb(0.002,60.0,1500,-0.5,3.5); \ - - RealD alpha=-0.8; - RealD beta =-0.8; -#define FILTER(llo,hhi,oorder) \ - { \ - Chebyshev Cheb(llo,hhi,oorder); \ - /* JacobiPolynomial Cheb(0.0,60.0,oorder,alpha,beta);*/\ - Cheb(hermop,noise,Mn); \ - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \ - subspace[b] = Mn; \ - hermop.Op(Mn,tmp); \ - std::cout< "< Cheb(llo,hhi,oorder); \ - Cheb(hermop,noise,combined); \ - } - - double node = 0.000; - FILTERb(lo,hi,orderfilter);// 0 - // FILTERc(node,hi,51);// 0 - noise = Mn; - int base = 0; - int mult = 100; - FILTER(node,hi,base+1*mult); - FILTER(node,hi,base+2*mult); - FILTER(node,hi,base+3*mult); - FILTER(node,hi,base+4*mult); - FILTER(node,hi,base+5*mult); - FILTER(node,hi,base+6*mult); - FILTER(node,hi,base+7*mult); - FILTER(node,hi,base+8*mult); - FILTER(node,hi,base+9*mult); - FILTER(node,hi,base+10*mult); - FILTER(node,hi,base+11*mult); - FILTER(node,hi,base+12*mult); - FILTER(node,hi,base+13*mult); - FILTER(node,hi,base+14*mult); - FILTER(node,hi,base+15*mult); - assert(b==nn); - } -#endif - -#if 0 - virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase &hermop, - int nn, - double hi, - double lo, - int orderfilter, - int ordermin, - int orderstep, - double filterlo - ) { - - RealD scale; - - FineField noise(FineGrid); - FineField Mn(FineGrid); - FineField tmp(FineGrid); - FineField combined(FineGrid); - - // New normalised noise - gaussian(RNG,noise); - scale = std::pow(norm2(noise),-0.5); - noise=noise*scale; - - // Initial matrix element - hermop.Op(noise,Mn); std::cout< "< JacobiPoly(0.005,60.,1500); - // JacobiPolynomial JacobiPoly(0.002,60.0,1500,-0.5,3.5); - //JacobiPolynomial JacobiPoly(0.03,60.0,500,-0.5,3.5); - // JacobiPolynomial JacobiPoly(0.00,60.0,1000,-0.5,3.5); - JacobiPoly(hermop,noise,Mn); - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; - subspace[b] = Mn; - hermop.Op(Mn,tmp); - std::cout< "< "<_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane); + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane); + nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); } - synchronise(); + acceleratorSynchronise(); for(int bb=0;bb_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane); + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane); + nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); } - synchronise(); + acceleratorSynchronise(); for(int bb=0;bboSites(),1,{ - - siteVector res = Zero(); - siteVector nbr; - int ptype; - StencilEntry *SE; - - SE=Stencil.GetEntry(ptype,point,ss); - - if(SE->_is_local&&SE->_permute) { - permute(nbr,in_v[SE->_offset],ptype); - } else if(SE->_is_local) { - nbr = in_v[SE->_offset]; - } else { - nbr = Stencil.CommBuf()[SE->_offset]; - } - synchronise(); - - res = res + Aview_p[point][ss]*nbr; - - out_v[ss]=res; - }); -#endif } void MdirAll(const CoarseVector &in,std::vector &out) { @@ -912,33 +590,8 @@ public: std::cout << GridLogMessage << " ForceHermitian, new code "< bc(FineGrid->_ndimension,0); - - blockPick(Grid(),phi,tmp,bc); // Pick out a block - linop.Op(tmp,Mphi); // Apply big dop - blockProject(iProj,Mphi,Subspace.subspace); // project it and print it - std::cout< &basis,Eigen::MatrixXd& Qt,int j0, int j1, i typedef decltype(basis[0].View()) View; auto tmp_v = basis[0].View(); Vector basis_v(basis.size(),tmp_v); + View *basis_vp = &basis_v[0]; typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); @@ -129,7 +130,7 @@ void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, i for(int k=k0; k &basis,Eigen::MatrixXd& Qt,int j0, int j1, i int jj =j0+j; int ss =sj/nrot; int sss=ss+s; - coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); + coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j])); }); } #endif @@ -155,6 +156,7 @@ void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,in result.Checkerboard() = basis[0].Checkerboard(); auto result_v=result.View(); Vector basis_v(basis.size(),result_v); + View * basis_vp = &basis_v[0]; for(int k=0;k &basis,Eigen::MatrixXd& Qt,in double * Qt_j = & Qt_jv[0]; for(int k=0;koSites(),vobj::Nsimd(),{ - auto B=coalescedRead(basis_v[k0][ss]); + auto B=coalescedRead(basis_vp[k0][ss]); B=Zero(); for(int k=k0; k #ifndef GRID_ALIGNED_ALLOCATOR_H #define GRID_ALIGNED_ALLOCATOR_H -#ifdef HAVE_MALLOC_MALLOC_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif - -#ifdef HAVE_MM_MALLOC_H -#include -#endif - -#define POINTER_CACHE -#define GRID_ALLOC_ALIGN (2*1024*1024) NAMESPACE_BEGIN(Grid); -// Move control to configure.ac and Config.h? +/*Move control to configure.ac and Config.h*/ +#define POINTER_CACHE +/*Pinning pages is costly*/ +/*Could maintain separate large and small allocation caches*/ #ifdef POINTER_CACHE class PointerCache { private: -/*Pinning pages is costly*/ -/*Could maintain separate large and small allocation caches*/ static const int Ncache=128; static int victim; @@ -159,44 +147,16 @@ public: size_type bytes = __n*sizeof(_Tp); profilerAllocate(bytes); - #ifdef POINTER_CACHE _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); #else pointer ptr = nullptr; #endif -#ifdef GRID_CUDA - //////////////////////////////////// - // Unified (managed) memory - //////////////////////////////////// - if ( ptr == (_Tp *) NULL ) { - // printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout); - auto err = cudaMallocManaged((void **)&ptr,bytes); - if( err != cudaSuccess ) { - ptr = (_Tp *) NULL; - std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " < seeders(Nthread); for(int t=0;t_offset],lane); \ } \ - synchronise(); \ + acceleratorSynchronise(); \ Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ Recon(result, Uchi); @@ -76,12 +76,12 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) } else if ( st.same_node[Dir] ) { \ chi = coalescedRead(buf[SE->_offset],lane); \ } \ - synchronise(); \ + acceleratorSynchronise(); \ if (SE->_is_local || st.same_node[Dir] ) { \ Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ Recon(result, Uchi); \ } \ - synchronise(); + acceleratorSynchronise(); #define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \ SE = st.GetEntry(ptype, Dir, sF); \ @@ -91,7 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) Recon(result, Uchi); \ nmu++; \ } \ - synchronise(); + acceleratorSynchronise(); #define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \ if (SE->_is_local ) { \ @@ -101,7 +101,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) } else { \ chi = coalescedRead(buf[SE->_offset],lane); \ } \ - synchronise(); \ + acceleratorSynchronise(); \ Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \ Recon(result, Uchi); @@ -128,7 +128,7 @@ void WilsonKernels::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV StencilEntry *SE; int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp); GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp); GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp); @@ -155,7 +155,7 @@ void WilsonKernels::GenericDhopSite(StencilView &st, DoubledGaugeFieldView int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp); GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp); GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp); @@ -183,7 +183,7 @@ void WilsonKernels::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi StencilEntry *SE; int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); result=Zero(); GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp); @@ -205,7 +205,7 @@ void WilsonKernels::GenericDhopSiteInt(StencilView &st, DoubledGaugeField typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; typedef decltype(coalescedRead(in[0])) calcSpinor; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); calcHalfSpinor chi; // calcHalfSpinor *chi_p; @@ -241,7 +241,7 @@ void WilsonKernels::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi int ptype; int nmu=0; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); result=Zero(); GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp); GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp); @@ -272,7 +272,7 @@ void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeField int ptype; int nmu=0; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); result=Zero(); GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp); GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp); @@ -302,7 +302,7 @@ void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeField StencilEntry *SE; \ int ptype; \ const int Nsimd = SiteHalfSpinor::Nsimd(); \ - const int lane=SIMTlane(Nsimd); \ + const int lane=acceleratorSIMTlane(Nsimd); \ \ SE = st.GetEntry(ptype, dir, sF); \ GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \ @@ -330,7 +330,7 @@ void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si StencilEntry *SE; int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); SE = st.GetEntry(ptype, dir, sF); GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp); diff --git a/Grid/tensors/Tensor_SIMT.h b/Grid/tensors/Tensor_SIMT.h index c92edc82..ec57a679 100644 --- a/Grid/tensors/Tensor_SIMT.h +++ b/Grid/tensors/Tensor_SIMT.h @@ -31,24 +31,11 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); -//accelerator_inline void SIMTsynchronise(void) -accelerator_inline void synchronise(void) -{ -#ifdef GRID_SIMT -#ifdef GRID_CUDA -// __syncthreads(); - __syncwarp(); -#endif -#endif - return; -} #ifndef GRID_SIMT ////////////////////////////////////////// // Trivial mapping of vectors on host ////////////////////////////////////////// -accelerator_inline int SIMTlane(int Nsimd) { return 0; } // CUDA specific - template accelerator_inline vobj coalescedRead(const vobj & __restrict__ vec,int lane=0) { @@ -68,7 +55,6 @@ vobj coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int template accelerator_inline void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) { - // vstream(vec, extracted); vec = extracted; } template accelerator_inline @@ -77,31 +63,24 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ vstream(vec, extracted); } #else -#ifdef GRID_CUDA -accelerator_inline int SIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific -#endif -#ifdef GRID_SYCL -//accelerator_inline int SIMTlane(int Nsimd) { return __spirv_BuiltInGlobalInvocationId[2]; } //SYCL specific -accelerator_inline int SIMTlane(int Nsimd) { return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; } // SYCL specific -#endif ////////////////////////////////////////// // Extract and insert slices on the GPU ////////////////////////////////////////// template accelerator_inline -typename vobj::scalar_object coalescedRead(const vobj & __restrict__ vec,int lane=SIMTlane(vobj::Nsimd())) +typename vobj::scalar_object coalescedRead(const vobj & __restrict__ vec,int lane=acceleratorSIMTlane(vobj::Nsimd())) { return extractLane(lane,vec); } template accelerator_inline -typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int lane=SIMTlane(vobj::Nsimd())) +typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int lane=acceleratorSIMTlane(vobj::Nsimd())) { int mask = vobj::Nsimd() >> (ptype + 1); int plane= doperm ? lane ^ mask : lane; return extractLane(plane,vec); } template accelerator_inline -void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=SIMTlane(vobj::Nsimd())) +void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=acceleratorSIMTlane(vobj::Nsimd())) { insertLane(lane,vec,extracted); } diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc new file mode 100644 index 00000000..4f2198f8 --- /dev/null +++ b/Grid/threads/Accelerator.cc @@ -0,0 +1,10 @@ +#include + +NAMESPACE_BEGIN(Grid); +uint32_t accelerator_threads; +uint32_t acceleratorThreads(void) {return accelerator_threads;}; +void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; +#ifdef GRID_SYCL +cl::sycl::queue *theGridAccelerator; +#endif +NAMESPACE_END(Grid); diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h new file mode 100644 index 00000000..dc854388 --- /dev/null +++ b/Grid/threads/Accelerator.h @@ -0,0 +1,345 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/Accelerator.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once +NAMESPACE_BEGIN(Grid); + +////////////////////////////////////////////////////////////////////////////////// +// Accelerator primitives; fall back to threading if not CUDA or SYCL +////////////////////////////////////////////////////////////////////////////////// +// +// Function attributes +// +// accelerator +// accelerator_inline +// +// Parallel looping +// +// accelerator_for +// accelerator_forNB +// uint32_t accelerator_barrier(); // device synchronise +// +// Parallelism control: Number of threads in thread block is acceleratorThreads*Nsimd +// +// uint32_t acceleratorThreads(void); +// void acceleratorThreads(uint32_t); +// +// Warp control and info: +// +// void acceleratorSynchronise(void); // synch warp etc.. +// int acceleratorSIMTlane(int Nsimd); +// +// Memory management: +// +// void *acceleratorAllocShared(size_t bytes); +// void acceleratorFreeShared(void *ptr); +// +// void *acceleratorAllocDevice(size_t bytes); +// void acceleratorFreeDevice(void *ptr); +// +// void *acceleratorCopyToDevice(void *from,void *to,size_t bytes); +// void *acceleratorCopyFromDevice(void *from,void *to,size_t bytes); +// +////////////////////////////////////////////////////////////////////////////////// + +uint32_t acceleratorThreads(void); +void acceleratorThreads(uint32_t); + +////////////////////////////////////////////// +// CUDA acceleration +////////////////////////////////////////////// +#ifdef GRID_CUDA + +#ifdef __CUDA_ARCH__ +#define GRID_SIMT +#endif + +#define accelerator __host__ __device__ +#define accelerator_inline __host__ __device__ inline + +#define accelerator_barrier(dummy) \ + { \ + cudaDeviceSynchronize(); \ + cudaError err = cudaGetLastError(); \ + if ( cudaSuccess != err ) { \ + printf("Cuda error %s \n", cudaGetErrorString( err )); \ + puts(__FILE__); \ + printf("Line %d\n",__LINE__); \ + exit(0); \ + } \ + } + +#define accelerator_forNB( iterator, num, nsimd, ... ) \ + { \ + typedef uint64_t Iterator; \ + auto lambda = [=] accelerator (Iterator lane,Iterator iterator) mutable { \ + __VA_ARGS__; \ + }; \ + dim3 cu_threads(acceleratorThreads(),nsimd); \ + dim3 cu_blocks ((num+acceleratorThreads()-1)/acceleratorThreads()); \ + LambdaApply<<>>(nsimd,num,lambda); \ + } + +#define accelerator_for( iterator, num, nsimd, ... ) \ + accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ + accelerator_barrier(dummy); + +inline void *acceleratorAllocShared(size_t bytes) +{ + void *ptr=NULL; + auto err = cudaMallocManaged((void **)&ptr,bytes); + if( err != cudaSuccess ) { + ptr = (_Tp *) NULL; + printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); + } + return ptr; +}; +inline void *acceleratorAllocDevice(size_t bytes) +{ + void *ptr=NULL; + auto err = cudaMalloc((void **)&ptr,bytes); + if( err != cudaSuccess ) { + ptr = (_Tp *) NULL; + printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err)); + } + return ptr; +}; +inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; +inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);}; + +template __global__ +void LambdaApply(uint64_t Isites, uint64_t Osites, lambda Lambda) +{ + uint64_t isite = threadIdx.y; + uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x; + if ( (osite +#include +NAMESPACE_BEGIN(Grid); + +extern cl::sycl::queue *theGridAccelerator; + +#ifdef __SYCL_DEVICE_ONLY__ +#define GRID_SIMT +#endif + +#define accelerator +#define accelerator_inline strong_inline + +#define accelerator_forNB(iterator,num,nsimd, ... ) \ + theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ + cl::sycl::range<3> local {acceleratorThreads(),1,nsimd}; \ + cl::sycl::range<3> global{(unsigned long)num,1,(unsigned long)nsimd}; \ + cgh.parallel_for( \ + cl::sycl::nd_range<3>(global,local), \ + [=] (cl::sycl::nd_item<3> item) mutable { \ + auto iterator = item.get_global_id(0); \ + auto lane = item.get_global_id(2); \ + { __VA_ARGS__ }; \ + }); \ + }); + +#define accelerator_barrier(dummy) theGridAccelerator->wait(); + +#define accelerator_for( iterator, num, nsimd, ... ) \ + accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ + accelerator_barrier(dummy); + +inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);}; +inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);}; +inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);}; +inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; + +#endif + +////////////////////////////////////////////// +// HIP acceleration +////////////////////////////////////////////// +#ifdef GRID_HIP + +#ifdef __HIP_DEVICE_COMPILE__ +#define GRID_SIMT +#endif + +#define accelerator __host__ __device__ +#define accelerator_inline __host__ __device__ inline +#define accelerator_barrier(dummy) \ + { \ + hipDeviceSynchronize(); \ + auto err = hipGetLastError(); \ + if ( err != hipSuccess ) { \ + printf("HIP error %s \n", hipGetErrorString( err )); \ + puts(__FILE__); \ + printf("Line %d\n",__LINE__); \ + exit(0); \ + } \ + } + +#define accelerator_forNB( iterator, num, nsimd, ... ) \ + { \ + typedef uint64_t Iterator; \ + auto lambda = [=] accelerator (Iterator lane,Iterator iterator) mutable { \ + __VA_ARGS__; \ + }; \ + dim3 hip_threads(acceleratorThreads(),nsimd); \ + dim3 hip_blocks ((num+acceleratorThreads()-1)/acceleratorThreads()); \ + hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,simd,lambda);\ + } + +#define accelerator_for( iterator, num, nsimd, ... ) \ + accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ + accelerator_barrier(dummy); + +inline void *acceleratorAllocShared(size_t bytes) +{ + void *ptr=NULL; + auto err = hipMallocManaged((void **)&ptr,bytes); + if( err != hipSuccess ) { + ptr = (_Tp *) NULL; + printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err)); + } + return ptr; +}; +inline void *acceleratorAllocDevice(size_t bytes) +{ + void *ptr=NULL; + auto err = hipMalloc((void **)&ptr,bytes); + if( err != hipSuccess ) { + ptr = (_Tp *) NULL; + printf(" hipMalloc failed for %d %s \n",bytes,hipGetErrorString(err)); + } + return ptr; +}; +inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);}; +inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);}; + +template __global__ +void LambdaApply(uint64_t Isites, uint64_t Osites, lambda Lambda) +{ + uint64_t isite = hipThreadIdx_y; + uint64_t osite = hipThreadIdx_x + hipBlockDim_x*hipBlockIdx_x; + if ( (osite +#endif +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_MM_MALLOC_H +#include +#endif + +#ifdef HAVE_MM_MALLOC_H +inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);}; +inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);}; +inline void acceleratorFreeShared(void *ptr){_mm_free(ptr);}; +inline void acceleratorFreeDevice(void *ptr){_mm_free(ptr);}; +#else +inline void *acceleratorAllocShared(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);}; +inline void *acceleratorAllocDevice(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);}; +inline void acceleratorFreeShared(void *ptr){free(ptr);}; +inline void acceleratorFreeDevice(void *ptr){free(ptr);}; +#endif + + +#endif // CPU target + +/////////////////////////////////////////////////// +// Synchronise across local threads for divergence resynch +/////////////////////////////////////////////////// +accelerator_inline void acceleratorSynchronise(void) +{ +#ifdef GRID_SIMT +#ifdef GRID_CUDA + __syncwarp(); +#endif +#ifdef GRID_SYCL + // No barrier call on SYCL?? // Option get __spir:: stuff to do warp barrier +#endif +#ifdef GRID_HIP + __syncthreads(); +#endif +#endif + return; +} + +//////////////////////////////////////////////////// +// Address subvectors on accelerators +//////////////////////////////////////////////////// +#ifdef GRID_SIMT + +#ifdef GRID_CUDA +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific +#endif +#ifdef GRID_SYCL +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; } // SYCL specific +#endif +#ifdef GRID_HIP +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return hipThreadIdx_y; } // HIP specific +#endif + +#else + +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific + +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/threads/Pragmas.h b/Grid/threads/Pragmas.h index 45eca253..f311bf9b 100644 --- a/Grid/threads/Pragmas.h +++ b/Grid/threads/Pragmas.h @@ -2,7 +2,7 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/Threads.h + Source file: ./lib/Pragmas.h Copyright (C) 2015 @@ -28,148 +28,5 @@ Author: paboyle /* END LEGAL */ #pragma once -#ifndef MAX -#define MAX(x,y) ((x)>(y)?(x):(y)) -#define MIN(x,y) ((x)>(y)?(y):(x)) -#endif - -#define strong_inline __attribute__((always_inline)) inline -#define UNROLL _Pragma("unroll") - -////////////////////////////////////////////////////////////////////////////////// -// New primitives; explicit host thread calls, and accelerator data parallel calls -////////////////////////////////////////////////////////////////////////////////// - -#ifdef _OPENMP -#define GRID_OMP -#include -#endif - -#ifdef GRID_OMP -#define DO_PRAGMA_(x) _Pragma (#x) -#define DO_PRAGMA(x) DO_PRAGMA_(x) -#define thread_num(a) omp_get_thread_num() -#define thread_max(a) omp_get_max_threads() -#else -#define DO_PRAGMA_(x) -#define DO_PRAGMA(x) -#define thread_num(a) (0) -#define thread_max(a) (1) -#endif - -#define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=0;i __global__ -void LambdaApplySIMT(uint64_t Isites, uint64_t Osites, lambda Lambda) -{ - uint64_t isite = threadIdx.y; - uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x; - if ( (osite >>(nsimd,num,lambda); \ - } - -// Copy the for_each_n style ; Non-blocking variant (default -#define accelerator_for( iterator, num, nsimd, ... ) \ - accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ - accelerator_barrier(dummy); - -#endif - -#ifdef GRID_SYCL - -#ifdef __SYCL_DEVICE_ONLY__ -#define GRID_SIMT -#endif - -#include -#include - -extern cl::sycl::queue *theGridAccelerator; - -extern uint32_t gpu_threads; - -#define accelerator -#define accelerator_inline strong_inline - -#define accelerator_forNB(iterator,num,nsimd, ... ) \ - theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ - cl::sycl::range<3> local {gpu_threads,1,nsimd}; \ - cl::sycl::range<3> global{(unsigned long)num,1,(unsigned long)nsimd}; \ - cgh.parallel_for( \ - cl::sycl::nd_range<3>(global,local), \ - [=] (cl::sycl::nd_item<3> item) mutable { \ - auto iterator = item.get_global_id(0); \ - auto lane = item.get_global_id(2); \ - { __VA_ARGS__ }; \ - }); \ - }); - -#define accelerator_barrier(dummy) theGridAccelerator->wait(); - -#define accelerator_for( iterator, num, nsimd, ... ) \ - accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ - accelerator_barrier(dummy); - - -#endif - -#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) ) - -#define accelerator -#define accelerator_inline strong_inline -#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ }); -#define accelerator_forNB(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ }); -#define accelerator_barrier(dummy) - -#endif +#include +#include diff --git a/Grid/threads/ThreadReduction.h b/Grid/threads/ThreadReduction.h new file mode 100644 index 00000000..f0d24d50 --- /dev/null +++ b/Grid/threads/ThreadReduction.h @@ -0,0 +1,127 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/ThreadReduction.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +// Introduce a class to gain deterministic bit reproducible reduction. +// make static; perhaps just a namespace is required. +NAMESPACE_BEGIN(Grid); + +class GridThread { +public: + static int _threads; + static int _hyperthreads; + static int _cores; + + static void SetCores(int cr) { +#ifdef GRID_OMP + _cores = cr; +#else + _cores = 1; +#endif + } + static void SetThreads(int thr) { +#ifdef GRID_OMP + _threads = MIN(thr,omp_get_max_threads()) ; + omp_set_num_threads(_threads); +#else + _threads = 1; +#endif + }; + static void SetMaxThreads(void) { +#ifdef GRID_OMP + _threads = omp_get_max_threads(); + omp_set_num_threads(_threads); +#else + _threads = 1; +#endif + }; + static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; }; + static int GetCores(void) { return _cores; }; + static int GetThreads(void) { return _threads; }; + static int SumArraySize(void) {return _threads;}; + + static void GetWork(int nwork, int me, int & mywork, int & myoff){ + GetWork(nwork,me,mywork,myoff,_threads); + } + static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){ + int basework = nwork/units; + int backfill = units-(nwork%units); + if ( me >= units ) { + mywork = myoff = 0; + } else { + mywork = (nwork+me)/units; + myoff = basework * me; + if ( me > backfill ) + myoff+= (me-backfill); + } + return; + }; + + static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){ + me = ThreadBarrier(); + GetWork(nwork,me,mywork,myoff); + }; + + static int ThreadBarrier(void) { +#ifdef GRID_OMP +#pragma omp barrier + return omp_get_thread_num(); +#else + return 0; +#endif + }; + + template static void ThreadSum( std::vector &sum_array,obj &val,int me){ + sum_array[me] = val; + val=Zero(); + ThreadBarrier(); + for(int i=0;i<_threads;i++) val+= sum_array[i]; + ThreadBarrier(); + } + + static void bcopy(const void *src, void *dst, size_t len) { +#ifdef GRID_OMP +#pragma omp parallel + { + const char *c_src =(char *) src; + char *c_dest=(char *) dst; + int me,mywork,myoff; + GridThread::GetWorkBarrier(len,me, mywork,myoff); + bcopy(&c_src[myoff],&c_dest[myoff],mywork); + } +#else + bcopy(src,dst,len); +#endif + } + + +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index 29cae060..84989853 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -28,101 +28,41 @@ Author: paboyle /* END LEGAL */ #pragma once +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define MIN(x,y) ((x)>(y)?(y):(x)) +#endif -// Introduce a class to gain deterministic bit reproducible reduction. -// make static; perhaps just a namespace is required. -NAMESPACE_BEGIN(Grid); +#define strong_inline __attribute__((always_inline)) inline +#define UNROLL _Pragma("unroll") -class GridThread { -public: - static int _threads; - static int _hyperthreads; - static int _cores; +////////////////////////////////////////////////////////////////////////////////// +// New primitives; explicit host thread calls, and accelerator data parallel calls +////////////////////////////////////////////////////////////////////////////////// + +#ifdef _OPENMP +#define GRID_OMP +#include +#endif - static void SetCores(int cr) { #ifdef GRID_OMP - _cores = cr; +#define DO_PRAGMA_(x) _Pragma (#x) +#define DO_PRAGMA(x) DO_PRAGMA_(x) +#define thread_num(a) omp_get_thread_num() +#define thread_max(a) omp_get_max_threads() #else - _cores = 1; +#define DO_PRAGMA_(x) +#define DO_PRAGMA(x) +#define thread_num(a) (0) +#define thread_max(a) (1) #endif - } - static void SetThreads(int thr) { -#ifdef GRID_OMP - _threads = MIN(thr,omp_get_max_threads()) ; - omp_set_num_threads(_threads); -#else - _threads = 1; -#endif - }; - static void SetMaxThreads(void) { -#ifdef GRID_OMP - _threads = omp_get_max_threads(); - omp_set_num_threads(_threads); -#else - _threads = 1; -#endif - }; - static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; }; - static int GetCores(void) { return _cores; }; - static int GetThreads(void) { return _threads; }; - static int SumArraySize(void) {return _threads;}; - static void GetWork(int nwork, int me, int & mywork, int & myoff){ - GetWork(nwork,me,mywork,myoff,_threads); - } - static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){ - int basework = nwork/units; - int backfill = units-(nwork%units); - if ( me >= units ) { - mywork = myoff = 0; - } else { - mywork = (nwork+me)/units; - myoff = basework * me; - if ( me > backfill ) - myoff+= (me-backfill); - } - return; - }; - - static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){ - me = ThreadBarrier(); - GetWork(nwork,me,mywork,myoff); - }; - - static int ThreadBarrier(void) { -#ifdef GRID_OMP -#pragma omp barrier - return omp_get_thread_num(); -#else - return 0; -#endif - }; - - template static void ThreadSum( std::vector &sum_array,obj &val,int me){ - sum_array[me] = val; - val=Zero(); - ThreadBarrier(); - for(int i=0;i<_threads;i++) val+= sum_array[i]; - ThreadBarrier(); - } - - static void bcopy(const void *src, void *dst, size_t len) { -#ifdef GRID_OMP -#pragma omp parallel - { - const char *c_src =(char *) src; - char *c_dest=(char *) dst; - int me,mywork,myoff; - GridThread::GetWorkBarrier(len,me, mywork,myoff); - bcopy(&c_src[myoff],&c_dest[myoff],mywork); - } -#else - bcopy(src,dst,len); -#endif - } - - -}; - -NAMESPACE_END(Grid); +#define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=0;i Date: Fri, 8 May 2020 13:14:12 -0400 Subject: [PATCH 04/86] NVCC compile fixes --- Grid/Grid_Eigen_Dense.h | 11 ++++++----- Grid/threads/Accelerator.h | 12 ++++++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index d7119358..3aec81b6 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -12,25 +12,26 @@ #endif /* NVCC save and restore compile environment*/ -#ifdef __NVCC__ +#ifdef GRID_CUDA #pragma push #pragma diag_suppress code_is_unreachable -#pragma push_macro("GRID_SIMT") +#pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__NVCC__") #pragma push_macro("__CUDACC__") +#undef __CUDA_ARCH__ #undef __NVCC__ #undef __CUDACC__ -#undef GRID_SIMT #define __NVCC__REDEFINE__ #endif /* SYCL save and restore compile environment*/ -#ifdef __SYCL_DEVICE_ONLY__ +#ifdef GRID_SYCL #pragma push #pragma push_macro("__SYCL_DEVICE_ONLY__") #undef __SYCL_DEVICE_ONLY__ -#undef EIGEN_USE_SYCL #define EIGEN_DONT_VECTORIZE +//#undef EIGEN_USE_SYCL +#define __SYCL__REDEFINE__ #endif diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index dc854388..ec20d8c9 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -73,6 +73,10 @@ void acceleratorThreads(uint32_t); ////////////////////////////////////////////// // CUDA acceleration ////////////////////////////////////////////// +#ifdef __NVCC__ +#define GRID_CUDA +#endif + #ifdef GRID_CUDA #ifdef __CUDA_ARCH__ @@ -114,7 +118,7 @@ inline void *acceleratorAllocShared(size_t bytes) void *ptr=NULL; auto err = cudaMallocManaged((void **)&ptr,bytes); if( err != cudaSuccess ) { - ptr = (_Tp *) NULL; + ptr = (void *) NULL; printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); } return ptr; @@ -124,7 +128,7 @@ inline void *acceleratorAllocDevice(size_t bytes) void *ptr=NULL; auto err = cudaMalloc((void **)&ptr,bytes); if( err != cudaSuccess ) { - ptr = (_Tp *) NULL; + ptr = (void *) NULL; printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err)); } return ptr; @@ -232,7 +236,7 @@ inline void *acceleratorAllocShared(size_t bytes) void *ptr=NULL; auto err = hipMallocManaged((void **)&ptr,bytes); if( err != hipSuccess ) { - ptr = (_Tp *) NULL; + ptr = (void *) NULL; printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err)); } return ptr; @@ -242,7 +246,7 @@ inline void *acceleratorAllocDevice(size_t bytes) void *ptr=NULL; auto err = hipMalloc((void **)&ptr,bytes); if( err != hipSuccess ) { - ptr = (_Tp *) NULL; + ptr = (void *) NULL; printf(" hipMalloc failed for %d %s \n",bytes,hipGetErrorString(err)); } return ptr; From bbbee5660d036a8a39b48f46f75bf790b65b7aa7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 10 May 2020 05:28:09 -0400 Subject: [PATCH 05/86] First compiile on HiP --- Grid/Grid_Eigen_Dense.h | 2 +- Grid/allocator/AlignedAllocator.h | 2 +- Grid/lattice/Lattice_reduction.h | 2 +- .../CayleyFermion5DImplementation.h | 7 ++-- Grid/serialisation/Serialisation.h | 2 +- Grid/simd/Grid_gpu_vec.h | 5 +++ Grid/simd/Simd.h | 4 +- Grid/threads/Accelerator.h | 8 ++-- configure.ac | 37 +++++++++++++------ 9 files changed, 44 insertions(+), 25 deletions(-) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index 3aec81b6..9556c03d 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -12,7 +12,7 @@ #endif /* NVCC save and restore compile environment*/ -#ifdef GRID_CUDA +#ifdef __NVCC__ #pragma push #pragma diag_suppress code_is_unreachable #pragma push_macro("__CUDA_ARCH__") diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 7921c415..a29c8bcb 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -155,7 +155,7 @@ public: if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) acceleratorAllocShared(bytes); - assert( ptr != (_Tp *)NULL); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); return ptr; } diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 1f06ac66..997affe8 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -38,7 +38,7 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) { typedef typename vobj::scalar_object sobj; - const int Nsimd = vobj::Nsimd(); + // const int Nsimd = vobj::Nsimd(); const int nthread = GridThread::GetThreads(); Vector sumarray(nthread); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 082e4b73..7542dd34 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -644,7 +644,7 @@ void CayleyFermion5D::ContractConservedCurrent( PropagatorField &q_in_1, Current curr_type, unsigned int mu) { -#ifndef GRID_CUDA +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, @@ -779,9 +779,9 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, assert(mu>=0); assert(mu::SeqConservedCurrent(PropagatorField &q_in, } #endif -#ifndef GRID_CUDA +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) + int tshift = (mu == Nd-1) ? 1 : 0; //////////////////////////////////////////////// // GENERAL CAYLEY CASE //////////////////////////////////////////////// diff --git a/Grid/serialisation/Serialisation.h b/Grid/serialisation/Serialisation.h index 177a65f9..e14120af 100644 --- a/Grid/serialisation/Serialisation.h +++ b/Grid/serialisation/Serialisation.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include "BinaryIO.h" #include "TextIO.h" #include "XmlIO.h" -#ifndef GRID_CUDA +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) #include "JSON_IO.h" #endif diff --git a/Grid/simd/Grid_gpu_vec.h b/Grid/simd/Grid_gpu_vec.h index 0bff4c2f..aa7e385c 100644 --- a/Grid/simd/Grid_gpu_vec.h +++ b/Grid/simd/Grid_gpu_vec.h @@ -32,7 +32,12 @@ Author: Peter Boyle */ //---------------------------------------------------------------------- +#ifdef GRID_CUDA #include +#endif +#ifdef GRID_HIP +#include +#endif namespace Grid { diff --git a/Grid/simd/Simd.h b/Grid/simd/Simd.h index 80f7c2e7..37aee2ed 100644 --- a/Grid/simd/Simd.h +++ b/Grid/simd/Simd.h @@ -31,7 +31,7 @@ directory #ifndef GRID_SIMD_H #define GRID_SIMD_H -#ifdef GRID_CUDA +#if defined(GRID_CUDA) || defined(GRID_HIP) #include #endif @@ -65,7 +65,7 @@ typedef RealD Real; typedef RealF Real; #endif -#ifdef GRID_CUDA +#if defined(GRID_CUDA) || defined(GRID_HIP) typedef thrust::complex ComplexF; typedef thrust::complex ComplexD; typedef thrust::complex Complex; diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index ec20d8c9..6f2e0b04 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -73,9 +73,6 @@ void acceleratorThreads(uint32_t); ////////////////////////////////////////////// // CUDA acceleration ////////////////////////////////////////////// -#ifdef __NVCC__ -#define GRID_CUDA -#endif #ifdef GRID_CUDA @@ -197,6 +194,9 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; // HIP acceleration ////////////////////////////////////////////// #ifdef GRID_HIP +NAMESPACE_END(Grid); +#include +NAMESPACE_BEGIN(Grid); #ifdef __HIP_DEVICE_COMPILE__ #define GRID_SIMT @@ -224,7 +224,7 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; }; \ dim3 hip_threads(acceleratorThreads(),nsimd); \ dim3 hip_blocks ((num+acceleratorThreads()-1)/acceleratorThreads()); \ - hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,simd,lambda);\ + hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads,0,0,num,nsimd,lambda);\ } #define accelerator_for( iterator, num, nsimd, ... ) \ diff --git a/configure.ac b/configure.ac index cf5ca85b..f9ea03fc 100644 --- a/configure.ac +++ b/configure.ac @@ -138,7 +138,7 @@ esac ############### SUMMIT JSRUN AC_ARG_ENABLE([summit], [AC_HELP_STRING([--enable-summit=yes|no], [enable IBMs jsrun resource manager for SUMMIT])], - [ac_JSRUN=${enable_summit}], [ac_SUMMIT=no]) + [ac_SUMMIT=${enable_summit}], [ac_SUMMIT=no]) case ${ac_SUMMIT} in no);; yes) @@ -148,18 +148,26 @@ case ${ac_SUMMIT} in esac ############### SYCL -AC_ARG_ENABLE([sycl], - [AC_HELP_STRING([--enable-sycl=yes|no], [enable SYCL])], - [ac_JSRUN=${enable_sycl}], [ac_SYCL=no]) -case ${ac_SYCL} in - no);; - yes) +AC_ARG_ENABLE([accelerator], + [AC_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none], [enable none,cuda,sycl,hip acceleration])], + [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none]) +case ${ac_ACCELERATOR} in + cuda) + echo CUDA acceleration + AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);; + sycl) + echo SYCL acceleration AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);; + hip) + echo HIP acceleration + AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);; + none) + echo NO acceleration + ;; *) - AC_DEFINE([GRID_SYCL],[1],[Use SYCL offload]);; + AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);; esac - ############### Intel libraries AC_ARG_ENABLE([mkl], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], @@ -289,16 +297,20 @@ esac ##################### Compiler dependent choices case ${CXX} in nvcc) -# CXX="nvcc -keep -v -x cu " -# CXXLD="nvcc -v -link" CXX="nvcc -x cu " CXXLD="nvcc -link" -# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr" CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" if test $ac_openmp = yes; then CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" fi ;; + hipcc) + CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" + CXXLD=${CXX} + if test $ac_openmp = yes; then + CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" + fi + ;; *) CXXLD=${CXX} CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" @@ -599,6 +611,7 @@ compiler version : ${ax_cv_gxx_version} ----- BUILD OPTIONS ----------------------------------- SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} Threading : ${ac_openmp} +Acceleration : ${ac_ACCELERATOR} Communications type : ${comms_type} Shared memory allocator : ${ac_SHM} Shared memory mmap path : ${ac_SHMPATH} From 8c31c065b53d8fafb7ae5d52969f7eca07837413 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 11 May 2020 17:00:30 -0400 Subject: [PATCH 06/86] Keep the Vector fixed to protect it from realloc --- Grid/cshift/Cshift_table.cc | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Grid/cshift/Cshift_table.cc diff --git a/Grid/cshift/Cshift_table.cc b/Grid/cshift/Cshift_table.cc new file mode 100644 index 00000000..d46e51c0 --- /dev/null +++ b/Grid/cshift/Cshift_table.cc @@ -0,0 +1,4 @@ +#include +NAMESPACE_BEGIN(Grid); +Vector > Cshift_table; +NAMESPACE_END(Grid); From 07c0c02f8c1d58605150c4729f7d1b3b32416045 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 11 May 2020 17:02:01 -0400 Subject: [PATCH 07/86] Speed up Cshift --- Grid/allocator/AlignedAllocator.h | 10 +- Grid/cartesian/Cartesian_base.h | 1 + Grid/cartesian/Cartesian_full.h | 4 + Grid/cartesian/Cartesian_red_black.h | 18 ++- Grid/cshift/Cshift_common.h | 85 +++++++------ Grid/threads/Accelerator.cc | 182 ++++++++++++++++++++++++++- Grid/threads/Accelerator.h | 178 ++++++++++++++------------ Grid/threads/Threads.h | 6 + Grid/util/Init.cc | 139 +------------------- tests/core/Test_cshift_red_black.cc | 2 +- tests/core/Test_cshift_rotate.cc | 1 + tests/core/Test_main.cc | 12 +- 12 files changed, 373 insertions(+), 265 deletions(-) diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index a29c8bcb..c8742d3e 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -29,7 +29,6 @@ Author: Peter Boyle #ifndef GRID_ALIGNED_ALLOCATOR_H #define GRID_ALIGNED_ALLOCATOR_H - NAMESPACE_BEGIN(Grid); /*Move control to configure.ac and Config.h*/ @@ -157,6 +156,15 @@ public: assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); +#if 0 + size_type page_size=4096; + size_type pages = (bytes+page_size-1)/page_size; + uint8_t *bp = (uint8_t *)ptr; + + accelerator_for(pg,pages,1,{ + bp[pg*page_size]=0; + }); +#endif return ptr; } diff --git a/Grid/cartesian/Cartesian_base.h b/Grid/cartesian/Cartesian_base.h index 87472cc9..ae1fd1fd 100644 --- a/Grid/cartesian/Cartesian_base.h +++ b/Grid/cartesian/Cartesian_base.h @@ -81,6 +81,7 @@ public: bool _isCheckerBoarded; int LocallyPeriodic; + Coordinate _checker_dim_mask; public: diff --git a/Grid/cartesian/Cartesian_full.h b/Grid/cartesian/Cartesian_full.h index c083817b..31a67bf0 100644 --- a/Grid/cartesian/Cartesian_full.h +++ b/Grid/cartesian/Cartesian_full.h @@ -38,6 +38,7 @@ class GridCartesian: public GridBase { public: int dummy; + Coordinate _checker_dim_mask; virtual int CheckerBoardFromOindexTable (int Oindex) { return 0; } @@ -104,6 +105,7 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); + _checker_dim_mask.resize(_ndimension);; _lstart.resize(_ndimension); _lend.resize(_ndimension); @@ -114,6 +116,8 @@ public: for (int d = 0; d < _ndimension; d++) { + _checker_dim_mask[d]=0; + _fdimensions[d] = dimensions[d]; // Global dimensions _gdimensions[d] = _fdimensions[d]; // Global dimensions _simd_layout[d] = simd_layout[d]; diff --git a/Grid/cartesian/Cartesian_red_black.h b/Grid/cartesian/Cartesian_red_black.h index 34f763d2..b71981f5 100644 --- a/Grid/cartesian/Cartesian_red_black.h +++ b/Grid/cartesian/Cartesian_red_black.h @@ -35,12 +35,28 @@ static const int CbRed =0; static const int CbBlack=1; static const int Even =CbRed; static const int Odd =CbBlack; + +accelerator_inline int RedBlackCheckerBoardFromOindex (int oindex, Coordinate &rdim, Coordinate &chk_dim_msk) +{ + int nd=rdim.size(); + Coordinate coor(nd); + + Lexicographic::CoorFromIndex(coor,oindex,rdim); + + int linear=0; + for(int d=0;d _checker_board; diff --git a/Grid/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h index 954342cb..fe9afc62 100644 --- a/Grid/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -29,6 +29,8 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); +extern Vector > Cshift_table; + /////////////////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split /////////////////////////////////////////////////////////////////// @@ -46,7 +48,8 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen int e2=rhs.Grid()->_slice_block[dimension]; int ent = 0; - static Vector > table; table.resize(e1*e2); + if(Cshift_table.size()_slice_stride[dimension]; auto rhs_v = rhs.View(); @@ -55,7 +58,7 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen for(int b=0;b(off+bo+b,so+o+b); + Cshift_table[ent++] = std::pair(off+bo+b,so+o+b); } } } else { @@ -65,13 +68,15 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen int o = n*stride; int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { - table[ent++]=std::pair (off+bo++,so+o+b); + Cshift_table[ent++]=std::pair (off+bo++,so+o+b); } } } } - thread_for(i,ent,{ - buffer[table[i].first]=rhs_v[table[i].second]; + auto buffer_p = & buffer[0]; + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + buffer_p[table[i].first]=rhs_v[table[i].second]; }); } @@ -97,34 +102,36 @@ Gather_plane_extract(const Lattice &rhs, auto rhs_v = rhs.View(); if ( cbmask ==0x3){ - thread_for_collapse(2,n,e1,{ - for(int b=0;b(temp,pointers,offset); - } - }); + }); } else { - // Case of SIMD split AND checker dim cannot currently be hit, except in - // Test_cshift_red_black code. - std::cout << " Dense packed buffer WARNING " <_rdimensions; + Coordinate cdm =rhs.Grid()->_checker_dim_mask; + std::cout << " Dense packed buffer WARNING " <CheckerBoardFromOindex(o+b); + int oindex = o+b; + + int cb = RedBlackCheckerBoardFromOindex(oindex, rdim, cdm); + + int ocb=1<(temp,pointers,offset); } - } - }); + }); } } @@ -145,7 +152,8 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_block[dimension]; int stride=rhs.Grid()->_slice_stride[dimension]; - static std::vector > table; table.resize(e1*e2); + if(Cshift_table.size() void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; int bo =n*rhs.Grid()->_slice_block[dimension]; - table[ent++] = std::pair(so+o+b,bo+b); + Cshift_table[ent++] = std::pair(so+o+b,bo+b); } } @@ -165,15 +173,17 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - table[ent++]=std::pair (so+o+b,bo++); + Cshift_table[ent++]=std::pair (so+o+b,bo++); } } } } auto rhs_v = rhs.View(); - thread_for(i,ent,{ - rhs_v[table[i].first]=buffer[table[i].second]; + auto buffer_p = & buffer[0]; + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + rhs_v[table[i].first]=buffer_p[table[i].second]; }); } @@ -195,13 +205,11 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA if(cbmask ==0x3 ) { auto rhs_v = rhs.View(); - thread_for_collapse(2,n,e1,{ - for(int b=0;b_slice_stride[dimension]; int offset = b+n*rhs.Grid()->_slice_block[dimension]; merge(rhs_v[so+o+b],pointers,offset); - } - }); + }); } else { // Case of SIMD split AND checker dim cannot currently be hit, except in @@ -225,6 +233,7 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA ////////////////////////////////////////////////////// // local to node block strided copies ////////////////////////////////////////////////////// + template void Copy_plane(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask) { int rd = rhs.Grid()->_rdimensions[dimension]; @@ -239,14 +248,16 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc int e2=rhs.Grid()->_slice_block[dimension]; int stride = rhs.Grid()->_slice_stride[dimension]; - static std::vector > table; table.resize(e1*e2); + + if(Cshift_table.size()(lo+o,ro+o); + Cshift_table[ent++] = std::pair(lo+o,ro+o); } } } else { @@ -255,7 +266,7 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs int o =n*stride+b; int ocb=1<CheckerBoardFromOindex(o); if ( ocb&cbmask ) { - table[ent++] = std::pair(lo+o,ro+o); + Cshift_table[ent++] = std::pair(lo+o,ro+o); } } } @@ -263,7 +274,8 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs auto rhs_v = rhs.View(); auto lhs_v = lhs.View(); - thread_for(i,ent,{ + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ lhs_v[table[i].first]=rhs_v[table[i].second]; }); @@ -271,7 +283,6 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs template void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) { - int rd = rhs.Grid()->_rdimensions[dimension]; if ( !rhs.Grid()->CheckerBoarded(dimension) ) { @@ -285,27 +296,29 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice_slice_block [dimension]; int stride = rhs.Grid()->_slice_stride[dimension]; - static std::vector > table; table.resize(e1*e2); + if(Cshift_table.size()(lo+o+b,ro+o+b); + Cshift_table[ent++] = std::pair(lo+o+b,ro+o+b); }} } else { for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); + if ( ocb&cbmask ) Cshift_table[ent++] = std::pair(lo+o+b,ro+o+b); }} } auto rhs_v = rhs.View(); auto lhs_v = lhs.View(); - thread_for(i,ent,{ + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type); }); } diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 4f2198f8..18cc406d 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -1,10 +1,186 @@ #include NAMESPACE_BEGIN(Grid); -uint32_t accelerator_threads; +uint32_t accelerator_threads=8; uint32_t acceleratorThreads(void) {return accelerator_threads;}; void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; -#ifdef GRID_SYCL -cl::sycl::queue *theGridAccelerator; + +#ifdef GRID_CUDA +cudaDeviceProp *gpu_props; +void acceleratorInit(void) +{ + int nDevices = 1; + cudaGetDeviceCount(&nDevices); + gpu_props = new cudaDeviceProp[nDevices]; + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + for (int i = 0; i < nDevices; i++) { + +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); + + cudaGetDeviceProperties(&gpu_props[i], i); + if ( world_rank == 0) { + cudaDeviceProp prop; + prop = gpu_props[i]; + printf("GpuInit: ========================\n"); + printf("GpuInit: Device Number : %d\n", i); + printf("GpuInit: ========================\n"); + printf("GpuInit: Device identifier: %s\n", prop.name); + + GPU_PROP(managedMemory); + GPU_PROP(isMultiGpuBoard); + GPU_PROP(warpSize); + // GPU_PROP(unifiedAddressing); + // GPU_PROP(l2CacheSize); + // GPU_PROP(singleToDoublePrecisionPerfRatio); + } + } +#ifdef GRID_IBM_SUMMIT + // IBM Jsrun makes cuda Device numbering screwy and not match rank + if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); +#else + if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); + cudaSetDevice(rank); #endif + if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); +} +#endif + +#ifdef GRID_HIP +hipDeviceProp_t *gpu_props; +void acceleratorInit(void) +{ + int nDevices = 1; + hipGetDeviceCount(&nDevices); + gpu_props = new hipDeviceProp_t[nDevices]; + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + for (int i = 0; i < nDevices; i++) { + +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); + + hipGetDeviceProperties(&gpu_props[i], i); + if ( world_rank == 0) { + hipDeviceProp_t prop; + prop = gpu_props[i]; + printf("GpuInit: ========================\n"); + printf("GpuInit: Device Number : %d\n", i); + printf("GpuInit: ========================\n"); + printf("GpuInit: Device identifier: %s\n", prop.name); + + // GPU_PROP(managedMemory); + GPU_PROP(isMultiGpuBoard); + GPU_PROP(warpSize); + // GPU_PROP(unifiedAddressing); + // GPU_PROP(l2CacheSize); + // GPU_PROP(singleToDoublePrecisionPerfRatio); + } + } +#ifdef GRID_IBM_SUMMIT + // IBM Jsrun makes cuda Device numbering screwy and not match rank + if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); +#else + if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); + cudaSetDevice(rank); +#endif + if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); +} +#endif + + +#ifdef GRID_SYCL + +cl::sycl::queue *theGridAccelerator; + +void acceleratorInit(void) +{ + int nDevices = 1; + cl::sycl::gpu_selector selector; + cl::sycl::device selectedDevice { selector }; + theGridAccelerator = new sycl::queue (selectedDevice); + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + if ( world_rank == 0 ) { + GridBanner(); + } + /* + for (int i = 0; i < nDevices; i++) { + +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); + + cudaGetDeviceProperties(&gpu_props[i], i); + if ( world_rank == 0) { + cudaDeviceProp prop; + prop = gpu_props[i]; + printf("GpuInit: ========================\n"); + printf("GpuInit: Device Number : %d\n", i); + printf("GpuInit: ========================\n"); + printf("GpuInit: Device identifier: %s\n", prop.name); + } + } + */ + if ( world_rank == 0 ) { + printf("GpuInit: ================================================\n"); + } +} +#endif + +#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP)) +void acceleratorInit(void){} +#endif + NAMESPACE_END(Grid); diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 6f2e0b04..1569b22b 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -51,6 +51,7 @@ NAMESPACE_BEGIN(Grid); // // Warp control and info: // +// acceleratorInit; // void acceleratorSynchronise(void); // synch warp etc.. // int acceleratorSIMTlane(int Nsimd); // @@ -69,6 +70,7 @@ NAMESPACE_BEGIN(Grid); uint32_t acceleratorThreads(void); void acceleratorThreads(uint32_t); +void acceleratorInit(void); ////////////////////////////////////////////// // CUDA acceleration @@ -83,6 +85,32 @@ void acceleratorThreads(uint32_t); #define accelerator __host__ __device__ #define accelerator_inline __host__ __device__ inline +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return threadIdx.x; } // CUDA specific + +#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ + { \ + typedef uint64_t Iterator; \ + auto lambda = [=] accelerator \ + (Iterator lane,Iterator iter1,Iterator iter2) mutable { \ + __VA_ARGS__; \ + }; \ + int nt=acceleratorThreads(); \ + dim3 cu_threads(nsimd,acceleratorThreads(),1); \ + dim3 cu_blocks (1,(num1+nt-1)/nt,num2); \ + LambdaApply<<>>(nsimd,num1,num2,lambda); \ + } + +template __global__ +void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) +{ + uint64_t x = threadIdx.x;//+ blockDim.x*blockIdx.x; + uint64_t y = threadIdx.y + blockDim.y*blockIdx.y; + uint64_t z = threadIdx.z + blockDim.z*blockIdx.z; + if ( (x < num1) && (y>>(nsimd,num,lambda); \ - } - -#define accelerator_for( iterator, num, nsimd, ... ) \ - accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ - accelerator_barrier(dummy); - inline void *acceleratorAllocShared(size_t bytes) { void *ptr=NULL; @@ -133,15 +145,6 @@ inline void *acceleratorAllocDevice(size_t bytes) inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);}; -template __global__ -void LambdaApply(uint64_t Isites, uint64_t Osites, lambda Lambda) -{ - uint64_t isite = threadIdx.y; - uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x; - if ( (osite >()[0]; } // SYCL specific + +#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ - cl::sycl::range<3> local {acceleratorThreads(),1,nsimd}; \ - cl::sycl::range<3> global{(unsigned long)num,1,(unsigned long)nsimd}; \ + int nt=acceleratorThreads(); \ + unsigned long unum1 = num1; \ + unsigned long unum2 = num2; \ + cl::sycl::range<3> local {nsimd,nt,1}; \ + cl::sycl::range<3> global{nsimd,unum1,unum2}; \ cgh.parallel_for( \ cl::sycl::nd_range<3>(global,local), \ [=] (cl::sycl::nd_item<3> item) mutable { \ - auto iterator = item.get_global_id(0); \ - auto lane = item.get_global_id(2); \ + auto lane = item.get_global_id(0); \ + auto iter1 = item.get_global_id(1); \ + auto iter2 = item.get_global_id(2); \ { __VA_ARGS__ }; \ }); \ }); +dim3 cu_threads(nsimd,acceleratorThreads(),1); \ +dim3 cu_blocks (1,(num1+nt-1)/n,num2); \ #define accelerator_barrier(dummy) theGridAccelerator->wait(); -#define accelerator_for( iterator, num, nsimd, ... ) \ - accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ - accelerator_barrier(dummy); - inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);}; inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);}; inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);}; @@ -204,33 +211,49 @@ NAMESPACE_BEGIN(Grid); #define accelerator __host__ __device__ #define accelerator_inline __host__ __device__ inline + +/*These routines define mapping from thread grid to loop & vector lane indexing */ +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return hipThreadIdx_x; } // HIP specific + +#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ + { \ + typedef uint64_t Iterator; \ + auto lambda = [=] accelerator \ + (Iterator lane,Iterator iter1,Iterator iter2 ) mutable { \ + { __VA_ARGS__;} \ + }; \ + int nt=acceleratorThreads(); \ + dim3 hip_threads(nsimd,nt,1); \ + dim3 hip_blocks (1,(num1+nt-1)/nt,num2); \ + hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ + 0,0, \ + nsimd,num1,num2,lambda); \ + } + + +template __global__ +void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) +{ + uint64_t x = hipThreadIdx_x;//+ hipBlockDim_x*hipBlockIdx_x; + uint64_t y = hipThreadIdx_y + hipBlockDim_y*hipBlockIdx_y; + uint64_t z = hipThreadIdx_z + hipBlockDim_z*hipBlockIdx_z; + if ( (x < numx) && (y __global__ -void LambdaApply(uint64_t Isites, uint64_t Osites, lambda Lambda) -{ - uint64_t isite = hipThreadIdx_y; - uint64_t osite = hipThreadIdx_x + hipBlockDim_x*hipBlockIdx_x; - if ( (osite @@ -303,7 +337,6 @@ inline void acceleratorFreeShared(void *ptr){free(ptr);}; inline void acceleratorFreeDevice(void *ptr){free(ptr);}; #endif - #endif // CPU target /////////////////////////////////////////////////// @@ -325,25 +358,4 @@ accelerator_inline void acceleratorSynchronise(void) return; } -//////////////////////////////////////////////////// -// Address subvectors on accelerators -//////////////////////////////////////////////////// -#ifdef GRID_SIMT - -#ifdef GRID_CUDA -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific -#endif -#ifdef GRID_SYCL -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; } // SYCL specific -#endif -#ifdef GRID_HIP -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return hipThreadIdx_y; } // HIP specific -#endif - -#else - -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific - -#endif - NAMESPACE_END(Grid); diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index 84989853..a9fa13ea 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -58,6 +58,12 @@ Author: paboyle #endif #define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=0;i gputhreads(0); -#ifndef GRID_CUDA - std::cout << GridLogWarning << "'--gpu-threads' option used but Grid was" - << " not compiled with GPU support" << std::endl; -#endif - arg= GridCmdOptionPayload(argv,argv+argc,"--gpu-threads"); + arg= GridCmdOptionPayload(argv,argv+argc,"--accelerator-threads"); GridCmdOptionIntVector(arg,gputhreads); assert(gputhreads.size()==1); - gpu_threads=gputhreads[0]; + acceleratorThreads(gputhreads[0]); } if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ @@ -245,8 +235,6 @@ static int Grid_is_initialised; ///////////////////////////////////////////////////////// void GridBanner(void) { - static int printed =0; - if( !printed ) { std::cout < Date: Tue, 12 May 2020 07:01:23 -0400 Subject: [PATCH 08/86] no automatic prefetching for now --- Grid/lattice/Lattice_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 74525cc1..284190ba 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -267,14 +267,14 @@ public: LatticeView AcceleratorView(int mode = ViewReadWrite) const { LatticeView accessor(*( (LatticeAccelerator *) this)); - accessor.AcceleratorPrefetch(mode); + //accessor.AcceleratorPrefetch(mode); return accessor; } LatticeView HostView(int mode = ViewReadWrite) const { LatticeView accessor(*( (LatticeAccelerator *) this)); - accessor.HostPrefetch(mode); + //accessor.HostPrefetch(mode); return accessor; } From d24d8e8398ebb9a8e65fcadbccda09ab7a89e3a7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 12 May 2020 10:35:49 -0400 Subject: [PATCH 09/86] Use X-direction as more bits meaningful on CUDA. 2^31-1 shoulddd always bee enough for SIMD and thread reduced local volume e.g. 32*2^31 = 2^36 = (2^9)^4 or 512^4 ias big enough. Where 32 is gpu_threads * Nsimd = 8*4 --- Grid/threads/Accelerator.h | 43 ++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 1569b22b..0a5103a2 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -85,27 +85,27 @@ void acceleratorInit(void); #define accelerator __host__ __device__ #define accelerator_inline __host__ __device__ inline -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return threadIdx.x; } // CUDA specific +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return threadIdx.z; } // CUDA specific #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ { \ typedef uint64_t Iterator; \ auto lambda = [=] accelerator \ - (Iterator lane,Iterator iter1,Iterator iter2) mutable { \ + (Iterator iter1,Iterator iter2,Iterator lane) mutable { \ __VA_ARGS__; \ }; \ int nt=acceleratorThreads(); \ - dim3 cu_threads(nsimd,acceleratorThreads(),1); \ - dim3 cu_blocks (1,(num1+nt-1)/nt,num2); \ - LambdaApply<<>>(nsimd,num1,num2,lambda); \ + dim3 cu_threads(acceleratorThreads(),1,nsimd); \ + dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ + LambdaApply<<>>(num1,num2,nsimd,lambda); \ } template __global__ void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) { - uint64_t x = threadIdx.x;//+ blockDim.x*blockIdx.x; + uint64_t x = threadIdx.x + blockDim.x*blockIdx.x; uint64_t y = threadIdx.y + blockDim.y*blockIdx.y; - uint64_t z = threadIdx.z + blockDim.z*blockIdx.z; + uint64_t z = threadIdx.z; if ( (x < num1) && (y>()[0]; } // SYCL specific +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; } // SYCL specific #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ int nt=acceleratorThreads(); \ unsigned long unum1 = num1; \ unsigned long unum2 = num2; \ - cl::sycl::range<3> local {nsimd,nt,1}; \ - cl::sycl::range<3> global{nsimd,unum1,unum2}; \ + cl::sycl::range<3> local {nt,1,nsimd}; \ + cl::sycl::range<3> global{unum1,unum2,nsimd}; \ cgh.parallel_for( \ cl::sycl::nd_range<3>(global,local), \ [=] (cl::sycl::nd_item<3> item) mutable { \ - auto lane = item.get_global_id(0); \ - auto iter1 = item.get_global_id(1); \ - auto iter2 = item.get_global_id(2); \ + auto iter1 = item.get_global_id(0); \ + auto iter2 = item.get_global_id(1); \ + auto lane = item.get_global_id(2); \ { __VA_ARGS__ }; \ }); \ }); -dim3 cu_threads(nsimd,acceleratorThreads(),1); \ -dim3 cu_blocks (1,(num1+nt-1)/n,num2); \ #define accelerator_barrier(dummy) theGridAccelerator->wait(); @@ -213,30 +211,29 @@ NAMESPACE_BEGIN(Grid); #define accelerator_inline __host__ __device__ inline /*These routines define mapping from thread grid to loop & vector lane indexing */ -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return hipThreadIdx_x; } // HIP specific +accelerator_inline int acceleratorSIMTlane(int Nsimd) { return hipThreadIdx_z; } // HIP specific #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ { \ typedef uint64_t Iterator; \ auto lambda = [=] accelerator \ - (Iterator lane,Iterator iter1,Iterator iter2 ) mutable { \ + (Iterator iter1,Iterator iter2,Iterator lane ) mutable { \ { __VA_ARGS__;} \ }; \ int nt=acceleratorThreads(); \ - dim3 hip_threads(nsimd,nt,1); \ - dim3 hip_blocks (1,(num1+nt-1)/nt,num2); \ + dim3 hip_threads(nt,1,nsimd); \ + dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \ hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ 0,0, \ - nsimd,num1,num2,lambda); \ + num1,num2,nsimd,lambda); \ } - template __global__ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) { - uint64_t x = hipThreadIdx_x;//+ hipBlockDim_x*hipBlockIdx_x; + uint64_t x = hipThreadIdx_x + hipBlockDim_x*hipBlockIdx_x; uint64_t y = hipThreadIdx_y + hipBlockDim_y*hipBlockIdx_y; - uint64_t z = hipThreadIdx_z + hipBlockDim_z*hipBlockIdx_z; + uint64_t z = hipThreadIdx_z ;//+ hipBlockDim_z*hipBlockIdx_z; if ( (x < numx) && (y Date: Tue, 12 May 2020 20:03:37 -0400 Subject: [PATCH 10/86] Dependence fix --- Grid/Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/Makefile.am b/Grid/Makefile.am index b88ea4f2..f1fa462e 100644 --- a/Grid/Makefile.am +++ b/Grid/Makefile.am @@ -21,7 +21,7 @@ if BUILD_HDF5 extra_headers+=serialisation/Hdf5Type.h endif -all: version-cache +all: version-cache Version.h version-cache: @if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\ @@ -42,7 +42,7 @@ version-cache: fi;\ rm -f vertmp -Version.h: +Version.h: version-cache cp version-cache Version.h .PHONY: version-cache From ebb60330c90e085d30a799fb4176cf3faa2635cb Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 17 May 2020 16:34:25 -0400 Subject: [PATCH 11/86] Automatic data motion options beginning --- Grid/GridCore.h | 2 +- Grid/allocator/AlignedAllocator.h | 127 +--------- Grid/allocator/AllocationCache.cc | 159 ++++++++++++ Grid/allocator/AllocationCache.h | 93 +++++++ Grid/allocator/Allocator.h | 4 + Grid/allocator/MemoryCacheDeviceMem.cc | 338 +++++++++++++++++++++++++ Grid/allocator/MemoryCacheShared.cc | 27 ++ Grid/allocator/MemoryStats.cc | 67 +++++ Grid/allocator/MemoryStats.h | 95 +++++++ Grid/lattice/Lattice_ET.h | 70 ++++- Grid/lattice/Lattice_base.h | 79 +++++- Grid/threads/Accelerator.h | 31 ++- Grid/util/Init.cc | 2 + configure.ac | 17 +- 14 files changed, 963 insertions(+), 148 deletions(-) create mode 100644 Grid/allocator/AllocationCache.cc create mode 100644 Grid/allocator/AllocationCache.h create mode 100644 Grid/allocator/Allocator.h create mode 100644 Grid/allocator/MemoryCacheDeviceMem.cc create mode 100644 Grid/allocator/MemoryCacheShared.cc create mode 100644 Grid/allocator/MemoryStats.cc create mode 100644 Grid/allocator/MemoryStats.h diff --git a/Grid/GridCore.h b/Grid/GridCore.h index f7c1267a..2209f960 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -47,7 +47,7 @@ Author: paboyle #include #include #include -#include +#include #include #include #include diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index c8742d3e..c3a32cd3 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -26,102 +26,10 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_ALIGNED_ALLOCATOR_H -#define GRID_ALIGNED_ALLOCATOR_H +#pragma once NAMESPACE_BEGIN(Grid); -/*Move control to configure.ac and Config.h*/ -#define POINTER_CACHE -/*Pinning pages is costly*/ -/*Could maintain separate large and small allocation caches*/ -#ifdef POINTER_CACHE -class PointerCache { -private: - - static const int Ncache=128; - static int victim; - - typedef struct { - void *address; - size_t bytes; - int valid; - } PointerCacheEntry; - - static PointerCacheEntry Entries[Ncache]; - -public: - - static void *Insert(void *ptr,size_t bytes) ; - static void *Lookup(size_t bytes) ; - -}; -#endif - -std::string sizeString(size_t bytes); - -struct MemoryStats -{ - size_t totalAllocated{0}, maxAllocated{0}, - currentlyAllocated{0}, totalFreed{0}; -}; - -class MemoryProfiler -{ -public: - static MemoryStats *stats; - static bool debug; -}; - -#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" -#define profilerDebugPrint \ - if (MemoryProfiler::stats) \ - { \ - auto s = MemoryProfiler::stats; \ - std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ - << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ - << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ - << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ - << std::endl; \ - } - -#define profilerAllocate(bytes) \ - if (MemoryProfiler::stats) \ - { \ - auto s = MemoryProfiler::stats; \ - s->totalAllocated += (bytes); \ - s->currentlyAllocated += (bytes); \ - s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \ - } \ - if (MemoryProfiler::debug) \ - { \ - std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \ - profilerDebugPrint; \ - } - -#define profilerFree(bytes) \ - if (MemoryProfiler::stats) \ - { \ - auto s = MemoryProfiler::stats; \ - s->totalFreed += (bytes); \ - s->currentlyAllocated -= (bytes); \ - } \ - if (MemoryProfiler::debug) \ - { \ - std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \ - profilerDebugPrint; \ - } - -void check_huge_pages(void *Buf,uint64_t BYTES); - -//////////////////////////////////////////////////////////////////// -// A lattice of something, but assume the something is SIMDized. -//////////////////////////////////////////////////////////////////// - template class alignedAllocator { public: @@ -144,42 +52,23 @@ public: pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); + profilerAllocate(bytes); -#ifdef POINTER_CACHE - _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); -#else - pointer ptr = nullptr; -#endif - - if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) acceleratorAllocShared(bytes); - + _Tp *ptr = (_Tp*) AllocationCache::CpuAllocate(bytes); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); -#if 0 - size_type page_size=4096; - size_type pages = (bytes+page_size-1)/page_size; - uint8_t *bp = (uint8_t *)ptr; - - accelerator_for(pg,pages,1,{ - bp[pg*page_size]=0; - }); -#endif return ptr; } - void deallocate(pointer __p, size_type __n) { + void deallocate(pointer __p, size_type __n) + { size_type bytes = __n * sizeof(_Tp); profilerFree(bytes); -#ifdef POINTER_CACHE - pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); -#else - pointer __freeme = __p; -#endif - - if ( __freeme ) acceleratorFreeShared((void *)__freeme); + AllocationCache::CpuFree((void *)__p,bytes); } // FIXME: hack for the copy constructor, eventually it must be avoided @@ -201,4 +90,4 @@ template using Matrix = std::vector + +NAMESPACE_BEGIN(Grid); + +/*Allocation types, saying which pointer cache should be used*/ +#define Cpu (0) +#define CpuSmall (1) +#define Acc (2) +#define AccSmall (3) + +////////////////////////////////////////////////////////////////////// +// Data tables for recently freed pooiniter caches +////////////////////////////////////////////////////////////////////// +AllocationCache::AllocationCacheEntry AllocationCache::Entries[AllocationCache::NallocType][AllocationCache::NallocCacheMax]; +int AllocationCache::Victim[AllocationCache::NallocType]; +int AllocationCache::Ncache[AllocationCache::NallocType]; + +////////////////////////////////////////////////////////////////////// +// Actual allocation and deallocation utils +////////////////////////////////////////////////////////////////////// +void *AllocationCache::AcceleratorAllocate(size_t bytes) +{ + void *ptr = (void *) Lookup(bytes,Acc); + + if ( ptr == (void *) NULL ) + ptr = (void *) acceleratorAllocDevice(bytes); + + return ptr; +} +void AllocationCache::AcceleratorFree (void *ptr,size_t bytes) +{ + void *__freeme = Insert(ptr,bytes,Acc); + + if ( __freeme ) acceleratorFreeShared(__freeme); +} +void *AllocationCache::CpuAllocate(size_t bytes) +{ + void *ptr = (void *) Lookup(bytes,Cpu); + + if ( ptr == (void *) NULL ) { + ptr = (void *) acceleratorAllocShared(bytes); + // std::cout <<"CpuAllocate: allocated pointer "<=0){ Evict(e); } + + // If present remove entry and free accelerator too. + // Can we ever hit a free event with a view still in scope? + void *__freeme = Insert(ptr,bytes,Cpu); + // std::cout <<"CpuFree cached pointer "<=0) && (Nc < NallocCacheMax)) { + Ncache[Cpu]=Nc; + Ncache[Acc]=Nc; + } + } + + str= getenv("GRID_ALLOC_NCACHE_SMALL"); + if ( str ) { + Nc = atoi(str); + if ( (Nc>=0) && (Nc < NallocCacheMax)) { + Ncache[CpuSmall]=Nc; + Ncache[AccSmall]=Nc; + } + } +} + +void *AllocationCache::Insert(void *ptr,size_t bytes,int type) +{ + bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); + int cache = type + small; + return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache]); +} +void *AllocationCache::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) +{ + assert(ncache>0); +#ifdef GRID_OMP + assert(omp_in_parallel()==0); +#endif + + void * ret = NULL; + int v = -1; + + for(int e=0;e0); +#ifdef GRID_OMP + assert(omp_in_parallel()==0); +#endif + for(int e=0;e +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +// Move control to configure.ac and Config.h? + +#define ALLOCATION_CACHE +#define GRID_ALLOC_ALIGN (2*1024*1024) +#define GRID_ALLOC_SMALL_LIMIT (4096) + +/*Pinning pages is costly*/ + +class AllocationCache { +private: + + //////////////////////////////////////////////////////////// + // For caching recently freed allocations + //////////////////////////////////////////////////////////// + typedef struct { + void *address; + size_t bytes; + int valid; + } AllocationCacheEntry; + + static const int NallocCacheMax=128; + static const int NallocType=4; + static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; + static int Victim[NallocType]; + static int Ncache[NallocType]; + + ///////////////////////////////////////////////// + // Free pool + ///////////////////////////////////////////////// + static void *Insert(void *ptr,size_t bytes,int type) ; + static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; + static void *Lookup(size_t bytes,int type) ; + static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; + + ///////////////////////////////////////////////// + // Internal device view + ///////////////////////////////////////////////// + static void *AcceleratorAllocate(size_t bytes); + static void AcceleratorFree (void *ptr,size_t bytes); + static int ViewVictim(void); + static void Evict(int e); + static void Flush(int e); + static void Clone(int e); + static int CpuViewLookup(void *CpuPtr); + static int AccViewLookup(void *AccPtr); + +public: + static void Init(void); + + static void AccViewClose(void* AccPtr); + static void CpuViewClose(void* CpuPtr); + static void *AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transient); + static void *CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transient); + + static void *CpuAllocate(size_t bytes); + static void CpuFree (void *ptr,size_t bytes); +}; + +NAMESPACE_END(Grid); + + diff --git a/Grid/allocator/Allocator.h b/Grid/allocator/Allocator.h new file mode 100644 index 00000000..9eaec8f6 --- /dev/null +++ b/Grid/allocator/Allocator.h @@ -0,0 +1,4 @@ +#pragma once +#include +#include +#include diff --git a/Grid/allocator/MemoryCacheDeviceMem.cc b/Grid/allocator/MemoryCacheDeviceMem.cc new file mode 100644 index 00000000..e46d48af --- /dev/null +++ b/Grid/allocator/MemoryCacheDeviceMem.cc @@ -0,0 +1,338 @@ +#include +#ifndef GRID_UNIFIED + +#warning "Using explicit device memory copies" +NAMESPACE_BEGIN(Grid); +#define dprintf(...) + +//////////////////////////////////////////////////////////// +// For caching copies of data on device +//////////////////////////////////////////////////////////// +const int NaccCacheMax=128; + +typedef struct { + void *CpuPtr; + void *AccPtr; + size_t bytes; + uint32_t transient; + uint32_t state; + uint32_t accLock; + uint32_t cpuLock; +} AcceleratorViewEntry; + +#define Write (1) +#define Read (2) +#define WriteDiscard (3) +////////////////////////////////////////////////////////////////////// +// Data tables for ViewCache +////////////////////////////////////////////////////////////////////// +static AcceleratorViewEntry AccCache[NaccCacheMax]; +static int AccCacheVictim; // Base for round robin search +static int NaccCache = 8; + +//////////////////////////////////// +// Priority ordering for unlocked entries +// Empty +// CpuDirty +// Consistent +// AccDirty +//////////////////////////////////// +#define Empty (0x0) /*Entry unoccupied */ +#define CpuDirty (0x1) /*CPU copy is golden, Acc buffer MAY not be allocated*/ +#define Consistent (0x2) /*ACC copy AND CPU copy are valid */ +#define AccDirty (0x4) /*ACC copy is golden */ +#define EvictNext (0x8) /*Priority for eviction*/ + +int AllocationCache::ViewVictim(void) +{ + int prioEmpty =-1; + int prioCpuDirty =-1; + int prioConsistent =-1; + int prioAccDirty =-1; + int prioCpuDirtyEN =-1; + int prioConsistentEN =-1; + int prioAccDirtyEN =-1; + + int victim=-1; + + // round robin priority search of unlocked entries offset from current victim + for(int ep=0;ep= 0 ) victim = prioAccDirty; + if ( prioConsistent >= 0 ) victim = prioConsistent; + if ( prioCpuDirty >= 0 ) victim = prioCpuDirty; + if ( prioAccDirtyEN >= 0 ) victim = prioAccDirtyEN; + if ( prioConsistentEN >= 0 ) victim = prioConsistentEN; + if ( prioCpuDirtyEN >= 0 ) victim = prioCpuDirtyEN; + if ( prioEmpty >= 0 ) victim = prioEmpty; /*Highest prio is winner*/ + + assert(victim >= 0); // Must succeed/ + dprintf("AllocationCacheDeviceMem: Selected victim cache entry %d\n",victim); + + // advance victim pointer + AccCacheVictim=(AccCacheVictim+1)%NaccCache; + dprintf("AllocationCacheDeviceMem: victim pointer now %d / %d\n",AccCacheVictim,NaccCache); + + return victim; +} +///////////////////////////////////////////////// +// Accelerator cache motion +///////////////////////////////////////////////// +void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerator, remove entry +{ + if(AccCache[e].state!=Empty){ + dprintf("AllocationCache: Evict(%d) %llx,%llxn",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + assert(AccCache[e].accLock==0); + assert(AccCache[e].cpuLock==0); + if(AccCache[e].state==AccDirty) { + Flush(e); + } + assert(AccCache[e].CpuPtr!=NULL); + if(AccCache[e].AccPtr) { + dprintf("AllocationCache: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); + AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); + } + } + AccCache[e].AccPtr=NULL; + AccCache[e].CpuPtr=NULL; + AccCache[e].bytes=0; + AccCache[e].state=Empty; + AccCache[e].accLock=0; + AccCache[e].cpuLock=0; +} +void AllocationCache::Flush(int e)// Copy back from a dirty device state and mark consistent. Do not remove +{ + dprintf("AllocationCache: Flush(%d) %llx -> %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + assert(AccCache[e].state==AccDirty); + assert(AccCache[e].cpuLock==0); + assert(AccCache[e].accLock==0); + assert(AccCache[e].AccPtr!=NULL); + assert(AccCache[e].CpuPtr!=NULL); + acceleratorCopyFromDevice(AccCache[e].AccPtr,AccCache[e].CpuPtr,AccCache[e].bytes); + AccCache[e].state=Consistent; +} +void AllocationCache::Clone(int e)// Copy from CPU, mark consistent. Allocate if necessary +{ + assert(AccCache[e].state==CpuDirty); + assert(AccCache[e].cpuLock==0); + assert(AccCache[e].accLock==0); + assert(AccCache[e].CpuPtr!=NULL); + if(AccCache[e].AccPtr==NULL){ + AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); + } + dprintf("AllocationCache: Clone(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); + AccCache[e].state=Consistent; +} +///////////////////////////////////////////////////////////////////////////////// +// View management +///////////////////////////////////////////////////////////////////////////////// +void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) +{ + //////////////////////////////////////////////////////////////////////////// + // Find if present, otherwise get or force an empty + //////////////////////////////////////////////////////////////////////////// + int e=CpuViewLookup(CpuPtr); + if(e==-1) { + e = ViewVictim(); + Evict(e); // Does copy back if necessary, frees accelerator pointer if not null, sets to empty + } + + assert(AccCache[e].cpuLock==0); // Programming error + + if(AccCache[e].state!=Empty) { + assert(AccCache[e].CpuPtr == CpuPtr); + assert(AccCache[e].bytes==bytes); + } +/* + * State transitions and actions + * + * Action State StateNext Flush Clone + * + * AccRead Empty Consistent - Y + * AccWrite Empty AccDirty - Y + * AccRead CpuDirty Consistent - Y + * AccWrite CpuDirty AccDirty - Y + * AccRead Consistent Consistent - - + * AccWrite Consistent AccDirty - - + * AccRead AccDirty AccDirty - - + * AccWrite AccDirty AccDirty - - + */ + if(AccCache[e].state==Empty) { + AccCache[e].CpuPtr = CpuPtr; + AccCache[e].AccPtr = NULL; + AccCache[e].bytes = bytes; + AccCache[e].state = CpuDirty; // Cpu starts primary + Clone(e); + if(mode==Write) + AccCache[e].state = AccDirty; // Empty + AccWrite=> AccDirty + else + AccCache[e].state = Consistent; // Empty + AccRead => Consistent + AccCache[e].accLock= 1; + } else if(AccCache[e].state&CpuDirty ){ + Clone(e); + if(mode==Write) + AccCache[e].state = AccDirty; // CpuDirty + AccWrite=> AccDirty + else + AccCache[e].state = Consistent; // CpuDirty + AccRead => Consistent + AccCache[e].accLock++; + } else if(AccCache[e].state&Consistent) { + if(mode==Write) + AccCache[e].state = AccDirty; // Consistent + AccWrite=> AccDirty + else + AccCache[e].state = Consistent; // Consistent + AccRead => Consistent + AccCache[e].accLock++; + } else if(AccCache[e].state&AccDirty) { + if(mode==Write) + AccCache[e].state = AccDirty; // AccDirty + AccWrite=> AccDirty + else + AccCache[e].state = AccDirty; // AccDirty + AccRead => AccDirty + AccCache[e].accLock++; + } else { + assert(0); + } + + AccCache[e].transient= transient? EvictNext : 0; + + return AccCache[e].AccPtr; +} +/* + * Action State StateNext Flush Clone + * + * CpuRead Empty CpuDirty - - + * CpuWrite Empty CpuDirty - - + * CpuRead CpuDirty CpuDirty - - + * CpuWrite CpuDirty CpuDirty - - + * CpuRead Consistent Consistent - - + * CpuWrite Consistent CpuDirty - - + * CpuRead AccDirty Consistent Y - + * CpuWrite AccDirty CpuDirty Y - + */ +//////////////////////////////////// +// look up & decrement lock count +//////////////////////////////////// +void AllocationCache::AccViewClose(void* AccPtr) +{ + int e=AccViewLookup(AccPtr); + assert(e!=-1); + assert(AccCache[e].cpuLock==0); + assert(AccCache[e].accLock>0); + AccCache[e].accLock--; +} +void AllocationCache::CpuViewClose(void* CpuPtr) +{ + int e=CpuViewLookup(CpuPtr); + assert(e!=-1); + assert(AccCache[e].cpuLock>0); + assert(AccCache[e].accLock==0); + AccCache[e].cpuLock--; +} +void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) +{ + //////////////////////////////////////////////////////////////////////////// + // Find if present, otherwise get or force an empty + //////////////////////////////////////////////////////////////////////////// + int e=CpuViewLookup(CpuPtr); + if(e==-1) { + e = ViewVictim(); + Evict(e); // Does copy back if necessary, frees accelerator pointer if not null, sets to empty + } + + assert(AccCache[e].accLock==0); // Programming error + + if(AccCache[e].state!=Empty) { + assert(AccCache[e].CpuPtr == CpuPtr); + assert(AccCache[e].bytes==bytes); + } + + if(AccCache[e].state==Empty) { + AccCache[e].CpuPtr = CpuPtr; + AccCache[e].AccPtr = NULL; + AccCache[e].bytes = bytes; + AccCache[e].state = CpuDirty; // Empty + CpuRead/CpuWrite => CpuDirty + AccCache[e].accLock= 0; + AccCache[e].cpuLock= 1; + } else if(AccCache[e].state==CpuDirty ){ + // AccPtr dont care, deferred allocate + AccCache[e].state = CpuDirty; // CpuDirty +CpuRead/CpuWrite => CpuDirty + AccCache[e].cpuLock++; + } else if(AccCache[e].state==Consistent) { + assert(AccCache[e].AccPtr != NULL); + if(mode==Write) + AccCache[e].state = CpuDirty; // Consistent +CpuWrite => CpuDirty + else + AccCache[e].state = Consistent; // Consistent +CpuRead => Consistent + AccCache[e].cpuLock++; + } else if(AccCache[e].state==AccDirty) { + assert(AccCache[e].AccPtr != NULL); + Flush(e); + if(mode==Write) AccCache[e].state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush + else AccCache[e].state = Consistent; // AccDirty +CpuRead => Consistent, Flush + AccCache[e].cpuLock++; + } else { + assert(0); // should be unreachable + } + + AccCache[e].transient= transient? EvictNext : 0; + + return AccCache[e].CpuPtr; +} + +////////////////////////////////////////////////////////////////////////////// +//loop round robin over entries checking acc pointer +////////////////////////////////////////////////////////////////////////////// +int AllocationCache::CpuViewLookup(void *CpuPtr) +{ + assert(CpuPtr!=NULL); + for(int e=0;e +#ifdef GRID_UNIFIED + +#warning "Grid is assuming unified virtual memory address space" +NAMESPACE_BEGIN(Grid); +///////////////////////////////////////////////////////////////////////////////// +// View management is 1:1 address space mapping +///////////////////////////////////////////////////////////////////////////////// + +void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) { return CpuPtr; } +void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) { return CpuPtr; } +void AllocationCache::AccViewClose(void* AccPtr){} +void AllocationCache::CpuViewClose(void* CpuPtr){} + +///////////////////////////////////// +// Dummy stubs +///////////////////////////////////// +int AllocationCache::ViewVictim(void) { assert(0); return 0;} +void AllocationCache::Evict(int e) { assert(0);} +void AllocationCache::Flush(int e) { assert(0);} +void AllocationCache::Clone(int e) { assert(0);} + +int AllocationCache::CpuViewLookup(void *CpuPtr){assert(0); return 0;} +int AllocationCache::AccViewLookup(void *AccPtr){assert(0); return 0;} + +NAMESPACE_END(Grid); +#endif diff --git a/Grid/allocator/MemoryStats.cc b/Grid/allocator/MemoryStats.cc new file mode 100644 index 00000000..0d1707d9 --- /dev/null +++ b/Grid/allocator/MemoryStats.cc @@ -0,0 +1,67 @@ +#include +#include + +NAMESPACE_BEGIN(Grid); + +MemoryStats *MemoryProfiler::stats = nullptr; +bool MemoryProfiler::debug = false; + +void check_huge_pages(void *Buf,uint64_t BYTES) +{ +#ifdef __linux__ + int fd = open("/proc/self/pagemap", O_RDONLY); + assert(fd >= 0); + const int page_size = 4096; + uint64_t virt_pfn = (uint64_t)Buf / page_size; + off_t offset = sizeof(uint64_t) * virt_pfn; + uint64_t npages = (BYTES + page_size-1) / page_size; + uint64_t pagedata[npages]; + uint64_t ret = lseek(fd, offset, SEEK_SET); + assert(ret == offset); + ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); + assert(ret == sizeof(uint64_t) * npages); + int nhugepages = npages / 512; + int n4ktotal, nnothuge; + n4ktotal = 0; + nnothuge = 0; + for (int i = 0; i < nhugepages; ++i) { + uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; + for (int j = 0; j < 512; ++j) { + uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; + ++n4ktotal; + if (pageaddr != baseaddr + j * page_size) + ++nnothuge; + } + } + int rank = CartesianCommunicator::RankWorld(); + printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); +#endif +} + +std::string sizeString(const size_t bytes) +{ + constexpr unsigned int bufSize = 256; + const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"}; + char buf[256]; + size_t s = 0; + double count = bytes; + + while (count >= 1024 && s < 7) + { + s++; + count /= 1024; + } + if (count - floor(count) == 0.0) + { + snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]); + } + else + { + snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]); + } + + return std::string(buf); +} + +NAMESPACE_END(Grid); + diff --git a/Grid/allocator/MemoryStats.h b/Grid/allocator/MemoryStats.h new file mode 100644 index 00000000..156c9747 --- /dev/null +++ b/Grid/allocator/MemoryStats.h @@ -0,0 +1,95 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/MemoryStats.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + + +NAMESPACE_BEGIN(Grid); + +std::string sizeString(size_t bytes); + +struct MemoryStats +{ + size_t totalAllocated{0}, maxAllocated{0}, + currentlyAllocated{0}, totalFreed{0}; +}; + +class MemoryProfiler +{ +public: + static MemoryStats *stats; + static bool debug; +}; + +#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" +#define profilerDebugPrint \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ + << std::endl; \ + } + +#define profilerAllocate(bytes) \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + s->totalAllocated += (bytes); \ + s->currentlyAllocated += (bytes); \ + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \ + } \ + if (MemoryProfiler::debug) \ + { \ + std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \ + profilerDebugPrint; \ + } + +#define profilerFree(bytes) \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + s->totalFreed += (bytes); \ + s->currentlyAllocated -= (bytes); \ + } \ + if (MemoryProfiler::debug) \ + { \ + std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \ + profilerDebugPrint; \ + } + +void check_huge_pages(void *Buf,uint64_t BYTES); + +NAMESPACE_END(Grid); + diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index cf7147b9..b8abd199 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -87,7 +87,7 @@ sobj eval(const uint64_t ss, const sobj &arg) } template accelerator_inline -const lobj & eval(const uint64_t ss, const LatticeView &arg) +const lobj & eval(const uint64_t ss, const LatticeExprView &arg) { return arg[ss]; } @@ -179,16 +179,12 @@ inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf cb = lat.Checkerboard(); } template ::value, T1>::type * = nullptr> -inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf -{ -} - +inline void CBFromExpression(int &cb, const T1 ¬lat) {} // non-lattice leaf template inline void CBFromExpression(int &cb,const LatticeUnaryExpression &expr) { CBFromExpression(cb, expr.arg1); // recurse AST } - template inline void CBFromExpression(int &cb,const LatticeBinaryExpression &expr) { @@ -203,6 +199,68 @@ inline void CBFromExpression(int &cb, const LatticeTrinaryExpression::value, T1>::type * = nullptr> +inline void ExpressionViewOpen(T1 &lat) // Lattice leaf +{ + lat.AcceleratorViewOpen(); +} +template ::value, T1>::type * = nullptr> + inline void ExpressionViewOpen(T1 ¬lat) {} + +template inline +void ExpressionViewOpen(LatticeUnaryExpression &expr) +{ + ExpressionViewOpen(expr.arg1); // recurse AST +} + +template inline +void ExpressionViewOpen(LatticeBinaryExpression &expr) +{ + ExpressionViewOpen(expr.arg1); // recurse AST + ExpressionViewOpen(expr.arg2); // recurse AST +} +template +inline void ExpressionViewOpen(LatticeTrinaryExpression &expr) +{ + ExpressionViewOpen(expr.arg1); // recurse AST + ExpressionViewOpen(expr.arg2); // recurse AST + ExpressionViewOpen(expr.arg3); // recurse AST +} + +////////////////////////////////////////////////////////////////////////// +// ViewClose +////////////////////////////////////////////////////////////////////////// +template ::value, T1>::type * = nullptr> +inline void ExpressionViewClose( T1 &lat) // Lattice leaf +{ + lat.AcceleratorViewClose(); +} +template ::value, T1>::type * = nullptr> +inline void ExpressionViewClose(T1 ¬lat) {} + +template inline +void ExpressionViewClose(LatticeUnaryExpression &expr) +{ + ExpressionViewClose(expr.arg1); // recurse AST +} +template inline +void ExpressionViewClose(LatticeBinaryExpression &expr) +{ + ExpressionViewClose(expr.arg1); // recurse AST + ExpressionViewClose(expr.arg2); // recurse AST +} +template +inline void ExpressionViewClose(LatticeTrinaryExpression &expr) +{ + ExpressionViewClose(expr.arg1); // recurse AST + ExpressionViewClose(expr.arg2); // recurse AST + ExpressionViewClose(expr.arg3); // recurse AST +} + //////////////////////////////////////////// // Unary operators and funcs //////////////////////////////////////////// diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 6a8664d4..76622275 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -83,11 +83,9 @@ public: // The copy constructor for this will need to be used by device lambda functions ///////////////////////////////////////////////////////////////////////////////////////// template -class LatticeView : public LatticeAccelerator +class LatticeExprView : public LatticeAccelerator { public: - - // Rvalue #ifdef GRID_SIMT accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } @@ -102,11 +100,65 @@ public: accelerator_inline uint64_t end(void) const { return this->_odata_size; }; accelerator_inline uint64_t size(void) const { return this->_odata_size; }; - LatticeView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me) + // Non accelerator functions + LatticeExprView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me){} + ~LatticeExprView(){} + + void AcceleratorViewOpen(void) + { // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base + void *cpu_ptr=this->_odata; + // std::cout << "AccViewOpen "<_odata <_odata=(vobj *)AllocationCache::AccViewOpen(this->_odata,this->_odata_size*sizeof(vobj),1,0); + } + void AcceleratorViewClose(void) + { // Inform the manager + // std::cout << "View Close"<_odata<_odata); + } + void CpuViewOpen(void) + { // Translate the pointer + void *cpu_ptr=this->_odata; + // std::cout << "CpuViewOpen "<_odata <_odata=(vobj *)AllocationCache::CpuViewOpen(cpu_ptr,this->_odata_size*sizeof(vobj),1,0); + } + void CpuViewClose(void) + { // Inform the manager + // std::cout << "CpuViewClose"<_odata<_odata); + } + +}; +// UserView constructor,destructor updates view manager +// Non-copyable object??? Second base with copy/= deleted? +template +class LatticeView : public LatticeExprView +{ +public: + // Rvalue + /* +#ifdef GRID_SIMT + accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } +#else + accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } +#endif + + accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; + accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; + + accelerator_inline uint64_t begin(void) const { return 0;}; + accelerator_inline uint64_t end(void) const { return this->_odata_size; }; + accelerator_inline uint64_t size(void) const { return this->_odata_size; }; + */ + LatticeView(const LatticeAccelerator &refer_to_me) : LatticeExprView (refer_to_me) { + this->AcceleratorViewOpen(); + } + ~LatticeView(){ + this->AcceleratorViewClose(); } }; + ///////////////////////////////////////////////////////////////////////////////////////// // Lattice expression types used by ET to assemble the AST // @@ -120,7 +172,7 @@ template using is_lattice = std::is_base_of; template using is_lattice_expr = std::is_base_of; template struct ViewMapBase { typedef T Type; }; -template struct ViewMapBase { typedef LatticeView Type; }; +template struct ViewMapBase { typedef LatticeExprView Type; }; template using ViewMap = ViewMapBase::value >; template @@ -231,12 +283,15 @@ public: CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; - + + auto exprCopy = expr; + ExpressionViewOpen(exprCopy); auto me = View(); accelerator_for(ss,me.size(),1,{ - auto tmp = eval(ss,expr); + auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + ExpressionViewClose(exprCopy); return *this; } template inline Lattice & operator=(const LatticeBinaryExpression &expr) @@ -251,11 +306,14 @@ public: assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; + auto exprCopy = expr; + ExpressionViewOpen(exprCopy); auto me = View(); accelerator_for(ss,me.size(),1,{ - auto tmp = eval(ss,expr); + auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + ExpressionViewClose(exprCopy); return *this; } template inline Lattice & operator=(const LatticeTrinaryExpression &expr) @@ -269,11 +327,14 @@ public: CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; + auto exprCopy = expr; + ExpressionViewOpen(exprCopy); auto me = View(); accelerator_for(ss,me.size(),1,{ - auto tmp = eval(ss,expr); + auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + ExpressionViewClose(exprCopy); return *this; } //GridFromExpression is tricky to do diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 0a5103a2..5da4e21e 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -27,6 +27,17 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ #pragma once + +#ifdef HAVE_MALLOC_MALLOC_H +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_MM_MALLOC_H +#include +#endif + NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////////////////////////////////// @@ -144,8 +155,8 @@ inline void *acceleratorAllocDevice(size_t bytes) }; inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);}; - - +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} #endif ////////////////////////////////////////////// @@ -192,6 +203,8 @@ inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*t inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);}; inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);}; inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} #endif @@ -275,6 +288,8 @@ inline void *acceleratorAllocDevice(size_t bytes) inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);}; +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} #endif @@ -311,16 +326,8 @@ inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);}; #define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) thread_for2d(iter1,num1,iter2,num2,{ __VA_ARGS__ }); accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific - -#ifdef HAVE_MALLOC_MALLOC_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif -#ifdef HAVE_MM_MALLOC_H -#include -#endif +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);} #ifdef HAVE_MM_MALLOC_H inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);}; diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index f1ab6551..97ac7dc9 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -286,6 +286,8 @@ void Grid_init(int *argc,char ***argv) ////////////////////////////////////////////////////////// acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver + AllocationCache::Init(); + if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){ int MB; arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); diff --git a/configure.ac b/configure.ac index f9ea03fc..74d37605 100644 --- a/configure.ac +++ b/configure.ac @@ -147,7 +147,7 @@ case ${ac_SUMMIT} in AC_DEFINE([GRID_IBM_SUMMIT],[1],[Let JSRUN manage the GPU device allocation]);; esac -############### SYCL +############### SYCL/CUDA/HIP/none AC_ARG_ENABLE([accelerator], [AC_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none], [enable none,cuda,sycl,hip acceleration])], [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none]) @@ -168,6 +168,20 @@ case ${ac_ACCELERATOR} in AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);; esac +############### UNIFIED MEMORY +AC_ARG_ENABLE([unified], + [AC_HELP_STRING([--enable-unified=yes|no], [enable unified address space for accelerator loops])], + [ac_UNIFIED=${enable_unified}], [ac_UNIFIED=yes]) +case ${ac_UNIFIED} in + yes) + echo Unified memory for accelerator loops + AC_DEFINE([GRID_UVM],[1],[Use unified address space]);; + no) + echo Manual memory copy for accelerator loops;; + *) + AC_MSG_ERROR(["Unified virtual memory option not suppoorted ${ac_UNIFIED}"]);; +esac + ############### Intel libraries AC_ARG_ENABLE([mkl], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], @@ -612,6 +626,7 @@ compiler version : ${ax_cv_gxx_version} SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} Threading : ${ac_openmp} Acceleration : ${ac_ACCELERATOR} +Unified virtual memory : ${ac_UNIFIED} Communications type : ${comms_type} Shared memory allocator : ${ac_SHM} Shared memory mmap path : ${ac_SHMPATH} From a7635fd5ba250e95483005c6988b30b27980d928 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Mon, 18 May 2020 17:52:26 -0400 Subject: [PATCH 12/86] summit mem --- Grid/GridCore.h | 1 + Grid/allocator/AlignedAllocator.h | 9 +- Grid/allocator/GridMemoryManager.cc | 131 ++++++++++++++++++++++++++++ Grid/allocator/GridMemoryManager.h | 42 +++++++++ Grid/lattice/Lattice_base.h | 39 ++++----- Grid/lattice/Lattice_transfer.h | 2 +- 6 files changed, 197 insertions(+), 27 deletions(-) create mode 100644 Grid/allocator/GridMemoryManager.cc create mode 100644 Grid/allocator/GridMemoryManager.h diff --git a/Grid/GridCore.h b/Grid/GridCore.h index a48d2d49..495a81e1 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -47,6 +47,7 @@ Author: paboyle #include #include #include +#include #include #include #include diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 77167299..600b7097 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -178,12 +178,13 @@ public: //////////////////////////////////// if ( ptr == (_Tp *) NULL ) { // printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout); - auto err = cudaMallocManaged((void **)&ptr,bytes); - if( err != cudaSuccess ) { + // auto err = +gridMallocManaged((void **)&ptr,bytes); +/*if( err != cudaSuccess ) { ptr = (_Tp *) NULL; std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " < + +NAMESPACE_BEGIN(Grid); + +#define _GRID_MEM_PAGE_SIZE 4096 +void* _grid_mem_base = 0; +size_t _grid_mem_pages; +struct _grid_mem_range { + size_t page_start, page_end; +}; +std::vector<_grid_mem_range> _grid_mem_avail; +std::map _grid_mem_alloc; + +void gridMemoryInit() { + size_t free,total; + cudaMemGetInfo(&free,&total); + + char* ev = getenv("GRID_DEVICE_BYTES_FOR_CACHE"); + if (ev) { + long bytes; + assert(sscanf(ev,"%ld",&bytes)==1); + free -= bytes; + } + + _grid_mem_pages = free / _GRID_MEM_PAGE_SIZE; + size_t sz = _grid_mem_pages * _GRID_MEM_PAGE_SIZE; + + assert(cudaSuccess == cudaMallocManaged(&_grid_mem_base,sz)); + + int target; + cudaGetDevice(&target); + cudaMemAdvise(_grid_mem_base,sz,cudaMemAdviseSetPreferredLocation,target); + + assert(cudaSuccess == cudaMemset(_grid_mem_base,0,sz)); // touch on device + std::cout << GridLogMessage << "gridMemoryInit: " << sz << " bytes" << std::endl; + + _grid_mem_avail.push_back( { 0, _grid_mem_pages } ); +} + +void gridMallocManaged(void** pp, size_t sz) { + + if (_grid_mem_avail.empty()) + gridMemoryInit(); + + size_t pages = (sz + _GRID_MEM_PAGE_SIZE - 1) / _GRID_MEM_PAGE_SIZE; + // find free block + size_t m; + for (m=0;m<_grid_mem_avail.size();m++) { + auto & b = _grid_mem_avail[m]; + if (b.page_end - b.page_start >= pages) + break; + } + if (m == _grid_mem_avail.size()) { + std::cout << GridLogMessage << "Out of memory" << std::endl; + assert(0); + } + *pp = (char*)_grid_mem_base + _GRID_MEM_PAGE_SIZE*_grid_mem_avail[m].page_start; + _grid_mem_alloc[*pp] = { _grid_mem_avail[m].page_start, _grid_mem_avail[m].page_start + pages }; + _grid_mem_avail[m].page_start += pages; +} + +void gridFree(void* p) { + + if (_grid_mem_avail.empty()) + gridMemoryInit(); + + auto & alloc = _grid_mem_alloc[p]; + if (alloc.page_start == alloc.page_end) { + free(p); + //cudaFreeHost(p); + } else { + // can we enlarge existing one? + for (size_t m=0;m<_grid_mem_avail.size();m++) { + auto & b = _grid_mem_avail[m]; + if (b.page_start == alloc.page_end) { + b.page_start = alloc.page_start; + return; + } + if (b.page_end == alloc.page_start) { + b.page_end = alloc.page_end; + return; + } + } + // fragment memory + _grid_mem_avail.push_back( alloc ); + } + _grid_mem_alloc.erase(p); +} + +void gridAcceleratorPrefetch(void* p, size_t sz) { + + auto & alloc = _grid_mem_alloc[p]; + if (alloc.page_start == alloc.page_end) // pinned to host + return; + + int target; + cudaGetDevice(&target); + cudaMemPrefetchAsync(p,sz,target); +} + +void gridMemGetInfo(size_t* pfree, size_t* ptotal) { + + if (_grid_mem_avail.empty()) + gridMemoryInit(); + + *ptotal = _grid_mem_pages * _GRID_MEM_PAGE_SIZE; + *pfree = 0; + for (auto & a : _grid_mem_avail) + *pfree += (a.page_end - a.page_start) * _GRID_MEM_PAGE_SIZE; +} + +void gridMoveToHost(void** pp) { + + if (_grid_mem_avail.empty()) + gridMemoryInit(); + + auto & alloc = _grid_mem_alloc[*pp]; + if (alloc.page_start == alloc.page_end) // already on host + return; + + size_t sz = (alloc.page_end - alloc.page_start) * _GRID_MEM_PAGE_SIZE; + void*pn; + //assert(cudaSuccess == cudaMallocHost(&pn,sz)); + pn = malloc(sz); + memcpy(pn,*pp,sz); + gridFree(*pp); + *pp = pn; + _grid_mem_alloc[pn] = { 0,0 }; +} + +NAMESPACE_END(Grid); diff --git a/Grid/allocator/GridMemoryManager.h b/Grid/allocator/GridMemoryManager.h new file mode 100644 index 00000000..9e619301 --- /dev/null +++ b/Grid/allocator/GridMemoryManager.h @@ -0,0 +1,42 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/GridMemoryManager.h + + Copyright (C) 2020 + +Author: Christoph Lehner + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_MEMORY_MANAGER_H +#define GRID_MEMORY_MANAGER_H + +NAMESPACE_BEGIN(Grid); + +void gridMemoryInit(); +void gridMallocManaged(void** pp, size_t sz); +void gridMoveToHost(void** pp); +void gridAcceleratorPrefetch(void* p, size_t sz); +void gridMemGetInfo(size_t* pfree, size_t* ptotal); +void gridFree(void* p); + +NAMESPACE_END(Grid); + +#endif diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 284190ba..42e9e50a 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -97,33 +97,14 @@ public: else grid = _grid; }; - accelerator_inline void Advise(int advise) { -#ifdef GRID_NVCC -#ifndef __CUDA_ARCH__ // only on host - if (advise & AdviseInfrequentUse) { - cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId); - } - if (advise & AdviseReadMostly) { - cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1); - } -#endif -#endif - }; - accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future -#ifdef GRID_NVCC -#ifndef __CUDA_ARCH__ // only on host - int target; - cudaGetDevice(&target); - cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target); -#endif -#endif + gridAcceleratorPrefetch(_odata,_odata_size*sizeof(vobj)); }; accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future #ifdef GRID_NVCC #ifndef __CUDA_ARCH__ // only on host - cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId); + //cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId); #endif #endif }; @@ -246,13 +227,27 @@ private: dealloc(); this->_odata_size = size; - if ( size ) + if ( size ) this->_odata = alloc.allocate(this->_odata_size); else this->_odata = nullptr; } } public: + + void Advise(int advise) { +#ifdef GRID_NVCC +#ifndef __CUDA_ARCH__ // only on host + if (advise & AdviseInfrequentUse) { + gridMoveToHost((void**)&this->_odata); + } + if (advise & AdviseReadMostly) { + //cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1); + } +#endif +#endif + }; + ///////////////////////////////////////////////////////////////////////////////// // Return a view object that may be dereferenced in site loops. // The view is trivially copy constructible and may be copied to an accelerator device diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index c23ddcdc..e12ef787 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -96,7 +96,7 @@ accelerator_inline void convertType(ComplexF & out, const std::complex & out = in; } -#ifdef __CUDA_ARCH__ +#ifdef GRID_NVCC accelerator_inline void convertType(vComplexF & out, const ComplexF & in) { ((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in; } From 7860a50f70a6084dd281ca7e69aca917c47ddfa3 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 21 May 2020 16:13:16 -0400 Subject: [PATCH 13/86] Make view specify where and drive data motion - first cut. This is a compile tiime option --enable-unified=yes/no --- Grid/GridStd.h | 1 + Grid/algorithms/CoarsenedMatrix.h | 42 ++-- Grid/algorithms/FFT.h | 3 +- Grid/algorithms/iterative/BiCGSTAB.h | 14 +- Grid/algorithms/iterative/ConjugateGradient.h | 6 +- .../iterative/ImplicitlyRestartedLanczos.h | 14 +- Grid/allocator/AlignedAllocator.cc | 2 +- Grid/allocator/AllocationCache.cc | 21 +- Grid/allocator/AllocationCache.h | 43 +++- Grid/allocator/MemoryCacheDeviceMem.cc | 157 +++++++++---- Grid/allocator/MemoryCacheShared.cc | 27 +-- Grid/cshift/Cshift_common.h | 18 +- Grid/lattice/Lattice.h | 1 + Grid/lattice/Lattice_ET.h | 8 +- Grid/lattice/Lattice_arith.h | 68 +++--- Grid/lattice/Lattice_base.h | 210 ++---------------- Grid/lattice/Lattice_comparison.h | 14 +- Grid/lattice/Lattice_coordinate.h | 20 +- Grid/lattice/Lattice_local.h | 16 +- Grid/lattice/Lattice_matrix_reduction.h | 14 +- Grid/lattice/Lattice_peekpoke.h | 24 +- Grid/lattice/Lattice_reality.h | 8 +- Grid/lattice/Lattice_reduction.h | 43 ++-- Grid/lattice/Lattice_rng.h | 2 +- Grid/lattice/Lattice_trace.h | 8 +- Grid/lattice/Lattice_transfer.h | 59 +++-- Grid/lattice/Lattice_transpose.h | 8 +- Grid/lattice/Lattice_unary.h | 16 +- Grid/qcd/action/fermion/GparityWilsonImpl.h | 18 +- Grid/qcd/action/fermion/WilsonCloverFermion.h | 24 +- Grid/qcd/action/fermion/WilsonImpl.h | 12 +- .../implementation/CayleyFermion5Dcache.h | 20 +- .../implementation/CayleyFermion5Dvec.h | 24 +- .../DomainWallEOFAFermionCache.h | 20 +- ...ImprovedStaggeredFermion5DImplementation.h | 32 +-- .../ImprovedStaggeredFermionImplementation.h | 40 ++-- .../implementation/MobiusEOFAFermionCache.h | 40 ++-- .../WilsonFermionImplementation.h | 22 +- .../WilsonKernelsImplementation.h | 46 ++-- Grid/qcd/action/gauge/GaugeImplTypes.h | 8 +- .../action/scalar/ScalarInteractionAction.h | 8 +- Grid/qcd/smearing/GaugeConfiguration.h | 20 +- Grid/qcd/utils/A2Autils.h | 50 ++--- Grid/qcd/utils/BaryonUtils.h | 26 +-- Grid/qcd/utils/LinalgUtils.h | 48 ++-- Grid/qcd/utils/SUn.h | 10 +- Grid/stencil/Stencil.h | 47 +++- Grid/threads/Accelerator.h | 24 +- 48 files changed, 688 insertions(+), 718 deletions(-) diff --git a/Grid/GridStd.h b/Grid/GridStd.h index 16cfcf50..ecb561ea 100644 --- a/Grid/GridStd.h +++ b/Grid/GridStd.h @@ -6,6 +6,7 @@ /////////////////// #include #include +#include #include #include #include diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 8e5c91a7..4493d740 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -186,10 +186,10 @@ public: hermop.HermOp(*Tn,y); - auto y_v = y.View(); - auto Tn_v = Tn->View(); - auto Tnp_v = Tnp->View(); - auto Tnm_v = Tnm->View(); + auto y_v = y.View(AcceleratorWrite); + auto Tn_v = Tn->View(AcceleratorWrite); + auto Tnp_v = Tnp->View(AcceleratorWrite); + auto Tnm_v = Tnm->View(AcceleratorWrite); const int Nsimd = CComplex::Nsimd(); accelerator_forNB(ss, FineGrid->oSites(), Nsimd, { coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); @@ -264,12 +264,12 @@ public: Stencil.HaloExchange(in,compressor); comms_usec += usecond(); - auto in_v = in.View(); - auto out_v = out.View(); + auto in_v = in.View(AcceleratorRead); + auto out_v = out.View(AcceleratorWrite); typedef LatticeView Aview; Vector AcceleratorViewContainer; - for(int p=0;p Aview; Vector AcceleratorViewContainer; - for(int p=0;poSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); // if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });} @@ -563,11 +563,11 @@ public: mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); { - auto tmp_ = tmp.View(); - auto evenmask_ = evenmask.View(); - auto oddmask_ = oddmask.View(); - auto Mphie_ = Mphie.View(); - auto Mphio_ = Mphio.View(); + auto tmp_ = tmp.View(AcceleratorWrite); + auto evenmask_ = evenmask.View(AcceleratorRead); + auto oddmask_ = oddmask.View(AcceleratorRead); + auto Mphie_ = Mphie.View(AcceleratorRead); + auto Mphio_ = Mphio.View(AcceleratorRead); accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); }); @@ -575,8 +575,8 @@ public: blockProject(SelfProj,tmp,Subspace.subspace); - auto SelfProj_ = SelfProj.View(); - auto A_self = A[self_stencil].View(); + auto SelfProj_ = SelfProj.View(AcceleratorRead); + auto A_self = A[self_stencil].View(AcceleratorWrite); accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ for(int j=0;j pgbuf(&pencil_g); - auto pgbuf_v = pgbuf.View(); + auto pgbuf_v = pgbuf.View(CpuWrite); typedef typename FFTW::FFTW_scalar FFTW_scalar; typedef typename FFTW::FFTW_plan FFTW_plan; diff --git a/Grid/algorithms/iterative/BiCGSTAB.h b/Grid/algorithms/iterative/BiCGSTAB.h index 3a7be1ef..04328a77 100644 --- a/Grid/algorithms/iterative/BiCGSTAB.h +++ b/Grid/algorithms/iterative/BiCGSTAB.h @@ -122,9 +122,9 @@ class BiCGSTAB : public OperatorFunction LinearCombTimer.Start(); bo = beta * omega; - auto p_v = p.View(); - auto r_v = r.View(); - auto v_v = v.View(); + auto p_v = p.View(AcceleratorWrite); + auto r_v = r.View(AcceleratorWrite); + auto v_v = v.View(AcceleratorWrite); accelerator_for(ss, p_v.size(), Field::vector_object::Nsimd(),{ coalescedWrite(p_v[ss], beta*p_v(ss) - bo*v_v(ss) + r_v(ss)); }); @@ -142,13 +142,13 @@ class BiCGSTAB : public OperatorFunction alpha = rho / Calpha.real(); LinearCombTimer.Start(); - auto h_v = h.View(); - auto psi_v = psi.View(); + auto h_v = h.View(AcceleratorWrite); + auto psi_v = psi.View(AcceleratorWrite); accelerator_for(ss, h_v.size(), Field::vector_object::Nsimd(),{ coalescedWrite(h_v[ss], alpha*p_v(ss) + psi_v(ss)); }); - auto s_v = s.View(); + auto s_v = s.View(AcceleratorWrite); accelerator_for(ss, s_v.size(), Field::vector_object::Nsimd(),{ coalescedWrite(s_v[ss], -alpha*v_v(ss) + r_v(ss)); }); @@ -166,7 +166,7 @@ class BiCGSTAB : public OperatorFunction omega = Comega.real() / norm2(t); LinearCombTimer.Start(); - auto t_v = t.View(); + auto t_v = t.View(AcceleratorWrite); accelerator_for(ss, psi_v.size(), Field::vector_object::Nsimd(),{ coalescedWrite(psi_v[ss], h_v(ss) + omega * s_v(ss)); coalescedWrite(r_v[ss], -omega * t_v(ss) + s_v(ss)); diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index 3a2544b5..d40fee7b 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -140,9 +140,9 @@ public: b = cp / c; LinearCombTimer.Start(); - auto psi_v = psi.View(); - auto p_v = p.View(); - auto r_v = r.View(); + auto psi_v = psi.View(AcceleratorWrite); + auto p_v = p.View(AcceleratorWrite); + auto r_v = r.View(AcceleratorWrite); accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{ coalescedWrite(psi_v[ss], a * p_v(ss) + psi_v(ss)); coalescedWrite(p_v[ss] , b * p_v(ss) + r_v (ss)); diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h index 49190663..05ed8586 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -57,17 +57,17 @@ void basisOrthogonalize(std::vector &basis,Field &w,int k) template void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { - typedef decltype(basis[0].View()) View; - auto tmp_v = basis[0].View(); + typedef decltype(basis[0].View(CpuWrite)) View; + auto tmp_v = basis[0].View(CpuWrite); Vector basis_v(basis.size(),tmp_v); View *basis_vp = &basis_v[0]; typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); for(int k=0;k > Bt(thread_max() * Nm); // Thread private thread_region { @@ -149,16 +149,16 @@ void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, i template void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) { - typedef decltype(basis[0].View()) View; + typedef decltype(basis[0].View(AcceleratorWrite)) View; typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); result.Checkerboard() = basis[0].Checkerboard(); - auto result_v=result.View(); + auto result_v=result.View(AcceleratorWrite); Vector basis_v(basis.size(),result_v); View * basis_vp = &basis_v[0]; for(int k=0;k Qt_jv(Nm); double * Qt_j = & Qt_jv[0]; diff --git a/Grid/allocator/AlignedAllocator.cc b/Grid/allocator/AlignedAllocator.cc index 18854c95..399f1939 100644 --- a/Grid/allocator/AlignedAllocator.cc +++ b/Grid/allocator/AlignedAllocator.cc @@ -12,7 +12,7 @@ bool MemoryProfiler::debug = false; #define SMALL_LIMIT (4096) #endif -#ifdef POINTER_CACHE +#ifdef ALLOCATION_CACHE int PointerCache::victim; PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache]; diff --git a/Grid/allocator/AllocationCache.cc b/Grid/allocator/AllocationCache.cc index a7aeea80..dc32affd 100644 --- a/Grid/allocator/AllocationCache.cc +++ b/Grid/allocator/AllocationCache.cc @@ -22,8 +22,10 @@ void *AllocationCache::AcceleratorAllocate(size_t bytes) { void *ptr = (void *) Lookup(bytes,Acc); - if ( ptr == (void *) NULL ) + if ( ptr == (void *) NULL ) { ptr = (void *) acceleratorAllocDevice(bytes); + // std::cout <<"AcceleratorAllocate: allocated Accelerator pointer "<=0){ Evict(e); } + if(e>=0){ Discard(e); } // If present remove entry and free accelerator too. // Can we ever hit a free event with a view still in scope? @@ -90,13 +90,18 @@ void AllocationCache::Init(void) Ncache[AccSmall]=Nc; } } + std::cout << "MemoryManager::Init() SMALL "< -#ifndef GRID_UNIFIED +#ifndef GRID_UVM #warning "Using explicit device memory copies" NAMESPACE_BEGIN(Grid); -#define dprintf(...) +#define dprintf //////////////////////////////////////////////////////////// // For caching copies of data on device @@ -20,15 +20,12 @@ typedef struct { uint32_t cpuLock; } AcceleratorViewEntry; -#define Write (1) -#define Read (2) -#define WriteDiscard (3) ////////////////////////////////////////////////////////////////////// // Data tables for ViewCache ////////////////////////////////////////////////////////////////////// static AcceleratorViewEntry AccCache[NaccCacheMax]; static int AccCacheVictim; // Base for round robin search -static int NaccCache = 8; +static int NaccCache = 32; //////////////////////////////////// // Priority ordering for unlocked entries @@ -68,7 +65,7 @@ int AllocationCache::ViewVictim(void) if ( locks==0 ) { - if( s==Empty ) { prioEmpty = e; dprintf("Empty");} + if( s==Empty ) { prioEmpty = e; dprintf("Empty"); } if( t == EvictNext ) { if( s==CpuDirty ) { prioCpuDirtyEN = e; dprintf("CpuDirty Transient");} @@ -97,21 +94,42 @@ int AllocationCache::ViewVictim(void) if ( prioEmpty >= 0 ) victim = prioEmpty; /*Highest prio is winner*/ assert(victim >= 0); // Must succeed/ - dprintf("AllocationCacheDeviceMem: Selected victim cache entry %d\n",victim); + dprintf("AllocationCacheDeviceMem: Selected victim cache entry %d\n",victim); // advance victim pointer AccCacheVictim=(AccCacheVictim+1)%NaccCache; - dprintf("AllocationCacheDeviceMem: victim pointer now %d / %d\n",AccCacheVictim,NaccCache); + dprintf("AllocationCacheDeviceMem: victim pointer now %d / %d\n",AccCacheVictim,NaccCache); return victim; } ///////////////////////////////////////////////// // Accelerator cache motion ///////////////////////////////////////////////// + +void AllocationCache::Discard(int e) // remove from Accelerator, remove entry, without flush +{ + if(AccCache[e].state!=Empty){ + dprintf("AllocationCache: Discard(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + assert(AccCache[e].accLock==0); + assert(AccCache[e].cpuLock==0); + assert(AccCache[e].CpuPtr!=NULL); + if(AccCache[e].AccPtr) { + dprintf("AllocationCache: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); + AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); + } + } + AccCache[e].AccPtr=NULL; + AccCache[e].CpuPtr=NULL; + AccCache[e].bytes=0; + AccCache[e].state=Empty; + AccCache[e].accLock=0; + AccCache[e].cpuLock=0; +} + void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerator, remove entry { if(AccCache[e].state!=Empty){ - dprintf("AllocationCache: Evict(%d) %llx,%llxn",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + dprintf("AllocationCache: Evict(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); assert(AccCache[e].accLock==0); assert(AccCache[e].cpuLock==0); if(AccCache[e].state==AccDirty) { @@ -119,7 +137,7 @@ void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerat } assert(AccCache[e].CpuPtr!=NULL); if(AccCache[e].AccPtr) { - dprintf("AllocationCache: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); + dprintf("AllocationCache: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); } } @@ -132,7 +150,7 @@ void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerat } void AllocationCache::Flush(int e)// Copy back from a dirty device state and mark consistent. Do not remove { - dprintf("AllocationCache: Flush(%d) %llx -> %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + // printf("AllocationCache: Flush(%d) %llx -> %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); assert(AccCache[e].state==AccDirty); assert(AccCache[e].cpuLock==0); assert(AccCache[e].accLock==0); @@ -150,14 +168,50 @@ void AllocationCache::Clone(int e)// Copy from CPU, mark consistent. Allocate if if(AccCache[e].AccPtr==NULL){ AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); } - dprintf("AllocationCache: Clone(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + // printf("AllocationCache: Clone(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); AccCache[e].state=Consistent; } + +void AllocationCache::CpuDiscard(int e)// Mark accelerator dirty without copy. Allocate if necessary +{ + assert(AccCache[e].state!=Empty); + assert(AccCache[e].cpuLock==0); + assert(AccCache[e].accLock==0); + assert(AccCache[e].CpuPtr!=NULL); + if(AccCache[e].AccPtr==NULL){ + AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); + } + // printf("AllocationCache: CpuDiscard(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); + // acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); + AccCache[e].state=AccDirty; +} + ///////////////////////////////////////////////////////////////////////////////// // View management ///////////////////////////////////////////////////////////////////////////////// -void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) +void AllocationCache::ViewClose(void* Ptr,ViewMode mode) +{ + if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ + AcceleratorViewClose(Ptr); + } else if( (mode==CpuRead)||(mode==CpuWrite)){ + CpuViewClose(Ptr); + } else { + assert(0); + } +} +void *AllocationCache::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ + return AcceleratorViewOpen(CpuPtr,bytes,mode,hint); + } else if( (mode==CpuRead)||(mode==CpuWrite)){ + return CpuViewOpen(CpuPtr,bytes,mode,hint); + } else { + assert(0); + return nullptr; + } +} +void *AllocationCache::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) { //////////////////////////////////////////////////////////////////////////// // Find if present, otherwise get or force an empty @@ -165,9 +219,11 @@ void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transi int e=CpuViewLookup(CpuPtr); if(e==-1) { e = ViewVictim(); + dprintf("AcceleratorViewOpen Victim is %d\n",e); Evict(e); // Does copy back if necessary, frees accelerator pointer if not null, sets to empty } + assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)); assert(AccCache[e].cpuLock==0); // Programming error if(AccCache[e].state!=Empty) { @@ -193,35 +249,50 @@ void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transi AccCache[e].AccPtr = NULL; AccCache[e].bytes = bytes; AccCache[e].state = CpuDirty; // Cpu starts primary - Clone(e); - if(mode==Write) - AccCache[e].state = AccDirty; // Empty + AccWrite=> AccDirty - else + if(mode==AcceleratorWriteDiscard){ + CpuDiscard(e); + AccCache[e].state = AccDirty; // Empty + AcceleratorWrite=> AccDirty + } else if(mode==AcceleratorWrite){ + Clone(e); + AccCache[e].state = AccDirty; // Empty + AcceleratorWrite=> AccDirty + } else { + Clone(e); AccCache[e].state = Consistent; // Empty + AccRead => Consistent + } AccCache[e].accLock= 1; - } else if(AccCache[e].state&CpuDirty ){ - Clone(e); - if(mode==Write) - AccCache[e].state = AccDirty; // CpuDirty + AccWrite=> AccDirty - else + // printf("Copied Empy entry %d into device accLock %d\n",e,AccCache[e].accLock); + } else if(AccCache[e].state==CpuDirty ){ + if(mode==AcceleratorWriteDiscard) { + CpuDiscard(e); + AccCache[e].state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty + } else if(mode==AcceleratorWrite) { + Clone(e); + AccCache[e].state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty + } else { + Clone(e); AccCache[e].state = Consistent; // CpuDirty + AccRead => Consistent + } AccCache[e].accLock++; - } else if(AccCache[e].state&Consistent) { - if(mode==Write) - AccCache[e].state = AccDirty; // Consistent + AccWrite=> AccDirty + // printf("Copied CpuDirty entry %d into device accLock %d\n",e,AccCache[e].accLock); + } else if(AccCache[e].state==Consistent) { + if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) + AccCache[e].state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty else AccCache[e].state = Consistent; // Consistent + AccRead => Consistent AccCache[e].accLock++; - } else if(AccCache[e].state&AccDirty) { - if(mode==Write) - AccCache[e].state = AccDirty; // AccDirty + AccWrite=> AccDirty + // printf("Consistent entry %d into device accLock %d\n",e,AccCache[e].accLock); + } else if(AccCache[e].state==AccDirty) { + if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) + AccCache[e].state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty else AccCache[e].state = AccDirty; // AccDirty + AccRead => AccDirty AccCache[e].accLock++; + // printf("AccDirty entry %d into device accLock %d\n",e,AccCache[e].accLock); } else { assert(0); } + int transient =hint; AccCache[e].transient= transient? EvictNext : 0; return AccCache[e].AccPtr; @@ -241,12 +312,18 @@ void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transi //////////////////////////////////// // look up & decrement lock count //////////////////////////////////// -void AllocationCache::AccViewClose(void* AccPtr) +void AllocationCache::AcceleratorViewClose(void* AccPtr) { - int e=AccViewLookup(AccPtr); + int e=CpuViewLookup(AccPtr); + // printf("AccView close %d lock %d \n",e,AccCache[e].accLock); + if(e==-1) exit(0); + if(AccCache[e].cpuLock!=0) exit(0); + if(AccCache[e].accLock==0) exit(0); + /* assert(e!=-1); assert(AccCache[e].cpuLock==0); assert(AccCache[e].accLock>0); + */ AccCache[e].accLock--; } void AllocationCache::CpuViewClose(void* CpuPtr) @@ -257,7 +334,7 @@ void AllocationCache::CpuViewClose(void* CpuPtr) assert(AccCache[e].accLock==0); AccCache[e].cpuLock--; } -void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) +void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient) { //////////////////////////////////////////////////////////////////////////// // Find if present, otherwise get or force an empty @@ -265,9 +342,11 @@ void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transi int e=CpuViewLookup(CpuPtr); if(e==-1) { e = ViewVictim(); + dprintf("CpuViewOpen Victim is %d\n",e); Evict(e); // Does copy back if necessary, frees accelerator pointer if not null, sets to empty } + assert((mode==CpuRead)||(mode==CpuWrite)); assert(AccCache[e].accLock==0); // Programming error if(AccCache[e].state!=Empty) { @@ -288,7 +367,7 @@ void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transi AccCache[e].cpuLock++; } else if(AccCache[e].state==Consistent) { assert(AccCache[e].AccPtr != NULL); - if(mode==Write) + if(mode==CpuWrite) AccCache[e].state = CpuDirty; // Consistent +CpuWrite => CpuDirty else AccCache[e].state = Consistent; // Consistent +CpuRead => Consistent @@ -296,7 +375,7 @@ void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transi } else if(AccCache[e].state==AccDirty) { assert(AccCache[e].AccPtr != NULL); Flush(e); - if(mode==Write) AccCache[e].state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush + if(mode==CpuWrite) AccCache[e].state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush else AccCache[e].state = Consistent; // AccDirty +CpuRead => Consistent, Flush AccCache[e].cpuLock++; } else { @@ -321,16 +400,6 @@ int AllocationCache::CpuViewLookup(void *CpuPtr) } return -1; } -int AllocationCache::AccViewLookup(void *AccPtr) -{ - assert(AccPtr!=NULL); - for(int e=0;e -#ifdef GRID_UNIFIED +#ifdef GRID_UVM #warning "Grid is assuming unified virtual memory address space" NAMESPACE_BEGIN(Grid); @@ -7,21 +7,22 @@ NAMESPACE_BEGIN(Grid); // View management is 1:1 address space mapping ///////////////////////////////////////////////////////////////////////////////// -void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) { return CpuPtr; } -void *AllocationCache::AccViewOpen(void* CpuPtr,size_t bytes,int mode,int transient) { return CpuPtr; } -void AllocationCache::AccViewClose(void* AccPtr){} -void AllocationCache::CpuViewClose(void* CpuPtr){} - +void AllocationCache::AcceleratorViewClose(void* AccPtr){}; +void *AllocationCache::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; } +void AllocationCache::CpuViewClose(void* Ptr){}; +void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; } +int AllocationCache::CpuViewLookup(void *CpuPtr){ return 0;} ///////////////////////////////////// // Dummy stubs ///////////////////////////////////// -int AllocationCache::ViewVictim(void) { assert(0); return 0;} -void AllocationCache::Evict(int e) { assert(0);} -void AllocationCache::Flush(int e) { assert(0);} -void AllocationCache::Clone(int e) { assert(0);} - -int AllocationCache::CpuViewLookup(void *CpuPtr){assert(0); return 0;} -int AllocationCache::AccViewLookup(void *AccPtr){assert(0); return 0;} +void AllocationCache::CpuDiscard(int e) { return;} +void AllocationCache::Discard(int e) { return;} +void AllocationCache::Evict(int e) { return; } +void AllocationCache::Flush(int e) { assert(0);} +void AllocationCache::Clone(int e) { assert(0);} +int AllocationCache::ViewVictim(void) { assert(0); return 0;} +void AllocationCache::ViewClose(void* AccPtr,ViewMode mode){}; +void *AllocationCache::ViewOpen (void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){return CpuPtr;}; NAMESPACE_END(Grid); #endif diff --git a/Grid/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h index fe9afc62..1c99e797 100644 --- a/Grid/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -52,7 +52,6 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen int stride=rhs.Grid()->_slice_stride[dimension]; - auto rhs_v = rhs.View(); if ( cbmask == 0x3 ) { for(int n=0;n &rhs,commVector &buffer,int dimen } } } + auto rhs_v = rhs.View(AcceleratorRead); auto buffer_p = & buffer[0]; auto table = &Cshift_table[0]; accelerator_for(i,ent,1,{ @@ -100,7 +100,7 @@ Gather_plane_extract(const Lattice &rhs, int e2=rhs.Grid()->_slice_block[dimension]; int n1=rhs.Grid()->_slice_stride[dimension]; - auto rhs_v = rhs.View(); + auto rhs_v = rhs.View(AcceleratorRead); if ( cbmask ==0x3){ accelerator_for2d(n,e1,b,e2,1,{ int o = n*n1; @@ -179,7 +179,7 @@ template void Scatter_plane_simple (Lattice &rhs,commVector void Scatter_plane_merge(Lattice &rhs,ExtractPointerA int e2=rhs.Grid()->_slice_block[dimension]; if(cbmask ==0x3 ) { - auto rhs_v = rhs.View(); + auto rhs_v = rhs.View(AcceleratorWrite); accelerator_for2d(n,e1,b,e2,1,{ int o = n*rhs.Grid()->_slice_stride[dimension]; int offset = b+n*rhs.Grid()->_slice_block[dimension]; @@ -216,7 +216,7 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA // Test_cshift_red_black code. // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<_slice_stride[dimension]; @@ -272,8 +272,8 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs } } - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(AcceleratorRead); + auto lhs_v = lhs.View(AcceleratorWrite); auto table = &Cshift_table[0]; accelerator_for(i,ent,1,{ lhs_v[table[i].first]=rhs_v[table[i].second]; @@ -315,8 +315,8 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice *************************************************************************************/ /* END LEGAL */ #pragma once +#include #include #include #include diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index b8abd199..b4f196b6 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -91,12 +91,16 @@ const lobj & eval(const uint64_t ss, const LatticeExprView &arg) { return arg[ss]; } + +// What needs this? +#if 1 template accelerator_inline const lobj & eval(const uint64_t ss, const Lattice &arg) { auto view = arg.View(); return view[ss]; } +#endif /////////////////////////////////////////////////// // handle nodes in syntax tree- eval one operand @@ -206,7 +210,7 @@ inline void CBFromExpression(int &cb, const LatticeTrinaryExpression::value, T1>::type * = nullptr> inline void ExpressionViewOpen(T1 &lat) // Lattice leaf { - lat.AcceleratorViewOpen(); + lat.ViewOpen(AcceleratorRead); } template ::value, T1>::type * = nullptr> inline void ExpressionViewOpen(T1 ¬lat) {} @@ -237,7 +241,7 @@ inline void ExpressionViewOpen(LatticeTrinaryExpression &expr) template ::value, T1>::type * = nullptr> inline void ExpressionViewClose( T1 &lat) // Lattice leaf { - lat.AcceleratorViewClose(); + lat.ViewClose(); } template ::value, T1>::type * = nullptr> inline void ExpressionViewClose(T1 ¬lat) {} diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index 3543d6aa..b1252952 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -36,9 +36,9 @@ NAMESPACE_BEGIN(Grid); template inline void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); + auto rhs_v = rhs.View(AcceleratorRead); conformable(ret,rhs); conformable(lhs,rhs); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ @@ -55,9 +55,9 @@ void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); + auto rhs_v = rhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -72,9 +72,9 @@ void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); + auto rhs_v = rhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -88,9 +88,9 @@ void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); + auto rhs_v = rhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -107,8 +107,8 @@ template inline void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; mult(&tmp,&lhs_v(ss),&rhs); @@ -120,8 +120,8 @@ template inline void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -134,8 +134,8 @@ template inline void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -147,8 +147,8 @@ template inline void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -164,8 +164,8 @@ template inline void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto rhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -178,8 +178,8 @@ template inline void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto rhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -192,8 +192,8 @@ template inline void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto rhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -205,8 +205,8 @@ template inline void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto rhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -220,9 +220,9 @@ void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice & ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); - auto ret_v = ret.View(); - auto x_v = x.View(); - auto y_v = y.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ auto tmp = a*x_v(ss)+y_v(ss); coalescedWrite(ret_v[ss],tmp); @@ -233,9 +233,9 @@ void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); - auto ret_v = ret.View(); - auto x_v = x.View(); - auto y_v = y.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ auto tmp = a*x_v(ss)+b*y_v(ss); coalescedWrite(ret_v[ss],tmp); diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 76622275..17f84d44 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -28,6 +28,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + #pragma once #define STREAMING_STORES @@ -36,181 +37,6 @@ NAMESPACE_BEGIN(Grid); extern int GridCshiftPermuteMap[4][16]; -/////////////////////////////////////////////////////////////////// -// Base class which can be used by traits to pick up behaviour -/////////////////////////////////////////////////////////////////// -class LatticeBase {}; - -///////////////////////////////////////////////////////////////////////////////////////// -// Conformable checks; same instance of Grid required -///////////////////////////////////////////////////////////////////////////////////////// -void accelerator_inline conformable(GridBase *lhs,GridBase *rhs) -{ - assert(lhs == rhs); -} - -//////////////////////////////////////////////////////////////////////////// -// Minimal base class containing only data valid to access from accelerator -// _odata will be a managed pointer in CUDA -//////////////////////////////////////////////////////////////////////////// -// Force access to lattice through a view object. -// prevents writing of code that will not offload to GPU, but perhaps annoyingly -// strict since host could could in principle direct access through the lattice object -// Need to decide programming model. -#define LATTICE_VIEW_STRICT -template class LatticeAccelerator : public LatticeBase -{ -protected: - GridBase *_grid; - int checkerboard; - vobj *_odata; // A managed pointer - uint64_t _odata_size; -public: - accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { }; - accelerator_inline uint64_t oSites(void) const { return _odata_size; }; - accelerator_inline int Checkerboard(void) const { return checkerboard; }; - accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view - accelerator_inline void Conformable(GridBase * &grid) const - { - if (grid) conformable(grid, _grid); - else grid = _grid; - }; -}; - -///////////////////////////////////////////////////////////////////////////////////////// -// A View class which provides accessor to the data. -// This will be safe to call from accelerator_for and is trivially copy constructible -// The copy constructor for this will need to be used by device lambda functions -///////////////////////////////////////////////////////////////////////////////////////// -template -class LatticeExprView : public LatticeAccelerator -{ -public: - // Rvalue -#ifdef GRID_SIMT - accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } -#else - accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } -#endif - - accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; - accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; - - accelerator_inline uint64_t begin(void) const { return 0;}; - accelerator_inline uint64_t end(void) const { return this->_odata_size; }; - accelerator_inline uint64_t size(void) const { return this->_odata_size; }; - - // Non accelerator functions - LatticeExprView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me){} - ~LatticeExprView(){} - - void AcceleratorViewOpen(void) - { // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base - void *cpu_ptr=this->_odata; - // std::cout << "AccViewOpen "<_odata <_odata=(vobj *)AllocationCache::AccViewOpen(this->_odata,this->_odata_size*sizeof(vobj),1,0); - } - void AcceleratorViewClose(void) - { // Inform the manager - // std::cout << "View Close"<_odata<_odata); - } - void CpuViewOpen(void) - { // Translate the pointer - void *cpu_ptr=this->_odata; - // std::cout << "CpuViewOpen "<_odata <_odata=(vobj *)AllocationCache::CpuViewOpen(cpu_ptr,this->_odata_size*sizeof(vobj),1,0); - } - void CpuViewClose(void) - { // Inform the manager - // std::cout << "CpuViewClose"<_odata<_odata); - } - -}; -// UserView constructor,destructor updates view manager -// Non-copyable object??? Second base with copy/= deleted? -template -class LatticeView : public LatticeExprView -{ -public: - // Rvalue - /* -#ifdef GRID_SIMT - accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } -#else - accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } -#endif - - accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; - accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; - - accelerator_inline uint64_t begin(void) const { return 0;}; - accelerator_inline uint64_t end(void) const { return this->_odata_size; }; - accelerator_inline uint64_t size(void) const { return this->_odata_size; }; - */ - LatticeView(const LatticeAccelerator &refer_to_me) : LatticeExprView (refer_to_me) - { - this->AcceleratorViewOpen(); - } - ~LatticeView(){ - this->AcceleratorViewClose(); - } -}; - - -///////////////////////////////////////////////////////////////////////////////////////// -// Lattice expression types used by ET to assemble the AST -// -// Need to be able to detect code paths according to the whether a lattice object or not -// so introduce some trait type things -///////////////////////////////////////////////////////////////////////////////////////// - -class LatticeExpressionBase {}; - -template using is_lattice = std::is_base_of; -template using is_lattice_expr = std::is_base_of; - -template struct ViewMapBase { typedef T Type; }; -template struct ViewMapBase { typedef LatticeExprView Type; }; -template using ViewMap = ViewMapBase::value >; - -template -class LatticeUnaryExpression : public LatticeExpressionBase -{ -public: - typedef typename ViewMap<_T1>::Type T1; - Op op; - T1 arg1; - LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {}; -}; - -template -class LatticeBinaryExpression : public LatticeExpressionBase -{ -public: - typedef typename ViewMap<_T1>::Type T1; - typedef typename ViewMap<_T2>::Type T2; - Op op; - T1 arg1; - T2 arg2; - LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {}; -}; - -template -class LatticeTrinaryExpression : public LatticeExpressionBase -{ -public: - typedef typename ViewMap<_T1>::Type T1; - typedef typename ViewMap<_T2>::Type T2; - typedef typename ViewMap<_T3>::Type T3; - Op op; - T1 arg1; - T2 arg2; - T3 arg3; - LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {}; -}; - ///////////////////////////////////////////////////////////////////////////////////////// // The real lattice class, with normal copy and assignment semantics. // This contains extra (host resident) grid pointer data that may be accessed by host code @@ -253,14 +79,20 @@ private: } } public: + ///////////////////////////////////////////////////////////////////////////////// + // Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents + ///////////////////////////////////////////////////////////////////////////////// + void SetViewMode(ViewMode mode) { + LatticeView accessor(*( (LatticeAccelerator *) this),mode); + } ///////////////////////////////////////////////////////////////////////////////// // Return a view object that may be dereferenced in site loops. // The view is trivially copy constructible and may be copied to an accelerator device // in device lambdas ///////////////////////////////////////////////////////////////////////////////// - LatticeView View (void) const + LatticeView View (ViewMode mode) const { - LatticeView accessor(*( (LatticeAccelerator *) this)); + LatticeView accessor(*( (LatticeAccelerator *) this),mode); return accessor; } @@ -286,7 +118,7 @@ public: auto exprCopy = expr; ExpressionViewOpen(exprCopy); - auto me = View(); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),1,{ auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); @@ -308,7 +140,7 @@ public: auto exprCopy = expr; ExpressionViewOpen(exprCopy); - auto me = View(); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),1,{ auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); @@ -329,7 +161,7 @@ public: this->checkerboard=cb; auto exprCopy = expr; ExpressionViewOpen(exprCopy); - auto me = View(); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),1,{ auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); @@ -385,9 +217,9 @@ public: } template inline Lattice & operator = (const sobj & r){ - auto me = View(); - thread_for(ss,me.size(),{ - me[ss] = r; + auto me = View(AcceleratorWriteDiscard); + accelerator_for(ss,me.size(),1,{ + me[ss]= r; }); return *this; } @@ -398,11 +230,12 @@ public: /////////////////////////////////////////// // user defined constructor /////////////////////////////////////////// - Lattice(GridBase *grid) { + Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) { this->_grid = grid; resize(this->_grid->oSites()); assert((((uint64_t)&this->_odata[0])&0xF) ==0); this->checkerboard=0; + SetViewMode(mode); } // virtual ~Lattice(void) = default; @@ -418,7 +251,6 @@ public: // copy constructor /////////////////////////////////////////// Lattice(const Lattice& r){ - // std::cout << "Lattice constructor(const Lattice &) "<_grid = r.Grid(); resize(this->_grid->oSites()); *this = r; @@ -441,8 +273,8 @@ public: typename std::enable_if::value,int>::type i=0; conformable(*this,r); this->checkerboard = r.Checkerboard(); - auto me = View(); - auto him= r.View(); + auto me = View(AcceleratorWriteDiscard); + auto him= r.View(AcceleratorRead); accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); @@ -455,8 +287,8 @@ public: inline Lattice & operator = (const Lattice & r){ this->checkerboard = r.Checkerboard(); conformable(*this,r); - auto me = View(); - auto him= r.View(); + auto me = View(AcceleratorWriteDiscard); + auto him= r.View(AcceleratorRead); accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); diff --git a/Grid/lattice/Lattice_comparison.h b/Grid/lattice/Lattice_comparison.h index bbed2ef5..17a61750 100644 --- a/Grid/lattice/Lattice_comparison.h +++ b/Grid/lattice/Lattice_comparison.h @@ -78,9 +78,9 @@ template inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) { Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + auto lhs_v = lhs.View(CpuRead); + auto rhs_v = rhs.View(CpuRead); + auto ret_v = ret.View(CpuWrite); thread_for( ss, rhs_v.size(), { ret_v[ss]=op(lhs_v[ss],rhs_v[ss]); }); @@ -93,8 +93,8 @@ template inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) { Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(); - auto ret_v = ret.View(); + auto lhs_v = lhs.View(CpuRead); + auto ret_v = ret.View(CpuWrite); thread_for( ss, lhs_v.size(), { ret_v[ss]=op(lhs_v[ss],rhs); }); @@ -107,8 +107,8 @@ template inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) { Lattice ret(rhs.Grid()); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + auto rhs_v = rhs.View(CpuRead); + auto ret_v = ret.View(CpuWrite); thread_for( ss, rhs_v.size(), { ret_v[ss]=op(lhs,rhs_v[ss]); }); diff --git a/Grid/lattice/Lattice_coordinate.h b/Grid/lattice/Lattice_coordinate.h index a1abe58d..b8e73b25 100644 --- a/Grid/lattice/Lattice_coordinate.h +++ b/Grid/lattice/Lattice_coordinate.h @@ -37,7 +37,7 @@ template inline void LatticeCoordinate(Lattice &l,int mu) GridBase *grid = l.Grid(); int Nsimd = grid->iSites(); - auto l_v = l.View(); + auto l_v = l.View(CpuWrite); thread_for( o, grid->oSites(), { vector_type vI; Coordinate gcoor; @@ -51,23 +51,5 @@ template inline void LatticeCoordinate(Lattice &l,int mu) }); }; -// LatticeCoordinate(); -// FIXME for debug; deprecate this; made obscelete by -template void lex_sites(Lattice &l){ - auto l_v = l.View(); - Real *v_ptr = (Real *)&l_v[0]; - size_t o_len = l.Grid()->oSites(); - size_t v_len = sizeof(vobj)/sizeof(vRealF); - size_t vec_len = vRealF::Nsimd(); - - for(int i=0;i inline auto localNorm2 (const Lattice &rhs)-> Lattice { Lattice ret(rhs.Grid()); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + auto rhs_v = rhs.View(AcceleratorRead); + auto ret_v = ret.View(AcceleratorWrite); accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss))); }); @@ -56,9 +56,9 @@ template inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + auto lhs_v = lhs.View(AcceleratorRead); + auto rhs_v = rhs.View(AcceleratorRead); + auto ret_v = ret.View(AcceleratorWrite); accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss))); }); @@ -73,9 +73,9 @@ inline auto outerProduct (const Lattice &lhs,const Lattice &rhs) -> Latt typedef decltype(coalescedRead(ll())) sll; typedef decltype(coalescedRead(rr())) srr; Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + auto lhs_v = lhs.View(AcceleratorRead); + auto rhs_v = rhs.View(AcceleratorRead); + auto ret_v = ret.View(AcceleratorWrite); accelerator_for(ss,rhs_v.size(),1,{ // FIXME had issues with scalar version of outer // Use vector [] operator and don't read coalesce this loop diff --git a/Grid/lattice/Lattice_matrix_reduction.h b/Grid/lattice/Lattice_matrix_reduction.h index 0980ad8a..88de5210 100644 --- a/Grid/lattice/Lattice_matrix_reduction.h +++ b/Grid/lattice/Lattice_matrix_reduction.h @@ -51,9 +51,9 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v = X.View(); - auto Y_v = Y.View(); - auto R_v = R.View(); + auto X_v = X.View(CpuRead); + auto Y_v = Y.View(CpuRead); + auto R_v = R.View(CpuWrite); thread_region { std::vector s_x(Nblock); @@ -97,8 +97,8 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v = X.View(); - auto R_v = R.View(); + auto X_v = X.View(CpuRead); + auto R_v = R.View(CpuWrite); thread_region { @@ -156,8 +156,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice int ostride=FullGrid->_ostride[Orthog]; typedef typename vobj::vector_typeD vector_typeD; - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + auto lhs_v = lhs.View(CpuRead); + auto rhs_v = rhs.View(CpuRead); thread_region { std::vector Left(Nblock); std::vector Right(Nblock); diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index 8f649bd7..af98c07b 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -46,8 +46,8 @@ auto PeekIndex(const Lattice &lhs,int i) -> Lattice(vobj(),i))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(CpuWrite); + auto lhs_v = lhs.View(CpuRead); thread_for( ss, lhs_v.size(), { ret_v[ss] = peekIndex(lhs_v[ss],i); }); @@ -58,8 +58,8 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(vobj(),i,j))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(CpuWrite); + auto lhs_v = lhs.View(CpuRead); thread_for( ss, lhs_v.size(), { ret_v[ss] = peekIndex(lhs_v[ss],i,j); }); @@ -72,8 +72,8 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice void PokeIndex(Lattice &lhs,const Lattice(vobj(),0))> & rhs,int i) { - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(CpuRead); + auto lhs_v = lhs.View(CpuWrite); thread_for( ss, lhs_v.size(), { pokeIndex(lhs_v[ss],rhs_v[ss],i); }); @@ -81,8 +81,8 @@ void PokeIndex(Lattice &lhs,const Lattice(vobj() template void PokeIndex(Lattice &lhs,const Lattice(vobj(),0,0))> & rhs,int i,int j) { - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(CpuRead); + auto lhs_v = lhs.View(CpuWrite); thread_for( ss, lhs_v.size(), { pokeIndex(lhs_v[ss],rhs_v[ss],i,j); }); @@ -111,7 +111,7 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ // extract-modify-merge cycle is easiest way and this is not perf critical ExtractBuffer buf(Nsimd); - auto l_v = l.View(); + auto l_v = l.View(CpuWrite); if ( rank == grid->ThisRank() ) { extract(l_v[odx],buf); buf[idx] = s; @@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ grid->GlobalCoorToRankIndex(rank,odx,idx,site); ExtractBuffer buf(Nsimd); - auto l_v = l.View(); + auto l_v = l.View(CpuWrite); extract(l_v[odx],buf); s = buf[idx]; @@ -173,7 +173,7 @@ inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site){ idx= grid->iIndex(site); odx= grid->oIndex(site); - auto l_v = l.View(); + auto l_v = l.View(CpuRead); scalar_type * vp = (scalar_type *)&l_v[odx]; scalar_type * pt = (scalar_type *)&s; @@ -202,7 +202,7 @@ inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site){ idx= grid->iIndex(site); odx= grid->oIndex(site); - auto l_v = l.View(); + auto l_v = l.View(CpuWrite); scalar_type * vp = (scalar_type *)&l_v[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w inline Lattice adj(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(); - auto ret_v = ret.View(); + auto lhs_v = lhs.View(AcceleratorRead); + auto ret_v = ret.View(AcceleratorWrite); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], adj(lhs_v(ss))); }); @@ -50,8 +50,8 @@ template inline Lattice adj(const Lattice &lhs){ template inline Lattice conjugate(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(); - auto ret_v = ret.View(); + auto lhs_v = lhs.View(AcceleratorRead); + auto ret_v = ret.View(AcceleratorWrite); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 997affe8..99d799b6 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -76,7 +76,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) template inline typename vobj::scalar_object sum(const Lattice &arg) { - auto arg_v = arg.View(); + auto arg_v = arg.View(AcceleratorRead); Integer osites = arg.Grid()->oSites(); auto ssum= sum(&arg_v[0],osites); arg.Grid()->GlobalSum(ssum); @@ -102,8 +102,8 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ GridBase *grid = left.Grid(); // Might make all code paths go this way. - auto left_v = left.View(); - auto right_v=right.View(); + auto left_v = left.View(AcceleratorRead); + auto right_v=right.View(AcceleratorRead); const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); @@ -167,9 +167,9 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt GridBase *grid = x.Grid(); - auto x_v=x.View(); - auto y_v=y.View(); - auto z_v=z.View(); + auto x_v=x.View(AcceleratorRead); + auto y_v=y.View(AcceleratorRead); + auto z_v=z.View(AcceleratorWrite); const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); @@ -271,7 +271,7 @@ template inline void sliceSum(const Lattice &Data,std::vector< // sum over reduced dimension planes, breaking out orthog dir // Parallel over orthog direction - auto Data_v=Data.View(); + auto Data_v=Data.View(CpuRead); thread_for( r,rd, { int so=r*grid->_ostride[orthogdim]; // base offset for start of plane for(int n=0;n & result, const Latti int e2= grid->_slice_block [orthogdim]; int stride=grid->_slice_stride[orthogdim]; - auto lhv=lhs.View(); - auto rhv=rhs.View(); + auto lhv=lhs.View(CpuRead); + auto rhv=rhs.View(CpuRead); thread_for( r,rd,{ int so=r*grid->_ostride[orthogdim]; // base offset for start of plane @@ -457,14 +457,12 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice tensor_reduced at; at=av; - auto Rv=R.View(); - auto Xv=X.View(); - auto Yv=Y.View(); - thread_for_collapse(2, n, e1, { - for(int b=0;b &R,Eigen::MatrixXcd &aa,const Lattice int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v=X.View(); - auto Y_v=Y.View(); - auto R_v=R.View(); + auto X_v=X.View(CpuRead); + auto Y_v=Y.View(CpuRead); + auto R_v=R.View(CpuWrite); thread_region { Vector s_x(Nblock); @@ -564,13 +562,14 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< // int nl=1; //FIXME package in a convenient iterator + // thread_for2d_in_region //Should loop over a plane orthogonal to direction "Orthog" int stride=FullGrid->_slice_stride[Orthog]; int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto R_v = R.View(); - auto X_v = X.View(); + auto R_v = R.View(CpuWrite); + auto X_v = X.View(CpuRead); thread_region { std::vector s_x(Nblock); @@ -628,8 +627,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice typedef typename vobj::vector_typeD vector_typeD; - auto lhs_v=lhs.View(); - auto rhs_v=rhs.View(); + auto lhs_v=lhs.View(CpuRead); + auto rhs_v=rhs.View(CpuRead); thread_region { std::vector Left(Nblock); diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 04b74873..e5da8d35 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -375,7 +375,7 @@ public: int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity int words = sizeof(scalar_object) / sizeof(scalar_type); - auto l_v = l.View(); + auto l_v = l.View(CpuWrite); thread_for( ss, osites, { ExtractBuffer buf(Nsimd); for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times diff --git a/Grid/lattice/Lattice_trace.h b/Grid/lattice/Lattice_trace.h index 93444e0c..8d1f85bd 100644 --- a/Grid/lattice/Lattice_trace.h +++ b/Grid/lattice/Lattice_trace.h @@ -41,8 +41,8 @@ template inline auto trace(const Lattice &lhs) -> Lattice { Lattice ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], trace(lhs_v(ss))); }); @@ -56,8 +56,8 @@ template inline auto TraceIndex(const Lattice &lhs) -> Lattice(vobj()))> { Lattice(vobj()))> ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], traceIndex(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index c80e7db2..9e98d111 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -49,8 +49,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine) template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ half.Checkerboard() = cb; - auto half_v = half.View(); - auto full_v = full.View(); + auto half_v = half.View(CpuWrite); + auto full_v = full.View(CpuRead); thread_for(ss, full.Grid()->oSites(),{ int cbos; Coordinate coor; @@ -65,8 +65,8 @@ template inline void pickCheckerboard(int cb,Lattice &half,con } template inline void setCheckerboard(Lattice &full,const Lattice &half){ int cb = half.Checkerboard(); - auto half_v = half.View(); - auto full_v = full.View(); + auto half_v = half.View(CpuRead); + auto full_v = full.View(CpuWrite); thread_for(ss,full.Grid()->oSites(),{ Coordinate coor; @@ -92,9 +92,8 @@ inline void blockProject(Lattice > &coarseData, Lattice ip(coarse); - // auto fineData_ = fineData.View(); - auto coarseData_ = coarseData.View(); - auto ip_ = ip.View(); + auto coarseData_ = coarseData.View(AcceleratorWrite); + auto ip_ = ip.View(AcceleratorWrite); for(int v=0;voSites(), vobj::Nsimd(), { @@ -102,7 +101,7 @@ inline void blockProject(Lattice > &coarseData, }); } } - +#if 0 template inline void blockProject1(Lattice > &coarseData, const Lattice &fineData, @@ -132,8 +131,8 @@ inline void blockProject1(Lattice > &coarseData, coarseData=Zero(); - auto fineData_ = fineData.View(); - auto coarseData_ = coarseData.View(); + auto fineData_ = fineData.View(AcceleratorRead); + auto coarseData_ = coarseData.View(AcceleratorWrite); //////////////////////////////////////////////////////////////////////////////////////////////////////// // To make this lock free, loop over coars parallel, and then loop over fine associated with coarse. // Otherwise do fine inner product per site, and make the update atomic @@ -142,7 +141,7 @@ inline void blockProject1(Lattice > &coarseData, auto sc=sci/nbasis; auto i=sci%nbasis; - auto Basis_ = Basis[i].View(); + auto Basis_ = Basis[i].View(AcceleratorRead); Coordinate coor_c(_ndimension); Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate @@ -165,6 +164,7 @@ inline void blockProject1(Lattice > &coarseData, }); return; } +#endif template inline void blockZAXPY(Lattice &fineZ, @@ -191,10 +191,10 @@ inline void blockZAXPY(Lattice &fineZ, assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); } - auto fineZ_ = fineZ.View(); - auto fineX_ = fineX.View(); - auto fineY_ = fineY.View(); - auto coarseA_= coarseA.View(); + auto fineZ_ = fineZ.View(AcceleratorWrite); + auto fineX_ = fineX.View(AcceleratorRead); + auto fineY_ = fineY.View(AcceleratorRead); + auto coarseA_= coarseA.View(AcceleratorRead); accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), { @@ -227,11 +227,10 @@ inline void blockInnerProduct(Lattice &CoarseInner, Lattice coarse_inner(coarse); // Precision promotion? - auto CoarseInner_ = CoarseInner.View(); - auto coarse_inner_ = coarse_inner.View(); - fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); + auto CoarseInner_ = CoarseInner.View(AcceleratorWrite); + auto coarse_inner_ = coarse_inner.View(AcceleratorRead); accelerator_for(ss, coarse->oSites(), 1, { CoarseInner_[ss] = coarse_inner_[ss]; }); @@ -266,8 +265,8 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) // Turn this around to loop threaded over sc and interior loop // over sf would thread better - auto coarseData_ = coarseData.View(); - auto fineData_ = fineData.View(); + auto coarseData_ = coarseData.View(AcceleratorWrite); + auto fineData_ = fineData.View(AcceleratorRead); accelerator_for(sc,coarse->oSites(),1,{ @@ -360,8 +359,8 @@ inline void blockPromote(const Lattice > &coarseData, for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } - auto fineData_ = fineData.View(); - auto coarseData_ = coarseData.View(); + auto fineData_ = fineData.View(AcceleratorWrite); + auto coarseData_ = coarseData.View(AcceleratorRead); // Loop with a cache friendly loop ordering accelerator_for(sf,fine->oSites(),1,{ @@ -374,7 +373,7 @@ inline void blockPromote(const Lattice > &coarseData, Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); for(int i=0;i > &coarseData, for(int i=0;i > ip = PeekIndex<0>(coarseData,i); Lattice cip(coarse); - auto cip_ = cip.View(); - auto ip_ = ip.View(); + auto cip_ = cip.View(AcceleratorWrite); + auto ip_ = ip.View(AcceleratorRead); accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{ coalescedWrite(cip_[sc], ip_(sc)()); }); @@ -470,8 +469,8 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro Coordinate rdt = Tg->_rdimensions; Coordinate ist = Tg->_istride; Coordinate ost = Tg->_ostride; - auto t_v = To.View(); - auto f_v = From.View(); + auto t_v = To.View(AcceleratorWrite); + auto f_v = From.View(AcceleratorRead); accelerator_for(idx,Fg->lSites(),1,{ sobj s; Coordinate Fcoor(nd); @@ -718,7 +717,7 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) } //loop over outer index - auto in_v = in.View(); + auto in_v = in.View(CpuRead); thread_for(in_oidx,in_grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray out_ptrs(in_nsimd); @@ -811,7 +810,7 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) icoor[lane].resize(ndim); grid->iCoorFromIindex(icoor[lane],lane); } - auto out_v = out.View(); + auto out_v = out.View(CpuWrite); thread_for(oidx, grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray ptrs(nsimd); @@ -914,7 +913,7 @@ void precisionChange(Lattice &out, const Lattice &in) std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - auto out_v = out.View(); + auto out_v = out.View(CpuWrite); thread_for(out_oidx,out_grid->oSites(),{ Coordinate out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); diff --git a/Grid/lattice/Lattice_transpose.h b/Grid/lattice/Lattice_transpose.h index 9b0b3483..c17a808b 100644 --- a/Grid/lattice/Lattice_transpose.h +++ b/Grid/lattice/Lattice_transpose.h @@ -41,8 +41,8 @@ NAMESPACE_BEGIN(Grid); template inline Lattice transpose(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss], transpose(lhs_v(ss))); }); @@ -56,8 +56,8 @@ template inline auto TransposeIndex(const Lattice &lhs) -> Lattice(vobj()))> { Lattice(vobj()))> ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + auto ret_v = ret.View(AcceleratorWrite); + auto lhs_v = lhs.View(AcceleratorRead); accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss] , transposeIndex(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_unary.h b/Grid/lattice/Lattice_unary.h index 591afe72..10aa7472 100644 --- a/Grid/lattice/Lattice_unary.h +++ b/Grid/lattice/Lattice_unary.h @@ -35,8 +35,8 @@ NAMESPACE_BEGIN(Grid); template Lattice pow(const Lattice &rhs_i,RealD y){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(); - auto ret = ret_i.View(); + auto rhs = rhs_i.View(AcceleratorRead); + auto ret = ret_i.View(AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),1,{ ret[ss]=pow(rhs[ss],y); @@ -45,8 +45,8 @@ template Lattice pow(const Lattice &rhs_i,RealD y){ } template Lattice mod(const Lattice &rhs_i,Integer y){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(); - auto ret = ret_i.View(); + auto rhs = rhs_i.View(AcceleratorRead); + auto ret = ret_i.View(AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],mod(rhs(ss),y)); @@ -56,8 +56,8 @@ template Lattice mod(const Lattice &rhs_i,Integer y){ template Lattice div(const Lattice &rhs_i,Integer y){ Lattice ret_i(rhs_i.Grid()); - auto ret = ret_i.View(); - auto rhs = rhs_i.View(); + auto ret = ret_i.View(AcceleratorWrite); + auto rhs = rhs_i.View(AcceleratorRead); ret.Checkerboard() = rhs_i.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],div(rhs(ss),y)); @@ -67,8 +67,8 @@ template Lattice div(const Lattice &rhs_i,Integer y){ template Lattice expMat(const Lattice &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(); - auto ret = ret_i.View(); + auto rhs = rhs_i.View(AcceleratorRead); + auto ret = ret_i.View(AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],Exponentiate(rhs(ss),alpha, Nexp)); diff --git a/Grid/qcd/action/fermion/GparityWilsonImpl.h b/Grid/qcd/action/fermion/GparityWilsonImpl.h index 47d1a861..a8ae90ec 100644 --- a/Grid/qcd/action/fermion/GparityWilsonImpl.h +++ b/Grid/qcd/action/fermion/GparityWilsonImpl.h @@ -233,10 +233,10 @@ public: Uconj = where(coor==neglink,-Uconj,Uconj); } - auto U_v = U.View(); - auto Uds_v = Uds.View(); - auto Uconj_v = Uconj.View(); - auto Utmp_v= Utmp.View(); + auto U_v = U.View(CpuRead); + auto Uds_v = Uds.View(CpuWrite); + auto Uconj_v = Uconj.View(CpuRead); + auto Utmp_v= Utmp.View(CpuWrite); thread_foreach(ss,U_v,{ Uds_v[ss](0)(mu) = U_v[ss](); Uds_v[ss](1)(mu) = Uconj_v[ss](); @@ -272,8 +272,8 @@ public: GaugeLinkField link(mat.Grid()); // use lorentz for flavour as hack. auto tmp = TraceIndex(outerProduct(Btilde, A)); - auto link_v = link.View(); - auto tmp_v = tmp.View(); + auto link_v = link.View(CpuWrite); + auto tmp_v = tmp.View(CpuRead); thread_foreach(ss,tmp_v,{ link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); }); @@ -306,9 +306,9 @@ public: GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - auto tmp_v = tmp.View(); - auto Atilde_v = Atilde.View(); - auto Btilde_v = Btilde.View(); + auto tmp_v = tmp.View(CpuWrite); + auto Atilde_v = Atilde.View(CpuRead); + auto Btilde_v = Btilde.View(CpuRead); thread_for(ss,tmp.Grid()->oSites(),{ for (int s = 0; s < Ls; s++) { int sF = s + Ls * ss; diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index 3847b0d9..05143551 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -264,8 +264,8 @@ private: { CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); + auto T_v = T.View(CpuWrite); + auto F_v = F.View(CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = timesMinusI(F_v[i]()()); @@ -282,8 +282,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); + auto T_v = T.View(CpuWrite); + auto F_v = F.View(CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = -F_v[i]()(); @@ -300,8 +300,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); + auto T_v = T.View(CpuWrite); + auto F_v = F.View(CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 0) = timesMinusI(F_v[i]()()); @@ -318,8 +318,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); + auto T_v = T.View(CpuWrite); + auto F_v = F.View(CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = timesI(F_v[i]()()); @@ -336,8 +336,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); + auto T_v = T.View(CpuWrite); + auto F_v = F.View(CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = -(F_v[i]()()); @@ -355,8 +355,8 @@ private: T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); + auto T_v = T.View(CpuWrite); + auto F_v = F.View(CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 0) = timesI(F_v[i]()()); diff --git a/Grid/qcd/action/fermion/WilsonImpl.h b/Grid/qcd/action/fermion/WilsonImpl.h index e78023cf..356d0941 100644 --- a/Grid/qcd/action/fermion/WilsonImpl.h +++ b/Grid/qcd/action/fermion/WilsonImpl.h @@ -106,9 +106,9 @@ public: const _SpinorField & phi, int mu) { - auto out_v= out.View(); - auto phi_v= phi.View(); - auto Umu_v= Umu.View(); + auto out_v= out.View(CpuWrite); + auto phi_v= phi.View(CpuRead); + auto Umu_v= Umu.View(CpuRead); thread_for(sss,out.Grid()->oSites(),{ multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu); }); @@ -191,9 +191,9 @@ public: int Ls=Btilde.Grid()->_fdimensions[0]; GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - auto tmp_v = tmp.View(); - auto Btilde_v = Btilde.View(); - auto Atilde_v = Atilde.View(); + auto tmp_v = tmp.View(CpuWrite); + auto Btilde_v = Btilde.View(CpuRead); + auto Atilde_v = Atilde.View(CpuRead); thread_for(sss,tmp.Grid()->oSites(),{ int sU=sss; for(int s=0;s::M5D(const FermionField &psi_i, chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; @@ -93,9 +93,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; @@ -131,8 +131,8 @@ CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); int Ls=this->Ls; @@ -193,8 +193,8 @@ CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi GridBase *grid=psi_i.Grid(); int Ls=this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); auto plee = & lee [0]; auto pdee = & dee [0]; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h index 034ce642..079ea481 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h @@ -65,9 +65,9 @@ CayleyFermion5D::M5D(const FermionField &psi_i, EnableIf sfinae=0; chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(CpuRead); + auto phi = phi_i.View(CpuRead); + auto chi = chi_i.View(CpuWrite); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd= Simd::Nsimd(); @@ -213,9 +213,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, EnableIf sfinae=0; chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi=psi_i.View(); - auto phi=phi_i.View(); - auto chi=chi_i.View(); + auto psi=psi_i.View(CpuRead); + auto phi=phi_i.View(CpuRead); + auto chi=chi_i.View(CpuWrite); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd= Simd::Nsimd(); @@ -357,8 +357,8 @@ CayleyFermion5D::MooeeInternalAsm(const FermionField &psi_i, FermionField Vector > &Matm) { EnableIf sfinae=0; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(CpuRead); + auto chi = chi_i.View(CpuWrite); #ifndef AVX512 { SiteHalfSpinor BcastP; @@ -535,8 +535,8 @@ CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField EnableIf sfinae=0; #ifndef AVX512 { - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(CpuRead); + auto chi = chi_i.View(CpuWrite); SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; @@ -586,8 +586,8 @@ CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField } #else { - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(CpuRead); + auto chi = chi_i.View(CpuWrite); // pointers // MASK_REGS; #define Chi_00 %zmm0 diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h index 46d3fa1f..100eb0d2 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h @@ -46,9 +46,9 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi chi_i.Checkerboard() = psi_i.Checkerboard(); int Ls = this->Ls; GridBase* grid = psi_i.Grid(); - auto phi = phi_i.View(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto phi = phi_i.View(AcceleratorRead); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; @@ -82,9 +82,9 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const Fermio GridBase* grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; @@ -116,8 +116,8 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionFie { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); - auto psi=psi_i.View(); - auto chi=chi_i.View(); + auto psi=psi_i.View(AcceleratorRead); + auto chi=chi_i.View(AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; @@ -172,8 +172,8 @@ void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, Fermion { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index 23692d49..01d5578f 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -221,10 +221,10 @@ void ImprovedStaggeredFermion5D::DhopDir(const FermionField &in, FermionFi Compressor compressor; Stencil.HaloExchange(in,compressor); - auto Umu_v = Umu.View(); - auto UUUmu_v = UUUmu.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto Umu_v = Umu.View(CpuRead); + auto UUUmu_v = UUUmu.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); thread_for( ss,Umu.Grid()->oSites(),{ for(int s=0;s::DhopInternalOverlappedComms(StencilImpl & } // do the compute - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); if (dag == DaggerYes) { for (int ss = myblock; ss < myblock+myn; ++ss) { @@ -376,10 +376,10 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & DhopComputeTime2-=usecond(); - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); if (dag == DaggerYes) { int sz=st.surface_list.size(); thread_for( ss,sz,{ @@ -418,10 +418,10 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, DhopComputeTime -= usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); if (dag == DaggerYes) { thread_for( ss,U.Grid()->oSites(),{ int sU=ss; diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 37675da0..1e59c4e7 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -250,10 +250,10 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge //////////////////////// // Call the single hop //////////////////////// - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto B_v = B.View(); - auto Btilde_v = Btilde.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto B_v = B.View(CpuWrite); + auto Btilde_v = Btilde.View(CpuWrite); thread_for(sss,B.Grid()->oSites(),{ Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); }); @@ -378,10 +378,10 @@ void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionFiel Compressor compressor; Stencil.HaloExchange(in, compressor); - auto Umu_v = Umu.View(); - auto UUUmu_v = UUUmu.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto Umu_v = Umu.View(CpuRead); + auto UUUmu_v = UUUmu.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); thread_for( sss, in.Grid()->oSites(),{ Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); }); @@ -449,10 +449,10 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st } // do the compute - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); if (dag == DaggerYes) { for (int ss = myblock; ss < myblock+myn; ++ss) { int sU = ss; @@ -479,10 +479,10 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st DhopComputeTime2 -= usecond(); { - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); if (dag == DaggerYes) { int sz=st.surface_list.size(); thread_for(ss,sz,{ @@ -520,10 +520,10 @@ void ImprovedStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Le st.HaloExchange(in, compressor); DhopCommTime += usecond(); - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); + auto U_v = U.View(CpuRead); + auto UUU_v = UUU.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); DhopComputeTime -= usecond(); if (dag == DaggerYes) { thread_for(sss, in.Grid()->oSites(),{ diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h index f74c7a51..ed7be056 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h @@ -44,9 +44,9 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -84,9 +84,9 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); auto pm = this->pm; int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator @@ -132,9 +132,9 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -174,9 +174,9 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const Ferm GridBase *grid = psi_i.Grid(); int Ls = this->Ls; int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto phi = phi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -226,8 +226,8 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField & chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); auto plee = & this->lee [0]; auto pdee = & this->dee [0]; @@ -286,8 +286,8 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionF chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); auto pm = this->pm; auto plee = & this->lee [0]; @@ -354,8 +354,8 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionFiel chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); auto plee = & this->lee [0]; auto pdee = & this->dee [0]; @@ -410,8 +410,8 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, Fermi { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + auto psi = psi_i.View(AcceleratorRead); + auto chi = chi_i.View(AcceleratorWrite); int Ls = this->Ls; auto pm = this->pm; diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index be05fcf8..9e492831 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -475,12 +475,12 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, // Inefficient comms method but not performance critical. tmp1 = Cshift(q_in_1, mu, 1); tmp2 = Cshift(q_in_2, mu, 1); - auto tmp1_v = tmp1.View(); - auto tmp2_v = tmp2.View(); - auto q_in_1_v=q_in_1.View(); - auto q_in_2_v=q_in_2.View(); - auto q_out_v = q_out.View(); - auto Umu_v = Umu.View(); + auto tmp1_v = tmp1.View(CpuWrite); + auto tmp2_v = tmp2.View(CpuWrite); + auto q_in_1_v=q_in_1.View(CpuRead); + auto q_in_2_v=q_in_2.View(CpuRead); + auto q_out_v = q_out.View(CpuRead); + auto Umu_v = Umu.View(CpuRead); thread_for(sU, Umu.Grid()->oSites(),{ Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU], q_in_2_v[sU], @@ -526,11 +526,11 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, tmp = lattice_cmplx*q_in; tmpBwd = Cshift(tmp, mu, -1); - auto coords_v = coords.View(); - auto tmpFwd_v = tmpFwd.View(); - auto tmpBwd_v = tmpBwd.View(); - auto Umu_v = Umu.View(); - auto q_out_v = q_out.View(); + auto coords_v = coords.View(CpuRead); + auto tmpFwd_v = tmpFwd.View(CpuRead); + auto tmpBwd_v = tmpBwd.View(CpuRead); + auto Umu_v = Umu.View(CpuRead); + auto q_out_v = q_out.View(CpuWrite); thread_for(sU, Umu.Grid()->oSites(), { diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 14a2ec9e..587bf42c 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -348,18 +348,18 @@ template void WilsonKernels::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, int Nsite, const FermionField &in, std::vector &out) { - auto U_v = U.View(); - auto in_v = in.View(); - auto st_v = st.View(); + auto U_v = U.View(AcceleratorRead); + auto in_v = in.View(AcceleratorRead); + auto st_v = st.View(AcceleratorRead); - auto out_Xm = out[0].View(); - auto out_Ym = out[1].View(); - auto out_Zm = out[2].View(); - auto out_Tm = out[3].View(); - auto out_Xp = out[4].View(); - auto out_Yp = out[5].View(); - auto out_Zp = out[6].View(); - auto out_Tp = out[7].View(); + auto out_Xm = out[0].View(AcceleratorWrite); + auto out_Ym = out[1].View(AcceleratorWrite); + auto out_Zm = out[2].View(AcceleratorWrite); + auto out_Tm = out[3].View(AcceleratorWrite); + auto out_Xp = out[4].View(AcceleratorWrite); + auto out_Yp = out[5].View(AcceleratorWrite); + auto out_Zp = out[6].View(AcceleratorWrite); + auto out_Tp = out[7].View(AcceleratorWrite); auto CBp=st.CommBuf(); accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{ int sU=sss/Ls; @@ -383,10 +383,10 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S assert(dirdisp<=7); assert(dirdisp>=0); - auto U_v = U.View(); - auto in_v = in.View(); - auto out_v = out.View(); - auto st_v = st.View(); + auto U_v = U.View(AcceleratorRead); + auto in_v = in.View(AcceleratorRead); + auto out_v = out.View(AcceleratorWrite); + auto st_v = st.View(AcceleratorRead); auto CBp=st.CommBuf(); #define LoopBody(Dir) \ case Dir : \ @@ -438,10 +438,10 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField int Ls, int Nsite, const FermionField &in, FermionField &out, int interior,int exterior) { - auto U_v = U.View(); - auto in_v = in.View(); - auto out_v = out.View(); - auto st_v = st.View(); + auto U_v = U.View(AcceleratorRead); + auto in_v = in.View(AcceleratorRead); + auto out_v = out.View(AcceleratorWrite); + auto st_v = st.View(AcceleratorRead); if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} @@ -469,10 +469,10 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField int Ls, int Nsite, const FermionField &in, FermionField &out, int interior,int exterior) { - auto U_v = U.View(); - auto in_v = in.View(); - auto out_v = out.View(); - auto st_v = st.View(); + auto U_v = U.View(AcceleratorRead); + auto in_v = in.View(AcceleratorRead); + auto out_v = out.View(AcceleratorWrite); + auto st_v = st.View(AcceleratorRead); if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;} diff --git a/Grid/qcd/action/gauge/GaugeImplTypes.h b/Grid/qcd/action/gauge/GaugeImplTypes.h index b9a5296d..79549dcb 100644 --- a/Grid/qcd/action/gauge/GaugeImplTypes.h +++ b/Grid/qcd/action/gauge/GaugeImplTypes.h @@ -86,8 +86,8 @@ public: // Move this elsewhere? FIXME static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W - auto U_v = U.View(); - auto W_v = W.View(); + auto U_v = U.View(CpuWrite); + auto W_v = W.View(CpuRead); thread_for( ss, U.Grid()->oSites(), { U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); }); @@ -131,8 +131,8 @@ public: //static std::chrono::duration diff; //auto start = std::chrono::high_resolution_clock::now(); - auto U_v = U.View(); - auto P_v = P.View(); + auto U_v = U.View(CpuWrite); + auto P_v = P.View(CpuRead); thread_for(ss, P.Grid()->oSites(),{ for (int mu = 0; mu < Nd; mu++) { U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu)); diff --git a/Grid/qcd/action/scalar/ScalarInteractionAction.h b/Grid/qcd/action/scalar/ScalarInteractionAction.h index 3be84480..7ac85d56 100644 --- a/Grid/qcd/action/scalar/ScalarInteractionAction.h +++ b/Grid/qcd/action/scalar/ScalarInteractionAction.h @@ -89,8 +89,8 @@ public: action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared; - auto p_v = p.View(); - auto action_v = action.View(); + auto p_v = p.View(CpuRead); + auto action_v = action.View(CpuWrite); for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils @@ -146,8 +146,8 @@ public: for (int point = 0; point < npoint; point++) { - auto p_v = p.View(); - auto force_v = force.View(); + auto p_v = p.View(CpuRead); + auto force_v = force.View(CpuWrite); int permute_type; StencilEntry *SE; diff --git a/Grid/qcd/smearing/GaugeConfiguration.h b/Grid/qcd/smearing/GaugeConfiguration.h index f4d00c72..0ff7fc25 100644 --- a/Grid/qcd/smearing/GaugeConfiguration.h +++ b/Grid/qcd/smearing/GaugeConfiguration.h @@ -49,7 +49,7 @@ public: private: const unsigned int smearingLevels; - Smear_Stout StoutSmearing; + Smear_Stout *StoutSmearing; std::vector SmearedSet; // Member functions @@ -72,7 +72,7 @@ private: previous_u = *ThinLinks; for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) { - StoutSmearing.smear(SmearedSet[smearLvl], previous_u); + StoutSmearing->smear(SmearedSet[smearLvl], previous_u); previous_u = SmearedSet[smearLvl]; // For debug purposes @@ -93,7 +93,7 @@ private: GaugeLinkField SigmaKPrime_mu(grid); GaugeLinkField GaugeKmu(grid), Cmu(grid); - StoutSmearing.BaseSmear(C, GaugeK); + StoutSmearing->BaseSmear(C, GaugeK); SigmaK = Zero(); iLambda = Zero(); @@ -107,7 +107,7 @@ private: pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu); pokeLorentz(iLambda, iLambda_mu, mu); } - StoutSmearing.derivative(SigmaK, iLambda, + StoutSmearing->derivative(SigmaK, iLambda, GaugeK); // derivative of SmearBase return SigmaK; } @@ -144,14 +144,14 @@ private: // Exponential iQ2 = iQ * iQ; iQ3 = iQ * iQ2; - StoutSmearing.set_uw(u, w, iQ2, iQ3); - StoutSmearing.set_fj(f0, f1, f2, u, w); + StoutSmearing->set_uw(u, w, iQ2, iQ3); + StoutSmearing->set_fj(f0, f1, f2, u, w); e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2; // Getting B1, B2, Gamma and Lambda // simplify this part, reduntant calculations in set_fj - xi0 = StoutSmearing.func_xi0(w); - xi1 = StoutSmearing.func_xi1(w); + xi0 = StoutSmearing->func_xi0(w); + xi1 = StoutSmearing->func_xi1(w); u2 = u * u; w2 = w * w; cosw = cos(w); @@ -219,7 +219,7 @@ public: /* Standard constructor */ SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear, Smear_Stout& Stout) - : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) + : smearingLevels(Nsmear), StoutSmearing(&Stout), ThinLinks(NULL) { for (unsigned int i = 0; i < smearingLevels; ++i) SmearedSet.push_back(*(new GaugeField(UGrid))); @@ -227,7 +227,7 @@ public: /*! For just thin links */ SmearedConfiguration() - : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} + : smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {} // attach the smeared routines to the thin links U and fill the smeared set void set_Field(GaugeField &U) diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index c7c7d329..7ad496b7 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -185,13 +185,13 @@ void A2Autils::MesonField(TensorType &mat, for(int i=0;i::MesonField(TensorType &mat, int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r; for ( int m=0;m::PionFieldXX(Eigen::Tensor &mat, for(int i=0;i::PionFieldXX(Eigen::Tensor &mat, } for(int j=0;j::PionFieldWVmom(Eigen::Tensor &mat, for(int i=0;i::PionFieldWVmom(Eigen::Tensor &mat, int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r; for ( int m=0;m::AslashField(TensorType &mat, for(int i=0;i::AslashField(TensorType &mat, for ( int m=0;m::ContractWWVV(std::vector &WWVV, for(int d_o=0;d_o::ContractWWVV(std::vector &WWVV, thread_for(ss,grid->oSites(),{ for(int d_o=0;d_o::OuterProductWWVV(PropagatorField &WWVV, const vobj &rhs, const int Ns, const int ss) { - auto WWVV_v = WWVV.View(); + auto WWVV_v = WWVV.View(CpuWrite); for (int s1 = 0; s1 < Ns; s1++){ for (int s2 = 0; s2 < Ns; s2++){ WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0); @@ -1122,10 +1122,10 @@ void A2Autils::ContractFourQuarkColourDiagonal(const PropagatorField &WWV GridBase *grid = WWVV0.Grid(); - auto WWVV0_v = WWVV0.View(); - auto WWVV1_v = WWVV1.View(); - auto O_trtr_v= O_trtr.View(); - auto O_fig8_v= O_fig8.View(); + auto WWVV0_v = WWVV0.View(CpuRead); + auto WWVV1_v = WWVV1.View(CpuRead); + auto O_trtr_v= O_trtr.View(CpuWrite); + auto O_fig8_v= O_fig8.View(CpuWrite); thread_for(ss,grid->oSites(),{ typedef typename ComplexField::vector_object vobj; @@ -1166,10 +1166,10 @@ void A2Autils::ContractFourQuarkColourMix(const PropagatorField &WWVV0, GridBase *grid = WWVV0.Grid(); - auto WWVV0_v = WWVV0.View(); - auto WWVV1_v = WWVV1.View(); - auto O_trtr_v= O_trtr.View(); - auto O_fig8_v= O_fig8.View(); + auto WWVV0_v = WWVV0.View(CpuRead); + auto WWVV1_v = WWVV1.View(CpuRead); + auto O_trtr_v= O_trtr.View(CpuWrite); + auto O_fig8_v= O_fig8.View(CpuWrite); thread_for(ss,grid->oSites(),{ diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index d65b9176..d45fd93d 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -273,10 +273,10 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, for (int ie=0; ie < 6 ; ie++) wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - auto vbaryon_corr= baryon_corr.View(); - auto v1 = q1_left.View(); - auto v2 = q2_left.View(); - auto v3 = q3_left.View(); + auto vbaryon_corr= baryon_corr.View(CpuWrite); + auto v1 = q1_left.View(CpuRead); + auto v2 = q2_left.View(CpuRead); + auto v3 = q3_left.View(CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ @@ -560,10 +560,10 @@ void BaryonUtils::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop, { GridBase *grid = qs_ti.Grid(); - auto vcorr= stn_corr.View(); - auto vq_loop = qq_loop.View(); - auto vd_tf = qd_tf.View(); - auto vs_ti = qs_ti.View(); + auto vcorr= stn_corr.View(CpuWrite); + auto vq_loop = qq_loop.View(CpuRead); + auto vd_tf = qd_tf.View(CpuRead); + auto vs_ti = qs_ti.View(CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ @@ -597,11 +597,11 @@ void BaryonUtils::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti, { GridBase *grid = qs_ti.Grid(); - auto vcorr= stn_corr.View(); - auto vq_ti = qq_ti.View(); - auto vq_tf = qq_tf.View(); - auto vd_tf = qd_tf.View(); - auto vs_ti = qs_ti.View(); + auto vcorr= stn_corr.View(CpuWrite); + auto vq_ti = qq_ti.View(CpuRead); + auto vq_tf = qq_tf.View(CpuRead); + auto vd_tf = qd_tf.View(CpuRead); + auto vs_ti = qs_ti.View(CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ diff --git a/Grid/qcd/utils/LinalgUtils.h b/Grid/qcd/utils/LinalgUtils.h index 56f8f164..0adbfabf 100644 --- a/Grid/qcd/utils/LinalgUtils.h +++ b/Grid/qcd/utils/LinalgUtils.h @@ -47,8 +47,8 @@ void axpibg5x(Lattice &z,const Lattice &x,Coeff a,Coeff b) GridBase *grid=x.Grid(); Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); accelerator_for( ss, x_v.size(),vobj::Nsimd(), { auto tmp = a*x_v(ss) + G5*(b*timesI(x_v(ss))); coalescedWrite(z_v[ss],tmp); @@ -63,9 +63,9 @@ void axpby_ssp(Lattice &z, Coeff a,const Lattice &x,Coeff b,const La conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); // FIXME -- need a new class of accelerator_loop to implement this // uint64_t nloop = grid->oSites()/Ls; @@ -85,9 +85,9 @@ void ag5xpby_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -104,9 +104,9 @@ void axpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); Gamma G5(Gamma::Algebra::Gamma5); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ @@ -125,9 +125,9 @@ void ag5xpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); Gamma G5(Gamma::Algebra::Gamma5); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ @@ -147,9 +147,9 @@ void axpby_ssp_pminus(Lattice &z,Coeff a,const Lattice &x,Coeff b,co GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -168,9 +168,9 @@ void axpby_ssp_pplus(Lattice &z,Coeff a,const Lattice &x,Coeff b,con conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto y_v = y.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -189,8 +189,8 @@ void G5R5(Lattice &z,const Lattice &x) conformable(x,z); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(); - auto z_v = z.View(); + auto x_v = x.View(AcceleratorRead); + auto z_v = z.View(AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -222,8 +222,8 @@ void G5C(Lattice> &z, const LatticeoSites(),CComplex::Nsimd(), { for(int n = 0; n < nb; ++n) { diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 7ad80d00..5f98f926 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -222,9 +222,9 @@ public: conformable(subgroup, Determinant); int i0, i1; su2SubGroupIndex(i0, i1, su2_index); - auto subgroup_v = subgroup.View(); - auto source_v = source.View(); - auto Determinant_v = Determinant.View(); + auto subgroup_v = subgroup.View(CpuWrite); + auto source_v = source.View(CpuRead); + auto Determinant_v = Determinant.View(CpuWrite); thread_for(ss, grid->oSites(), { @@ -257,8 +257,8 @@ public: su2SubGroupIndex(i0, i1, su2_index); dest = 1.0; // start out with identity - auto dest_v = dest.View(); - auto subgroup_v = subgroup.View(); + auto dest_v = dest.View(CpuWrite); + auto subgroup_v = subgroup.View(CpuRead); thread_for(ss, grid->oSites(), { dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0); diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 7a200ba6..d70bac93 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -67,7 +67,7 @@ void Gather_plane_simple_table (Vector >& table,const Lattice { int num=table.size(); std::pair *table_v = & table[0]; - auto rhs_v = rhs.View(); + auto rhs_v = rhs.View(AcceleratorRead); accelerator_forNB( i,num, vobj::Nsimd(), { typedef decltype(coalescedRead(buffer[0])) compressed_t; compressed_t tmp_c; @@ -94,7 +94,7 @@ void Gather_plane_exchange_table(Vector >& table,const Lattic int num=table.size()/2; int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - auto rhs_v = rhs.View(); + auto rhs_v = rhs.View(AcceleratorRead); auto p0=&pointers[0][0]; auto p1=&pointers[1][0]; auto tp=&table[0]; @@ -122,7 +122,7 @@ struct StencilEntry { // Could pack to 8 + 4 + 4 = 128 bit and use template -class CartesianStencilView { +class CartesianStencilAccelerator { public: typedef AcceleratorVector StencilVector; @@ -130,14 +130,15 @@ class CartesianStencilView { //////////////////////////////////////// // Basic Grid and stencil info //////////////////////////////////////// - int _checkerboard; - int _npoints; // Move to template param? + int _checkerboard; + int _npoints; // Move to template param? + int _osites; StencilVector _directions; StencilVector _distances; StencilVector _comm_buf_size; StencilVector _permute_type; StencilVector same_node; - Coordinate _simd_layout; + Coordinate _simd_layout; Parameters parameters; StencilEntry* _entries_p; cobj* u_recv_buf_p; @@ -175,13 +176,37 @@ class CartesianStencilView { { Lexicographic::CoorFromIndex(coor,lane,this->_simd_layout); } +}; + +template +class CartesianStencilView : public CartesianStencilAccelerator +{ + std::shared_ptr Deleter; + public: + // + CartesianStencilView (const CartesianStencilView &refer_to_me) + : CartesianStencilAccelerator(refer_to_me), Deleter(refer_to_me.Deleter) + { } + CartesianStencilView (const CartesianStencilAccelerator &refer_to_me,ViewMode mode) + : CartesianStencilAccelerator(refer_to_me), Deleter(new MemViewDeleter) + { + Deleter->cpu_ptr =(void *)this->_entries_p; + Deleter->mode = mode; + this->_entries_p =(StencilEntry *) + + AllocationCache::ViewOpen(this->_entries_p, + this->_npoints*this->_osites*sizeof(StencilEntry), + mode, + AdviseDefault); + } }; + //////////////////////////////////////// // The Stencil Class itself //////////////////////////////////////// template -class CartesianStencil : public CartesianStencilView { // Stencil runs along coordinate axes only; NO diagonal fill in. +class CartesianStencil : public CartesianStencilAccelerator { // Stencil runs along coordinate axes only; NO diagonal fill in. public: typedef typename cobj::vector_type vector_type; @@ -226,8 +251,8 @@ public: // Generalise as required later if needed //////////////////////////////////////////////////////////////////////// - View_type View(void) const { - View_type accessor(*( (View_type *) this)); + View_type View(ViewMode mode) const { + View_type accessor(*( (View_type *) this),mode); return accessor; } @@ -662,9 +687,9 @@ public: _unified_buffer_size=0; surface_list.resize(0); - int osites = _grid->oSites(); + this->_osites = _grid->oSites(); - _entries.resize(this->_npoints* osites); + _entries.resize(this->_npoints* this->_osites); this->_entries_p = &_entries[0]; for(int ii=0;ii>()[2]; } // SYCL specific +accelerator_inline int acceleratorSIMTlane(int Nsimd) { +#ifdef GRID_SIMT + return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; +#else + return 0; +#endif +} // SYCL specific #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ @@ -224,7 +236,13 @@ NAMESPACE_BEGIN(Grid); #define accelerator_inline __host__ __device__ inline /*These routines define mapping from thread grid to loop & vector lane indexing */ -accelerator_inline int acceleratorSIMTlane(int Nsimd) { return hipThreadIdx_z; } // HIP specific +accelerator_inline int acceleratorSIMTlane(int Nsimd) { +#ifdef GRID_SIMT + return hipThreadIdx_z; +#else + return 0; +#endif +} // HIP specific #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ { \ From a7abda89e237459cf850094378f1d0f43a51b590 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 21 May 2020 16:13:59 -0400 Subject: [PATCH 14/86] View location & access mode --- benchmarks/Benchmark_ITT.cc | 10 ++++----- benchmarks/Benchmark_dwf.cc | 12 +++++------ benchmarks/Benchmark_dwf_sweep.cc | 4 ++-- benchmarks/Benchmark_memory_asynch.cc | 2 +- benchmarks/Benchmark_memory_bandwidth.cc | 2 +- benchmarks/Benchmark_meson_field.cc | 18 ++++++++-------- benchmarks/Benchmark_su3_gpu.cc | 26 ++++++++++++------------ benchmarks/Benchmark_wilson.cc | 20 +++++++++--------- 8 files changed, 47 insertions(+), 47 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 7ad4a147..adfd4b38 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -252,9 +252,9 @@ public: double start=usecond(); for(int i=0;i U(4,FGrid); - auto Umu_v = Umu.View(); - auto Umu5d_v = Umu5d.View(); + auto Umu_v = Umu.View(CpuRead); + auto Umu5d_v = Umu5d.View(CpuWrite); for(int ss=0;ssoSites();ss++){ for(int s=0;s U(4,FGrid); { - auto Umu5d_v = Umu5d.View(); - auto Umu_v = Umu.View(); + auto Umu5d_v = Umu5d.View(CpuWrite); + auto Umu_v = Umu.View(CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;s & latt4, int Ls, int threads,int report ) LatticeGaugeField Umu5d(FGrid); // replicate across fifth dimension - auto Umu5d_v = Umu5d.View(); - auto Umu_v = Umu.View(); + auto Umu5d_v = Umu5d.View(CpuWrite); + auto Umu_v = Umu.View(CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;s > &mat, for(int b=0;b > &mat, for(int b=0;b > &mat int ss= so+n*stride+b; for(int i=0;i > &m for(int i=0;i > &m // Trigger unroll for ( int m=0;m Date: Thu, 21 May 2020 16:14:20 -0400 Subject: [PATCH 15/86] View locatoin and access mode --- tests/Test_general_stencil.cc | 8 ++++---- tests/Test_stencil.cc | 16 ++++++++-------- tests/core/Test_staggered5D.cc | 13 +++++++------ tests/debug/Test_cayley_mres.cc | 8 ++++---- tests/forces/Test_contfrac_force.cc | 6 +++--- tests/forces/Test_dwf_force.cc | 6 +++--- tests/forces/Test_dwf_force_eofa.cc | 6 +++--- tests/forces/Test_dwf_gpforce.cc | 6 +++--- tests/forces/Test_dwf_gpforce_eofa.cc | 6 +++--- tests/forces/Test_gp_plaq_force.cc | 6 +++--- tests/forces/Test_gp_rect_force.cc | 6 +++--- tests/forces/Test_gpdwf_force.cc | 6 +++--- tests/forces/Test_gpwilson_force.cc | 6 +++--- tests/forces/Test_mobius_force.cc | 6 +++--- tests/forces/Test_mobius_force_eofa.cc | 6 +++--- tests/forces/Test_mobius_gpforce_eofa.cc | 6 +++--- tests/forces/Test_partfrac_force.cc | 6 +++--- tests/forces/Test_rect_force.cc | 6 +++--- tests/forces/Test_wilson_force.cc | 6 +++--- tests/forces/Test_wilsonclover_force.cc | 6 +++--- tests/forces/Test_zmobius_force.cc | 6 +++--- tests/solver/Test_dwf_hdcr.cc | 6 ++---- 22 files changed, 76 insertions(+), 77 deletions(-) diff --git a/tests/Test_general_stencil.cc b/tests/Test_general_stencil.cc index f03677d0..27d3f032 100644 --- a/tests/Test_general_stencil.cc +++ b/tests/Test_general_stencil.cc @@ -107,8 +107,8 @@ int main(int argc, char ** argv) // Implement a stencil code that should agree with cshift! for(int i=0;ioSites();i++){ auto SE = gStencil.GetEntry(0,i); - auto check = Check.View(); - auto foo = Foo.View(); + auto check = Check.View(CpuWrite); + auto foo = Foo.View(CpuRead); // Encapsulate in a general wrapper check[i] = foo[SE->_offset]; auto tmp=check[i]; @@ -147,8 +147,8 @@ int main(int argc, char ** argv) }}}} if (nrm > 1.0e-4) { - auto check = Check.View(); - auto bar = Bar.View(); + auto check = Check.View(CpuRead); + auto bar = Bar.View(CpuRead); for(int i=0;i_is_local && SE->_permute ) permute(check[i],foo[SE->_offset],permute_type); else if (SE->_is_local) @@ -151,8 +151,8 @@ int main(int argc, char ** argv) { }}}} if (nrm > 1.0e-4) { - auto check = Check.View(); - auto bar = Bar.View(); + auto check = Check.View(CpuRead); + auto bar = Bar.View(CpuRead); for(int i=0;i " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(ocheck[i],efoo[SE->_offset],permute_type); else if (SE->_is_local) @@ -226,8 +226,8 @@ int main(int argc, char ** argv) { SE = OStencil.GetEntry(permute_type,0,i); // std::cout << "ODD source "<< i<<" -> " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(echeck[i],ofoo[SE->_offset],permute_type); else if (SE->_is_local) diff --git a/tests/core/Test_staggered5D.cc b/tests/core/Test_staggered5D.cc index 7055d183..402e69d5 100644 --- a/tests/core/Test_staggered5D.cc +++ b/tests/core/Test_staggered5D.cc @@ -88,14 +88,15 @@ int main (int argc, char ** argv) // replicate across fifth dimension //////////////////////////////////// LatticeGaugeField Umu5d(FGrid); - auto umu5d = Umu5d.View(); - auto umu = Umu.View(); - for(int ss=0;ssoSites();ss++){ - for(int s=0;soSites();ss++){ + for(int s=0;s U(4,FGrid); for(int mu=0;mu U(4,FGrid); { - auto Umu5d_v = Umu5d.View(); - auto Umu_v = Umu.View(); + auto Umu5d_v = Umu5d.View(CpuWrite); + auto Umu_v = Umu.View(CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;soSites(),{ uint64_t ss= sss*Ls; typedef vSpinColourVector spinor; diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index 4eeb8c27..4c3a3f53 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -98,9 +98,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_force.cc b/tests/forces/Test_dwf_force.cc index 009f50b3..fea867e6 100644 --- a/tests/forces/Test_dwf_force.cc +++ b/tests/forces/Test_dwf_force.cc @@ -100,9 +100,9 @@ int main (int argc, char ** argv) // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc index 670e7589..5b864279 100644 --- a/tests/forces/Test_dwf_force_eofa.cc +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -110,9 +110,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index d762e22a..a0743edc 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -119,9 +119,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index 66ae9dcf..69b9adec 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -114,9 +114,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc index c4e214bb..5de7ddb7 100644 --- a/tests/forces/Test_gp_plaq_force.cc +++ b/tests/forces/Test_gp_plaq_force.cc @@ -85,9 +85,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); - auto mom_v = mom.View(); + auto Uprime_v = Uprime.View(CpuWrite); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index 2573af6a..026ce60f 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto Uprime_v= Uprime.View(); - auto U_v = U.View(); + auto mom_v = mom.View(CpuRead); + auto Uprime_v= Uprime.View(CpuWrite); + auto U_v = U.View(CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc index 09a1dc4b..22927d01 100644 --- a/tests/forces/Test_gpdwf_force.cc +++ b/tests/forces/Test_gpdwf_force.cc @@ -105,9 +105,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index cd30d898..41c4641d 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -99,9 +99,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_mobius_force.cc b/tests/forces/Test_mobius_force.cc index a1c4e930..daab4149 100644 --- a/tests/forces/Test_mobius_force.cc +++ b/tests/forces/Test_mobius_force.cc @@ -101,9 +101,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc index f71e2d41..7a8d4cf8 100644 --- a/tests/forces/Test_mobius_force_eofa.cc +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -112,9 +112,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index 4975f36d..13de233b 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -115,9 +115,9 @@ int main (int argc, char** argv) SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); // fourth order exponential approx thread_foreach( i, mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt + mom_v[i](mu) *mom_v[i](mu) *U_v[i](mu)*(dt*dt/2.0) diff --git a/tests/forces/Test_partfrac_force.cc b/tests/forces/Test_partfrac_force.cc index 3ea2c6aa..9292274e 100644 --- a/tests/forces/Test_partfrac_force.cc +++ b/tests/forces/Test_partfrac_force.cc @@ -101,9 +101,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index 9a78de24..909068c2 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); - auto mom_v = mom.View(); + auto Uprime_v = Uprime.View(CpuWrite); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 47f1516a..397dc40c 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -105,9 +105,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu); Uprime_v[i](mu) += mom_v[i](mu)*U_v[i](mu)*dt ; diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index d9ace23c..ff664e19 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -105,9 +105,9 @@ int main(int argc, char **argv) Hmom -= real(sum(trace(mommu * mommu))); PokeIndex(mom, mommu, mu); - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); - auto mom_v = mom.View(); + auto Uprime_v = Uprime.View(CpuWrite); + auto U_v = U.View(CpuRead); + auto mom_v = mom.View(CpuRead); thread_foreach(ss,mom_v, { Uprime_v[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom_v[ss]._internal[mu], dt, 12) * U_v[ss]._internal[mu]); diff --git a/tests/forces/Test_zmobius_force.cc b/tests/forces/Test_zmobius_force.cc index 2730885f..2ed12acd 100644 --- a/tests/forces/Test_zmobius_force.cc +++ b/tests/forces/Test_zmobius_force.cc @@ -114,9 +114,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + auto mom_v = mom.View(CpuRead); + auto U_v = U.View(CpuRead); + auto Uprime_v = Uprime.View(CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/solver/Test_dwf_hdcr.cc b/tests/solver/Test_dwf_hdcr.cc index 873530ff..f93af852 100644 --- a/tests/solver/Test_dwf_hdcr.cc +++ b/tests/solver/Test_dwf_hdcr.cc @@ -1,5 +1,3 @@ - - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -302,8 +300,8 @@ int main (int argc, char ** argv) int nb=nbasisc/2; CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,12.0,0.02,500,100,100,0.0); for(int n=0;noSites();site++){ subspace_g5[site](nn) = subspace[site](nn); From 8285e415747d71b1c655203f91867096f0f8d46c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 21 May 2020 16:14:41 -0400 Subject: [PATCH 16/86] View location / access mode --- Grid/lattice/Lattice_view.h | 174 ++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 Grid/lattice/Lattice_view.h diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h new file mode 100644 index 00000000..183423f7 --- /dev/null +++ b/Grid/lattice/Lattice_view.h @@ -0,0 +1,174 @@ +#pragma once +NAMESPACE_BEGIN(Grid); +/////////////////////////////////////////////////////////////////// +// Base class which can be used by traits to pick up behaviour +/////////////////////////////////////////////////////////////////// +class LatticeBase {}; + +///////////////////////////////////////////////////////////////////////////////////////// +// Conformable checks; same instance of Grid required +///////////////////////////////////////////////////////////////////////////////////////// +void accelerator_inline conformable(GridBase *lhs,GridBase *rhs) +{ + assert(lhs == rhs); +} + +//////////////////////////////////////////////////////////////////////////// +// Minimal base class containing only data valid to access from accelerator +// _odata will be a managed pointer in CUDA +//////////////////////////////////////////////////////////////////////////// +// Force access to lattice through a view object. +// prevents writing of code that will not offload to GPU, but perhaps annoyingly +// strict since host could could in principle direct access through the lattice object +// Need to decide programming model. +#define LATTICE_VIEW_STRICT +template class LatticeAccelerator : public LatticeBase +{ +protected: + GridBase *_grid; + int checkerboard; + vobj *_odata; // A managed pointer + uint64_t _odata_size; +public: + accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { }; + accelerator_inline uint64_t oSites(void) const { return _odata_size; }; + accelerator_inline int Checkerboard(void) const { return checkerboard; }; + accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view + accelerator_inline void Conformable(GridBase * &grid) const + { + if (grid) conformable(grid, _grid); + else grid = _grid; + }; +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// A View class which provides accessor to the data. +// This will be safe to call from accelerator_for and is trivially copy constructible +// The copy constructor for this will need to be used by device lambda functions +///////////////////////////////////////////////////////////////////////////////////////// +template +class LatticeExprView : public LatticeAccelerator +{ +public: + // Rvalue + ViewMode mode; + void * cpu_ptr; +#ifdef GRID_SIMT + accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { + return coalescedRead(this->_odata[i]); + } +#else + accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } +#endif + + accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; + accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; + + accelerator_inline uint64_t begin(void) const { return 0;}; + accelerator_inline uint64_t end(void) const { return this->_odata_size; }; + accelerator_inline uint64_t size(void) const { return this->_odata_size; }; + + LatticeExprView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me){} + + // Host functions + void ViewOpen(ViewMode mode) + { // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base + // std::cout << "View Open"<_odata<cpu_ptr = (void *)this->_odata; + this->mode = mode; + this->_odata =(vobj *) + AllocationCache::ViewOpen(this->cpu_ptr, + this->_odata_size*sizeof(vobj), + mode, + AdviseDefault); + } + void ViewClose(void) + { // Inform the manager + // std::cout << "View Close"<cpu_ptr<cpu_ptr,this->mode); + } + +}; + + +/////////////////////////////////////////////////////////////////////// +// An object to be stored in a shared_ptr to clean up after last view. +// UserView constructor,destructor updates view manager +// Non-copyable object??? Second base with copy/= deleted? +/////////////////////////////////////////////////////////////////////// +class MemViewDeleter { + public: + void *cpu_ptr; + ViewMode mode; + ~MemViewDeleter(){ + AllocationCache::ViewClose(cpu_ptr,mode); + } +}; +template +class LatticeView : public LatticeExprView +{ + std::shared_ptr Deleter; +public: + LatticeView(const LatticeView &orig) : LatticeExprView(orig) { } + LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : + LatticeExprView (refer_to_me), Deleter(new MemViewDeleter) + { + // std::cout << "FIXME - copy shared pointer? View Open in LatticeView"<_odata<ViewOpen(mode); + Deleter->cpu_ptr = this->cpu_ptr; + Deleter->mode = mode; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// Lattice expression types used by ET to assemble the AST +// +// Need to be able to detect code paths according to the whether a lattice object or not +// so introduce some trait type things +///////////////////////////////////////////////////////////////////////////////////////// + +class LatticeExpressionBase {}; + +template using is_lattice = std::is_base_of; +template using is_lattice_expr = std::is_base_of; + +template struct ViewMapBase { typedef T Type; }; +template struct ViewMapBase { typedef LatticeExprView Type; }; +template using ViewMap = ViewMapBase::value >; + +template +class LatticeUnaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + Op op; + T1 arg1; + LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {}; +}; + +template +class LatticeBinaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + typedef typename ViewMap<_T2>::Type T2; + Op op; + T1 arg1; + T2 arg2; + LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {}; +}; + +template +class LatticeTrinaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + typedef typename ViewMap<_T2>::Type T2; + typedef typename ViewMap<_T3>::Type T3; + Op op; + T1 arg1; + T2 arg2; + T3 arg3; + LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {}; +}; +NAMESPACE_END(Grid); From 556da86ac3ec908682c7f0adfc06ac566bf92ef0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 24 May 2020 13:41:58 -0400 Subject: [PATCH 17/86] HIP fp16 --- Grid/simd/Grid_gpu_vec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/simd/Grid_gpu_vec.h b/Grid/simd/Grid_gpu_vec.h index aa7e385c..b9c6a81b 100644 --- a/Grid/simd/Grid_gpu_vec.h +++ b/Grid/simd/Grid_gpu_vec.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include #endif #ifdef GRID_HIP -#include +#include #endif namespace Grid { From 92b342a477e3bf4e629aa7b613b06f85c4faafc5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 24 May 2020 13:50:28 -0400 Subject: [PATCH 18/86] Hip reduction too --- Grid/lattice/Lattice_reduction_gpu.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index c5d75356..5f490507 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -1,7 +1,13 @@ NAMESPACE_BEGIN(Grid); -#define WARP_SIZE 32 +#ifdef GRID_HIP +extern hipDeviceProp_t *gpu_props; +#endif +#ifdef GRID_CUDA extern cudaDeviceProp *gpu_props; +#endif + +#define WARP_SIZE 32 __device__ unsigned int retirementCount = 0; template @@ -19,7 +25,12 @@ template void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) { int device; +#ifdef GRID_CUDA cudaGetDevice(&device); +#endif +#ifdef GRID_HIP + hipGetDevice(&device); +#endif Iterator warpSize = gpu_props[device].warpSize; Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock; @@ -147,7 +158,7 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { sobj *smem = (sobj *)shmem_pointer; // wait until all outstanding memory instructions in this thread are finished - __threadfence(); + acceleratorFence(); if (tid==0) { unsigned int ticket = atomicInc(&retirementCount, gridDim.x); @@ -156,8 +167,8 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { } // each thread must read the correct value of amLast - __syncthreads(); - + acceleratorSynchroniseAll(); + if (amLast) { // reduce buffer[0], ..., buffer[gridDim.x-1] Iterator i = tid; @@ -199,13 +210,7 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) sobj *buffer_v = &buffer[0]; reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); - cudaDeviceSynchronize(); - - cudaError err = cudaGetLastError(); - if ( cudaSuccess != err ) { - printf("Cuda error %s\n",cudaGetErrorString( err )); - exit(0); - } + accelerator_barrier(); auto result = buffer_v[0]; return result; } From 32be2b13d31bb639eb6890979ccce09795ed5200 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 24 May 2020 14:00:55 -0400 Subject: [PATCH 19/86] Updates for HiP --- Grid/threads/Accelerator.cc | 46 ++++++++++++++++++------------------- Grid/threads/Accelerator.h | 40 +++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 18cc406d..1cecfe0e 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -33,17 +33,17 @@ void acceleratorInit(void) for (int i = 0; i < nDevices; i++) { -#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); cudaGetDeviceProperties(&gpu_props[i], i); if ( world_rank == 0) { cudaDeviceProp prop; prop = gpu_props[i]; - printf("GpuInit: ========================\n"); - printf("GpuInit: Device Number : %d\n", i); - printf("GpuInit: ========================\n"); - printf("GpuInit: Device identifier: %s\n", prop.name); + printf("AcceleratorCudaInit: ========================\n"); + printf("AcceleratorCudaInit: Device Number : %d\n", i); + printf("AcceleratorCudaInit: ========================\n"); + printf("AcceleratorCudaInit: Device identifier: %s\n", prop.name); GPU_PROP(managedMemory); GPU_PROP(isMultiGpuBoard); @@ -55,12 +55,12 @@ void acceleratorInit(void) } #ifdef GRID_IBM_SUMMIT // IBM Jsrun makes cuda Device numbering screwy and not match rank - if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); + if ( world_rank == 0 ) printf("AcceleratorCudaInit: IBM Summit or similar - NOT setting device to node rank\n"); #else - if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); + if ( world_rank == 0 ) printf("AcceleratorCudaInit: setting device to node rank\n"); cudaSetDevice(rank); #endif - if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); + if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n"); } #endif @@ -92,17 +92,17 @@ void acceleratorInit(void) for (int i = 0; i < nDevices; i++) { -#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); hipGetDeviceProperties(&gpu_props[i], i); if ( world_rank == 0) { hipDeviceProp_t prop; prop = gpu_props[i]; - printf("GpuInit: ========================\n"); - printf("GpuInit: Device Number : %d\n", i); - printf("GpuInit: ========================\n"); - printf("GpuInit: Device identifier: %s\n", prop.name); + printf("AcceleratorHipInit: ========================\n"); + printf("AcceleratorHipInit: Device Number : %d\n", i); + printf("AcceleratorHipInit: ========================\n"); + printf("AcceleratorHipInit: Device identifier: %s\n", prop.name); // GPU_PROP(managedMemory); GPU_PROP(isMultiGpuBoard); @@ -114,12 +114,12 @@ void acceleratorInit(void) } #ifdef GRID_IBM_SUMMIT // IBM Jsrun makes cuda Device numbering screwy and not match rank - if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); + if ( world_rank == 0 ) printf("AcceleratorHipInit: IBM Summit or similar - NOT setting device to node rank\n"); #else - if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); - cudaSetDevice(rank); + if ( world_rank == 0 ) printf("AcceleratorHipInit: setting device to node rank\n"); + hipSetDevice(rank); #endif - if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); + if ( world_rank == 0 ) printf("AcceleratorHipInit: ================================================\n"); } #endif @@ -159,22 +159,22 @@ void acceleratorInit(void) /* for (int i = 0; i < nDevices; i++) { -#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorSyclInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); cudaGetDeviceProperties(&gpu_props[i], i); if ( world_rank == 0) { cudaDeviceProp prop; prop = gpu_props[i]; - printf("GpuInit: ========================\n"); - printf("GpuInit: Device Number : %d\n", i); - printf("GpuInit: ========================\n"); - printf("GpuInit: Device identifier: %s\n", prop.name); + printf("AcceleratorSyclInit: ========================\n"); + printf("AcceleratorSyclInit: Device Number : %d\n", i); + printf("AcceleratorSyclInit: ========================\n"); + printf("AcceleratorSyclInit: Device identifier: %s\n", prop.name); } } */ if ( world_rank == 0 ) { - printf("GpuInit: ================================================\n"); + printf("AcceleratorSyclInit: ================================================\n"); } } #endif diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index e352bde0..b5c828aa 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -284,6 +284,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) inline void *acceleratorAllocShared(size_t bytes) { +#if 0 void *ptr=NULL; auto err = hipMallocManaged((void **)&ptr,bytes); if( err != hipSuccess ) { @@ -291,6 +292,9 @@ inline void *acceleratorAllocShared(size_t bytes) printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err)); } return ptr; +#else + return malloc(bytes); +#endif }; inline void *acceleratorAllocDevice(size_t bytes) @@ -304,10 +308,10 @@ inline void *acceleratorAllocDevice(size_t bytes) return ptr; }; -inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);}; +inline void acceleratorFreeShared(void *ptr){ free(ptr);}; inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);}; -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);} #endif @@ -379,5 +383,35 @@ accelerator_inline void acceleratorSynchronise(void) #endif return; } +accelerator_inline void acceleratorSynchroniseAll(void) +{ +#ifdef GRID_SIMT +#ifdef GRID_CUDA + __syncthreads(); +#endif +#ifdef GRID_SYCL + // No barrier call on SYCL?? // Option get __spir:: stuff to do warp barrier +#endif +#ifdef GRID_HIP + __syncthreads(); +#endif +#endif + return; +} +accelerator_inline void acceleratorFence(void) +{ +#ifdef GRID_SIMT +#ifdef GRID_CUDA + __threadfence(); +#endif +#ifdef GRID_SYCL + // FIXMEE +#endif +#ifdef GRID_HIP + __threadfence(); +#endif +#endif + return; +} NAMESPACE_END(Grid); From c7519a237a44eeb7b8a26076ebe97c2821bd968f Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 24 May 2020 14:02:47 -0400 Subject: [PATCH 20/86] Assertions fail on HIP foor unknown reasons - dedbugging --- Grid/util/Coordinate.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Grid/util/Coordinate.h b/Grid/util/Coordinate.h index c319a14d..7f1d31c0 100644 --- a/Grid/util/Coordinate.h +++ b/Grid/util/Coordinate.h @@ -52,14 +52,14 @@ public: accelerator_inline size_type size(void) const { return _size; }; accelerator_inline void clear(void) { resize(0);} accelerator_inline void resize(size_type sz) { +#ifndef GRID_HIP assert(sz>=0); assert(sz<=MaxEntries); +#endif _size = sz; } accelerator_inline void resize(size_type sz,const value &val) { - assert(sz>=0); - assert(sz<=MaxEntries); - _size = sz; + resize(sz); for(int s=0;s ©me) { From d1f1ccc70534609f330da292f84dec6e3ae55171 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 24 May 2020 21:18:49 -0400 Subject: [PATCH 21/86] HIP changes --- Grid/lattice/Lattice_reduction.h | 44 ++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 99d799b6..2d50fdd7 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -24,7 +24,7 @@ Author: paboyle #include -#ifdef GRID_CUDA +#if defined(GRID_CUDA)||defined(GRID_HIP) #include #endif @@ -64,21 +64,29 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) return ssum; } +/* template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { -#ifdef GRID_CUDA +#if defined(GRID_CUDA)||defined(GRID_HIP) return sum_gpu(arg,osites); #else return sum_cpu(arg,osites); #endif } +*/ template inline typename vobj::scalar_object sum(const Lattice &arg) { +#if defined(GRID_CUDA) auto arg_v = arg.View(AcceleratorRead); Integer osites = arg.Grid()->oSites(); - auto ssum= sum(&arg_v[0],osites); + auto ssum= sum_gpu(&arg_v[0],osites); +#else + auto arg_v = arg.View(CpuRead); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum_cpu(&arg_v[0],osites); +#endif arg.Grid()->GlobalSum(ssum); return ssum; } @@ -101,14 +109,14 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ GridBase *grid = left.Grid(); + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->oSites(); + +#if defined(GRID_CUDA) // Might make all code paths go this way. auto left_v = left.View(AcceleratorRead); auto right_v=right.View(AcceleratorRead); - const uint64_t nsimd = grid->Nsimd(); - const uint64_t sites = grid->oSites(); - -#ifdef GRID_CUDA // GPU - SIMT lane compliance... typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; Vector inner_tmp(sites); @@ -125,17 +133,21 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ // Need a sumD that sums in double nrm = TensorRemove(sumD_gpu(inner_tmp_v,sites)); #else + // Might make all code paths go this way. + auto left_v = left.View(CpuRead); + auto right_v=right.View(CpuRead); + // CPU typedef decltype(innerProductD(left_v[0],right_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, nsimd,{ + thread_for( ss, sites,{ auto x_l = left_v[ss]; auto y_l = right_v[ss]; inner_tmp_v[ss]=innerProductD(x_l,y_l); }) - nrm = TensorRemove(sum(inner_tmp_v,sites)); + nrm = TensorRemove(sum_cpu(inner_tmp_v,sites)); #endif grid->GlobalSum(nrm); @@ -167,15 +179,15 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt GridBase *grid = x.Grid(); + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->oSites(); + +#if defined(GRID_CUDA)||defined(GRID_HIP) + // GPU auto x_v=x.View(AcceleratorRead); auto y_v=y.View(AcceleratorRead); auto z_v=z.View(AcceleratorWrite); - const uint64_t nsimd = grid->Nsimd(); - const uint64_t sites = grid->oSites(); - -#ifdef GRID_CUDA - // GPU typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -188,6 +200,10 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt nrm = real(TensorRemove(sumD_gpu(inner_tmp_v,sites))); #else + auto x_v=x.View(AcceleratorRead); + auto y_v=y.View(AcceleratorRead); + auto z_v=z.View(AcceleratorWrite); + // CPU typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); From cb0d1b3399e1fc05b8f4f8b6b6b61dba53b934e2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 24 May 2020 21:27:00 -0400 Subject: [PATCH 22/86] hopefullly fix buildd fail --- Grid/lattice/Lattice_reduction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 2d50fdd7..f2b0dc43 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -64,7 +64,7 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) return ssum; } -/* + template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { @@ -74,7 +74,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) return sum_cpu(arg,osites); #endif } -*/ + template inline typename vobj::scalar_object sum(const Lattice &arg) { From 7bc0166c1c1648b4961f4fa5c46294c3f8084cff Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 25 May 2020 08:34:19 -0700 Subject: [PATCH 23/86] SYCLL maknig happy - must avoid non ttrivial copy constructors --- Grid/lattice/Lattice_base.h | 4 ++-- Grid/lattice/Lattice_view.h | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 17f84d44..9aae3333 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -217,8 +217,8 @@ public: } template inline Lattice & operator = (const sobj & r){ - auto me = View(AcceleratorWriteDiscard); - accelerator_for(ss,me.size(),1,{ + auto me = View(CpuWrite); + thread_for(ss,me.size(),{ me[ss]= r; }); return *this; diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h index 183423f7..b5887d50 100644 --- a/Grid/lattice/Lattice_view.h +++ b/Grid/lattice/Lattice_view.h @@ -107,10 +107,18 @@ class MemViewDeleter { template class LatticeView : public LatticeExprView { +#ifndef GRID_UVM std::shared_ptr Deleter; +#endif public: - LatticeView(const LatticeView &orig) : LatticeExprView(orig) { } - LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : +#ifdef GRID_UVM + LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : + LatticeExprView (refer_to_me) + { + } +#else + LatticeView(const LatticeView &orig) : LatticeExprView(orig) { } + LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : LatticeExprView (refer_to_me), Deleter(new MemViewDeleter) { // std::cout << "FIXME - copy shared pointer? View Open in LatticeView"<_odata<cpu_ptr = this->cpu_ptr; Deleter->mode = mode; } +#endif }; ///////////////////////////////////////////////////////////////////////////////////////// From 949ac3cd2474ee47072791a23d5453ca8a19dcd9 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 25 May 2020 08:35:28 -0700 Subject: [PATCH 24/86] Must avoid non-trivial copy constructors --- Grid/stencil/Stencil.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index d70bac93..3bb80cfe 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -181,9 +181,15 @@ class CartesianStencilAccelerator { template class CartesianStencilView : public CartesianStencilAccelerator { +#ifndef GRID_UVM std::shared_ptr Deleter; +#endif public: // +#ifdef GRID_UVM + CartesianStencilView (const CartesianStencilAccelerator &refer_to_me,ViewMode mode) + : CartesianStencilAccelerator(refer_to_me){}; +#else CartesianStencilView (const CartesianStencilView &refer_to_me) : CartesianStencilAccelerator(refer_to_me), Deleter(refer_to_me.Deleter) { } @@ -199,7 +205,8 @@ class CartesianStencilView : public CartesianStencilAccelerator Date: Mon, 25 May 2020 08:35:56 -0700 Subject: [PATCH 25/86] Sycl happier --- Grid/threads/Accelerator.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 1cecfe0e..ae02b437 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -153,9 +153,6 @@ void acceleratorInit(void) if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} - if ( world_rank == 0 ) { - GridBanner(); - } /* for (int i = 0; i < nDevices; i++) { From ee63721badd3e7da0dd4584a2bf146d7010d3f16 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 25 May 2020 08:36:24 -0700 Subject: [PATCH 26/86] int unhappiness sycl fix --- Grid/threads/Accelerator.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index b5c828aa..1cb6d637 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -194,13 +194,13 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ - int nt=acceleratorThreads(); \ + unsigned long nt=acceleratorThreads(); \ unsigned long unum1 = num1; \ unsigned long unum2 = num2; \ cl::sycl::range<3> local {nt,1,nsimd}; \ - cl::sycl::range<3> global{unum1,unum2,nsimd}; \ + cl::sycl::range<3> global{unum1,unum2,nsimd}; \ cgh.parallel_for( \ - cl::sycl::nd_range<3>(global,local), \ + cl::sycl::nd_range<3>(global,local), \ [=] (cl::sycl::nd_item<3> item) mutable { \ auto iter1 = item.get_global_id(0); \ auto iter2 = item.get_global_id(1); \ From cf2938688ae68db7c2389a72bd12abaa2dfc1266 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 25 May 2020 08:36:53 -0700 Subject: [PATCH 27/86] Sycl unhappy fix --- benchmarks/Benchmark_ITT.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index adfd4b38..08e25668 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -242,9 +242,9 @@ public: Vec rn ; random(sRNG,rn); - LatticeVec z(&Grid); z=rn; - LatticeVec x(&Grid); x=rn; - LatticeVec y(&Grid); y=rn; + LatticeVec z(&Grid); z=Zero(); + LatticeVec x(&Grid); x=Zero(); + LatticeVec y(&Grid); y=Zero(); double a=2.0; uint64_t Nloop=NLOOP; From 006cc8a8f151f36e73f671e98769a1f9bd9374e2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 28 May 2020 08:33:06 -0400 Subject: [PATCH 28/86] Staggereed move to accelerator --- Grid/qcd/action/fermion/Fermion.h | 5 + .../fermion/ImprovedStaggeredFermion5D.h | 4 +- .../action/fermion/NaiveStaggeredFermion.h | 194 +++++++ Grid/qcd/action/fermion/StaggeredKernels.h | 53 +- ...ImprovedStaggeredFermion5DImplementation.h | 126 +---- .../ImprovedStaggeredFermionImplementation.h | 102 +--- .../NaiveStaggeredFermionImplementation.h | 499 ++++++++++++++++++ .../implementation/StaggeredKernelsAsm.h | 43 +- .../implementation/StaggeredKernelsHand.h | 42 +- .../StaggeredKernelsImplementation.h | 222 +++++--- .../WilsonKernelsImplementation.h | 4 + .../NaiveStaggeredFermionInstantiation.cc | 36 ++ ...iveStaggeredFermionInstantiation.cc.master | 37 ++ ...geredFermionInstantiationStaggeredImplD.cc | 1 + ...geredFermionInstantiationStaggeredImplF.cc | 1 + .../instantiation/generate_instantiations.sh | 1 + benchmarks/Benchmark_staggered.cc | 23 - tests/core/Test_staggered_naive.cc | 282 ++++++++++ 18 files changed, 1319 insertions(+), 356 deletions(-) create mode 100644 Grid/qcd/action/fermion/NaiveStaggeredFermion.h create mode 100644 Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h create mode 100644 Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc create mode 100644 Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master create mode 120000 Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc create mode 120000 Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc create mode 100644 tests/core/Test_staggered_naive.cc diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index af5bebcc..16252340 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -57,6 +57,7 @@ NAMESPACE_CHECK(WilsonClover); #include // 5d base used by all 5d overlap types NAMESPACE_CHECK(Wilson5D); +#include #include #include NAMESPACE_CHECK(Staggered); @@ -282,6 +283,10 @@ typedef ImprovedStaggeredFermion ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; +typedef NaiveStaggeredFermion NaiveStaggeredFermionR; +typedef NaiveStaggeredFermion NaiveStaggeredFermionF; +typedef NaiveStaggeredFermion NaiveStaggeredFermionD; + typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DD; diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h index 0ce1c701..2578c288 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -62,8 +62,8 @@ public: double DhopCalls; double DhopCommTime; double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; + double DhopComputeTime2; + double DhopFaceTime; /////////////////////////////////////////////////////////////// // Implement the abstract base diff --git a/Grid/qcd/action/fermion/NaiveStaggeredFermion.h b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h new file mode 100644 index 00000000..8715f3c2 --- /dev/null +++ b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h @@ -0,0 +1,194 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_NAIVE_STAG_FERMION_H +#define GRID_QCD_NAIVE_STAG_FERMION_H + +NAMESPACE_BEGIN(Grid); + +class NaiveStaggeredFermionStatic { +public: + static const std::vector directions; + static const std::vector displacements; + static const int npoint = 8; +}; + +template +class NaiveStaggeredFermion : public StaggeredKernels, public NaiveStaggeredFermionStatic { +public: + INHERIT_IMPL_TYPES(Impl); + typedef StaggeredKernels Kernels; + + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + + //////////////////////////////////////// + // Performance monitoring + //////////////////////////////////////// + void Report(void); + void ZeroCounters(void); + double DhopTotalTime; + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; + double DhopComputeTime2; + double DhopFaceTime; + + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _grid; } + GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } + GridBase *FermionGrid(void) { return _grid; } + GridBase *FermionRedBlackGrid(void) { return _cbgrid; } + + ////////////////////////////////////////////////////////////////// + // override multiply; cut number routines if pass dagger argument + // and also make interface more uniformly consistent + ////////////////////////////////////////////////////////////////// + RealD M(const FermionField &in, FermionField &out); + RealD Mdag(const FermionField &in, FermionField &out); + + ///////////////////////////////////////////////////////// + // half checkerboard operations + ///////////////////////////////////////////////////////// + void Meooe(const FermionField &in, FermionField &out); + void MeooeDag(const FermionField &in, FermionField &out); + void Mooee(const FermionField &in, FermionField &out); + void MooeeDag(const FermionField &in, FermionField &out); + void MooeeInv(const FermionField &in, FermionField &out); + void MooeeInvDag(const FermionField &in, FermionField &out); + + //////////////////////// + // Derivative interface + //////////////////////// + // Interface calls an internal routine + void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + + /////////////////////////////////////////////////////////////// + // non-hermitian hopping term; half cb or both + /////////////////////////////////////////////////////////////// + void Dhop (const FermionField &in, FermionField &out, int dag); + void DhopOE(const FermionField &in, FermionField &out, int dag); + void DhopEO(const FermionField &in, FermionField &out, int dag); + + /////////////////////////////////////////////////////////////// + // Multigrid assistance; force term uses too + /////////////////////////////////////////////////////////////// + void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void MdirAll(const FermionField &in, std::vector &out); + void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); + + /////////////////////////////////////////////////////////////// + // Extra methods added by derived + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl &st, + DoubledGaugeField &U, + GaugeField &mat, + const FermionField &A, const FermionField &B, int dag); + + void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + void DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + + ////////////////////////////////////////////////////////////////////////// + // Grid own interface Constructor + ////////////////////////////////////////////////////////////////////////// + NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p = ImplParams()); + NaiveStaggeredFermion(GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p = ImplParams()); + + // DoubleStore impl dependent + void ImportGauge (const GaugeField &_U ); + DoubledGaugeField &GetU(void) { return Umu ; } ; + void CopyGaugeCheckerboards(void); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + + // protected: +public: + // any other parameters of action ??? + virtual int isTrivialEE(void) { return 1; }; + virtual RealD Mass(void) { return mass; } + RealD mass; + RealD u0; + RealD c1; + + GridBase *_grid; + GridBase *_cbgrid; + + // Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; + + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &src, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &srct, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx); +}; + +typedef NaiveStaggeredFermion NaiveStaggeredFermionF; +typedef NaiveStaggeredFermion NaiveStaggeredFermionD; + +NAMESPACE_END(Grid); + +#endif diff --git a/Grid/qcd/action/fermion/StaggeredKernels.h b/Grid/qcd/action/fermion/StaggeredKernels.h index 6ef0ab9d..0203dca2 100644 --- a/Grid/qcd/action/fermion/StaggeredKernels.h +++ b/Grid/qcd/action/fermion/StaggeredKernels.h @@ -47,23 +47,34 @@ template class StaggeredKernels : public FermionOperator , pub INHERIT_IMPL_TYPES(Impl); typedef FermionOperator Base; -public: - - void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, - int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); + public: + + void DhopImproved(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, DoubledGaugeField &UUU, + const FermionField &in, FermionField &out, int dag, int interior,int exterior); + void DhopNaive(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag, int interior,int exterior); + + void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); + protected: /////////////////////////////////////////////////////////////////////////////////////// // Generic Nc kernels /////////////////////////////////////////////////////////////////////////////////////// - void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, + template + void DhopSiteGeneric(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, + template + void DhopSiteGenericInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, + template + void DhopSiteGenericExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); @@ -71,15 +82,18 @@ public: /////////////////////////////////////////////////////////////////////////////////////// // Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// - void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, + template + void DhopSiteHand(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, + template + void DhopSiteHandInt(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, + template + void DhopSiteHandExt(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); @@ -87,27 +101,10 @@ public: /////////////////////////////////////////////////////////////////////////////////////// // Asm Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// - void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + void DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - /////////////////////////////////////////////////////////////////////////////////////////////////// - // Generic interface; fan out to right routine - /////////////////////////////////////////////////////////////////////////////////////////////////// - void DhopSite(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor * buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1); - - void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor * buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1); - - void DhopSite(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor * buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior); public: diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index 01d5578f..58d2b368 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -281,11 +281,9 @@ void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOr DoubledGaugeField & U,DoubledGaugeField & UUU, const FermionField &in, FermionField &out,int dag) { -#ifdef GRID_OMP if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag); else -#endif DhopInternalSerialComms(st,lo,U,UUU,in,out,dag); } @@ -294,9 +292,7 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & DoubledGaugeField & U,DoubledGaugeField & UUU, const FermionField &in, FermionField &out,int dag) { -#ifdef GRID_OMP // assert((dag==DaggerNo) ||(dag==DaggerYes)); - Compressor compressor; int LLs = in.Grid()->_rdimensions[0]; @@ -305,99 +301,42 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & DhopFaceTime-=usecond(); st.Prepare(); st.HaloGather(in,compressor); + DhopFaceTime+=usecond(); + + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); + // st.HaloExchangeOptGather(in,compressor); // Wilson compressor + DhopFaceTime-=usecond(); st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms DhopFaceTime+=usecond(); - double ctime=0; - double ptime=0; - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Ugly explicit thread mapping introduced for OPA reasons. + // Remove explicit thread mapping introduced for OPA reasons. ////////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma omp parallel reduction(max:ctime) reduction(max:ptime) + DhopComputeTime-=usecond(); { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = 1; - assert(nthreads > ncomms); - if (tid >= ncomms) { - double start = usecond(); - nthreads -= ncomms; - int ttid = tid - ncomms; - int n = U.Grid()->oSites(); // 4d vol - int chunk = n / nthreads; - int rem = n % nthreads; - int myblock, myn; - if (ttid < rem) { - myblock = ttid * chunk + ttid; - myn = chunk+1; - } else { - myblock = ttid*chunk + rem; - myn = chunk; - } - - // do the compute - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); - - if (dag == DaggerYes) { - for (int ss = myblock; ss < myblock+myn; ++ss) { - int sU = ss; - // Interior = 1; Exterior = 0; must implement for staggered - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<--------- - } - } else { - for (int ss = myblock; ss < myblock+myn; ++ss) { - // Interior = 1; Exterior = 0; - int sU = ss; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------ - } - } - ptime = usecond() - start; - } else { - double start = usecond(); - st.CommunicateThreaded(); - ctime = usecond() - start; - } + int interior=1; + int exterior=0; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopCommTime += ctime; - DhopComputeTime+=ptime; - - // First to enter, last to leave timing - st.CollateThreads(); + DhopComputeTime+=usecond(); DhopFaceTime-=usecond(); st.CommsMerge(compressor); DhopFaceTime+=usecond(); - DhopComputeTime2-=usecond(); + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); - if (dag == DaggerYes) { - int sz=st.surface_list.size(); - thread_for( ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<---------- - }); - } else { - int sz=st.surface_list.size(); - thread_for( ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<---------- - }); + DhopComputeTime2-=usecond(); + { + int interior=0; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime2+=usecond(); -#else - assert(0); -#endif - } template @@ -408,8 +347,6 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, Compressor compressor; int LLs = in.Grid()->_rdimensions[0]; - - //double t1=usecond(); DhopTotalTime -= usecond(); DhopCommTime -= usecond(); @@ -418,28 +355,13 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, DhopComputeTime -= usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); - if (dag == DaggerYes) { - thread_for( ss,U.Grid()->oSites(),{ - int sU=ss; - Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v); - }); - } else { - thread_for( ss,U.Grid()->oSites(),{ - int sU=ss; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v); - }); + { + int interior=1; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime += usecond(); DhopTotalTime += usecond(); - //double t2=usecond(); - //std::cout << __FILE__ << " " << __func__ << " Total Time " << DhopTotalTime << std::endl; - //std::cout << __FILE__ << " " << __func__ << " Total Time Org " << t2-t1 << std::endl; - //std::cout << __FILE__ << " " << __func__ << " Comml Time " << DhopCommTime << std::endl; - //std::cout << __FILE__ << " " << __func__ << " Compute Time " << DhopComputeTime << std::endl; } /*CHANGE END*/ diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 1e59c4e7..64554100 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -395,11 +395,9 @@ void ImprovedStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder const FermionField &in, FermionField &out, int dag) { -#ifdef GRID_OMP if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag); else -#endif DhopInternalSerialComms(st,lo,U,UUU,in,out,dag); } template @@ -409,7 +407,6 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st const FermionField &in, FermionField &out, int dag) { -#ifdef GRID_OMP Compressor compressor; int len = U.Grid()->oSites(); @@ -418,60 +415,30 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st DhopFaceTime -= usecond(); st.Prepare(); st.HaloGather(in,compressor); - st.CommsMergeSHM(compressor); DhopFaceTime += usecond(); + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); + + DhopFaceTime-=usecond(); + st.CommsMergeSHM(compressor); + DhopFaceTime+= usecond(); + ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Ugly explicit thread mapping introduced for OPA reasons. + // Removed explicit thread comms ////////////////////////////////////////////////////////////////////////////////////////////////////// DhopComputeTime -= usecond(); -#pragma omp parallel { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = 1; - assert(nthreads > ncomms); - - if (tid >= ncomms) { - nthreads -= ncomms; - int ttid = tid - ncomms; - int n = len; - int chunk = n / nthreads; - int rem = n % nthreads; - int myblock, myn; - if (ttid < rem) { - myblock = ttid * chunk + ttid; - myn = chunk+1; - } else { - myblock = ttid*chunk + rem; - myn = chunk; - } - - // do the compute - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); - if (dag == DaggerYes) { - for (int ss = myblock; ss < myblock+myn; ++ss) { - int sU = ss; - // Interior = 1; Exterior = 0; must implement for staggered - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0); - } - } else { - for (int ss = myblock; ss < myblock+myn; ++ss) { - // Interior = 1; Exterior = 0; - int sU = ss; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0); - } - } - } else { - st.CommunicateThreaded(); - } + int interior=1; + int exterior=0; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime += usecond(); + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); + // First to enter, last to leave timing DhopFaceTime -= usecond(); st.CommsMerge(compressor); @@ -479,28 +446,11 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st DhopComputeTime2 -= usecond(); { - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); - if (dag == DaggerYes) { - int sz=st.surface_list.size(); - thread_for(ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1); - }); - } else { - int sz=st.surface_list.size(); - thread_for(ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1); - }); - } + int interior=0; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime2 += usecond(); -#else - assert(0); -#endif } @@ -520,19 +470,11 @@ void ImprovedStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Le st.HaloExchange(in, compressor); DhopCommTime += usecond(); - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); DhopComputeTime -= usecond(); - if (dag == DaggerYes) { - thread_for(sss, in.Grid()->oSites(),{ - Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); - }); - } else { - thread_for(sss, in.Grid()->oSites(),{ - Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); - }); + { + int interior=1; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime += usecond(); DhopTotalTime += usecond(); diff --git a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h new file mode 100644 index 00000000..ccd36f57 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h @@ -0,0 +1,499 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +#pragma once + +NAMESPACE_BEGIN(Grid); + +///////////////////////////////// +// Constructor and gauge import +///////////////////////////////// + +template +NaiveStaggeredFermion::NaiveStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, + RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p) + : Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements,p), + StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid), + _tmp(&Hgrid) +{ + int vol4; + int LLs=1; + c1=_c1; + u0=_u0; + vol4= _grid->oSites(); + Stencil.BuildSurfaceList(LLs,vol4); + vol4= _cbgrid->oSites(); + StencilEven.BuildSurfaceList(LLs,vol4); + StencilOdd.BuildSurfaceList(LLs,vol4); +} + +template +NaiveStaggeredFermion::NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p) + : NaiveStaggeredFermion(Fgrid,Hgrid,_mass,_c1,_u0,p) +{ + ImportGauge(_U); +} + +//////////////////////////////////////////////////////////// +// Momentum space propagator should be +// https://arxiv.org/pdf/hep-lat/9712010.pdf +// +// mom space action. +// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m +// +// must track through staggered flavour/spin reduction in literature to +// turn to free propagator for the one component chi field, a la page 4/5 +// of above link to implmement fourier based solver. +//////////////////////////////////////////////////////////// + +template +void NaiveStaggeredFermion::CopyGaugeCheckerboards(void) +{ + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd , Umu); +} +template +void NaiveStaggeredFermion::ImportGauge(const GaugeField &_U) +{ + GaugeLinkField U(GaugeGrid()); + DoubledGaugeField _UUU(GaugeGrid()); + //////////////////////////////////////////////////////// + // Double Store should take two fields for Naik and one hop separately. + // Discard teh Naik as Naive + //////////////////////////////////////////////////////// + Impl::DoubleStore(GaugeGrid(), _UUU, Umu, _U, _U ); + + //////////////////////////////////////////////////////// + // Apply scale factors to get the right fermion Kinetic term + // Could pass coeffs into the double store to save work. + // 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) ) + //////////////////////////////////////////////////////// + for (int mu = 0; mu < Nd; mu++) { + + U = PeekIndex(Umu, mu); + PokeIndex(Umu, U*( 0.5*c1/u0), mu ); + + U = PeekIndex(Umu, mu+4); + PokeIndex(Umu, U*(-0.5*c1/u0), mu+4); + + } + + CopyGaugeCheckerboards(); +} + +///////////////////////////// +// Implement the interface +///////////////////////////// + +template +RealD NaiveStaggeredFermion::M(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + Dhop(in, out, DaggerNo); + return axpy_norm(out, mass, in, out); +} + +template +RealD NaiveStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + Dhop(in, out, DaggerYes); + return axpy_norm(out, mass, in, out); +} + +template +void NaiveStaggeredFermion::Meooe(const FermionField &in, FermionField &out) { + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } +} +template +void NaiveStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) { + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } +} + +template +void NaiveStaggeredFermion::Mooee(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + typename FermionField::scalar_type scal(mass); + out = scal * in; +} + +template +void NaiveStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + Mooee(in, out); +} + +template +void NaiveStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + out = (1.0 / (mass)) * in; +} + +template +void NaiveStaggeredFermion::MooeeInvDag(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + MooeeInv(in, out); +} + +/////////////////////////////////// +// Internal +/////////////////////////////////// + +template +void NaiveStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, + GaugeField & mat, + const FermionField &A, const FermionField &B, int dag) +{ + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor; + + FermionField Btilde(B.Grid()); + FermionField Atilde(B.Grid()); + Atilde = A; + + st.HaloExchange(B, compressor); + + for (int mu = 0; mu < Nd; mu++) { + + //////////////////////// + // Call the single hop + //////////////////////// + auto U_v = U.View(CpuRead); + auto B_v = B.View(CpuWrite); + auto Btilde_v = Btilde.View(CpuWrite); + thread_for(sss,B.Grid()->oSites(),{ + Kernels::DhopDirKernel(st, U_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); + }); + + assert(0);// need to figure out the force interface with a blasted three link term. + + } +} + +template +void NaiveStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U.Grid(), _grid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); + + mat.Checkerboard() = U.Checkerboard(); + + DerivInternal(Stencil, Umu, mat, U, V, dag); +} + +template +void NaiveStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); + + assert(V.Checkerboard() == Even); + assert(U.Checkerboard() == Odd); + mat.Checkerboard() = Odd; + + DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); +} + +template +void NaiveStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); + + assert(V.Checkerboard() == Odd); + assert(U.Checkerboard() == Even); + mat.Checkerboard() = Even; + + DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); +} + +template +void NaiveStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) +{ + DhopCalls+=2; + conformable(in.Grid(), _grid); // verifies full grid + conformable(in.Grid(), out.Grid()); + + out.Checkerboard() = in.Checkerboard(); + + DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); +} + +template +void NaiveStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) +{ + DhopCalls+=1; + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check + + assert(in.Checkerboard() == Even); + out.Checkerboard() = Odd; + + DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); +} + +template +void NaiveStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) +{ + DhopCalls+=1; + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check + + assert(in.Checkerboard() == Odd); + out.Checkerboard() = Even; + + DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); +} + +template +void NaiveStaggeredFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) +{ + DhopDir(in, out, dir, disp); +} +template +void NaiveStaggeredFermion::MdirAll(const FermionField &in, std::vector &out) +{ + assert(0); // Not implemented yet +} + +template +void NaiveStaggeredFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) +{ + + Compressor compressor; + Stencil.HaloExchange(in, compressor); + auto Umu_v = Umu.View(CpuRead); + auto in_v = in.View(CpuRead); + auto out_v = out.View(CpuWrite); + // thread_for( sss, in.Grid()->oSites(),{ + // Kernels::DhopDirKernel(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); + // }); + assert(0); +}; + + +template +void NaiveStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) + DhopInternalOverlappedComms(st,lo,U,in,out,dag); + else + DhopInternalSerialComms(st,lo,U,in,out,dag); +} +template +void NaiveStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + Compressor compressor; + int len = U.Grid()->oSites(); + + DhopTotalTime -= usecond(); + + DhopFaceTime -= usecond(); + st.Prepare(); + st.HaloGather(in,compressor); + DhopFaceTime += usecond(); + + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); + + DhopFaceTime-=usecond(); + st.CommsMergeSHM(compressor); + DhopFaceTime+= usecond(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Removed explicit thread comms + ////////////////////////////////////////////////////////////////////////////////////////////////////// + DhopComputeTime -= usecond(); + { + int interior=1; + int exterior=0; + Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); + } + DhopComputeTime += usecond(); + + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); + + // First to enter, last to leave timing + DhopFaceTime -= usecond(); + st.CommsMerge(compressor); + DhopFaceTime -= usecond(); + + DhopComputeTime2 -= usecond(); + { + int interior=0; + int exterior=1; + Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); + } + DhopComputeTime2 += usecond(); +} + +template +void NaiveStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + assert((dag == DaggerNo) || (dag == DaggerYes)); + + DhopTotalTime -= usecond(); + + DhopCommTime -= usecond(); + Compressor compressor; + st.HaloExchange(in, compressor); + DhopCommTime += usecond(); + + DhopComputeTime -= usecond(); + { + int interior=1; + int exterior=1; + Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); + } + DhopComputeTime += usecond(); + DhopTotalTime += usecond(); +}; + + //////////////////////////////////////////////////////////////// + // Reporting + //////////////////////////////////////////////////////////////// +template +void NaiveStaggeredFermion::Report(void) +{ + Coordinate latt = _grid->GlobalDimensions(); + RealD volume = 1; for(int mu=0;mu_Nprocessors; + RealD NN = _grid->NodeCount(); + + std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; + + std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : " + << DhopCalls << std::endl; + std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : " + << DhopTotalTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : " + << DhopCommTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : " + << DhopComputeTime / DhopCalls << " us" << std::endl; + + // Average the compute time + _grid->GlobalSum(DhopComputeTime); + DhopComputeTime/=NP; + + RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; + + RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; + + std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" < +void NaiveStaggeredFermion::ZeroCounters(void) +{ + DhopCalls = 0; + DhopTotalTime = 0; + DhopCommTime = 0; + DhopComputeTime = 0; + DhopFaceTime = 0; + + Stencil.ZeroCounters(); + StencilEven.ZeroCounters(); + StencilOdd.ZeroCounters(); +} + + +//////////////////////////////////////////////////////// +// Conserved current - not yet implemented. +//////////////////////////////////////////////////////// +template +void NaiveStaggeredFermion::ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &src, + Current curr_type, + unsigned int mu) +{ + assert(0); +} + +template +void NaiveStaggeredFermion::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &src, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx) +{ + assert(0); + +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h index 1a13e73a..63fd2a2f 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h @@ -618,10 +618,10 @@ Author: paboyle NAMESPACE_BEGIN(Grid); template -void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { assert(0); @@ -680,12 +680,13 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, gauge2 =(uint64_t)&UU[sU]( Z ); \ gauge3 =(uint64_t)&UU[sU]( T ); + // This is the single precision 5th direction vectorised kernel #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 @@ -702,9 +703,10 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s void StaggeredKernels::DhopSiteAsm(StencilImpl } #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dag) { #ifdef AVX512 @@ -756,8 +758,9 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s void StaggeredKernels::DhopSiteAsm(StencilImpl // This is the single precision 5th direction vectorised kernel #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 @@ -841,9 +844,9 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s void StaggeredKernels::DhopSiteAsm(StencilImpl &st, } #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 @@ -910,9 +913,9 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s -void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteHand(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; @@ -181,8 +182,9 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, StencilEntry *SE; int skew; - for(int s=0;s::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG (U,Ym,2,skew,odd); HAND_STENCIL_LEG (U,Zm,1,skew,even); HAND_STENCIL_LEG (U,Tm,0,skew,odd); + if (Naik) { skew = 8; HAND_STENCIL_LEG(UUU,Xp,3,skew,even); HAND_STENCIL_LEG(UUU,Yp,2,skew,odd); @@ -202,7 +205,7 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG(UUU,Ym,2,skew,odd); HAND_STENCIL_LEG(UUU,Zm,1,skew,even); HAND_STENCIL_LEG(UUU,Tm,0,skew,odd); - + } if ( dag ) { result()()(0) = - even_0 - odd_0; result()()(1) = - even_1 - odd_1; @@ -218,9 +221,10 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, template -void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteHandInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; @@ -253,8 +257,9 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, StencilEntry *SE; int skew; - for(int s=0;s::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd); HAND_STENCIL_LEG_INT(U,Zm,1,skew,even); HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd); + if (Naik) { skew = 8; HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even); HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd); @@ -277,7 +283,7 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd); HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even); HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd); - + } // Assume every site must be connected to at least one interior point. No 1^4 subvols. if ( dag ) { result()()(0) = - even_0 - odd_0; @@ -294,9 +300,10 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, template -void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteHandExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; @@ -329,8 +336,9 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, StencilEntry *SE; int skew; - for(int s=0;s::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd); HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even); HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd); + if (Naik) { skew = 8; HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even); HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd); @@ -353,7 +362,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd); HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even); HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd); - + } // Add sum of all exterior connected stencil legs if ( nmu ) { if ( dag ) { @@ -370,6 +379,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, } } +/* #define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ @@ -385,7 +395,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, int sU, \ const FermionFieldView &in, FermionFieldView &out, int dag); \ - +*/ #undef LOAD_CHI NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h index d301556c..d7abef27 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h @@ -37,9 +37,9 @@ NAMESPACE_BEGIN(Grid); if (SE->_is_local ) { \ if (SE->_permute) { \ chi_p = χ \ - permute(chi, in[SE->_offset], ptype); \ + permute(chi, in[SE->_offset], ptype); \ } else { \ - chi_p = &in[SE->_offset]; \ + chi_p = &in[SE->_offset]; \ } \ } else { \ chi_p = &buf[SE->_offset]; \ @@ -51,15 +51,15 @@ NAMESPACE_BEGIN(Grid); if (SE->_is_local ) { \ if (SE->_permute) { \ chi_p = χ \ - permute(chi, in[SE->_offset], ptype); \ + permute(chi, in[SE->_offset], ptype); \ } else { \ - chi_p = &in[SE->_offset]; \ + chi_p = &in[SE->_offset]; \ } \ } else if ( st.same_node[Dir] ) { \ chi_p = &buf[SE->_offset]; \ } \ if (SE->_is_local || st.same_node[Dir] ) { \ - multLink(Uchi, U[sU], *chi_p, Dir); \ + multLink(Uchi, U[sU], *chi_p, Dir); \ } #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \ @@ -67,7 +67,7 @@ NAMESPACE_BEGIN(Grid); if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ nmu++; \ chi_p = &buf[SE->_offset]; \ - multLink(Uchi, U[sU], *chi_p, Dir); \ + multLink(Uchi, U[sU], *chi_p, Dir); \ } template @@ -78,10 +78,12 @@ StaggeredKernels::StaggeredKernels(const ImplParams &p) : Base(p){}; // Int, Ext, Int+Ext cases for comms overlap //////////////////////////////////////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteGeneric(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int dag) { + SiteSpinor *buf, int sF, int sU, + const FermionFieldView &in, FermionFieldView &out, int dag) +{ const SiteSpinor *chi_p; SiteSpinor chi; SiteSpinor Uchi; @@ -89,8 +91,10 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, int ptype; int skew; - for(int s=0;s::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd); + if ( Naik ) { skew=8; GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd); @@ -109,6 +114,7 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd); + } if ( dag ) { Uchi = - Uchi; } @@ -120,9 +126,10 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, // Only contributions from interior of our node /////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteGenericInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { const SiteSpinor *chi_p; SiteSpinor chi; @@ -131,8 +138,9 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & int ptype; int skew ; - for(int s=0;s::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd); + if ( Naik ) { skew=8; GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd); @@ -152,6 +161,7 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd); + } if ( dag ) { Uchi = - Uchi; } @@ -164,9 +174,10 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & // Only contributions from exterior of our node /////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteGenericExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { const SiteSpinor *chi_p; // SiteSpinor chi; @@ -176,8 +187,9 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & int nmu=0; int skew ; - for(int s=0;s::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd); + if ( Naik ) { skew=8; GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd); @@ -197,7 +210,7 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd); - + } if ( nmu ) { if ( dag ) { out[sF] = out[sF] - Uchi; @@ -211,72 +224,9 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & //////////////////////////////////////////////////////////////////////////////////// // Driving / wrapping routine to select right kernel //////////////////////////////////////////////////////////////////////////////////// - template -void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, - int interior,int exterior) -{ - int dag=1; - DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior); -}; - -template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, - int interior,int exterior) -{ - int dag=0; - DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior); -}; - -template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, - int sU, const FermionFieldView &in, FermionFieldView &out, - int dag,int interior,int exterior) -{ - switch(Opt) { -#ifdef AVX512 - case OptInlineAsm: - if ( interior && exterior ) { - DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag); - } else { - std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldVi assert(0); } +#define KERNEL_CALLNB(A,improved) \ + const uint64_t NN = Nsite*Ls; \ + accelerator_forNB( ss, NN, Simd::Nsimd(), { \ + int sF = ss; \ + int sU = ss/Ls; \ + ThisKernel:: template A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \ + }); + +#define KERNEL_CALL(A,improved) KERNEL_CALLNB(A,improved); accelerator_barrier(); + +#define ASM_CALL(A) \ + const uint64_t NN = Nsite*Ls; \ + thread_for( ss, NN, { \ + int sF = ss; \ + int sU = ss/Ls; \ + ThisKernel::A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \ + }); + +template +void StaggeredKernels::DhopImproved(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, DoubledGaugeField &UUU, + const FermionField &in, FermionField &out, int dag, int interior,int exterior) +{ + GridBase *FGrid=in.Grid(); + GridBase *UGrid=U.Grid(); + typedef StaggeredKernels ThisKernel; + auto UUU_v = UUU.View(AcceleratorRead); + auto U_v = U.View(AcceleratorRead); + auto in_v = in.View(AcceleratorRead); + auto out_v = out.View(AcceleratorWrite); + auto st_v = st.View(AcceleratorRead); + SiteSpinor * buf = st.CommBuf(); + + int Ls=1; + if(FGrid->Nd()==UGrid->Nd()+1){ + Ls = FGrid->_rdimensions[0]; + } + int Nsite = UGrid->oSites(); + + if( interior && exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;} + if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;} +#endif + } else if( interior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;} +#endif + } else if( exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;} +#endif + } + assert(0 && " Kernel optimisation case not covered "); +} +template +void StaggeredKernels::DhopNaive(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag, int interior,int exterior) +{ + GridBase *FGrid=in.Grid(); + GridBase *UGrid=U.Grid(); + typedef StaggeredKernels ThisKernel; + auto UUU_v= U.View(AcceleratorRead); + auto U_v = U.View(AcceleratorRead); + auto in_v = in.View(AcceleratorRead); + auto out_v = out.View(AcceleratorWrite); + auto st_v = st.View(AcceleratorRead); + SiteSpinor * buf = st.CommBuf(); + + int Ls=1; + if(FGrid->Nd()==UGrid->Nd()+1){ + Ls = FGrid->_rdimensions[0]; + } + int Nsite = UGrid->oSites(); + + if( interior && exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;} +#endif + } else if( interior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;} +#endif + } else if( exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;} +#endif + } +} + + +#undef KERNEL_CALLNB +#undef KERNEL_CALL +#undef ASM_CALL + NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 587bf42c..8f8c1063 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -496,5 +496,9 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField assert(0 && " Kernel optimisation case not covered "); } +#undef KERNEL_CALLNB +#undef KERNEL_CALL +#undef ASM_CALL + NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc new file mode 100644 index 00000000..c424cb2d --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc @@ -0,0 +1,36 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +const std::vector NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3}); +const std::vector NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1}); + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master new file mode 100644 index 00000000..75b75678 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master @@ -0,0 +1,37 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/NaiveStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class NaiveStaggeredFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc new file mode 120000 index 00000000..42057f56 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc @@ -0,0 +1 @@ +../NaiveStaggeredFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc new file mode 120000 index 00000000..42057f56 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc @@ -0,0 +1 @@ +../NaiveStaggeredFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index 330dcfa8..72a9eaf9 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -88,6 +88,7 @@ done CC_LIST=" \ ImprovedStaggeredFermion5DInstantiation \ ImprovedStaggeredFermionInstantiation \ + NaiveStaggeredFermionInstantiation \ StaggeredKernelsInstantiation " for impl in $STAG_IMPL_LIST diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc index 93086927..e2fcd8f2 100644 --- a/benchmarks/Benchmark_staggered.cc +++ b/benchmarks/Benchmark_staggered.cc @@ -87,26 +87,6 @@ int main (int argc, char ** argv) for(int mu=0;mu(Umu,mu); } - ref = Zero(); - /* - { // Naive wilson implementation - ref = Zero(); - for(int mu=0;mu +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename NaiveStaggeredFermionR::FermionField FermionField; + typedef typename NaiveStaggeredFermionR::ComplexField ComplexField; + typename NaiveStaggeredFermionR::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=Zero(); + FermionField ref(&Grid); ref=Zero(); + FermionField tmp(&Grid); tmp=Zero(); + FermionField err(&Grid); tmp=Zero(); + FermionField phi (&Grid); random(pRNG,phi); + FermionField chi (&Grid); random(pRNG,chi); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + std::vector U(4,&Grid); + + + double volume=1; + for(int mu=0;mu(Umu,mu); + /* Debug force unit + U[mu] = 1.0; + PokeIndex(Umu,U[mu],mu); + */ + } + + ref = Zero(); + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD u0=1.0; + + { // Simple improved staggered implementation + ref = Zero(); + RealD c1tad = 0.5*c1/u0; + + Lattice > coor(&Grid); + + Lattice > x(&Grid); LatticeCoordinate(x,0); + Lattice > y(&Grid); LatticeCoordinate(y,1); + Lattice > z(&Grid); LatticeCoordinate(z,2); + Lattice > t(&Grid); LatticeCoordinate(t,3); + + Lattice > lin_z(&Grid); lin_z=x+y; + Lattice > lin_t(&Grid); lin_t=x+y+z; + + for(int mu=0;mu * = < chi | Deo^dag| phi> "< HermOpEO(Ds); + HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); + HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + + HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); + HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + + pDce = innerProduct(phi_e,dchi_e); + pDco = innerProduct(phi_o,dchi_o); + cDpe = innerProduct(chi_e,dphi_e); + cDpo = innerProduct(chi_o,dphi_o); + + std::cout< Date: Thu, 28 May 2020 11:45:25 -0400 Subject: [PATCH 29/86] Accelerator inline --- Grid/qcd/action/fermion/StaggeredKernels.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/action/fermion/StaggeredKernels.h b/Grid/qcd/action/fermion/StaggeredKernels.h index 0203dca2..30deee06 100644 --- a/Grid/qcd/action/fermion/StaggeredKernels.h +++ b/Grid/qcd/action/fermion/StaggeredKernels.h @@ -63,17 +63,17 @@ template class StaggeredKernels : public FermionOperator , pub /////////////////////////////////////////////////////////////////////////////////////// // Generic Nc kernels /////////////////////////////////////////////////////////////////////////////////////// - template + template accelerator_inline void DhopSiteGeneric(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - template + template accelerator_inline void DhopSiteGenericInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - template + template accelerator_inline void DhopSiteGenericExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, @@ -82,17 +82,17 @@ template class StaggeredKernels : public FermionOperator , pub /////////////////////////////////////////////////////////////////////////////////////// // Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// - template + template accelerator_inline void DhopSiteHand(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - template + template accelerator_inline void DhopSiteHandInt(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - template + template accelerator_inline void DhopSiteHandExt(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, From 9fcb47ee63246dc180963ce840c8da525238d5b3 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Tue, 2 Jun 2020 07:44:38 -0400 Subject: [PATCH 30/86] Explicit error message instead of infinite loop in GlobalSharedMemory::GetShmDims --- Grid/communicator/SharedMemoryMPI.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index ed465252..0de48cfe 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -170,17 +170,24 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD std::vector primes({2,3,5}); int dim = 0; + int last_dim = ndimension - 1; int AutoShmSize = 1; while(AutoShmSize != WorldShmSize) { - for(int p=0;p Date: Wed, 3 Jun 2020 09:09:52 -0400 Subject: [PATCH 31/86] Reorg memory manager for O(1) hash table --- Grid/allocator/AlignedAllocator.h | 4 +- Grid/allocator/Allocator.h | 2 +- Grid/allocator/MemoryCacheShared.cc | 28 ------ .../{AllocationCache.cc => MemoryManager.cc} | 35 ++++--- .../{AllocationCache.h => MemoryManager.h} | 97 +++++++++++++++---- ...eMem.cc => MemoryManagerCacheDeviceMem.cc} | 48 ++++----- Grid/allocator/MemoryManagerShared.cc | 16 +++ 7 files changed, 137 insertions(+), 93 deletions(-) delete mode 100644 Grid/allocator/MemoryCacheShared.cc rename Grid/allocator/{AllocationCache.cc => MemoryManager.cc} (74%) rename Grid/allocator/{AllocationCache.h => MemoryManager.h} (59%) rename Grid/allocator/{MemoryCacheDeviceMem.cc => MemoryManagerCacheDeviceMem.cc} (86%) create mode 100644 Grid/allocator/MemoryManagerShared.cc diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index c3a32cd3..6c6dd7d8 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -55,7 +55,7 @@ public: profilerAllocate(bytes); - _Tp *ptr = (_Tp*) AllocationCache::CpuAllocate(bytes); + _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); @@ -68,7 +68,7 @@ public: profilerFree(bytes); - AllocationCache::CpuFree((void *)__p,bytes); + MemoryManager::CpuFree((void *)__p,bytes); } // FIXME: hack for the copy constructor, eventually it must be avoided diff --git a/Grid/allocator/Allocator.h b/Grid/allocator/Allocator.h index 9eaec8f6..589ea36f 100644 --- a/Grid/allocator/Allocator.h +++ b/Grid/allocator/Allocator.h @@ -1,4 +1,4 @@ #pragma once #include -#include +#include #include diff --git a/Grid/allocator/MemoryCacheShared.cc b/Grid/allocator/MemoryCacheShared.cc deleted file mode 100644 index d7592024..00000000 --- a/Grid/allocator/MemoryCacheShared.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include -#ifdef GRID_UVM - -#warning "Grid is assuming unified virtual memory address space" -NAMESPACE_BEGIN(Grid); -///////////////////////////////////////////////////////////////////////////////// -// View management is 1:1 address space mapping -///////////////////////////////////////////////////////////////////////////////// - -void AllocationCache::AcceleratorViewClose(void* AccPtr){}; -void *AllocationCache::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; } -void AllocationCache::CpuViewClose(void* Ptr){}; -void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; } -int AllocationCache::CpuViewLookup(void *CpuPtr){ return 0;} -///////////////////////////////////// -// Dummy stubs -///////////////////////////////////// -void AllocationCache::CpuDiscard(int e) { return;} -void AllocationCache::Discard(int e) { return;} -void AllocationCache::Evict(int e) { return; } -void AllocationCache::Flush(int e) { assert(0);} -void AllocationCache::Clone(int e) { assert(0);} -int AllocationCache::ViewVictim(void) { assert(0); return 0;} -void AllocationCache::ViewClose(void* AccPtr,ViewMode mode){}; -void *AllocationCache::ViewOpen (void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){return CpuPtr;}; - -NAMESPACE_END(Grid); -#endif diff --git a/Grid/allocator/AllocationCache.cc b/Grid/allocator/MemoryManager.cc similarity index 74% rename from Grid/allocator/AllocationCache.cc rename to Grid/allocator/MemoryManager.cc index dc32affd..599cdaab 100644 --- a/Grid/allocator/AllocationCache.cc +++ b/Grid/allocator/MemoryManager.cc @@ -11,14 +11,14 @@ NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////////////////////// // Data tables for recently freed pooiniter caches ////////////////////////////////////////////////////////////////////// -AllocationCache::AllocationCacheEntry AllocationCache::Entries[AllocationCache::NallocType][AllocationCache::NallocCacheMax]; -int AllocationCache::Victim[AllocationCache::NallocType]; -int AllocationCache::Ncache[AllocationCache::NallocType]; +MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; +int MemoryManager::Victim[MemoryManager::NallocType]; +int MemoryManager::Ncache[MemoryManager::NallocType]; ////////////////////////////////////////////////////////////////////// // Actual allocation and deallocation utils ////////////////////////////////////////////////////////////////////// -void *AllocationCache::AcceleratorAllocate(size_t bytes) +void *MemoryManager::AcceleratorAllocate(size_t bytes) { void *ptr = (void *) Lookup(bytes,Acc); @@ -29,13 +29,13 @@ void *AllocationCache::AcceleratorAllocate(size_t bytes) return ptr; } -void AllocationCache::AcceleratorFree (void *ptr,size_t bytes) +void MemoryManager::AcceleratorFree (void *ptr,size_t bytes) { void *__freeme = Insert(ptr,bytes,Acc); if ( __freeme ) acceleratorFreeDevice(__freeme); } -void *AllocationCache::CpuAllocate(size_t bytes) +void *MemoryManager::CpuAllocate(size_t bytes) { void *ptr = (void *) Lookup(bytes,Cpu); @@ -46,23 +46,19 @@ void *AllocationCache::CpuAllocate(size_t bytes) return ptr; } -void AllocationCache::CpuFree (void *ptr,size_t bytes) +void MemoryManager::CpuFree (void *_ptr,size_t bytes) { - // Look up in ViewCache - int e=CpuViewLookup(ptr); - if(e>=0){ Discard(e); } + NotifyDeletion(_ptr); // If present remove entry and free accelerator too. // Can we ever hit a free event with a view still in scope? - void *__freeme = Insert(ptr,bytes,Cpu); - // std::cout <<"CpuFree cached pointer "<0); #ifdef GRID_OMP @@ -139,7 +136,7 @@ void *AllocationCache::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entri return ret; } -void *AllocationCache::Lookup(size_t bytes,int type) +void *MemoryManager::Lookup(size_t bytes,int type) { #ifdef ALLOCATION_CACHE bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); @@ -149,7 +146,8 @@ void *AllocationCache::Lookup(size_t bytes,int type) return NULL; #endif } -void *AllocationCache::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) + +void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) { assert(ncache>0); #ifdef GRID_OMP @@ -164,5 +162,6 @@ void *AllocationCache::Lookup(size_t bytes,AllocationCacheEntry *entries,int nca return NULL; } + NAMESPACE_END(Grid); diff --git a/Grid/allocator/AllocationCache.h b/Grid/allocator/MemoryManager.h similarity index 59% rename from Grid/allocator/AllocationCache.h rename to Grid/allocator/MemoryManager.h index d09c2b0e..e3339ce0 100644 --- a/Grid/allocator/AllocationCache.h +++ b/Grid/allocator/MemoryManager.h @@ -2,7 +2,7 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/AllocationCache.h + Source file: ./lib/MemoryManager.h Copyright (C) 2015 @@ -27,6 +27,8 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #pragma once +#include +#include NAMESPACE_BEGIN(Grid); @@ -65,7 +67,7 @@ enum ViewMode { CpuWriteDiscard = 0x10 // same for now }; -class AllocationCache { +class MemoryManager { private: //////////////////////////////////////////////////////////// @@ -87,36 +89,89 @@ private: // Free pool ///////////////////////////////////////////////// static void *Insert(void *ptr,size_t bytes,int type) ; - static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; static void *Lookup(size_t bytes,int type) ; + static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; - ///////////////////////////////////////////////// - // Internal device view - ///////////////////////////////////////////////// static void *AcceleratorAllocate(size_t bytes); static void AcceleratorFree (void *ptr,size_t bytes); - static int ViewVictim(void); - static void CpuDiscard(int e); - static void Discard(int e); - static void Evict(int e); - static void Flush(int e); - static void Clone(int e); - static int CpuViewLookup(void *CpuPtr); - // static int AccViewLookup(void *AccPtr); - static void AcceleratorViewClose(void* AccPtr); - static void *AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); - static void CpuViewClose(void* Ptr); - static void *CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); -public: + public: static void Init(void); + static void *CpuAllocate(size_t bytes); + static void CpuFree (void *ptr,size_t bytes); + + private: + + //////////////////////////////////////////////////////// + // Footprint tracking + //////////////////////////////////////////////////////// + static uint64_t DeviceBytes; + static uint64_t DeviceLRUBytes; + static uint64_t DeviceMaxBytes; + static uint64_t HostToDeviceBytes; + static uint64_t DeviceToHostBytes; + +#ifndef GRID_UVM + ////////////////////////////////////////////////////////////////////// + // Data tables for ViewCache + ////////////////////////////////////////////////////////////////////// + typedef std::list LRU_t; + typedef typename LRU_t::iterator LRUiterator; + typedef struct { + int LRU_valid; + LRUiterator LRU_entry; + uint64_t CpuPtr; + uint64_t AccPtr; + size_t bytes; + uint32_t transient; + uint32_t state; + uint32_t accLock; + uint32_t cpuLock; + } AcceleratorViewEntry; + + typedef std::unordered_map AccViewTable_t; + typedef typename AccViewTable_t::iterator AccViewTableIterator ; + + static AccViewTable_t AccViewTable; + static LRU_t LRU; + static LRU_t LRU_transient; + + ///////////////////////////////////////////////// + // Device motion + ///////////////////////////////////////////////// + static void Create(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static void EvictVictims(uint64_t bytes); // Frees up + static void Evict(AcceleratorViewEntry &AccCache); + static void Flush(AcceleratorViewEntry &AccCache); + static void Clone(AcceleratorViewEntry &AccCache); + static void AccDiscard(AcceleratorViewEntry &AccCache); + static void CpuDiscard(AcceleratorViewEntry &AccCache); + + // static void LRUupdate(AcceleratorViewEntry &AccCache); + static void LRUinsert(AcceleratorViewEntry &AccCache); + static void LRUremove(AcceleratorViewEntry &AccCache); + + // manage entries in the table + static int EntryPresent(uint64_t CpuPtr); + static void EntryCreate(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static void EntryErase (uint64_t CpuPtr); + static AccViewTableIterator EntryLookup(uint64_t CpuPtr); + static void EntrySet (uint64_t CpuPtr,AcceleratorViewEntry &entry); + + static void AcceleratorViewClose(uint64_t AccPtr); + static uint64_t AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static void CpuViewClose(uint64_t Ptr); + static uint64_t CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); +#endif + static void NotifyDeletion(void * CpuPtr); + + public: + static void Print(void); static void ViewClose(void* AccPtr,ViewMode mode); static void *ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); - static void *CpuAllocate(size_t bytes); - static void CpuFree (void *ptr,size_t bytes); }; NAMESPACE_END(Grid); diff --git a/Grid/allocator/MemoryCacheDeviceMem.cc b/Grid/allocator/MemoryManagerCacheDeviceMem.cc similarity index 86% rename from Grid/allocator/MemoryCacheDeviceMem.cc rename to Grid/allocator/MemoryManagerCacheDeviceMem.cc index 6922a51a..f9804a9c 100644 --- a/Grid/allocator/MemoryCacheDeviceMem.cc +++ b/Grid/allocator/MemoryManagerCacheDeviceMem.cc @@ -1,3 +1,4 @@ +#if 0 #include #ifndef GRID_UVM @@ -40,7 +41,7 @@ static int NaccCache = 32; #define AccDirty (0x4) /*ACC copy is golden */ #define EvictNext (0x8) /*Priority for eviction*/ -int AllocationCache::ViewVictim(void) +int MemoryManager::ViewVictim(void) { int prioEmpty =-1; int prioCpuDirty =-1; @@ -55,7 +56,7 @@ int AllocationCache::ViewVictim(void) // round robin priority search of unlocked entries offset from current victim for(int ep=0;ep= 0 ) victim = prioEmpty; /*Highest prio is winner*/ assert(victim >= 0); // Must succeed/ - dprintf("AllocationCacheDeviceMem: Selected victim cache entry %d\n",victim); + dprintf("MemoryManagerDeviceMem: Selected victim cache entry %d\n",victim); // advance victim pointer AccCacheVictim=(AccCacheVictim+1)%NaccCache; - dprintf("AllocationCacheDeviceMem: victim pointer now %d / %d\n",AccCacheVictim,NaccCache); + dprintf("MemoryManagerDeviceMem: victim pointer now %d / %d\n",AccCacheVictim,NaccCache); return victim; } @@ -106,15 +107,15 @@ int AllocationCache::ViewVictim(void) // Accelerator cache motion ///////////////////////////////////////////////// -void AllocationCache::Discard(int e) // remove from Accelerator, remove entry, without flush +void MemoryManager::Discard(int e) // remove from Accelerator, remove entry, without flush { if(AccCache[e].state!=Empty){ - dprintf("AllocationCache: Discard(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + dprintf("MemoryManager: Discard(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); assert(AccCache[e].accLock==0); assert(AccCache[e].cpuLock==0); assert(AccCache[e].CpuPtr!=NULL); if(AccCache[e].AccPtr) { - dprintf("AllocationCache: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); + dprintf("MemoryManager: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); } } @@ -126,10 +127,10 @@ void AllocationCache::Discard(int e) // remove from Accelerator, remove entry, w AccCache[e].cpuLock=0; } -void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerator, remove entry +void MemoryManager::Evict(int e) // Make CPU consistent, remove from Accelerator, remove entry { if(AccCache[e].state!=Empty){ - dprintf("AllocationCache: Evict(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); + dprintf("MemoryManager: Evict(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); assert(AccCache[e].accLock==0); assert(AccCache[e].cpuLock==0); if(AccCache[e].state==AccDirty) { @@ -137,7 +138,7 @@ void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerat } assert(AccCache[e].CpuPtr!=NULL); if(AccCache[e].AccPtr) { - dprintf("AllocationCache: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); + dprintf("MemoryManager: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); } } @@ -148,9 +149,9 @@ void AllocationCache::Evict(int e) // Make CPU consistent, remove from Accelerat AccCache[e].accLock=0; AccCache[e].cpuLock=0; } -void AllocationCache::Flush(int e)// Copy back from a dirty device state and mark consistent. Do not remove +void MemoryManager::Flush(int e)// Copy back from a dirty device state and mark consistent. Do not remove { - // printf("AllocationCache: Flush(%d) %llx -> %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); + // printf("MemoryManager: Flush(%d) %llx -> %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); assert(AccCache[e].state==AccDirty); assert(AccCache[e].cpuLock==0); assert(AccCache[e].accLock==0); @@ -159,7 +160,7 @@ void AllocationCache::Flush(int e)// Copy back from a dirty device state and mar acceleratorCopyFromDevice(AccCache[e].AccPtr,AccCache[e].CpuPtr,AccCache[e].bytes); AccCache[e].state=Consistent; } -void AllocationCache::Clone(int e)// Copy from CPU, mark consistent. Allocate if necessary +void MemoryManager::Clone(int e)// Copy from CPU, mark consistent. Allocate if necessary { assert(AccCache[e].state==CpuDirty); assert(AccCache[e].cpuLock==0); @@ -168,12 +169,12 @@ void AllocationCache::Clone(int e)// Copy from CPU, mark consistent. Allocate if if(AccCache[e].AccPtr==NULL){ AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); } - // printf("AllocationCache: Clone(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); + // printf("MemoryManager: Clone(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); AccCache[e].state=Consistent; } -void AllocationCache::CpuDiscard(int e)// Mark accelerator dirty without copy. Allocate if necessary +void MemoryManager::CpuDiscard(int e)// Mark accelerator dirty without copy. Allocate if necessary { assert(AccCache[e].state!=Empty); assert(AccCache[e].cpuLock==0); @@ -182,7 +183,7 @@ void AllocationCache::CpuDiscard(int e)// Mark accelerator dirty without copy. A if(AccCache[e].AccPtr==NULL){ AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); } - // printf("AllocationCache: CpuDiscard(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); + // printf("MemoryManager: CpuDiscard(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); // acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); AccCache[e].state=AccDirty; } @@ -190,7 +191,7 @@ void AllocationCache::CpuDiscard(int e)// Mark accelerator dirty without copy. A ///////////////////////////////////////////////////////////////////////////////// // View management ///////////////////////////////////////////////////////////////////////////////// -void AllocationCache::ViewClose(void* Ptr,ViewMode mode) +void MemoryManager::ViewClose(void* Ptr,ViewMode mode) { if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ AcceleratorViewClose(Ptr); @@ -200,7 +201,7 @@ void AllocationCache::ViewClose(void* Ptr,ViewMode mode) assert(0); } } -void *AllocationCache::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) { if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ return AcceleratorViewOpen(CpuPtr,bytes,mode,hint); @@ -211,7 +212,7 @@ void *AllocationCache::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvi return nullptr; } } -void *AllocationCache::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +void *MemoryManager::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) { //////////////////////////////////////////////////////////////////////////// // Find if present, otherwise get or force an empty @@ -312,7 +313,7 @@ void *AllocationCache::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mo //////////////////////////////////// // look up & decrement lock count //////////////////////////////////// -void AllocationCache::AcceleratorViewClose(void* AccPtr) +void MemoryManager::AcceleratorViewClose(void* AccPtr) { int e=CpuViewLookup(AccPtr); // printf("AccView close %d lock %d \n",e,AccCache[e].accLock); @@ -326,7 +327,7 @@ void AllocationCache::AcceleratorViewClose(void* AccPtr) */ AccCache[e].accLock--; } -void AllocationCache::CpuViewClose(void* CpuPtr) +void MemoryManager::CpuViewClose(void* CpuPtr) { int e=CpuViewLookup(CpuPtr); assert(e!=-1); @@ -334,7 +335,7 @@ void AllocationCache::CpuViewClose(void* CpuPtr) assert(AccCache[e].accLock==0); AccCache[e].cpuLock--; } -void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient) +void *MemoryManager::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient) { //////////////////////////////////////////////////////////////////////////// // Find if present, otherwise get or force an empty @@ -390,7 +391,7 @@ void *AllocationCache::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewA ////////////////////////////////////////////////////////////////////////////// //loop round robin over entries checking acc pointer ////////////////////////////////////////////////////////////////////////////// -int AllocationCache::CpuViewLookup(void *CpuPtr) +int MemoryManager::CpuViewLookup(void *CpuPtr) { assert(CpuPtr!=NULL); for(int e=0;e +#ifdef GRID_UVM + +#warning "Grid is assuming unified virtual memory address space" +NAMESPACE_BEGIN(Grid); +///////////////////////////////////////////////////////////////////////////////// +// View management is 1:1 address space mapping +///////////////////////////////////////////////////////////////////////////////// + +void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){}; +void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; }; +void MemoryManager::Print(void){}; +void MemoryManager::NotifyDeletion(void *ptr){}; + +NAMESPACE_END(Grid); +#endif From 1c9f20b15eec600043326429b903245a0b140f95 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 09:10:29 -0400 Subject: [PATCH 32/86] Views must be closed --- Grid/algorithms/iterative/ConjugateGradient.h | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index d40fee7b..c8180a11 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -140,15 +140,17 @@ public: b = cp / c; LinearCombTimer.Start(); - auto psi_v = psi.View(AcceleratorWrite); - auto p_v = p.View(AcceleratorWrite); - auto r_v = r.View(AcceleratorWrite); - accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{ - coalescedWrite(psi_v[ss], a * p_v(ss) + psi_v(ss)); - coalescedWrite(p_v[ss] , b * p_v(ss) + r_v (ss)); - }); - LinearCombTimer.Stop(); - LinalgTimer.Stop(); + { + auto psi_v = psi.View(AcceleratorWrite); + auto p_v = p.View(AcceleratorWrite); + auto r_v = r.View(AcceleratorWrite); + accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{ + coalescedWrite(psi_v[ss], a * p_v(ss) + psi_v(ss)); + coalescedWrite(p_v[ss] , b * p_v(ss) + r_v (ss)); + }); + LinearCombTimer.Stop(); + LinalgTimer.Stop(); + } std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; From 8cfd5d2639746c534ede59f13f912fb81fb471c1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 09:11:28 -0400 Subject: [PATCH 33/86] Need lattice view --- Grid/lattice/Lattice_view.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h index b5887d50..b12dd2b7 100644 --- a/Grid/lattice/Lattice_view.h +++ b/Grid/lattice/Lattice_view.h @@ -77,7 +77,7 @@ public: this->cpu_ptr = (void *)this->_odata; this->mode = mode; this->_odata =(vobj *) - AllocationCache::ViewOpen(this->cpu_ptr, + MemoryManager::ViewOpen(this->cpu_ptr, this->_odata_size*sizeof(vobj), mode, AdviseDefault); @@ -85,7 +85,7 @@ public: void ViewClose(void) { // Inform the manager // std::cout << "View Close"<cpu_ptr<cpu_ptr,this->mode); + MemoryManager::ViewClose(this->cpu_ptr,this->mode); } }; @@ -101,7 +101,7 @@ class MemViewDeleter { void *cpu_ptr; ViewMode mode; ~MemViewDeleter(){ - AllocationCache::ViewClose(cpu_ptr,mode); + MemoryManager::ViewClose(cpu_ptr,mode); } }; template From 0c3112cd943384063fcb1fec44ddf3135b6351b1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 09:11:51 -0400 Subject: [PATCH 34/86] Use view mechanism --- Grid/stencil/Stencil.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 3bb80cfe..5602420b 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -200,7 +200,7 @@ class CartesianStencilView : public CartesianStencilAcceleratormode = mode; this->_entries_p =(StencilEntry *) - AllocationCache::ViewOpen(this->_entries_p, + MemoryManager::ViewOpen(this->_entries_p, this->_npoints*this->_osites*sizeof(StencilEntry), mode, AdviseDefault); From e93e12b6a4f62b9ed4c1e8c74a6468bea4aafe17 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 09:12:11 -0400 Subject: [PATCH 35/86] More verbose SYCL setup --- Grid/threads/Accelerator.cc | 44 ++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index ae02b437..fea812c7 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -153,24 +153,38 @@ void acceleratorInit(void) if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} - /* - for (int i = 0; i < nDevices; i++) { + auto devices = cl::sycl::device::get_devices(); + for(int d = 0;d().c_str()); + +#define GPU_PROP_FMT(prop,FMT) \ + printf("AcceleratorSyclInit: " #prop ": " FMT" \n",prop,devices[d].get_info()); + +#define GPU_PROP(prop) GPU_PROP_FMT(prop,"%d"); + + GPU_PROP_STR(vendor); + GPU_PROP_STR(version); + GPU_PROP_STR(device_type); + GPU_PROP_STR(max_compute_units); + GPU_PROP(native_vector_width_char); + GPU_PROP(native_vector_width_short); + GPU_PROP(native_vector_width_int); + GPU_PROP(native_vector_width_long); + GPU_PROP(native_vector_width_float); + GPU_PROP(native_vector_width_double); + GPU_PROP(native_vector_width_half); + GPU_PROP(address_bits); + GPU_PROP(half_fp_config); + GPU_PROP(single_fp_config); + GPU_PROP(double_fp_config); + GPU_PROP(global_mem_size); -#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorSyclInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); -#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); - - cudaGetDeviceProperties(&gpu_props[i], i); - if ( world_rank == 0) { - cudaDeviceProp prop; - prop = gpu_props[i]; - printf("AcceleratorSyclInit: ========================\n"); - printf("AcceleratorSyclInit: Device Number : %d\n", i); - printf("AcceleratorSyclInit: ========================\n"); - printf("AcceleratorSyclInit: Device identifier: %s\n", prop.name); - } } - */ if ( world_rank == 0 ) { + auto name = theGridAccelerator->get_device().get_info(); + printf("AcceleratorSyclInit: Selected device is %s\n",name.c_str()); printf("AcceleratorSyclInit: ================================================\n"); } } From fb559614ad232e6e38aa04dab52d2eec6af97451 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 09:12:47 -0400 Subject: [PATCH 36/86] Initialise meemory manager --- Grid/util/Init.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 97ac7dc9..79eccb73 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -286,7 +286,7 @@ void Grid_init(int *argc,char ***argv) ////////////////////////////////////////////////////////// acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver - AllocationCache::Init(); + MemoryManager::Init(); if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){ int MB; From e3147881a974ea91f96d06c74590b62670e2da0f Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 09:23:48 -0400 Subject: [PATCH 37/86] Cache scheme --- Grid/allocator/MemoryManagerCacheDevice.cc | 459 +++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 Grid/allocator/MemoryManagerCacheDevice.cc diff --git a/Grid/allocator/MemoryManagerCacheDevice.cc b/Grid/allocator/MemoryManagerCacheDevice.cc new file mode 100644 index 00000000..3a956f56 --- /dev/null +++ b/Grid/allocator/MemoryManagerCacheDevice.cc @@ -0,0 +1,459 @@ +#include + +#ifndef GRID_UVM + +#warning "Using explicit device memory copies" +NAMESPACE_BEGIN(Grid); +#define dprintf(...) + +//////////////////////////////////////////////////////////// +// For caching copies of data on device +//////////////////////////////////////////////////////////// +MemoryManager::AccViewTable_t MemoryManager::AccViewTable; +MemoryManager::LRU_t MemoryManager::LRU; +MemoryManager::LRU_t MemoryManager::LRU_transient; + +//////////////////////////////////////////////////////// +// Footprint tracking +//////////////////////////////////////////////////////// +uint64_t MemoryManager::DeviceBytes; +uint64_t MemoryManager::DeviceLRUBytes; +uint64_t MemoryManager::DeviceMaxBytes = 1024*1024*128; +uint64_t MemoryManager::HostToDeviceBytes; +uint64_t MemoryManager::DeviceToHostBytes; + +//////////////////////////////////// +// Priority ordering for unlocked entries +// Empty +// CpuDirty +// Consistent +// AccDirty +//////////////////////////////////// +#define Empty (0x0) /*Entry unoccupied */ +#define CpuDirty (0x1) /*CPU copy is golden, Acc buffer MAY not be allocated*/ +#define Consistent (0x2) /*ACC copy AND CPU copy are valid */ +#define AccDirty (0x4) /*ACC copy is golden */ +#define EvictNext (0x8) /*Priority for eviction*/ + +///////////////////////////////////////////////// +// Mechanics of data table maintenance +///////////////////////////////////////////////// +int MemoryManager::EntryPresent(uint64_t CpuPtr) +{ + if(AccViewTable.empty()) return 0; + + auto count = AccViewTable.count(CpuPtr); assert((count==0)||(count==1)); + return count; +} +void MemoryManager::EntryCreate(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + assert(!EntryPresent(CpuPtr)); + AcceleratorViewEntry AccCache; + AccCache.CpuPtr = CpuPtr; + AccCache.AccPtr = (uint64_t)NULL; + AccCache.bytes = bytes; + AccCache.state = CpuDirty; + AccCache.LRU_valid=0; + AccCache.transient=0; + AccCache.accLock=0; + AccCache.cpuLock=0; + AccViewTable[CpuPtr] = AccCache; +} +MemoryManager::AccViewTableIterator MemoryManager::EntryLookup(uint64_t CpuPtr) +{ + assert(EntryPresent(CpuPtr)); + auto AccCacheIterator = AccViewTable.find(CpuPtr); + assert(AccCacheIterator!=AccViewTable.end()); + return AccCacheIterator; +} +void MemoryManager::EntryErase(uint64_t CpuPtr) +{ + auto AccCache = EntryLookup(CpuPtr); + AccViewTable.erase(CpuPtr); +} +/* +void LRUupdate(AcceleratorViewEntry &AccCache) +{ + assert(0); // No such thing. Acc view removes + assert(AccCache.LRU_valid==1); + assert(AccCache.accLock==0); + assert(AccCache.cpuLock==0); + assert(AccCache.CpuPtr==(*AccCache.LRU_entry)); + LRU.erase(AccCache.LRU_entry); + LRU.push_front(AccCache.CpuPtr); + AccCache.LRU_entry = LRU.begin(); + AccCache.LRU_valid = 1; +} +*/ +void MemoryManager::LRUinsert(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.LRU_valid==0); + LRU.push_front(AccCache.CpuPtr); + AccCache.LRU_entry = LRU.begin(); + AccCache.LRU_valid = 1; + DeviceLRUBytes+=AccCache.bytes; +} +void MemoryManager::LRUremove(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.LRU_valid==1); + LRU.erase(AccCache.LRU_entry); + AccCache.LRU_valid = 0; + DeviceLRUBytes-=AccCache.bytes; +} +///////////////////////////////////////////////// +// Accelerator cache motion & consistency logic +///////////////////////////////////////////////// +void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache) +{ + /////////////////////////////////////////////////////////// + // Remove from Accelerator, remove entry, without flush + // Cannot be locked. If allocated Must be in LRU pool. + /////////////////////////////////////////////////////////// + assert(AccCache.state!=Empty); + + dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr); + assert(AccCache.accLock==0); + assert(AccCache.cpuLock==0); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr) { + AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes); + DeviceBytes -=AccCache.bytes; + LRUremove(AccCache); + dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes); + } + uint64_t CpuPtr = AccCache.CpuPtr; + EntryErase(CpuPtr); +} + +void MemoryManager::Evict(AcceleratorViewEntry &AccCache) +{ + /////////////////////////////////////////////////////////////////////////// + // Make CPU consistent, remove from Accelerator, remove entry + // Cannot be locked. If allocated must be in LRU pool. + /////////////////////////////////////////////////////////////////////////// + assert(AccCache.state!=Empty); + + dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr); + assert(AccCache.accLock==0); + assert(AccCache.cpuLock==0); + if(AccCache.state==AccDirty) { + Flush(AccCache); + } + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr) { + AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes); + DeviceBytes -=AccCache.bytes; + LRUremove(AccCache); + dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes); + } + uint64_t CpuPtr = AccCache.CpuPtr; + EntryErase(CpuPtr); +} +void MemoryManager::Flush(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.state==AccDirty); + assert(AccCache.cpuLock==0); + assert(AccCache.accLock==0); + assert(AccCache.AccPtr!=(uint64_t)NULL); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes); + dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); + DeviceToHostBytes+=AccCache.bytes; + AccCache.state=Consistent; +} +void MemoryManager::Clone(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.state==CpuDirty); + assert(AccCache.cpuLock==0); + assert(AccCache.accLock==0); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr==(uint64_t)NULL){ + AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes); + DeviceBytes+=AccCache.bytes; + } + dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); + acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes); + HostToDeviceBytes+=AccCache.bytes; + AccCache.state=Consistent; +} + +void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.state!=Empty); + assert(AccCache.cpuLock==0); + assert(AccCache.accLock==0); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr==(uint64_t)NULL){ + AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes); + DeviceBytes+=AccCache.bytes; + } + AccCache.state=AccDirty; +} + +///////////////////////////////////////////////////////////////////////////////// +// View management +///////////////////////////////////////////////////////////////////////////////// +void MemoryManager::ViewClose(void* Ptr,ViewMode mode) +{ + if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ + AcceleratorViewClose((uint64_t)Ptr); + } else if( (mode==CpuRead)||(mode==CpuWrite)){ + CpuViewClose((uint64_t)Ptr); + } else { + assert(0); + } +} +void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + uint64_t CpuPtr = (uint64_t)_CpuPtr; + if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ + return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint); + } else if( (mode==CpuRead)||(mode==CpuWrite)){ + return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint); + } else { + assert(0); + return NULL; + } +} +void MemoryManager::EvictVictims(uint64_t bytes) +{ + while(bytes+DeviceLRUBytes > DeviceMaxBytes){ + if ( DeviceLRUBytes > 0){ + assert(LRU.size()>0); + uint64_t victim = LRU.back(); + auto AccCacheIterator = EntryLookup(victim); + auto & AccCache = AccCacheIterator->second; + Evict(AccCache); + } + } +} +uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + //////////////////////////////////////////////////////////////////////////// + // Find if present, otherwise get or force an empty + //////////////////////////////////////////////////////////////////////////// + if ( EntryPresent(CpuPtr)==0 ){ + EvictVictims(bytes); + EntryCreate(CpuPtr,bytes,mode,hint); + } + + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)); + + assert(AccCache.cpuLock==0); // Programming error + + if(AccCache.state!=Empty) { + assert(AccCache.CpuPtr == CpuPtr); + assert(AccCache.bytes ==bytes); + } +/* + * State transitions and actions + * + * Action State StateNext Flush Clone + * + * AccRead Empty Consistent - Y + * AccWrite Empty AccDirty - Y + * AccRead CpuDirty Consistent - Y + * AccWrite CpuDirty AccDirty - Y + * AccRead Consistent Consistent - - + * AccWrite Consistent AccDirty - - + * AccRead AccDirty AccDirty - - + * AccWrite AccDirty AccDirty - - + */ + if(AccCache.state==Empty) { + assert(AccCache.LRU_valid==0); + AccCache.CpuPtr = CpuPtr; + AccCache.AccPtr = (uint64_t)NULL; + AccCache.bytes = bytes; + AccCache.state = CpuDirty; // Cpu starts primary + if(mode==AcceleratorWriteDiscard){ + CpuDiscard(AccCache); + AccCache.state = AccDirty; // Empty + AcceleratorWrite=> AccDirty + } else if(mode==AcceleratorWrite){ + Clone(AccCache); + AccCache.state = AccDirty; // Empty + AcceleratorWrite=> AccDirty + } else { + Clone(AccCache); + AccCache.state = Consistent; // Empty + AccRead => Consistent + } + AccCache.accLock= 1; + } else if(AccCache.state==CpuDirty ){ + if(mode==AcceleratorWriteDiscard) { + CpuDiscard(AccCache); + AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty + } else if(mode==AcceleratorWrite) { + Clone(AccCache); + AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty + } else { + Clone(AccCache); + AccCache.state = Consistent; // CpuDirty + AccRead => Consistent + } + AccCache.accLock++; + // printf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock); + } else if(AccCache.state==Consistent) { + if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) + AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty + else + AccCache.state = Consistent; // Consistent + AccRead => Consistent + AccCache.accLock++; + // printf("Consistent entry into device accLock %d\n",AccCache.accLock); + } else if(AccCache.state==AccDirty) { + if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) + AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty + else + AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty + AccCache.accLock++; + // printf("AccDirty entry into device accLock %d\n",AccCache.accLock); + } else { + assert(0); + } + + // If view is opened on device remove from LRU + if(AccCache.LRU_valid==1){ + // must possibly remove from LRU as now locked on GPU + LRUremove(AccCache); + } + + int transient =hint; + AccCache.transient= transient? EvictNext : 0; + + return AccCache.AccPtr; +} +//////////////////////////////////// +// look up & decrement lock count +//////////////////////////////////// +void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr) +{ + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert(AccCache.cpuLock==0); + assert(AccCache.accLock>0); + + AccCache.accLock--; + + // Move to LRU queue if not locked and close on device + if(AccCache.accLock==0) { + LRUinsert(AccCache); + } +} +void MemoryManager::CpuViewClose(uint64_t CpuPtr) +{ + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert(AccCache.cpuLock>0); + assert(AccCache.accLock==0); + + AccCache.cpuLock--; +} +/* + * Action State StateNext Flush Clone + * + * CpuRead Empty CpuDirty - - + * CpuWrite Empty CpuDirty - - + * CpuRead CpuDirty CpuDirty - - + * CpuWrite CpuDirty CpuDirty - - + * CpuRead Consistent Consistent - - + * CpuWrite Consistent CpuDirty - - + * CpuRead AccDirty Consistent Y - + * CpuWrite AccDirty CpuDirty Y - + */ +uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient) +{ + //////////////////////////////////////////////////////////////////////////// + // Find if present, otherwise get or force an empty + //////////////////////////////////////////////////////////////////////////// + if ( EntryPresent(CpuPtr)==0 ){ + EvictVictims(bytes); + EntryCreate(CpuPtr,bytes,mode,transient); + } + + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert((mode==CpuRead)||(mode==CpuWrite)); + assert(AccCache.accLock==0); // Programming error + + if(AccCache.state!=Empty) { + assert(AccCache.CpuPtr == CpuPtr); + assert(AccCache.bytes==bytes); + } + + if(AccCache.state==Empty) { + AccCache.CpuPtr = CpuPtr; + AccCache.AccPtr = (uint64_t)NULL; + AccCache.bytes = bytes; + AccCache.state = CpuDirty; // Empty + CpuRead/CpuWrite => CpuDirty + AccCache.accLock= 0; + AccCache.cpuLock= 1; + } else if(AccCache.state==CpuDirty ){ + // AccPtr dont care, deferred allocate + AccCache.state = CpuDirty; // CpuDirty +CpuRead/CpuWrite => CpuDirty + AccCache.cpuLock++; + } else if(AccCache.state==Consistent) { + assert(AccCache.AccPtr != (uint64_t)NULL); + if(mode==CpuWrite) + AccCache.state = CpuDirty; // Consistent +CpuWrite => CpuDirty + else + AccCache.state = Consistent; // Consistent +CpuRead => Consistent + AccCache.cpuLock++; + } else if(AccCache.state==AccDirty) { + assert(AccCache.AccPtr != (uint64_t)NULL); + Flush(AccCache); + if(mode==CpuWrite) AccCache.state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush + else AccCache.state = Consistent; // AccDirty +CpuRead => Consistent, Flush + AccCache.cpuLock++; + } else { + assert(0); // should be unreachable + } + + AccCache.transient= transient? EvictNext : 0; + + return AccCache.CpuPtr; +} +void MemoryManager::NotifyDeletion(void *_ptr) +{ + // Look up in ViewCache + uint64_t ptr = (uint64_t)_ptr; + if(EntryPresent(ptr)) { + auto e = EntryLookup(ptr); + AccDiscard(e->second); + } +} +void MemoryManager::Print(void) +{ + std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + std::cout << GridLogDebug << " Memory Manager " << std::endl; + std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl; + std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl; + std::cout << GridLogDebug << DeviceMaxBytes<< " bytes max on device " << std::endl; + std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl; + std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl; + std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << std::endl; + std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){ + auto &AccCache = it->second; + + std::string str; + if ( AccCache.state==Empty ) str = std::string("Empty"); + if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty"); + if ( AccCache.state==AccDirty ) str = std::string("AccDirty"); + if ( AccCache.state==Consistent)str = std::string("Consistent"); + + std::cout << GridLogDebug << " Cpu 0x"< Date: Wed, 3 Jun 2020 09:28:57 -0400 Subject: [PATCH 38/86] Better printing and logging --- Grid/allocator/MemoryManager.h | 2 + ...erCacheDevice.cc => MemoryManagerCache.cc} | 6 + Grid/allocator/MemoryManagerCacheDeviceMem.cc | 409 ------------------ 3 files changed, 8 insertions(+), 409 deletions(-) rename Grid/allocator/{MemoryManagerCacheDevice.cc => MemoryManagerCache.cc} (98%) delete mode 100644 Grid/allocator/MemoryManagerCacheDeviceMem.cc diff --git a/Grid/allocator/MemoryManager.h b/Grid/allocator/MemoryManager.h index e3339ce0..8c6a5af4 100644 --- a/Grid/allocator/MemoryManager.h +++ b/Grid/allocator/MemoryManager.h @@ -112,6 +112,8 @@ private: static uint64_t DeviceMaxBytes; static uint64_t HostToDeviceBytes; static uint64_t DeviceToHostBytes; + static uint64_t HostToDeviceXfer; + static uint64_t DeviceToHostXfer; #ifndef GRID_UVM ////////////////////////////////////////////////////////////////////// diff --git a/Grid/allocator/MemoryManagerCacheDevice.cc b/Grid/allocator/MemoryManagerCache.cc similarity index 98% rename from Grid/allocator/MemoryManagerCacheDevice.cc rename to Grid/allocator/MemoryManagerCache.cc index 3a956f56..d84f9a05 100644 --- a/Grid/allocator/MemoryManagerCacheDevice.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -21,6 +21,8 @@ uint64_t MemoryManager::DeviceLRUBytes; uint64_t MemoryManager::DeviceMaxBytes = 1024*1024*128; uint64_t MemoryManager::HostToDeviceBytes; uint64_t MemoryManager::DeviceToHostBytes; +uint64_t MemoryManager::HostToDeviceXfer; +uint64_t MemoryManager::DeviceToHostXfer; //////////////////////////////////// // Priority ordering for unlocked entries @@ -159,6 +161,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache) acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes); dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); DeviceToHostBytes+=AccCache.bytes; + DeviceToHostXfer++; AccCache.state=Consistent; } void MemoryManager::Clone(AcceleratorViewEntry &AccCache) @@ -174,6 +177,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache) dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes); HostToDeviceBytes+=AccCache.bytes; + HostToDeviceXfer++; AccCache.state=Consistent; } @@ -431,6 +435,8 @@ void MemoryManager::Print(void) std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl; std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl; std::cout << GridLogDebug << DeviceMaxBytes<< " bytes max on device " << std::endl; + std::cout << GridLogDebug << HostToDeviceXfer << " transfers to device " << std::endl; + std::cout << GridLogDebug << DeviceToHostXfer << " transfers from device " << std::endl; std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl; std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl; std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << std::endl; diff --git a/Grid/allocator/MemoryManagerCacheDeviceMem.cc b/Grid/allocator/MemoryManagerCacheDeviceMem.cc deleted file mode 100644 index f9804a9c..00000000 --- a/Grid/allocator/MemoryManagerCacheDeviceMem.cc +++ /dev/null @@ -1,409 +0,0 @@ -#if 0 -#include -#ifndef GRID_UVM - -#warning "Using explicit device memory copies" -NAMESPACE_BEGIN(Grid); -#define dprintf - -//////////////////////////////////////////////////////////// -// For caching copies of data on device -//////////////////////////////////////////////////////////// -const int NaccCacheMax=128; - -typedef struct { - void *CpuPtr; - void *AccPtr; - size_t bytes; - uint32_t transient; - uint32_t state; - uint32_t accLock; - uint32_t cpuLock; -} AcceleratorViewEntry; - -////////////////////////////////////////////////////////////////////// -// Data tables for ViewCache -////////////////////////////////////////////////////////////////////// -static AcceleratorViewEntry AccCache[NaccCacheMax]; -static int AccCacheVictim; // Base for round robin search -static int NaccCache = 32; - -//////////////////////////////////// -// Priority ordering for unlocked entries -// Empty -// CpuDirty -// Consistent -// AccDirty -//////////////////////////////////// -#define Empty (0x0) /*Entry unoccupied */ -#define CpuDirty (0x1) /*CPU copy is golden, Acc buffer MAY not be allocated*/ -#define Consistent (0x2) /*ACC copy AND CPU copy are valid */ -#define AccDirty (0x4) /*ACC copy is golden */ -#define EvictNext (0x8) /*Priority for eviction*/ - -int MemoryManager::ViewVictim(void) -{ - int prioEmpty =-1; - int prioCpuDirty =-1; - int prioConsistent =-1; - int prioAccDirty =-1; - int prioCpuDirtyEN =-1; - int prioConsistentEN =-1; - int prioAccDirtyEN =-1; - - int victim=-1; - - // round robin priority search of unlocked entries offset from current victim - for(int ep=0;ep= 0 ) victim = prioAccDirty; - if ( prioConsistent >= 0 ) victim = prioConsistent; - if ( prioCpuDirty >= 0 ) victim = prioCpuDirty; - if ( prioAccDirtyEN >= 0 ) victim = prioAccDirtyEN; - if ( prioConsistentEN >= 0 ) victim = prioConsistentEN; - if ( prioCpuDirtyEN >= 0 ) victim = prioCpuDirtyEN; - if ( prioEmpty >= 0 ) victim = prioEmpty; /*Highest prio is winner*/ - - assert(victim >= 0); // Must succeed/ - dprintf("MemoryManagerDeviceMem: Selected victim cache entry %d\n",victim); - - // advance victim pointer - AccCacheVictim=(AccCacheVictim+1)%NaccCache; - dprintf("MemoryManagerDeviceMem: victim pointer now %d / %d\n",AccCacheVictim,NaccCache); - - return victim; -} -///////////////////////////////////////////////// -// Accelerator cache motion -///////////////////////////////////////////////// - -void MemoryManager::Discard(int e) // remove from Accelerator, remove entry, without flush -{ - if(AccCache[e].state!=Empty){ - dprintf("MemoryManager: Discard(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); - assert(AccCache[e].accLock==0); - assert(AccCache[e].cpuLock==0); - assert(AccCache[e].CpuPtr!=NULL); - if(AccCache[e].AccPtr) { - dprintf("MemoryManager: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); - AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); - } - } - AccCache[e].AccPtr=NULL; - AccCache[e].CpuPtr=NULL; - AccCache[e].bytes=0; - AccCache[e].state=Empty; - AccCache[e].accLock=0; - AccCache[e].cpuLock=0; -} - -void MemoryManager::Evict(int e) // Make CPU consistent, remove from Accelerator, remove entry -{ - if(AccCache[e].state!=Empty){ - dprintf("MemoryManager: Evict(%d) %llx,%llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); - assert(AccCache[e].accLock==0); - assert(AccCache[e].cpuLock==0); - if(AccCache[e].state==AccDirty) { - Flush(e); - } - assert(AccCache[e].CpuPtr!=NULL); - if(AccCache[e].AccPtr) { - dprintf("MemoryManager: Free(%d) %llx\n",e,(uint64_t)AccCache[e].AccPtr); - AcceleratorFree(AccCache[e].AccPtr,AccCache[e].bytes); - } - } - AccCache[e].AccPtr=NULL; - AccCache[e].CpuPtr=NULL; - AccCache[e].bytes=0; - AccCache[e].state=Empty; - AccCache[e].accLock=0; - AccCache[e].cpuLock=0; -} -void MemoryManager::Flush(int e)// Copy back from a dirty device state and mark consistent. Do not remove -{ - // printf("MemoryManager: Flush(%d) %llx -> %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); - assert(AccCache[e].state==AccDirty); - assert(AccCache[e].cpuLock==0); - assert(AccCache[e].accLock==0); - assert(AccCache[e].AccPtr!=NULL); - assert(AccCache[e].CpuPtr!=NULL); - acceleratorCopyFromDevice(AccCache[e].AccPtr,AccCache[e].CpuPtr,AccCache[e].bytes); - AccCache[e].state=Consistent; -} -void MemoryManager::Clone(int e)// Copy from CPU, mark consistent. Allocate if necessary -{ - assert(AccCache[e].state==CpuDirty); - assert(AccCache[e].cpuLock==0); - assert(AccCache[e].accLock==0); - assert(AccCache[e].CpuPtr!=NULL); - if(AccCache[e].AccPtr==NULL){ - AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); - } - // printf("MemoryManager: Clone(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); - acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); - AccCache[e].state=Consistent; -} - -void MemoryManager::CpuDiscard(int e)// Mark accelerator dirty without copy. Allocate if necessary -{ - assert(AccCache[e].state!=Empty); - assert(AccCache[e].cpuLock==0); - assert(AccCache[e].accLock==0); - assert(AccCache[e].CpuPtr!=NULL); - if(AccCache[e].AccPtr==NULL){ - AccCache[e].AccPtr=AcceleratorAllocate(AccCache[e].bytes); - } - // printf("MemoryManager: CpuDiscard(%d) %llx <- %llx\n",e,(uint64_t)AccCache[e].AccPtr,(uint64_t)AccCache[e].CpuPtr); fflush(stdout); - // acceleratorCopyToDevice(AccCache[e].CpuPtr,AccCache[e].AccPtr,AccCache[e].bytes); - AccCache[e].state=AccDirty; -} - -///////////////////////////////////////////////////////////////////////////////// -// View management -///////////////////////////////////////////////////////////////////////////////// -void MemoryManager::ViewClose(void* Ptr,ViewMode mode) -{ - if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ - AcceleratorViewClose(Ptr); - } else if( (mode==CpuRead)||(mode==CpuWrite)){ - CpuViewClose(Ptr); - } else { - assert(0); - } -} -void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) -{ - if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ - return AcceleratorViewOpen(CpuPtr,bytes,mode,hint); - } else if( (mode==CpuRead)||(mode==CpuWrite)){ - return CpuViewOpen(CpuPtr,bytes,mode,hint); - } else { - assert(0); - return nullptr; - } -} -void *MemoryManager::AcceleratorViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) -{ - //////////////////////////////////////////////////////////////////////////// - // Find if present, otherwise get or force an empty - //////////////////////////////////////////////////////////////////////////// - int e=CpuViewLookup(CpuPtr); - if(e==-1) { - e = ViewVictim(); - dprintf("AcceleratorViewOpen Victim is %d\n",e); - Evict(e); // Does copy back if necessary, frees accelerator pointer if not null, sets to empty - } - - assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)); - assert(AccCache[e].cpuLock==0); // Programming error - - if(AccCache[e].state!=Empty) { - assert(AccCache[e].CpuPtr == CpuPtr); - assert(AccCache[e].bytes==bytes); - } -/* - * State transitions and actions - * - * Action State StateNext Flush Clone - * - * AccRead Empty Consistent - Y - * AccWrite Empty AccDirty - Y - * AccRead CpuDirty Consistent - Y - * AccWrite CpuDirty AccDirty - Y - * AccRead Consistent Consistent - - - * AccWrite Consistent AccDirty - - - * AccRead AccDirty AccDirty - - - * AccWrite AccDirty AccDirty - - - */ - if(AccCache[e].state==Empty) { - AccCache[e].CpuPtr = CpuPtr; - AccCache[e].AccPtr = NULL; - AccCache[e].bytes = bytes; - AccCache[e].state = CpuDirty; // Cpu starts primary - if(mode==AcceleratorWriteDiscard){ - CpuDiscard(e); - AccCache[e].state = AccDirty; // Empty + AcceleratorWrite=> AccDirty - } else if(mode==AcceleratorWrite){ - Clone(e); - AccCache[e].state = AccDirty; // Empty + AcceleratorWrite=> AccDirty - } else { - Clone(e); - AccCache[e].state = Consistent; // Empty + AccRead => Consistent - } - AccCache[e].accLock= 1; - // printf("Copied Empy entry %d into device accLock %d\n",e,AccCache[e].accLock); - } else if(AccCache[e].state==CpuDirty ){ - if(mode==AcceleratorWriteDiscard) { - CpuDiscard(e); - AccCache[e].state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty - } else if(mode==AcceleratorWrite) { - Clone(e); - AccCache[e].state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty - } else { - Clone(e); - AccCache[e].state = Consistent; // CpuDirty + AccRead => Consistent - } - AccCache[e].accLock++; - // printf("Copied CpuDirty entry %d into device accLock %d\n",e,AccCache[e].accLock); - } else if(AccCache[e].state==Consistent) { - if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) - AccCache[e].state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty - else - AccCache[e].state = Consistent; // Consistent + AccRead => Consistent - AccCache[e].accLock++; - // printf("Consistent entry %d into device accLock %d\n",e,AccCache[e].accLock); - } else if(AccCache[e].state==AccDirty) { - if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) - AccCache[e].state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty - else - AccCache[e].state = AccDirty; // AccDirty + AccRead => AccDirty - AccCache[e].accLock++; - // printf("AccDirty entry %d into device accLock %d\n",e,AccCache[e].accLock); - } else { - assert(0); - } - - int transient =hint; - AccCache[e].transient= transient? EvictNext : 0; - - return AccCache[e].AccPtr; -} -/* - * Action State StateNext Flush Clone - * - * CpuRead Empty CpuDirty - - - * CpuWrite Empty CpuDirty - - - * CpuRead CpuDirty CpuDirty - - - * CpuWrite CpuDirty CpuDirty - - - * CpuRead Consistent Consistent - - - * CpuWrite Consistent CpuDirty - - - * CpuRead AccDirty Consistent Y - - * CpuWrite AccDirty CpuDirty Y - - */ -//////////////////////////////////// -// look up & decrement lock count -//////////////////////////////////// -void MemoryManager::AcceleratorViewClose(void* AccPtr) -{ - int e=CpuViewLookup(AccPtr); - // printf("AccView close %d lock %d \n",e,AccCache[e].accLock); - if(e==-1) exit(0); - if(AccCache[e].cpuLock!=0) exit(0); - if(AccCache[e].accLock==0) exit(0); - /* - assert(e!=-1); - assert(AccCache[e].cpuLock==0); - assert(AccCache[e].accLock>0); - */ - AccCache[e].accLock--; -} -void MemoryManager::CpuViewClose(void* CpuPtr) -{ - int e=CpuViewLookup(CpuPtr); - assert(e!=-1); - assert(AccCache[e].cpuLock>0); - assert(AccCache[e].accLock==0); - AccCache[e].cpuLock--; -} -void *MemoryManager::CpuViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient) -{ - //////////////////////////////////////////////////////////////////////////// - // Find if present, otherwise get or force an empty - //////////////////////////////////////////////////////////////////////////// - int e=CpuViewLookup(CpuPtr); - if(e==-1) { - e = ViewVictim(); - dprintf("CpuViewOpen Victim is %d\n",e); - Evict(e); // Does copy back if necessary, frees accelerator pointer if not null, sets to empty - } - - assert((mode==CpuRead)||(mode==CpuWrite)); - assert(AccCache[e].accLock==0); // Programming error - - if(AccCache[e].state!=Empty) { - assert(AccCache[e].CpuPtr == CpuPtr); - assert(AccCache[e].bytes==bytes); - } - - if(AccCache[e].state==Empty) { - AccCache[e].CpuPtr = CpuPtr; - AccCache[e].AccPtr = NULL; - AccCache[e].bytes = bytes; - AccCache[e].state = CpuDirty; // Empty + CpuRead/CpuWrite => CpuDirty - AccCache[e].accLock= 0; - AccCache[e].cpuLock= 1; - } else if(AccCache[e].state==CpuDirty ){ - // AccPtr dont care, deferred allocate - AccCache[e].state = CpuDirty; // CpuDirty +CpuRead/CpuWrite => CpuDirty - AccCache[e].cpuLock++; - } else if(AccCache[e].state==Consistent) { - assert(AccCache[e].AccPtr != NULL); - if(mode==CpuWrite) - AccCache[e].state = CpuDirty; // Consistent +CpuWrite => CpuDirty - else - AccCache[e].state = Consistent; // Consistent +CpuRead => Consistent - AccCache[e].cpuLock++; - } else if(AccCache[e].state==AccDirty) { - assert(AccCache[e].AccPtr != NULL); - Flush(e); - if(mode==CpuWrite) AccCache[e].state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush - else AccCache[e].state = Consistent; // AccDirty +CpuRead => Consistent, Flush - AccCache[e].cpuLock++; - } else { - assert(0); // should be unreachable - } - - AccCache[e].transient= transient? EvictNext : 0; - - return AccCache[e].CpuPtr; -} - -////////////////////////////////////////////////////////////////////////////// -//loop round robin over entries checking acc pointer -////////////////////////////////////////////////////////////////////////////// -int MemoryManager::CpuViewLookup(void *CpuPtr) -{ - assert(CpuPtr!=NULL); - for(int e=0;e Date: Wed, 3 Jun 2020 22:50:09 -0400 Subject: [PATCH 39/86] Accelerator loop use --- Grid/lattice/Lattice_reduction.h | 36 +++----------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index f2b0dc43..a3a1192d 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -78,7 +78,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) template inline typename vobj::scalar_object sum(const Lattice &arg) { -#if defined(GRID_CUDA) +#if defined(GRID_CUDA)||defined(GRID_HIP) auto arg_v = arg.View(AcceleratorRead); Integer osites = arg.Grid()->oSites(); auto ssum= sum_gpu(&arg_v[0],osites); @@ -112,7 +112,6 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); -#if defined(GRID_CUDA) // Might make all code paths go this way. auto left_v = left.View(AcceleratorRead); auto right_v=right.View(AcceleratorRead); @@ -121,7 +120,6 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, nsimd,{ auto x_l = left_v(ss); @@ -131,22 +129,9 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ // This is in single precision and fails some tests // Need a sumD that sums in double +#if defined(GRID_CUDA)||defined(GRID_HIP) nrm = TensorRemove(sumD_gpu(inner_tmp_v,sites)); #else - // Might make all code paths go this way. - auto left_v = left.View(CpuRead); - auto right_v=right.View(CpuRead); - - // CPU - typedef decltype(innerProductD(left_v[0],right_v[0])) inner_t; - Vector inner_tmp(sites); - auto inner_tmp_v = &inner_tmp[0]; - - thread_for( ss, sites,{ - auto x_l = left_v[ss]; - auto y_l = right_v[ss]; - inner_tmp_v[ss]=innerProductD(x_l,y_l); - }) nrm = TensorRemove(sum_cpu(inner_tmp_v,sites)); #endif grid->GlobalSum(nrm); @@ -182,7 +167,6 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); -#if defined(GRID_CUDA)||defined(GRID_HIP) // GPU auto x_v=x.View(AcceleratorRead); auto y_v=y.View(AcceleratorRead); @@ -197,23 +181,9 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); coalescedWrite(z_v[ss],tmp); }); - +#if defined(GRID_CUDA)||defined(GRID_HIP) nrm = real(TensorRemove(sumD_gpu(inner_tmp_v,sites))); #else - auto x_v=x.View(AcceleratorRead); - auto y_v=y.View(AcceleratorRead); - auto z_v=z.View(AcceleratorWrite); - - // CPU - typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; - Vector inner_tmp(sites); - auto inner_tmp_v = &inner_tmp[0]; - - accelerator_for( ss, sites, nsimd,{ - auto tmp = a*x_v(ss)+b*y_v(ss); - inner_tmp_v[ss]=innerProductD(tmp,tmp); - z_v[ss]=tmp; - }); // Already promoted to double nrm = real(TensorRemove(sum(inner_tmp_v,sites))); #endif From 0d95805cded3b8769ebe555349a87543a38564cf Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2020 22:50:32 -0400 Subject: [PATCH 40/86] Print improvement --- Grid/allocator/MemoryManagerCache.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index d84f9a05..5820684c 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -430,7 +430,7 @@ void MemoryManager::NotifyDeletion(void *_ptr) void MemoryManager::Print(void) { std::cout << GridLogDebug << "--------------------------------------------" << std::endl; - std::cout << GridLogDebug << " Memory Manager " << std::endl; + std::cout << GridLogDebug << "Memory Manager " << std::endl; std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl; std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl; @@ -441,6 +441,8 @@ void MemoryManager::Print(void) std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl; std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << std::endl; std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<second; @@ -450,11 +452,11 @@ void MemoryManager::Print(void) if ( AccCache.state==AccDirty ) str = std::string("AccDirty"); if ( AccCache.state==Consistent)str = std::string("Consistent"); - std::cout << GridLogDebug << " Cpu 0x"< Date: Thu, 4 Jun 2020 14:58:03 -0400 Subject: [PATCH 41/86] Priintinig and device memory size detection --- Grid/allocator/MemoryManager.h | 4 +--- Grid/allocator/MemoryManagerCache.cc | 2 +- Grid/threads/Accelerator.cc | 5 +++++ Grid/util/Init.cc | 12 ++++++++++++ 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Grid/allocator/MemoryManager.h b/Grid/allocator/MemoryManager.h index 8c6a5af4..3ddd98b3 100644 --- a/Grid/allocator/MemoryManager.h +++ b/Grid/allocator/MemoryManager.h @@ -101,9 +101,6 @@ private: static void *CpuAllocate(size_t bytes); static void CpuFree (void *ptr,size_t bytes); - - private: - //////////////////////////////////////////////////////// // Footprint tracking //////////////////////////////////////////////////////// @@ -115,6 +112,7 @@ private: static uint64_t HostToDeviceXfer; static uint64_t DeviceToHostXfer; + private: #ifndef GRID_UVM ////////////////////////////////////////////////////////////////////// // Data tables for ViewCache diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index 5820684c..fa253c6c 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -439,7 +439,7 @@ void MemoryManager::Print(void) std::cout << GridLogDebug << DeviceToHostXfer << " transfers from device " << std::endl; std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl; std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl; - std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << std::endl; + std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl; std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "< Date: Thu, 4 Jun 2020 17:16:55 -0700 Subject: [PATCH 42/86] Decode of SYCL devices fix --- Grid/threads/Accelerator.cc | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 917c1c34..d049fd2f 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -36,7 +36,6 @@ void acceleratorInit(void) #define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); - cudaGetDeviceProperties(&gpu_props[i], i); if ( world_rank == 0) { cudaDeviceProp prop; @@ -57,6 +56,8 @@ void acceleratorInit(void) } } MemoryManager::DeviceMaxBytes = (8*totalDeviceMem)/10; // Assume 80% ours +#undef GPU_PROP_FMT +#undef GPU_PROP #ifdef GRID_IBM_SUMMIT // IBM Jsrun makes cuda Device numbering screwy and not match rank @@ -117,6 +118,8 @@ void acceleratorInit(void) // GPU_PROP(singleToDoublePrecisionPerfRatio); } } +#undef GPU_PROP_FMT +#undef GPU_PROP #ifdef GRID_IBM_SUMMIT // IBM Jsrun makes cuda Device numbering screwy and not match rank if ( world_rank == 0 ) printf("AcceleratorHipInit: IBM Summit or similar - NOT setting device to node rank\n"); @@ -162,17 +165,18 @@ void acceleratorInit(void) for(int d = 0;d().c_str()); + printf("AcceleratorSyclInit: " #prop ": %s \n",devices[d].get_info().c_str()); #define GPU_PROP_FMT(prop,FMT) \ - printf("AcceleratorSyclInit: " #prop ": " FMT" \n",prop,devices[d].get_info()); + printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info()); -#define GPU_PROP(prop) GPU_PROP_FMT(prop,"%d"); +#define GPU_PROP(prop) GPU_PROP_FMT(prop,"%ld"); GPU_PROP_STR(vendor); GPU_PROP_STR(version); - GPU_PROP_STR(device_type); - GPU_PROP_STR(max_compute_units); + // GPU_PROP_STR(device_type); + /* + GPU_PROP(max_compute_units); GPU_PROP(native_vector_width_char); GPU_PROP(native_vector_width_short); GPU_PROP(native_vector_width_int); @@ -183,7 +187,8 @@ void acceleratorInit(void) GPU_PROP(address_bits); GPU_PROP(half_fp_config); GPU_PROP(single_fp_config); - GPU_PROP(double_fp_config); + */ + // GPU_PROP(double_fp_config); GPU_PROP(global_mem_size); } From 1a4c8c3387cbadf34885ad0380542e67cbfc82e5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 5 Jun 2020 18:52:35 -0400 Subject: [PATCH 43/86] Global edit with change to View usage. autoView() creates a wrapper object that closes the view when scope closes. --- Grid/algorithms/Algorithms.h | 8 +- Grid/algorithms/CoarsenedMatrix.h | 53 ++++---- Grid/algorithms/FFT.h | 3 +- Grid/algorithms/iterative/BiCGSTAB.h | 56 ++++---- Grid/algorithms/iterative/ConjugateGradient.h | 12 +- .../iterative/ImplicitlyRestartedLanczos.h | 39 +++--- Grid/allocator/MemoryManager.h | 5 +- Grid/allocator/MemoryManagerCache.cc | 11 ++ Grid/allocator/MemoryManagerShared.cc | 1 + Grid/cshift/Cshift_common.h | 65 +++++----- Grid/lattice/Lattice_ET.h | 4 +- Grid/lattice/Lattice_arith.h | 68 +++++----- Grid/lattice/Lattice_base.h | 7 + Grid/lattice/Lattice_comparison.h | 14 +- Grid/lattice/Lattice_coordinate.h | 2 +- Grid/lattice/Lattice_local.h | 16 +-- Grid/lattice/Lattice_matrix_reduction.h | 14 +- Grid/lattice/Lattice_peekpoke.h | 24 ++-- Grid/lattice/Lattice_reality.h | 8 +- Grid/lattice/Lattice_reduction.h | 40 +++--- Grid/lattice/Lattice_rng.h | 2 +- Grid/lattice/Lattice_trace.h | 8 +- Grid/lattice/Lattice_transfer.h | 120 +++++------------- Grid/lattice/Lattice_transpose.h | 8 +- Grid/lattice/Lattice_unary.h | 16 +-- Grid/lattice/Lattice_view.h | 60 +++------ Grid/qcd/action/fermion/GparityWilsonImpl.h | 79 +++++++----- Grid/qcd/action/fermion/WilsonCloverFermion.h | 24 ++-- Grid/qcd/action/fermion/WilsonImpl.h | 29 +++-- .../implementation/CayleyFermion5Dcache.h | 20 +-- .../implementation/CayleyFermion5Dvec.h | 24 ++-- .../DomainWallEOFAFermionCache.h | 20 +-- ...ImprovedStaggeredFermion5DImplementation.h | 8 +- .../ImprovedStaggeredFermionImplementation.h | 16 +-- .../implementation/MobiusEOFAFermionCache.h | 40 +++--- .../NaiveStaggeredFermionImplementation.h | 12 +- .../StaggeredKernelsImplementation.h | 20 +-- .../WilsonFermionImplementation.h | 22 ++-- .../WilsonKernelsImplementation.h | 46 +++---- Grid/qcd/action/gauge/GaugeImplTypes.h | 10 +- .../action/scalar/ScalarInteractionAction.h | 8 +- Grid/qcd/modules/Registration.h | 2 +- Grid/qcd/utils/A2Autils.h | 105 ++++++++------- Grid/qcd/utils/BaryonUtils.h | 27 ++-- Grid/qcd/utils/LinalgUtils.h | 48 +++---- Grid/qcd/utils/SUn.h | 20 +-- Grid/stencil/Stencil.h | 46 +++---- benchmarks/Benchmark_ITT.cc | 18 +-- benchmarks/Benchmark_dwf.cc | 12 +- benchmarks/Benchmark_dwf_sweep.cc | 12 +- benchmarks/Benchmark_memory_asynch.cc | 2 +- benchmarks/Benchmark_memory_bandwidth.cc | 2 - benchmarks/Benchmark_meson_field.cc | 18 +-- benchmarks/Benchmark_su3_gpu.cc | 26 ++-- benchmarks/Benchmark_wilson.cc | 20 +-- tests/Test_dwf_mixedcg_prec.cc | 8 +- tests/Test_general_stencil.cc | 9 +- tests/Test_stencil.cc | 16 +-- tests/core/Test_staggered5D.cc | 4 +- tests/debug/Test_cayley_mres.cc | 8 +- tests/forces/Test_contfrac_force.cc | 6 +- tests/forces/Test_dwf_force.cc | 6 +- tests/forces/Test_dwf_force_eofa.cc | 6 +- tests/forces/Test_dwf_gpforce.cc | 6 +- tests/forces/Test_dwf_gpforce_eofa.cc | 6 +- tests/forces/Test_gp_plaq_force.cc | 6 +- tests/forces/Test_gp_rect_force.cc | 6 +- tests/forces/Test_gpdwf_force.cc | 6 +- tests/forces/Test_gpwilson_force.cc | 6 +- tests/forces/Test_mobius_force.cc | 6 +- tests/forces/Test_mobius_force_eofa.cc | 6 +- tests/forces/Test_mobius_gpforce_eofa.cc | 6 +- tests/forces/Test_partfrac_force.cc | 6 +- tests/forces/Test_rect_force.cc | 6 +- tests/forces/Test_wilson_force.cc | 6 +- tests/forces/Test_wilsonclover_force.cc | 6 +- tests/forces/Test_zmobius_force.cc | 6 +- tests/solver/Test_dwf_hdcr.cc | 4 +- 78 files changed, 773 insertions(+), 778 deletions(-) diff --git a/Grid/algorithms/Algorithms.h b/Grid/algorithms/Algorithms.h index 48ea194b..7f27784b 100644 --- a/Grid/algorithms/Algorithms.h +++ b/Grid/algorithms/Algorithms.h @@ -29,9 +29,11 @@ Author: Peter Boyle #ifndef GRID_ALGORITHMS_H #define GRID_ALGORITHMS_H +NAMESPACE_CHECK(algorithms); #include #include #include +NAMESPACE_CHECK(SparseMatrix); #include #include @@ -41,10 +43,12 @@ Author: Peter Boyle #include #include #include - +NAMESPACE_CHECK(approx); #include #include +NAMESPACE_CHECK(ConjGrad); #include +NAMESPACE_CHECK(BiCGSTAB); #include #include #include @@ -62,7 +66,9 @@ Author: Peter Boyle #include #include +NAMESPACE_CHECK(PowerMethod); #include +NAMESPACE_CHECK(CoarsendMatrix); #include #endif diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 4493d740..fb14ac32 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -186,10 +186,10 @@ public: hermop.HermOp(*Tn,y); - auto y_v = y.View(AcceleratorWrite); - auto Tn_v = Tn->View(AcceleratorWrite); - auto Tnp_v = Tnp->View(AcceleratorWrite); - auto Tnm_v = Tnm->View(AcceleratorWrite); + autoView( y_v , y, AcceleratorWrite); + autoView( Tn_v , (*Tn), AcceleratorWrite); + autoView( Tnp_v , (*Tnp), AcceleratorWrite); + autoView( Tnm_v , (*Tnm), AcceleratorWrite); const int Nsimd = CComplex::Nsimd(); accelerator_forNB(ss, FineGrid->oSites(), Nsimd, { coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); @@ -246,13 +246,14 @@ public: CartesianStencil Stencil; std::vector A; - + /////////////////////// // Interface /////////////////////// GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know - RealD M (const CoarseVector &in, CoarseVector &out){ + RealD M (const CoarseVector &in, CoarseVector &out) + { conformable(_grid,in.Grid()); conformable(in.Grid(),out.Grid()); @@ -263,12 +264,13 @@ public: double comms_usec = -usecond(); Stencil.HaloExchange(in,compressor); comms_usec += usecond(); - - auto in_v = in.View(AcceleratorRead); - auto out_v = out.View(AcceleratorWrite); + + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); typedef LatticeView Aview; - + Vector AcceleratorViewContainer; + for(int p=0;p &out) { @@ -542,10 +547,10 @@ public: blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi); - auto iZProj_v = iZProj.View(AcceleratorRead) ; - auto oZProj_v = oZProj.View(AcceleratorRead) ; - auto A_p = A[p].View(AcceleratorWrite); - auto A_self = A[self_stencil].View(AcceleratorWrite); + autoView( iZProj_v , iZProj, AcceleratorRead) ; + autoView( oZProj_v , oZProj, AcceleratorRead) ; + autoView( A_p , A[p], AcceleratorWrite); + autoView( A_self , A[self_stencil], AcceleratorWrite); accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); // if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });} @@ -563,11 +568,11 @@ public: mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); { - auto tmp_ = tmp.View(AcceleratorWrite); - auto evenmask_ = evenmask.View(AcceleratorRead); - auto oddmask_ = oddmask.View(AcceleratorRead); - auto Mphie_ = Mphie.View(AcceleratorRead); - auto Mphio_ = Mphio.View(AcceleratorRead); + autoView( tmp_ , tmp, AcceleratorWrite); + autoView( evenmask_ , evenmask, AcceleratorRead); + autoView( oddmask_ , oddmask, AcceleratorRead); + autoView( Mphie_ , Mphie, AcceleratorRead); + autoView( Mphio_ , Mphio, AcceleratorRead); accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); }); @@ -575,8 +580,8 @@ public: blockProject(SelfProj,tmp,Subspace.subspace); - auto SelfProj_ = SelfProj.View(AcceleratorRead); - auto A_self = A[self_stencil].View(AcceleratorWrite); + autoView( SelfProj_ , SelfProj, AcceleratorRead); + autoView( A_self , A[self_stencil], AcceleratorWrite); accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ for(int j=0;j #endif #endif - NAMESPACE_BEGIN(Grid); template struct FFTW { }; @@ -190,7 +189,7 @@ public: typedef typename sobj::scalar_type scalar; Lattice pgbuf(&pencil_g); - auto pgbuf_v = pgbuf.View(CpuWrite); + autoView(pgbuf_v , pgbuf, CpuWrite); typedef typename FFTW::FFTW_scalar FFTW_scalar; typedef typename FFTW::FFTW_plan FFTW_plan; diff --git a/Grid/algorithms/iterative/BiCGSTAB.h b/Grid/algorithms/iterative/BiCGSTAB.h index 04328a77..f4e5cdda 100644 --- a/Grid/algorithms/iterative/BiCGSTAB.h +++ b/Grid/algorithms/iterative/BiCGSTAB.h @@ -122,12 +122,14 @@ class BiCGSTAB : public OperatorFunction LinearCombTimer.Start(); bo = beta * omega; - auto p_v = p.View(AcceleratorWrite); - auto r_v = r.View(AcceleratorWrite); - auto v_v = v.View(AcceleratorWrite); - accelerator_for(ss, p_v.size(), Field::vector_object::Nsimd(),{ - coalescedWrite(p_v[ss], beta*p_v(ss) - bo*v_v(ss) + r_v(ss)); - }); + { + autoView( p_v , p, AcceleratorWrite); + autoView( r_v , r, AcceleratorRead); + autoView( v_v , v, AcceleratorRead); + accelerator_for(ss, p_v.size(), Field::vector_object::Nsimd(),{ + coalescedWrite(p_v[ss], beta*p_v(ss) - bo*v_v(ss) + r_v(ss)); + }); + } LinearCombTimer.Stop(); LinalgTimer.Stop(); @@ -142,16 +144,20 @@ class BiCGSTAB : public OperatorFunction alpha = rho / Calpha.real(); LinearCombTimer.Start(); - auto h_v = h.View(AcceleratorWrite); - auto psi_v = psi.View(AcceleratorWrite); - accelerator_for(ss, h_v.size(), Field::vector_object::Nsimd(),{ - coalescedWrite(h_v[ss], alpha*p_v(ss) + psi_v(ss)); - }); - - auto s_v = s.View(AcceleratorWrite); - accelerator_for(ss, s_v.size(), Field::vector_object::Nsimd(),{ - coalescedWrite(s_v[ss], -alpha*v_v(ss) + r_v(ss)); - }); + { + autoView( p_v , p, AcceleratorRead); + autoView( r_v , r, AcceleratorRead); + autoView( v_v , v, AcceleratorRead); + autoView( psi_v,psi, AcceleratorRead); + autoView( h_v , h, AcceleratorWrite); + autoView( s_v , s, AcceleratorWrite); + accelerator_for(ss, h_v.size(), Field::vector_object::Nsimd(),{ + coalescedWrite(h_v[ss], alpha*p_v(ss) + psi_v(ss)); + }); + accelerator_for(ss, s_v.size(), Field::vector_object::Nsimd(),{ + coalescedWrite(s_v[ss], -alpha*v_v(ss) + r_v(ss)); + }); + } LinearCombTimer.Stop(); LinalgTimer.Stop(); @@ -166,13 +172,19 @@ class BiCGSTAB : public OperatorFunction omega = Comega.real() / norm2(t); LinearCombTimer.Start(); - auto t_v = t.View(AcceleratorWrite); - accelerator_for(ss, psi_v.size(), Field::vector_object::Nsimd(),{ - coalescedWrite(psi_v[ss], h_v(ss) + omega * s_v(ss)); - coalescedWrite(r_v[ss], -omega * t_v(ss) + s_v(ss)); - }); + { + autoView( psi_v,psi, AcceleratorWrite); + autoView( r_v , r, AcceleratorWrite); + autoView( h_v , h, AcceleratorRead); + autoView( s_v , s, AcceleratorRead); + autoView( t_v , t, AcceleratorRead); + accelerator_for(ss, psi_v.size(), Field::vector_object::Nsimd(),{ + coalescedWrite(psi_v[ss], h_v(ss) + omega * s_v(ss)); + coalescedWrite(r_v[ss], -omega * t_v(ss) + s_v(ss)); + }); + } LinearCombTimer.Stop(); - + cp = norm2(r); LinalgTimer.Stop(); diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index c8180a11..14f3d306 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -141,16 +141,16 @@ public: LinearCombTimer.Start(); { - auto psi_v = psi.View(AcceleratorWrite); - auto p_v = p.View(AcceleratorWrite); - auto r_v = r.View(AcceleratorWrite); + autoView( psi_v , psi, AcceleratorWrite); + autoView( p_v , p, AcceleratorWrite); + autoView( r_v , r, AcceleratorWrite); accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{ coalescedWrite(psi_v[ss], a * p_v(ss) + psi_v(ss)); coalescedWrite(p_v[ss] , b * p_v(ss) + r_v (ss)); - }); - LinearCombTimer.Stop(); - LinalgTimer.Stop(); + }); } + LinearCombTimer.Stop(); + LinalgTimer.Stop(); std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h index 05ed8586..d2bec856 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -57,16 +57,17 @@ void basisOrthogonalize(std::vector &basis,Field &w,int k) template void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { - typedef decltype(basis[0].View(CpuWrite)) View; - auto tmp_v = basis[0].View(CpuWrite); - Vector basis_v(basis.size(),tmp_v); - View *basis_vp = &basis_v[0]; - typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); - for(int k=0;k basis_v; basis_v.reserve(basis.size()); + + for(int k=0;k > Bt(thread_max() * Nm); // Thread private thread_region @@ -142,6 +143,7 @@ void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, i coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j])); }); } + for(int k=0;k &basis,Eigen::MatrixXd& Qt,int j0, int j1, i template void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) { - typedef decltype(basis[0].View(AcceleratorWrite)) View; - typedef typename Field::vector_object vobj; GridBase* grid = basis[0].Grid(); + typedef typename Field::vector_object vobj; + typedef decltype(basis[0].View(AcceleratorWrite)) View; result.Checkerboard() = basis[0].Checkerboard(); - auto result_v=result.View(AcceleratorWrite); - Vector basis_v(basis.size(),result_v); + + autoView(result_v,result, AcceleratorWrite); + Vector basis_v; basis_v.reserve(basis.size()); View * basis_vp = &basis_v[0]; - for(int k=0;k Qt_jv(Nm); - double * Qt_j = & Qt_jv[0]; + + for(int k=0;k Qt_jv(Nm); double * Qt_j = & Qt_jv[0]; + for(int k=0;koSites(),vobj::Nsimd(),{ auto B=coalescedRead(basis_vp[k0][ss]); B=Zero(); @@ -171,6 +175,7 @@ void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,in } coalescedWrite(result_v[ss], B); }); + for(int k=0;k diff --git a/Grid/allocator/MemoryManager.h b/Grid/allocator/MemoryManager.h index 3ddd98b3..6e38d062 100644 --- a/Grid/allocator/MemoryManager.h +++ b/Grid/allocator/MemoryManager.h @@ -169,8 +169,9 @@ private: public: static void Print(void); - static void ViewClose(void* AccPtr,ViewMode mode); - static void *ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static int isOpen (void* CpuPtr); + static void ViewClose(void* CpuPtr,ViewMode mode); + static void *ViewOpen (void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); }; diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index fa253c6c..a3408095 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -461,6 +461,17 @@ void MemoryManager::Print(void) std::cout << GridLogDebug << "--------------------------------------------" << std::endl; }; +int MemoryManager::isOpen (void* _CpuPtr) +{ + uint64_t CpuPtr = (uint64_t)_CpuPtr; + if ( EntryPresent(CpuPtr) ){ + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + return AccCache.cpuLock+AccCache.accLock; + } else { + return 0; + } +} NAMESPACE_END(Grid); diff --git a/Grid/allocator/MemoryManagerShared.cc b/Grid/allocator/MemoryManagerShared.cc index 0008add4..e7e67753 100644 --- a/Grid/allocator/MemoryManagerShared.cc +++ b/Grid/allocator/MemoryManagerShared.cc @@ -9,6 +9,7 @@ NAMESPACE_BEGIN(Grid); void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){}; void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; }; +int MemoryManager::isOpen (void* CpuPtr) { return 0;} void MemoryManager::Print(void){}; void MemoryManager::NotifyDeletion(void *ptr){}; diff --git a/Grid/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h index 1c99e797..4de2bbe2 100644 --- a/Grid/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -72,12 +72,14 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen } } } - auto rhs_v = rhs.View(AcceleratorRead); - auto buffer_p = & buffer[0]; - auto table = &Cshift_table[0]; - accelerator_for(i,ent,1,{ - buffer_p[table[i].first]=rhs_v[table[i].second]; - }); + { + autoView(rhs_v , rhs, AcceleratorRead); + auto buffer_p = & buffer[0]; + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + buffer_p[table[i].first]=rhs_v[table[i].second]; + }); + } } /////////////////////////////////////////////////////////////////// @@ -100,8 +102,8 @@ Gather_plane_extract(const Lattice &rhs, int e2=rhs.Grid()->_slice_block[dimension]; int n1=rhs.Grid()->_slice_stride[dimension]; - auto rhs_v = rhs.View(AcceleratorRead); if ( cbmask ==0x3){ + autoView(rhs_v , rhs, AcceleratorRead); accelerator_for2d(n,e1,b,e2,1,{ int o = n*n1; int offset = b+n*e2; @@ -110,8 +112,8 @@ Gather_plane_extract(const Lattice &rhs, extract(temp,pointers,offset); }); } else { + autoView(rhs_v , rhs, AcceleratorRead); - Coordinate rdim=rhs.Grid()->_rdimensions; Coordinate cdm =rhs.Grid()->_checker_dim_mask; std::cout << " Dense packed buffer WARNING " < void Scatter_plane_simple (Lattice &rhs,commVector void Scatter_plane_merge(Lattice &rhs,ExtractPointerA int e2=rhs.Grid()->_slice_block[dimension]; if(cbmask ==0x3 ) { - auto rhs_v = rhs.View(AcceleratorWrite); + autoView( rhs_v , rhs, AcceleratorWrite); accelerator_for2d(n,e1,b,e2,1,{ int o = n*rhs.Grid()->_slice_stride[dimension]; int offset = b+n*rhs.Grid()->_slice_block[dimension]; @@ -216,7 +220,7 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA // Test_cshift_red_black code. // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<_slice_stride[dimension]; @@ -272,13 +276,14 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs } } - auto rhs_v = rhs.View(AcceleratorRead); - auto lhs_v = lhs.View(AcceleratorWrite); - auto table = &Cshift_table[0]; - accelerator_for(i,ent,1,{ - lhs_v[table[i].first]=rhs_v[table[i].second]; - }); - + { + autoView(rhs_v , rhs, AcceleratorRead); + autoView(lhs_v , lhs, AcceleratorWrite); + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + lhs_v[table[i].first]=rhs_v[table[i].second]; + }); + } } template void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) @@ -315,12 +320,14 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice accelerator_inline -const lobj & eval(const uint64_t ss, const LatticeExprView &arg) +const lobj & eval(const uint64_t ss, const LatticeView &arg) { return arg[ss]; } // What needs this? +// Cannot be legal on accelerator +// Comparison must convert #if 1 template accelerator_inline const lobj & eval(const uint64_t ss, const Lattice &arg) diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index b1252952..c204af5c 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -36,9 +36,9 @@ NAMESPACE_BEGIN(Grid); template inline void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); - auto rhs_v = rhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); conformable(ret,rhs); conformable(lhs,rhs); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ @@ -55,9 +55,9 @@ void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); - auto rhs_v = rhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -72,9 +72,9 @@ void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); - auto rhs_v = rhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -88,9 +88,9 @@ void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); - auto rhs_v = rhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -107,8 +107,8 @@ template inline void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; mult(&tmp,&lhs_v(ss),&rhs); @@ -120,8 +120,8 @@ template inline void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -134,8 +134,8 @@ template inline void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -147,8 +147,8 @@ template inline void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -164,8 +164,8 @@ template inline void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto rhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -178,8 +178,8 @@ template inline void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto rhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -192,8 +192,8 @@ template inline void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto rhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -205,8 +205,8 @@ template inline void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(AcceleratorWrite); - auto rhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -220,9 +220,9 @@ void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice & ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); - auto ret_v = ret.View(AcceleratorWrite); - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( x_v , x, AcceleratorRead); + autoView( y_v , y, AcceleratorRead); accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ auto tmp = a*x_v(ss)+y_v(ss); coalescedWrite(ret_v[ss],tmp); @@ -233,9 +233,9 @@ void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); - auto ret_v = ret.View(AcceleratorWrite); - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( x_v , x, AcceleratorRead); + autoView( y_v , y, AcceleratorRead); accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ auto tmp = a*x_v(ss)+b*y_v(ss); coalescedWrite(ret_v[ss],tmp); diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 9aae3333..65f71441 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -84,6 +84,7 @@ public: ///////////////////////////////////////////////////////////////////////////////// void SetViewMode(ViewMode mode) { LatticeView accessor(*( (LatticeAccelerator *) this),mode); + accessor.ViewClose(); } ///////////////////////////////////////////////////////////////////////////////// // Return a view object that may be dereferenced in site loops. @@ -123,6 +124,7 @@ public: auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + me.ViewClose(); ExpressionViewClose(exprCopy); return *this; } @@ -145,6 +147,7 @@ public: auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + me.ViewClose(); ExpressionViewClose(exprCopy); return *this; } @@ -166,6 +169,7 @@ public: auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + me.ViewClose(); ExpressionViewClose(exprCopy); return *this; } @@ -221,6 +225,7 @@ public: thread_for(ss,me.size(),{ me[ss]= r; }); + me.ViewClose(); return *this; } @@ -278,6 +283,7 @@ public: accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); + me.ViewClose(); him.ViewClose(); return *this; } @@ -292,6 +298,7 @@ public: accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); + me.ViewClose(); him.ViewClose(); return *this; } /////////////////////////////////////////// diff --git a/Grid/lattice/Lattice_comparison.h b/Grid/lattice/Lattice_comparison.h index 17a61750..6a29be94 100644 --- a/Grid/lattice/Lattice_comparison.h +++ b/Grid/lattice/Lattice_comparison.h @@ -78,9 +78,9 @@ template inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) { Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(CpuRead); - auto rhs_v = rhs.View(CpuRead); - auto ret_v = ret.View(CpuWrite); + autoView( lhs_v, lhs, CpuRead); + autoView( rhs_v, rhs, CpuRead); + autoView( ret_v, ret, CpuWrite); thread_for( ss, rhs_v.size(), { ret_v[ss]=op(lhs_v[ss],rhs_v[ss]); }); @@ -93,8 +93,8 @@ template inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) { Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(CpuRead); - auto ret_v = ret.View(CpuWrite); + autoView( lhs_v, lhs, CpuRead); + autoView( ret_v, ret, CpuWrite); thread_for( ss, lhs_v.size(), { ret_v[ss]=op(lhs_v[ss],rhs); }); @@ -107,8 +107,8 @@ template inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) { Lattice ret(rhs.Grid()); - auto rhs_v = rhs.View(CpuRead); - auto ret_v = ret.View(CpuWrite); + autoView( rhs_v, rhs, CpuRead); + autoView( ret_v, ret, CpuWrite); thread_for( ss, rhs_v.size(), { ret_v[ss]=op(lhs,rhs_v[ss]); }); diff --git a/Grid/lattice/Lattice_coordinate.h b/Grid/lattice/Lattice_coordinate.h index b8e73b25..cd0f11ee 100644 --- a/Grid/lattice/Lattice_coordinate.h +++ b/Grid/lattice/Lattice_coordinate.h @@ -37,7 +37,7 @@ template inline void LatticeCoordinate(Lattice &l,int mu) GridBase *grid = l.Grid(); int Nsimd = grid->iSites(); - auto l_v = l.View(CpuWrite); + autoView(l_v, l, CpuWrite); thread_for( o, grid->oSites(), { vector_type vI; Coordinate gcoor; diff --git a/Grid/lattice/Lattice_local.h b/Grid/lattice/Lattice_local.h index e497a748..1b31e9b3 100644 --- a/Grid/lattice/Lattice_local.h +++ b/Grid/lattice/Lattice_local.h @@ -43,8 +43,8 @@ template inline auto localNorm2 (const Lattice &rhs)-> Lattice { Lattice ret(rhs.Grid()); - auto rhs_v = rhs.View(AcceleratorRead); - auto ret_v = ret.View(AcceleratorWrite); + autoView( rhs_v , rhs, AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss))); }); @@ -56,9 +56,9 @@ template inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(AcceleratorRead); - auto rhs_v = rhs.View(AcceleratorRead); - auto ret_v = ret.View(AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss))); }); @@ -73,9 +73,9 @@ inline auto outerProduct (const Lattice &lhs,const Lattice &rhs) -> Latt typedef decltype(coalescedRead(ll())) sll; typedef decltype(coalescedRead(rr())) srr; Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(AcceleratorRead); - auto rhs_v = rhs.View(AcceleratorRead); - auto ret_v = ret.View(AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); accelerator_for(ss,rhs_v.size(),1,{ // FIXME had issues with scalar version of outer // Use vector [] operator and don't read coalesce this loop diff --git a/Grid/lattice/Lattice_matrix_reduction.h b/Grid/lattice/Lattice_matrix_reduction.h index 88de5210..7c470fef 100644 --- a/Grid/lattice/Lattice_matrix_reduction.h +++ b/Grid/lattice/Lattice_matrix_reduction.h @@ -51,9 +51,9 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v = X.View(CpuRead); - auto Y_v = Y.View(CpuRead); - auto R_v = R.View(CpuWrite); + autoView( X_v , X, CpuRead); + autoView( Y_v , Y, CpuRead); + autoView( R_v , R, CpuWrite); thread_region { std::vector s_x(Nblock); @@ -97,8 +97,8 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v = X.View(CpuRead); - auto R_v = R.View(CpuWrite); + autoView( X_v , X, CpuRead); + autoView( R_v , R, CpuWrite); thread_region { @@ -156,8 +156,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice int ostride=FullGrid->_ostride[Orthog]; typedef typename vobj::vector_typeD vector_typeD; - auto lhs_v = lhs.View(CpuRead); - auto rhs_v = rhs.View(CpuRead); + autoView( lhs_v , lhs, CpuRead); + autoView( rhs_v , rhs, CpuRead); thread_region { std::vector Left(Nblock); std::vector Right(Nblock); diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index af98c07b..2ec97b08 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -46,8 +46,8 @@ auto PeekIndex(const Lattice &lhs,int i) -> Lattice(vobj(),i))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - auto ret_v = ret.View(CpuWrite); - auto lhs_v = lhs.View(CpuRead); + autoView( ret_v, ret, CpuWrite); + autoView( lhs_v, lhs, CpuRead); thread_for( ss, lhs_v.size(), { ret_v[ss] = peekIndex(lhs_v[ss],i); }); @@ -58,8 +58,8 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(vobj(),i,j))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - auto ret_v = ret.View(CpuWrite); - auto lhs_v = lhs.View(CpuRead); + autoView( ret_v, ret, CpuWrite); + autoView( lhs_v, lhs, CpuRead); thread_for( ss, lhs_v.size(), { ret_v[ss] = peekIndex(lhs_v[ss],i,j); }); @@ -72,8 +72,8 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice void PokeIndex(Lattice &lhs,const Lattice(vobj(),0))> & rhs,int i) { - auto rhs_v = rhs.View(CpuRead); - auto lhs_v = lhs.View(CpuWrite); + autoView( rhs_v, rhs, CpuRead); + autoView( lhs_v, lhs, CpuWrite); thread_for( ss, lhs_v.size(), { pokeIndex(lhs_v[ss],rhs_v[ss],i); }); @@ -81,8 +81,8 @@ void PokeIndex(Lattice &lhs,const Lattice(vobj() template void PokeIndex(Lattice &lhs,const Lattice(vobj(),0,0))> & rhs,int i,int j) { - auto rhs_v = rhs.View(CpuRead); - auto lhs_v = lhs.View(CpuWrite); + autoView( rhs_v, rhs, CpuRead); + autoView( lhs_v, lhs, CpuWrite); thread_for( ss, lhs_v.size(), { pokeIndex(lhs_v[ss],rhs_v[ss],i,j); }); @@ -111,7 +111,7 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ // extract-modify-merge cycle is easiest way and this is not perf critical ExtractBuffer buf(Nsimd); - auto l_v = l.View(CpuWrite); + autoView( l_v , l, CpuWrite); if ( rank == grid->ThisRank() ) { extract(l_v[odx],buf); buf[idx] = s; @@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ grid->GlobalCoorToRankIndex(rank,odx,idx,site); ExtractBuffer buf(Nsimd); - auto l_v = l.View(CpuWrite); + autoView( l_v , l, CpuWrite); extract(l_v[odx],buf); s = buf[idx]; @@ -173,7 +173,7 @@ inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site){ idx= grid->iIndex(site); odx= grid->oIndex(site); - auto l_v = l.View(CpuRead); + autoView( l_v , l, CpuRead); scalar_type * vp = (scalar_type *)&l_v[odx]; scalar_type * pt = (scalar_type *)&s; @@ -202,7 +202,7 @@ inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site){ idx= grid->iIndex(site); odx= grid->oIndex(site); - auto l_v = l.View(CpuWrite); + autoView( l_v , l, CpuWrite); scalar_type * vp = (scalar_type *)&l_v[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w inline Lattice adj(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(AcceleratorRead); - auto ret_v = ret.View(AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], adj(lhs_v(ss))); }); @@ -50,8 +50,8 @@ template inline Lattice adj(const Lattice &lhs){ template inline Lattice conjugate(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(AcceleratorRead); - auto ret_v = ret.View(AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index a3a1192d..16742947 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -79,11 +79,11 @@ template inline typename vobj::scalar_object sum(const Lattice &arg) { #if defined(GRID_CUDA)||defined(GRID_HIP) - auto arg_v = arg.View(AcceleratorRead); + autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); auto ssum= sum_gpu(&arg_v[0],osites); #else - auto arg_v = arg.View(CpuRead); + autoView(arg_v, arg, CpuRead); Integer osites = arg.Grid()->oSites(); auto ssum= sum_cpu(&arg_v[0],osites); #endif @@ -113,8 +113,8 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ const uint64_t sites = grid->oSites(); // Might make all code paths go this way. - auto left_v = left.View(AcceleratorRead); - auto right_v=right.View(AcceleratorRead); + autoView( left_v , left, AcceleratorRead); + autoView( right_v,right, AcceleratorRead); // GPU - SIMT lane compliance... typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; @@ -168,9 +168,9 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt const uint64_t sites = grid->oSites(); // GPU - auto x_v=x.View(AcceleratorRead); - auto y_v=y.View(AcceleratorRead); - auto z_v=z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); @@ -257,7 +257,7 @@ template inline void sliceSum(const Lattice &Data,std::vector< // sum over reduced dimension planes, breaking out orthog dir // Parallel over orthog direction - auto Data_v=Data.View(CpuRead); + autoView( Data_v, Data, CpuRead); thread_for( r,rd, { int so=r*grid->_ostride[orthogdim]; // base offset for start of plane for(int n=0;n & result, const Latti int e2= grid->_slice_block [orthogdim]; int stride=grid->_slice_stride[orthogdim]; - auto lhv=lhs.View(CpuRead); - auto rhv=rhs.View(CpuRead); + autoView( lhv, lhs, CpuRead); + autoView( rhv, rhs, CpuRead); thread_for( r,rd,{ int so=r*grid->_ostride[orthogdim]; // base offset for start of plane @@ -443,9 +443,9 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice tensor_reduced at; at=av; - auto Rv=R.View(CpuWrite); - auto Xv=X.View(CpuRead); - auto Yv=Y.View(CpuRead); + autoView( Rv, R, CpuWrite); + autoView( Xv, X, CpuRead); + autoView( Yv, Y, CpuRead); thread_for2d( n, e1, b,e2, { int ss= so+n*stride+b; Rv[ss] = at*Xv[ss]+Yv[ss]; @@ -501,9 +501,9 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v=X.View(CpuRead); - auto Y_v=Y.View(CpuRead); - auto R_v=R.View(CpuWrite); + autoView( X_v, X, CpuRead); + autoView( Y_v, Y, CpuRead); + autoView( R_v, R, CpuWrite); thread_region { Vector s_x(Nblock); @@ -554,8 +554,8 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto R_v = R.View(CpuWrite); - auto X_v = X.View(CpuRead); + autoView( R_v, R, CpuWrite); + autoView( X_v, X, CpuRead); thread_region { std::vector s_x(Nblock); @@ -613,8 +613,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice typedef typename vobj::vector_typeD vector_typeD; - auto lhs_v=lhs.View(CpuRead); - auto rhs_v=rhs.View(CpuRead); + autoView( lhs_v, lhs, CpuRead); + autoView( rhs_v, rhs, CpuRead); thread_region { std::vector Left(Nblock); diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index e5da8d35..e5e63716 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -375,7 +375,7 @@ public: int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity int words = sizeof(scalar_object) / sizeof(scalar_type); - auto l_v = l.View(CpuWrite); + autoView(l_v, l, CpuWrite); thread_for( ss, osites, { ExtractBuffer buf(Nsimd); for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times diff --git a/Grid/lattice/Lattice_trace.h b/Grid/lattice/Lattice_trace.h index 8d1f85bd..b5d80ccc 100644 --- a/Grid/lattice/Lattice_trace.h +++ b/Grid/lattice/Lattice_trace.h @@ -41,8 +41,8 @@ template inline auto trace(const Lattice &lhs) -> Lattice { Lattice ret(lhs.Grid()); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView(ret_v , ret, AcceleratorWrite); + autoView(lhs_v , lhs, AcceleratorRead); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], trace(lhs_v(ss))); }); @@ -56,8 +56,8 @@ template inline auto TraceIndex(const Lattice &lhs) -> Lattice(vobj()))> { Lattice(vobj()))> ret(lhs.Grid()); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], traceIndex(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 9e98d111..7362060a 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -46,11 +46,12 @@ inline void subdivides(GridBase *coarse,GridBase *fine) //////////////////////////////////////////////////////////////////////////////////////////// // remove and insert a half checkerboard //////////////////////////////////////////////////////////////////////////////////////////// -template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ +template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full) +{ half.Checkerboard() = cb; - auto half_v = half.View(CpuWrite); - auto full_v = full.View(CpuRead); + autoView( half_v, half, CpuWrite); + autoView( full_v, full, CpuRead); thread_for(ss, full.Grid()->oSites(),{ int cbos; Coordinate coor; @@ -63,10 +64,11 @@ template inline void pickCheckerboard(int cb,Lattice &half,con } }); } -template inline void setCheckerboard(Lattice &full,const Lattice &half){ +template inline void setCheckerboard(Lattice &full,const Lattice &half) +{ int cb = half.Checkerboard(); - auto half_v = half.View(CpuRead); - auto full_v = full.View(CpuWrite); + autoView( half_v , half, CpuRead); + autoView( full_v , full, CpuWrite); thread_for(ss,full.Grid()->oSites(),{ Coordinate coor; @@ -92,79 +94,15 @@ inline void blockProject(Lattice > &coarseData, Lattice ip(coarse); - auto coarseData_ = coarseData.View(AcceleratorWrite); - auto ip_ = ip.View(AcceleratorWrite); + autoView( coarseData_ , coarseData, AcceleratorWrite); + autoView( ip_ , ip, AcceleratorWrite); for(int v=0;voSites(), vobj::Nsimd(), { coalescedWrite(coarseData_[sc](v),ip_(sc)); - }); + }); } } -#if 0 -template -inline void blockProject1(Lattice > &coarseData, - const Lattice &fineData, - const std::vector > &Basis) -{ - typedef iVector coarseSiteData; - coarseSiteData elide; - typedef decltype(coalescedRead(elide)) ScalarComplex; - GridBase * fine = fineData.Grid(); - GridBase * coarse= coarseData.Grid(); - int _ndimension = coarse->_ndimension; - - // checks - assert( nbasis == Basis.size() ); - subdivides(coarse,fine); - for(int i=0;i_rdimensions[d] / coarse->_rdimensions[d]; - assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); - } - int blockVol = fine->oSites()/coarse->oSites(); - - coarseData=Zero(); - - auto fineData_ = fineData.View(AcceleratorRead); - auto coarseData_ = coarseData.View(AcceleratorWrite); - //////////////////////////////////////////////////////////////////////////////////////////////////////// - // To make this lock free, loop over coars parallel, and then loop over fine associated with coarse. - // Otherwise do fine inner product per site, and make the update atomic - //////////////////////////////////////////////////////////////////////////////////////////////////////// - accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), { - - auto sc=sci/nbasis; - auto i=sci%nbasis; - auto Basis_ = Basis[i].View(AcceleratorRead); - - Coordinate coor_c(_ndimension); - Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate - - int sf; - decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero(); - - for(int sb=0;sb_rdimensions); - - reduce=reduce+innerProduct(Basis_(sf),fineData_(sf)); - } - coalescedWrite(coarseData_[sc](i),reduce); - }); - return; -} -#endif template inline void blockZAXPY(Lattice &fineZ, @@ -191,10 +129,10 @@ inline void blockZAXPY(Lattice &fineZ, assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); } - auto fineZ_ = fineZ.View(AcceleratorWrite); - auto fineX_ = fineX.View(AcceleratorRead); - auto fineY_ = fineY.View(AcceleratorRead); - auto coarseA_= coarseA.View(AcceleratorRead); + autoView( fineZ_ , fineZ, AcceleratorWrite); + autoView( fineX_ , fineX, AcceleratorRead); + autoView( fineY_ , fineY, AcceleratorRead); + autoView( coarseA_, coarseA, AcceleratorRead); accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), { @@ -229,8 +167,8 @@ inline void blockInnerProduct(Lattice &CoarseInner, // Precision promotion? fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); - auto CoarseInner_ = CoarseInner.View(AcceleratorWrite); - auto coarse_inner_ = coarse_inner.View(AcceleratorRead); + autoView( CoarseInner_ , CoarseInner, AcceleratorWrite); + autoView( coarse_inner_ , coarse_inner, AcceleratorRead); accelerator_for(ss, coarse->oSites(), 1, { CoarseInner_[ss] = coarse_inner_[ss]; }); @@ -265,8 +203,8 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) // Turn this around to loop threaded over sc and interior loop // over sf would thread better - auto coarseData_ = coarseData.View(AcceleratorWrite); - auto fineData_ = fineData.View(AcceleratorRead); + autoView( coarseData_ , coarseData, AcceleratorWrite); + autoView( fineData_ , fineData, AcceleratorRead); accelerator_for(sc,coarse->oSites(),1,{ @@ -359,8 +297,8 @@ inline void blockPromote(const Lattice > &coarseData, for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } - auto fineData_ = fineData.View(AcceleratorWrite); - auto coarseData_ = coarseData.View(AcceleratorRead); + autoView( fineData_ , fineData, AcceleratorWrite); + autoView( coarseData_ , coarseData, AcceleratorRead); // Loop with a cache friendly loop ordering accelerator_for(sf,fine->oSites(),1,{ @@ -373,7 +311,7 @@ inline void blockPromote(const Lattice > &coarseData, Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); for(int i=0;i > &coarseData, for(int i=0;i > ip = PeekIndex<0>(coarseData,i); Lattice cip(coarse); - auto cip_ = cip.View(AcceleratorWrite); - auto ip_ = ip.View(AcceleratorRead); + autoView( cip_ , cip, AcceleratorWrite); + autoView( ip_ , ip, AcceleratorRead); accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{ coalescedWrite(cip_[sc], ip_(sc)()); }); @@ -469,8 +407,8 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro Coordinate rdt = Tg->_rdimensions; Coordinate ist = Tg->_istride; Coordinate ost = Tg->_ostride; - auto t_v = To.View(AcceleratorWrite); - auto f_v = From.View(AcceleratorRead); + autoView( t_v , To, AcceleratorWrite); + autoView( f_v , From, AcceleratorRead); accelerator_for(idx,Fg->lSites(),1,{ sobj s; Coordinate Fcoor(nd); @@ -717,7 +655,7 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) } //loop over outer index - auto in_v = in.View(CpuRead); + autoView( in_v , in, CpuRead); thread_for(in_oidx,in_grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray out_ptrs(in_nsimd); @@ -810,7 +748,7 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) icoor[lane].resize(ndim); grid->iCoorFromIindex(icoor[lane],lane); } - auto out_v = out.View(CpuWrite); + autoView( out_v , out, CpuWrite); thread_for(oidx, grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray ptrs(nsimd); @@ -913,7 +851,7 @@ void precisionChange(Lattice &out, const Lattice &in) std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - auto out_v = out.View(CpuWrite); + autoView( out_v , out, CpuWrite); thread_for(out_oidx,out_grid->oSites(),{ Coordinate out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); diff --git a/Grid/lattice/Lattice_transpose.h b/Grid/lattice/Lattice_transpose.h index c17a808b..b11175dd 100644 --- a/Grid/lattice/Lattice_transpose.h +++ b/Grid/lattice/Lattice_transpose.h @@ -41,8 +41,8 @@ NAMESPACE_BEGIN(Grid); template inline Lattice transpose(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss], transpose(lhs_v(ss))); }); @@ -56,8 +56,8 @@ template inline auto TransposeIndex(const Lattice &lhs) -> Lattice(vobj()))> { Lattice(vobj()))> ret(lhs.Grid()); - auto ret_v = ret.View(AcceleratorWrite); - auto lhs_v = lhs.View(AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss] , transposeIndex(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_unary.h b/Grid/lattice/Lattice_unary.h index 10aa7472..07424b3d 100644 --- a/Grid/lattice/Lattice_unary.h +++ b/Grid/lattice/Lattice_unary.h @@ -35,8 +35,8 @@ NAMESPACE_BEGIN(Grid); template Lattice pow(const Lattice &rhs_i,RealD y){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(AcceleratorRead); - auto ret = ret_i.View(AcceleratorWrite); + autoView( rhs, rhs_i, AcceleratorRead); + autoView( ret, ret_i, AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),1,{ ret[ss]=pow(rhs[ss],y); @@ -45,8 +45,8 @@ template Lattice pow(const Lattice &rhs_i,RealD y){ } template Lattice mod(const Lattice &rhs_i,Integer y){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(AcceleratorRead); - auto ret = ret_i.View(AcceleratorWrite); + autoView( rhs , rhs_i, AcceleratorRead); + autoView( ret , ret_i, AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],mod(rhs(ss),y)); @@ -56,8 +56,8 @@ template Lattice mod(const Lattice &rhs_i,Integer y){ template Lattice div(const Lattice &rhs_i,Integer y){ Lattice ret_i(rhs_i.Grid()); - auto ret = ret_i.View(AcceleratorWrite); - auto rhs = rhs_i.View(AcceleratorRead); + autoView( ret , ret_i, AcceleratorWrite); + autoView( rhs , rhs_i, AcceleratorRead); ret.Checkerboard() = rhs_i.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],div(rhs(ss),y)); @@ -67,8 +67,8 @@ template Lattice div(const Lattice &rhs_i,Integer y){ template Lattice expMat(const Lattice &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(AcceleratorRead); - auto ret = ret_i.View(AcceleratorWrite); + autoView( rhs , rhs_i, AcceleratorRead); + autoView( ret , ret_i, AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],Exponentiate(rhs(ss),alpha, Nexp)); diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h index b12dd2b7..d21ab874 100644 --- a/Grid/lattice/Lattice_view.h +++ b/Grid/lattice/Lattice_view.h @@ -25,6 +25,7 @@ void accelerator_inline conformable(GridBase *lhs,GridBase *rhs) template class LatticeAccelerator : public LatticeBase { protected: + //public: GridBase *_grid; int checkerboard; vobj *_odata; // A managed pointer @@ -47,7 +48,7 @@ public: // The copy constructor for this will need to be used by device lambda functions ///////////////////////////////////////////////////////////////////////////////////////// template -class LatticeExprView : public LatticeAccelerator +class LatticeView : public LatticeAccelerator { public: // Rvalue @@ -68,7 +69,12 @@ public: accelerator_inline uint64_t end(void) const { return this->_odata_size; }; accelerator_inline uint64_t size(void) const { return this->_odata_size; }; - LatticeExprView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me){} + LatticeView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me){} + LatticeView(const LatticeView &refer_to_me) = default; // Trivially copyable + LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : LatticeAccelerator (refer_to_me) + { + this->ViewOpen(mode); + } // Host functions void ViewOpen(ViewMode mode) @@ -89,46 +95,20 @@ public: } }; - - -/////////////////////////////////////////////////////////////////////// -// An object to be stored in a shared_ptr to clean up after last view. -// UserView constructor,destructor updates view manager -// Non-copyable object??? Second base with copy/= deleted? -/////////////////////////////////////////////////////////////////////// -class MemViewDeleter { - public: - void *cpu_ptr; - ViewMode mode; - ~MemViewDeleter(){ - MemoryManager::ViewClose(cpu_ptr,mode); - } -}; -template -class LatticeView : public LatticeExprView +// Little autoscope assister +template +class ViewCloser { -#ifndef GRID_UVM - std::shared_ptr Deleter; -#endif -public: -#ifdef GRID_UVM - LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : - LatticeExprView (refer_to_me) - { - } -#else - LatticeView(const LatticeView &orig) : LatticeExprView(orig) { } - LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : - LatticeExprView (refer_to_me), Deleter(new MemViewDeleter) - { - // std::cout << "FIXME - copy shared pointer? View Open in LatticeView"<_odata<ViewOpen(mode); - Deleter->cpu_ptr = this->cpu_ptr; - Deleter->mode = mode; - } -#endif + View v; // Take a copy of view and call view close when I go out of scope automatically + public: + ViewCloser(View &_v) : v(_v) {}; + ~ViewCloser() { v.ViewClose(); } }; +#define autoView(l_v,l,mode) \ + auto l_v = l.View(mode); \ + ViewCloser _autoView##l_v(l_v); + ///////////////////////////////////////////////////////////////////////////////////////// // Lattice expression types used by ET to assemble the AST // @@ -142,7 +122,7 @@ template using is_lattice = std::is_base_of; template using is_lattice_expr = std::is_base_of; template struct ViewMapBase { typedef T Type; }; -template struct ViewMapBase { typedef LatticeExprView Type; }; +template struct ViewMapBase { typedef LatticeView Type; }; template using ViewMap = ViewMapBase::value >; template diff --git a/Grid/qcd/action/fermion/GparityWilsonImpl.h b/Grid/qcd/action/fermion/GparityWilsonImpl.h index a8ae90ec..0b726db9 100644 --- a/Grid/qcd/action/fermion/GparityWilsonImpl.h +++ b/Grid/qcd/action/fermion/GparityWilsonImpl.h @@ -232,15 +232,17 @@ public: if ( Params.twists[mu] ) { Uconj = where(coor==neglink,-Uconj,Uconj); } - - auto U_v = U.View(CpuRead); - auto Uds_v = Uds.View(CpuWrite); - auto Uconj_v = Uconj.View(CpuRead); - auto Utmp_v= Utmp.View(CpuWrite); - thread_foreach(ss,U_v,{ - Uds_v[ss](0)(mu) = U_v[ss](); - Uds_v[ss](1)(mu) = Uconj_v[ss](); - }); + + { + autoView( U_v , U, CpuRead); + autoView( Uconj_v , Uconj, CpuRead); + autoView( Uds_v , Uds, CpuWrite); + autoView( Utmp_v, Utmp, CpuWrite); + thread_foreach(ss,U_v,{ + Uds_v[ss](0)(mu) = U_v[ss](); + Uds_v[ss](1)(mu) = Uconj_v[ss](); + }); + } U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary Uconj = adj(Cshift(Uconj,mu,-1)); @@ -250,19 +252,25 @@ public: Utmp = where(coor==0,Uconj,Utmp); } - thread_foreach(ss,Utmp_v,{ - Uds_v[ss](0)(mu+4) = Utmp_v[ss](); - }); - + { + autoView( Uds_v , Uds, CpuWrite); + autoView( Utmp_v, Utmp, CpuWrite); + thread_foreach(ss,Utmp_v,{ + Uds_v[ss](0)(mu+4) = Utmp_v[ss](); + }); + } Utmp = Uconj; if ( Params.twists[mu] ) { Utmp = where(coor==0,U,Utmp); } - - thread_foreach(ss,Utmp_v,{ - Uds_v[ss](1)(mu+4) = Utmp_v[ss](); - }); - + + { + autoView( Uds_v , Uds, CpuWrite); + autoView( Utmp_v, Utmp, CpuWrite); + thread_foreach(ss,Utmp_v,{ + Uds_v[ss](1)(mu+4) = Utmp_v[ss](); + }); + } } } @@ -272,11 +280,14 @@ public: GaugeLinkField link(mat.Grid()); // use lorentz for flavour as hack. auto tmp = TraceIndex(outerProduct(Btilde, A)); - auto link_v = link.View(CpuWrite); - auto tmp_v = tmp.View(CpuRead); - thread_foreach(ss,tmp_v,{ - link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); - }); + + { + autoView( link_v , link, CpuWrite); + autoView( tmp_v , tmp, CpuRead); + thread_foreach(ss,tmp_v,{ + link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); + }); + } PokeIndex(mat, link, mu); return; } @@ -306,16 +317,18 @@ public: GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - auto tmp_v = tmp.View(CpuWrite); - auto Atilde_v = Atilde.View(CpuRead); - auto Btilde_v = Btilde.View(CpuRead); - thread_for(ss,tmp.Grid()->oSites(),{ - for (int s = 0; s < Ls; s++) { - int sF = s + Ls * ss; - auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); - tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); - } - }); + { + autoView( tmp_v , tmp, CpuWrite); + autoView( Atilde_v , Atilde, CpuRead); + autoView( Btilde_v , Btilde, CpuRead); + thread_for(ss,tmp.Grid()->oSites(),{ + for (int s = 0; s < Ls; s++) { + int sF = s + Ls * ss; + auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); + tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); + } + }); + } PokeIndex(mat, tmp, mu); return; } diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index 05143551..aa8fb150 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -264,8 +264,8 @@ private: { CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(CpuWrite); - auto F_v = F.View(CpuRead); + autoView(T_v,T,CpuWrite); + autoView(F_v,F,CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = timesMinusI(F_v[i]()()); @@ -282,8 +282,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(CpuWrite); - auto F_v = F.View(CpuRead); + autoView(T_v, T,CpuWrite); + autoView(F_v, F,CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = -F_v[i]()(); @@ -300,8 +300,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(CpuWrite); - auto F_v = F.View(CpuRead); + autoView(T_v,T,CpuWrite); + autoView(F_v,F,CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 0) = timesMinusI(F_v[i]()()); @@ -318,8 +318,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(CpuWrite); - auto F_v = F.View(CpuRead); + autoView( T_v , T, CpuWrite); + autoView( F_v , F, CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = timesI(F_v[i]()()); @@ -336,8 +336,8 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(CpuWrite); - auto F_v = F.View(CpuRead); + autoView( T_v ,T,CpuWrite); + autoView( F_v ,F,CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 1) = -(F_v[i]()()); @@ -355,8 +355,8 @@ private: T = Zero(); - auto T_v = T.View(CpuWrite); - auto F_v = F.View(CpuRead); + autoView( T_v , T,CpuWrite); + autoView( F_v , F,CpuRead); thread_for(i, CloverTerm.Grid()->oSites(), { T_v[i]()(0, 0) = timesI(F_v[i]()()); diff --git a/Grid/qcd/action/fermion/WilsonImpl.h b/Grid/qcd/action/fermion/WilsonImpl.h index 356d0941..b4afc69a 100644 --- a/Grid/qcd/action/fermion/WilsonImpl.h +++ b/Grid/qcd/action/fermion/WilsonImpl.h @@ -106,9 +106,9 @@ public: const _SpinorField & phi, int mu) { - auto out_v= out.View(CpuWrite); - auto phi_v= phi.View(CpuRead); - auto Umu_v= Umu.View(CpuRead); + autoView( out_v, out, CpuWrite); + autoView( phi_v, phi, CpuRead); + autoView( Umu_v, Umu, CpuRead); thread_for(sss,out.Grid()->oSites(),{ multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu); }); @@ -191,18 +191,19 @@ public: int Ls=Btilde.Grid()->_fdimensions[0]; GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - auto tmp_v = tmp.View(CpuWrite); - auto Btilde_v = Btilde.View(CpuRead); - auto Atilde_v = Atilde.View(CpuRead); - thread_for(sss,tmp.Grid()->oSites(),{ - int sU=sss; - for(int s=0;s(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here - } - }); + { + autoView( tmp_v , tmp, CpuWrite); + autoView( Btilde_v , Btilde, CpuRead); + autoView( Atilde_v , Atilde, CpuRead); + thread_for(sss,tmp.Grid()->oSites(),{ + int sU=sss; + for(int s=0;s(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here + } + }); + } PokeIndex(mat,tmp,mu); - } }; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h index 72940cda..d2537ccf 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -50,9 +50,9 @@ CayleyFermion5D::M5D(const FermionField &psi_i, chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i,AcceleratorRead); + autoView(phi , phi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; @@ -93,9 +93,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i,AcceleratorRead); + autoView(phi , phi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; @@ -131,8 +131,8 @@ CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); int Ls=this->Ls; @@ -193,8 +193,8 @@ CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi GridBase *grid=psi_i.Grid(); int Ls=this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); auto plee = & lee [0]; auto pdee = & dee [0]; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h index 079ea481..b54f63ad 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h @@ -65,9 +65,9 @@ CayleyFermion5D::M5D(const FermionField &psi_i, EnableIf sfinae=0; chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(CpuRead); - auto phi = phi_i.View(CpuRead); - auto chi = chi_i.View(CpuWrite); + autoView(psi, psi_i,CpuRead); + autoView(phi, phi_i,CpuRead); + autoView(chi, chi_i,CpuWrite); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd= Simd::Nsimd(); @@ -213,9 +213,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, EnableIf sfinae=0; chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi=psi_i.View(CpuRead); - auto phi=phi_i.View(CpuRead); - auto chi=chi_i.View(CpuWrite); + autoView(psi,psi_i,CpuRead); + autoView(phi,phi_i,CpuRead); + autoView(chi,chi_i,CpuWrite); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd= Simd::Nsimd(); @@ -357,8 +357,8 @@ CayleyFermion5D::MooeeInternalAsm(const FermionField &psi_i, FermionField Vector > &Matm) { EnableIf sfinae=0; - auto psi = psi_i.View(CpuRead); - auto chi = chi_i.View(CpuWrite); + autoView(psi , psi_i,CpuRead); + autoView(chi , chi_i,CpuWrite); #ifndef AVX512 { SiteHalfSpinor BcastP; @@ -535,8 +535,8 @@ CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField EnableIf sfinae=0; #ifndef AVX512 { - auto psi = psi_i.View(CpuRead); - auto chi = chi_i.View(CpuWrite); + autoView(psi , psi_i,CpuRead); + autoView(chi , chi_i,CpuWrite); SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; @@ -586,8 +586,8 @@ CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField } #else { - auto psi = psi_i.View(CpuRead); - auto chi = chi_i.View(CpuWrite); + autoView(psi , psi_i,CpuRead); + autoView(chi , chi_i,CpuWrite); // pointers // MASK_REGS; #define Chi_00 %zmm0 diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h index 100eb0d2..9a8454ef 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h @@ -46,9 +46,9 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi chi_i.Checkerboard() = psi_i.Checkerboard(); int Ls = this->Ls; GridBase* grid = psi_i.Grid(); - auto phi = phi_i.View(AcceleratorRead); - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView( phi , phi_i, AcceleratorRead); + autoView( psi , psi_i, AcceleratorRead); + autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; @@ -82,9 +82,9 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const Fermio GridBase* grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView( psi , psi_i, AcceleratorRead); + autoView( phi , phi_i, AcceleratorRead); + autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; @@ -116,8 +116,8 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionFie { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); - auto psi=psi_i.View(AcceleratorRead); - auto chi=chi_i.View(AcceleratorWrite); + autoView( psi, psi_i, AcceleratorRead); + autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; @@ -172,8 +172,8 @@ void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, Fermion { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView( psi, psi_i, AcceleratorRead); + autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index 58d2b368..87acca0e 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -221,10 +221,10 @@ void ImprovedStaggeredFermion5D::DhopDir(const FermionField &in, FermionFi Compressor compressor; Stencil.HaloExchange(in,compressor); - auto Umu_v = Umu.View(CpuRead); - auto UUUmu_v = UUUmu.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); + autoView( Umu_v , Umu, CpuRead); + autoView( UUUmu_v , UUUmu, CpuRead); + autoView( in_v , in, CpuRead); + autoView( out_v , out, CpuWrite); thread_for( ss,Umu.Grid()->oSites(),{ for(int s=0;s::DerivInternal(StencilImpl &st, DoubledGauge //////////////////////// // Call the single hop //////////////////////// - auto U_v = U.View(CpuRead); - auto UUU_v = UUU.View(CpuRead); - auto B_v = B.View(CpuWrite); - auto Btilde_v = Btilde.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( UUU_v , UUU, CpuRead); + autoView( B_v , B, CpuWrite); + autoView( Btilde_v , Btilde, CpuWrite); thread_for(sss,B.Grid()->oSites(),{ Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); }); @@ -378,10 +378,10 @@ void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionFiel Compressor compressor; Stencil.HaloExchange(in, compressor); - auto Umu_v = Umu.View(CpuRead); - auto UUUmu_v = UUUmu.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); + autoView( Umu_v , Umu, CpuRead); + autoView( UUUmu_v , UUUmu, CpuRead); + autoView( in_v , in, CpuRead); + autoView( out_v , out, CpuWrite); thread_for( sss, in.Grid()->oSites(),{ Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); }); diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h index ed7be056..41b9170d 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h @@ -44,9 +44,9 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -84,9 +84,9 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto pm = this->pm; int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator @@ -132,9 +132,9 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -174,9 +174,9 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const Ferm GridBase *grid = psi_i.Grid(); int Ls = this->Ls; int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - auto psi = psi_i.View(AcceleratorRead); - auto phi = phi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -226,8 +226,8 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField & chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto plee = & this->lee [0]; auto pdee = & this->dee [0]; @@ -286,8 +286,8 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionF chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto pm = this->pm; auto plee = & this->lee [0]; @@ -354,8 +354,8 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionFiel chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto plee = & this->lee [0]; auto pdee = & this->dee [0]; @@ -410,8 +410,8 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, Fermi { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); - auto psi = psi_i.View(AcceleratorRead); - auto chi = chi_i.View(AcceleratorWrite); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); int Ls = this->Ls; auto pm = this->pm; diff --git a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h index ccd36f57..49696aa7 100644 --- a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h @@ -208,9 +208,9 @@ void NaiveStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGaugeFie //////////////////////// // Call the single hop //////////////////////// - auto U_v = U.View(CpuRead); - auto B_v = B.View(CpuWrite); - auto Btilde_v = Btilde.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( B_v , B, CpuWrite); + autoView( Btilde_v , Btilde, CpuWrite); thread_for(sss,B.Grid()->oSites(),{ Kernels::DhopDirKernel(st, U_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); }); @@ -315,9 +315,9 @@ void NaiveStaggeredFermion::DhopDir(const FermionField &in, FermionField & Compressor compressor; Stencil.HaloExchange(in, compressor); - auto Umu_v = Umu.View(CpuRead); - auto in_v = in.View(CpuRead); - auto out_v = out.View(CpuWrite); + autoView( Umu_v , Umu, CpuRead); + autoView( in_v , in, CpuRead); + autoView( out_v , out, CpuWrite); // thread_for( sss, in.Grid()->oSites(),{ // Kernels::DhopDirKernel(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); // }); diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h index d7abef27..141725a7 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h @@ -261,11 +261,11 @@ void StaggeredKernels::DhopImproved(StencilImpl &st, LebesgueOrder &lo, GridBase *FGrid=in.Grid(); GridBase *UGrid=U.Grid(); typedef StaggeredKernels ThisKernel; - auto UUU_v = UUU.View(AcceleratorRead); - auto U_v = U.View(AcceleratorRead); - auto in_v = in.View(AcceleratorRead); - auto out_v = out.View(AcceleratorWrite); - auto st_v = st.View(AcceleratorRead); + autoView( UUU_v , UUU, AcceleratorRead); + autoView( U_v , U, AcceleratorRead); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); + autoView( st_v , st, AcceleratorRead); SiteSpinor * buf = st.CommBuf(); int Ls=1; @@ -301,11 +301,11 @@ void StaggeredKernels::DhopNaive(StencilImpl &st, LebesgueOrder &lo, GridBase *FGrid=in.Grid(); GridBase *UGrid=U.Grid(); typedef StaggeredKernels ThisKernel; - auto UUU_v= U.View(AcceleratorRead); - auto U_v = U.View(AcceleratorRead); - auto in_v = in.View(AcceleratorRead); - auto out_v = out.View(AcceleratorWrite); - auto st_v = st.View(AcceleratorRead); + autoView( UUU_v , U, AcceleratorRead); + autoView( U_v , U, AcceleratorRead); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); + autoView( st_v , st, AcceleratorRead); SiteSpinor * buf = st.CommBuf(); int Ls=1; diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 9e492831..3db59b1d 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -475,12 +475,12 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, // Inefficient comms method but not performance critical. tmp1 = Cshift(q_in_1, mu, 1); tmp2 = Cshift(q_in_2, mu, 1); - auto tmp1_v = tmp1.View(CpuWrite); - auto tmp2_v = tmp2.View(CpuWrite); - auto q_in_1_v=q_in_1.View(CpuRead); - auto q_in_2_v=q_in_2.View(CpuRead); - auto q_out_v = q_out.View(CpuRead); - auto Umu_v = Umu.View(CpuRead); + autoView( tmp1_v , tmp1, CpuWrite); + autoView( tmp2_v , tmp2, CpuWrite); + autoView( q_in_1_v,q_in_1, CpuRead); + autoView( q_in_2_v,q_in_2, CpuRead); + autoView( q_out_v , q_out, CpuRead); + autoView( Umu_v , Umu, CpuRead); thread_for(sU, Umu.Grid()->oSites(),{ Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU], q_in_2_v[sU], @@ -526,11 +526,11 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, tmp = lattice_cmplx*q_in; tmpBwd = Cshift(tmp, mu, -1); - auto coords_v = coords.View(CpuRead); - auto tmpFwd_v = tmpFwd.View(CpuRead); - auto tmpBwd_v = tmpBwd.View(CpuRead); - auto Umu_v = Umu.View(CpuRead); - auto q_out_v = q_out.View(CpuWrite); + autoView( coords_v , coords, CpuRead); + autoView( tmpFwd_v , tmpFwd, CpuRead); + autoView( tmpBwd_v , tmpBwd, CpuRead); + autoView( Umu_v , Umu, CpuRead); + autoView( q_out_v , q_out, CpuWrite); thread_for(sU, Umu.Grid()->oSites(), { diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 8f8c1063..603be7ec 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -348,18 +348,18 @@ template void WilsonKernels::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, int Nsite, const FermionField &in, std::vector &out) { - auto U_v = U.View(AcceleratorRead); - auto in_v = in.View(AcceleratorRead); - auto st_v = st.View(AcceleratorRead); + autoView(U_v ,U,AcceleratorRead); + autoView(in_v ,in,AcceleratorRead); + autoView(st_v ,st,AcceleratorRead); - auto out_Xm = out[0].View(AcceleratorWrite); - auto out_Ym = out[1].View(AcceleratorWrite); - auto out_Zm = out[2].View(AcceleratorWrite); - auto out_Tm = out[3].View(AcceleratorWrite); - auto out_Xp = out[4].View(AcceleratorWrite); - auto out_Yp = out[5].View(AcceleratorWrite); - auto out_Zp = out[6].View(AcceleratorWrite); - auto out_Tp = out[7].View(AcceleratorWrite); + autoView(out_Xm,out[0],AcceleratorWrite); + autoView(out_Ym,out[1],AcceleratorWrite); + autoView(out_Zm,out[2],AcceleratorWrite); + autoView(out_Tm,out[3],AcceleratorWrite); + autoView(out_Xp,out[4],AcceleratorWrite); + autoView(out_Yp,out[5],AcceleratorWrite); + autoView(out_Zp,out[6],AcceleratorWrite); + autoView(out_Tp,out[7],AcceleratorWrite); auto CBp=st.CommBuf(); accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{ int sU=sss/Ls; @@ -383,10 +383,10 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S assert(dirdisp<=7); assert(dirdisp>=0); - auto U_v = U.View(AcceleratorRead); - auto in_v = in.View(AcceleratorRead); - auto out_v = out.View(AcceleratorWrite); - auto st_v = st.View(AcceleratorRead); + autoView(U_v ,U ,AcceleratorRead); + autoView(in_v ,in ,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v ,st ,AcceleratorRead); auto CBp=st.CommBuf(); #define LoopBody(Dir) \ case Dir : \ @@ -438,10 +438,10 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField int Ls, int Nsite, const FermionField &in, FermionField &out, int interior,int exterior) { - auto U_v = U.View(AcceleratorRead); - auto in_v = in.View(AcceleratorRead); - auto out_v = out.View(AcceleratorWrite); - auto st_v = st.View(AcceleratorRead); + autoView(U_v , U,AcceleratorRead); + autoView(in_v , in,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v , st,AcceleratorRead); if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} @@ -469,10 +469,10 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField int Ls, int Nsite, const FermionField &in, FermionField &out, int interior,int exterior) { - auto U_v = U.View(AcceleratorRead); - auto in_v = in.View(AcceleratorRead); - auto out_v = out.View(AcceleratorWrite); - auto st_v = st.View(AcceleratorRead); + autoView(U_v ,U,AcceleratorRead); + autoView(in_v ,in,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v ,st,AcceleratorRead); if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;} diff --git a/Grid/qcd/action/gauge/GaugeImplTypes.h b/Grid/qcd/action/gauge/GaugeImplTypes.h index 79549dcb..1368667e 100644 --- a/Grid/qcd/action/gauge/GaugeImplTypes.h +++ b/Grid/qcd/action/gauge/GaugeImplTypes.h @@ -86,8 +86,8 @@ public: // Move this elsewhere? FIXME static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W - auto U_v = U.View(CpuWrite); - auto W_v = W.View(CpuRead); + autoView(U_v,U,CpuWrite); + autoView(W_v,W,CpuRead); thread_for( ss, U.Grid()->oSites(), { U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); }); @@ -131,15 +131,15 @@ public: //static std::chrono::duration diff; //auto start = std::chrono::high_resolution_clock::now(); - auto U_v = U.View(CpuWrite); - auto P_v = P.View(CpuRead); + autoView(U_v,U,CpuWrite); + autoView(P_v,P,CpuRead); thread_for(ss, P.Grid()->oSites(),{ for (int mu = 0; mu < Nd; mu++) { U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu)); } }); - //auto end = std::chrono::high_resolution_clock::now(); + //auto end = std::chrono::high_resolution_clock::now(); // diff += end - start; // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n"; } diff --git a/Grid/qcd/action/scalar/ScalarInteractionAction.h b/Grid/qcd/action/scalar/ScalarInteractionAction.h index 7ac85d56..5a5f9251 100644 --- a/Grid/qcd/action/scalar/ScalarInteractionAction.h +++ b/Grid/qcd/action/scalar/ScalarInteractionAction.h @@ -89,8 +89,8 @@ public: action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared; - auto p_v = p.View(CpuRead); - auto action_v = action.View(CpuWrite); + autoView( p_v , p, CpuRead); + autoView( action_v , action, CpuWrite); for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils @@ -146,8 +146,8 @@ public: for (int point = 0; point < npoint; point++) { - auto p_v = p.View(CpuRead); - auto force_v = force.View(CpuWrite); + autoView( p_v , p, CpuRead); + autoView( force_v , force, CpuWrite); int permute_type; StencilEntry *SE; diff --git a/Grid/qcd/modules/Registration.h b/Grid/qcd/modules/Registration.h index ec28f020..c1149b83 100644 --- a/Grid/qcd/modules/Registration.h +++ b/Grid/qcd/modules/Registration.h @@ -81,7 +81,7 @@ static Registrar, static Registrar< ConjugateGradientModule, HMC_SolverModuleFactory > __CGWFmodXMLInit("ConjugateGradient"); static Registrar< BiCGSTABModule, - HMC_SolverModuleFactory > __CGWFmodXMLInit("BiCGSTAB"); + HMC_SolverModuleFactory > __BiCGWFmodXMLInit("BiCGSTAB"); static Registrar< ConjugateResidualModule, HMC_SolverModuleFactory > __CRWFmodXMLInit("ConjugateResidual"); diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index 7ad496b7..b63d8571 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -185,13 +185,14 @@ void A2Autils::MesonField(TensorType &mat, for(int i=0;i::MesonField(TensorType &mat, int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r; for ( int m=0;m::PionFieldXX(Eigen::Tensor &mat, for(int i=0;i::PionFieldXX(Eigen::Tensor &mat, } for(int j=0;j::PionFieldWVmom(Eigen::Tensor &mat, for(int i=0;i::PionFieldWVmom(Eigen::Tensor &mat, int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r; for ( int m=0;m::AslashField(TensorType &mat, for(int i=0;i::AslashField(TensorType &mat, for ( int m=0;m::ContractWWVV(std::vector &WWVV, for(int d_o=0;d_o::ContractWWVV(std::vector &WWVV, thread_for(ss,grid->oSites(),{ for(int d_o=0;d_o::OuterProductWWVV(PropagatorField &WWVV, const vobj &rhs, const int Ns, const int ss) { - auto WWVV_v = WWVV.View(CpuWrite); + autoView(WWVV_v,WWVV,CpuWrite); for (int s1 = 0; s1 < Ns; s1++){ for (int s2 = 0; s2 < Ns; s2++){ WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0); @@ -1122,10 +1121,10 @@ void A2Autils::ContractFourQuarkColourDiagonal(const PropagatorField &WWV GridBase *grid = WWVV0.Grid(); - auto WWVV0_v = WWVV0.View(CpuRead); - auto WWVV1_v = WWVV1.View(CpuRead); - auto O_trtr_v= O_trtr.View(CpuWrite); - auto O_fig8_v= O_fig8.View(CpuWrite); + autoView(WWVV0_v , WWVV0,CpuRead); + autoView(WWVV1_v , WWVV1,CpuRead); + autoView(O_trtr_v, O_trtr,CpuWrite); + autoView(O_fig8_v, O_fig8,CpuWrite); thread_for(ss,grid->oSites(),{ typedef typename ComplexField::vector_object vobj; @@ -1166,10 +1165,10 @@ void A2Autils::ContractFourQuarkColourMix(const PropagatorField &WWVV0, GridBase *grid = WWVV0.Grid(); - auto WWVV0_v = WWVV0.View(CpuRead); - auto WWVV1_v = WWVV1.View(CpuRead); - auto O_trtr_v= O_trtr.View(CpuWrite); - auto O_fig8_v= O_fig8.View(CpuWrite); + autoView( WWVV0_v , WWVV0,CpuRead); + autoView( WWVV1_v , WWVV1,CpuRead); + autoView( O_trtr_v, O_trtr,CpuWrite); + autoView( O_fig8_v, O_fig8,CpuWrite); thread_for(ss,grid->oSites(),{ diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index d45fd93d..32beac9c 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -273,10 +273,10 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, for (int ie=0; ie < 6 ; ie++) wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - auto vbaryon_corr= baryon_corr.View(CpuWrite); - auto v1 = q1_left.View(CpuRead); - auto v2 = q2_left.View(CpuRead); - auto v3 = q3_left.View(CpuRead); + autoView(vbaryon_corr, baryon_corr,CpuWrite); + autoView( v1 , q1_left, CpuRead); + autoView( v2 , q2_left, CpuRead); + autoView( v3 , q3_left, CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ @@ -560,10 +560,10 @@ void BaryonUtils::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop, { GridBase *grid = qs_ti.Grid(); - auto vcorr= stn_corr.View(CpuWrite); - auto vq_loop = qq_loop.View(CpuRead); - auto vd_tf = qd_tf.View(CpuRead); - auto vs_ti = qs_ti.View(CpuRead); + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_loop , qq_loop, CpuRead); + autoView( vd_tf , qd_tf, CpuRead); + autoView( vs_ti , qs_ti, CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ @@ -597,12 +597,11 @@ void BaryonUtils::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti, { GridBase *grid = qs_ti.Grid(); - auto vcorr= stn_corr.View(CpuWrite); - auto vq_ti = qq_ti.View(CpuRead); - auto vq_tf = qq_tf.View(CpuRead); - auto vd_tf = qd_tf.View(CpuRead); - auto vs_ti = qs_ti.View(CpuRead); - + autoView( vcorr , stn_corr, CpuWrite); + autoView( vq_ti , qq_ti, CpuRead); + autoView( vq_tf , qq_tf, CpuRead); + autoView( vd_tf , qd_tf, CpuRead); + autoView( vs_ti , qs_ti, CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ auto Dq_ti = vq_ti[ss]; diff --git a/Grid/qcd/utils/LinalgUtils.h b/Grid/qcd/utils/LinalgUtils.h index 0adbfabf..1e016e4e 100644 --- a/Grid/qcd/utils/LinalgUtils.h +++ b/Grid/qcd/utils/LinalgUtils.h @@ -47,8 +47,8 @@ void axpibg5x(Lattice &z,const Lattice &x,Coeff a,Coeff b) GridBase *grid=x.Grid(); Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView(x_v, x, AcceleratorRead); + autoView(z_v, z, AcceleratorWrite); accelerator_for( ss, x_v.size(),vobj::Nsimd(), { auto tmp = a*x_v(ss) + G5*(b*timesI(x_v(ss))); coalescedWrite(z_v[ss],tmp); @@ -63,9 +63,9 @@ void axpby_ssp(Lattice &z, Coeff a,const Lattice &x,Coeff b,const La conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); // FIXME -- need a new class of accelerator_loop to implement this // uint64_t nloop = grid->oSites()/Ls; @@ -85,9 +85,9 @@ void ag5xpby_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -104,9 +104,9 @@ void axpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); Gamma G5(Gamma::Algebra::Gamma5); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ @@ -125,9 +125,9 @@ void ag5xpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); Gamma G5(Gamma::Algebra::Gamma5); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ @@ -147,9 +147,9 @@ void axpby_ssp_pminus(Lattice &z,Coeff a,const Lattice &x,Coeff b,co GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -168,9 +168,9 @@ void axpby_ssp_pplus(Lattice &z,Coeff a,const Lattice &x,Coeff b,con conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(AcceleratorRead); - auto y_v = y.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -189,8 +189,8 @@ void G5R5(Lattice &z,const Lattice &x) conformable(x,z); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(AcceleratorRead); - auto z_v = z.View(AcceleratorWrite); + autoView( x_v, x, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -222,8 +222,8 @@ void G5C(Lattice> &z, const LatticeoSites(),CComplex::Nsimd(), { for(int n = 0; n < nb; ++n) { diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 5f98f926..476c3d40 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -222,10 +222,10 @@ public: conformable(subgroup, Determinant); int i0, i1; su2SubGroupIndex(i0, i1, su2_index); - auto subgroup_v = subgroup.View(CpuWrite); - auto source_v = source.View(CpuRead); - auto Determinant_v = Determinant.View(CpuWrite); + autoView( subgroup_v , subgroup,CpuWrite); + autoView( source_v , source,CpuRead); + autoView( Determinant_v , Determinant,CpuWrite); thread_for(ss, grid->oSites(), { subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0); @@ -257,8 +257,8 @@ public: su2SubGroupIndex(i0, i1, su2_index); dest = 1.0; // start out with identity - auto dest_v = dest.View(CpuWrite); - auto subgroup_v = subgroup.View(CpuRead); + autoView( dest_v , dest, CpuWrite); + autoView( subgroup_v, subgroup, CpuRead); thread_for(ss, grid->oSites(), { dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0); @@ -266,6 +266,7 @@ public: dest_v[ss]()()(i1, i0) = subgroup_v[ss]()()(1, 0); dest_v[ss]()()(i1, i1) = subgroup_v[ss]()()(1, 1); }); + } /////////////////////////////////////////////// @@ -608,8 +609,8 @@ public: // reunitarise?? template - static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out, - double scale = 1.0) { + static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out, double scale = 1.0) + { GridBase *grid = out.Grid(); typedef typename LatticeMatrixType::vector_type vector_type; @@ -618,8 +619,7 @@ public: typedef iSinglet vTComplexType; typedef Lattice LatticeComplexType; - typedef typename GridTypeMapper< - typename LatticeMatrixType::vector_object>::scalar_object MatrixType; + typedef typename GridTypeMapper::scalar_object MatrixType; LatticeComplexType ca(grid); LatticeMatrixType lie(grid); @@ -629,6 +629,7 @@ public: MatrixType ta; lie = Zero(); + for (int a = 0; a < AdjointDimension; a++) { random(pRNG, ca); @@ -640,6 +641,7 @@ public: la = ci * ca * ta; lie = lie + la; // e^{i la ta} + } taExp(lie, out); } diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 5602420b..3b9ae08e 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -67,6 +67,7 @@ void Gather_plane_simple_table (Vector >& table,const Lattice { int num=table.size(); std::pair *table_v = & table[0]; + auto rhs_v = rhs.View(AcceleratorRead); accelerator_forNB( i,num, vobj::Nsimd(), { typedef decltype(coalescedRead(buffer[0])) compressed_t; @@ -75,6 +76,7 @@ void Gather_plane_simple_table (Vector >& table,const Lattice compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second)); coalescedWrite(buffer[off+o],tmp_c); }); + rhs_v.ViewClose(); // Further optimisatoin: i) software prefetch the first element of the next table entry, prefetch the table } @@ -104,6 +106,7 @@ void Gather_plane_exchange_table(Vector >& table,const Lattic so+tp[2*j+1].second, type); }); + rhs_v.ViewClose(); } struct StencilEntry { @@ -181,31 +184,30 @@ class CartesianStencilAccelerator { template class CartesianStencilView : public CartesianStencilAccelerator { -#ifndef GRID_UVM - std::shared_ptr Deleter; -#endif + private: + int *closed; + StencilEntry *cpu_ptr; + ViewMode mode; public: - // -#ifdef GRID_UVM - CartesianStencilView (const CartesianStencilAccelerator &refer_to_me,ViewMode mode) - : CartesianStencilAccelerator(refer_to_me){}; -#else - CartesianStencilView (const CartesianStencilView &refer_to_me) - : CartesianStencilAccelerator(refer_to_me), Deleter(refer_to_me.Deleter) - { } - CartesianStencilView (const CartesianStencilAccelerator &refer_to_me,ViewMode mode) - : CartesianStencilAccelerator(refer_to_me), Deleter(new MemViewDeleter) - { - Deleter->cpu_ptr =(void *)this->_entries_p; - Deleter->mode = mode; - this->_entries_p =(StencilEntry *) + // default copy constructor + CartesianStencilView (const CartesianStencilView &refer_to_me) = default; + CartesianStencilView (const CartesianStencilAccelerator &refer_to_me,ViewMode _mode) + : CartesianStencilAccelerator(refer_to_me), + cpu_ptr(this->_entries_p), + mode(_mode) + { + this->_entries_p =(StencilEntry *) MemoryManager::ViewOpen(this->_entries_p, - this->_npoints*this->_osites*sizeof(StencilEntry), - mode, - AdviseDefault); - } -#endif + this->_npoints*this->_osites*sizeof(StencilEntry), + mode, + AdviseDefault); + } + + void ViewClose(void) + { + MemoryManager::ViewClose(this->cpu_ptr,this->mode); + } }; diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 08e25668..0d904225 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -252,9 +252,9 @@ public: double start=usecond(); for(int i=0;i U(4,FGrid); - auto Umu_v = Umu.View(CpuRead); - auto Umu5d_v = Umu5d.View(CpuWrite); - for(int ss=0;ssoSites();ss++){ - for(int s=0;soSites();ss++){ + for(int s=0;s U(4,FGrid); { - auto Umu5d_v = Umu5d.View(CpuWrite); - auto Umu_v = Umu.View(CpuRead); + autoView( Umu5d_v, Umu5d, CpuWrite); + autoView( Umu_v , Umu , CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;s & latt4, int Ls, int threads,int report ) LatticeGaugeField Umu5d(FGrid); // replicate across fifth dimension - auto Umu5d_v = Umu5d.View(CpuWrite); - auto Umu_v = Umu.View(CpuRead); - for(int ss=0;ssoSites();ss++){ - for(int s=0;soSites();ss++){ + for(int s=0;s > &mat, for(int b=0;b > &mat, for(int b=0;b > &mat int ss= so+n*stride+b; for(int i=0;i > &m for(int i=0;i > &m // Trigger unroll for ( int m=0;m using namespace std; using namespace Grid; - ; template struct scal { @@ -51,6 +50,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; + { GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); @@ -100,6 +100,8 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,result_o_2); + MemoryManager::Print(); + LatticeFermionD diff_o(FrbGrid); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); @@ -130,7 +132,9 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " CG checksums "<oSites();i++){ auto SE = gStencil.GetEntry(0,i); - auto check = Check.View(CpuWrite); - auto foo = Foo.View(CpuRead); - + autoView(check, Check, CpuWrite); + autoView( foo, Foo, CpuRead); // Encapsulate in a general wrapper check[i] = foo[SE->_offset]; auto tmp=check[i]; if (SE->_permute & 0x1 ) { permute(check[i],tmp,0); tmp=check[i];} @@ -147,8 +146,8 @@ int main(int argc, char ** argv) }}}} if (nrm > 1.0e-4) { - auto check = Check.View(CpuRead); - auto bar = Bar.View(CpuRead); + autoView( check , Check, CpuRead); + autoView( bar , Bar, CpuRead); for(int i=0;i_is_local && SE->_permute ) permute(check[i],foo[SE->_offset],permute_type); else if (SE->_is_local) @@ -151,8 +151,8 @@ int main(int argc, char ** argv) { }}}} if (nrm > 1.0e-4) { - auto check = Check.View(CpuRead); - auto bar = Bar.View(CpuRead); + autoView( check , Check, CpuRead); + autoView( bar , Bar, CpuRead); for(int i=0;i " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(ocheck[i],efoo[SE->_offset],permute_type); else if (SE->_is_local) @@ -226,8 +226,8 @@ int main(int argc, char ** argv) { SE = OStencil.GetEntry(permute_type,0,i); // std::cout << "ODD source "<< i<<" -> " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(echeck[i],ofoo[SE->_offset],permute_type); else if (SE->_is_local) diff --git a/tests/core/Test_staggered5D.cc b/tests/core/Test_staggered5D.cc index 402e69d5..e4cd007f 100644 --- a/tests/core/Test_staggered5D.cc +++ b/tests/core/Test_staggered5D.cc @@ -89,8 +89,8 @@ int main (int argc, char ** argv) //////////////////////////////////// LatticeGaugeField Umu5d(FGrid); { - auto umu5d = Umu5d.View(CpuWrite); - auto umu = Umu.View(CpuRead); + autoView(umu5d, Umu5d, CpuWrite); + autoView( umu, Umu , CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;s U(4,FGrid); { - auto Umu5d_v = Umu5d.View(CpuWrite); - auto Umu_v = Umu.View(CpuRead); + autoView( Umu5d_v , Umu5d, CpuWrite); + autoView( Umu_v , Umu , CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;soSites(),{ uint64_t ss= sss*Ls; typedef vSpinColourVector spinor; diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index 4c3a3f53..cb30faad 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -98,9 +98,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_force.cc b/tests/forces/Test_dwf_force.cc index fea867e6..81a1b8c4 100644 --- a/tests/forces/Test_dwf_force.cc +++ b/tests/forces/Test_dwf_force.cc @@ -100,9 +100,9 @@ int main (int argc, char ** argv) // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc index 5b864279..0b0ba346 100644 --- a/tests/forces/Test_dwf_force_eofa.cc +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -110,9 +110,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index a0743edc..b39fdd14 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -119,9 +119,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index 69b9adec..58258a5e 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -114,9 +114,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc index 5de7ddb7..21f0b9d0 100644 --- a/tests/forces/Test_gp_plaq_force.cc +++ b/tests/forces/Test_gp_plaq_force.cc @@ -85,9 +85,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto Uprime_v = Uprime.View(CpuWrite); - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index 026ce60f..bb4ea6de 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto Uprime_v= Uprime.View(CpuWrite); - auto U_v = U.View(CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc index 22927d01..bdc332d9 100644 --- a/tests/forces/Test_gpdwf_force.cc +++ b/tests/forces/Test_gpdwf_force.cc @@ -105,9 +105,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index 41c4641d..1c85a5d9 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -99,9 +99,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_mobius_force.cc b/tests/forces/Test_mobius_force.cc index daab4149..11e69652 100644 --- a/tests/forces/Test_mobius_force.cc +++ b/tests/forces/Test_mobius_force.cc @@ -101,9 +101,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc index 7a8d4cf8..f85501fa 100644 --- a/tests/forces/Test_mobius_force_eofa.cc +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -112,9 +112,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index 13de233b..68163e63 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -115,9 +115,9 @@ int main (int argc, char** argv) SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); // fourth order exponential approx thread_foreach( i, mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt + mom_v[i](mu) *mom_v[i](mu) *U_v[i](mu)*(dt*dt/2.0) diff --git a/tests/forces/Test_partfrac_force.cc b/tests/forces/Test_partfrac_force.cc index 9292274e..17dce530 100644 --- a/tests/forces/Test_partfrac_force.cc +++ b/tests/forces/Test_partfrac_force.cc @@ -101,9 +101,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index 909068c2..ed72f2c0 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto Uprime_v = Uprime.View(CpuWrite); - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 397dc40c..c8b3a7f4 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -105,9 +105,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu); Uprime_v[i](mu) += mom_v[i](mu)*U_v[i](mu)*dt ; diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index ff664e19..f26f0ac9 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -105,9 +105,9 @@ int main(int argc, char **argv) Hmom -= real(sum(trace(mommu * mommu))); PokeIndex(mom, mommu, mu); - auto Uprime_v = Uprime.View(CpuWrite); - auto U_v = U.View(CpuRead); - auto mom_v = mom.View(CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); thread_foreach(ss,mom_v, { Uprime_v[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom_v[ss]._internal[mu], dt, 12) * U_v[ss]._internal[mu]); diff --git a/tests/forces/Test_zmobius_force.cc b/tests/forces/Test_zmobius_force.cc index 2ed12acd..e24ae601 100644 --- a/tests/forces/Test_zmobius_force.cc +++ b/tests/forces/Test_zmobius_force.cc @@ -114,9 +114,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(CpuRead); - auto U_v = U.View(CpuRead); - auto Uprime_v = Uprime.View(CpuWrite); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/solver/Test_dwf_hdcr.cc b/tests/solver/Test_dwf_hdcr.cc index f93af852..8e083231 100644 --- a/tests/solver/Test_dwf_hdcr.cc +++ b/tests/solver/Test_dwf_hdcr.cc @@ -300,8 +300,8 @@ int main (int argc, char ** argv) int nb=nbasisc/2; CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,12.0,0.02,500,100,100,0.0); for(int n=0;noSites();site++){ subspace_g5[site](nn) = subspace[site](nn); From 3111c0bd4feae2421b9cdae5176e5e0fbd058aee Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 5 Jun 2020 19:13:27 -0400 Subject: [PATCH 44/86] Single precisiono hardwire --- benchmarks/Benchmark_staggeredF.cc | 114 +++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 benchmarks/Benchmark_staggeredF.cc diff --git a/benchmarks/Benchmark_staggeredF.cc b/benchmarks/Benchmark_staggeredF.cc new file mode 100644 index 00000000..f7beed2d --- /dev/null +++ b/benchmarks/Benchmark_staggeredF.cc @@ -0,0 +1,114 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_staggered.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename ImprovedStaggeredFermionF::FermionField FermionField; + typename ImprovedStaggeredFermionF::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=Zero(); + FermionField ref(&Grid); ref=Zero(); + FermionField tmp(&Grid); tmp=Zero(); + FermionField err(&Grid); tmp=Zero(); + LatticeGaugeFieldF Umu(&Grid); random(pRNG,Umu); + std::vector U(4,&Grid); + + double volume=1; + for(int mu=0;mu(Umu,U[nn],nn); + } +#endif + + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + ImprovedStaggeredFermionF Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); + + std::cout< %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); + // dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); DeviceToHostBytes+=AccCache.bytes; DeviceToHostXfer++; AccCache.state=Consistent; @@ -174,7 +164,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache) AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes); DeviceBytes+=AccCache.bytes; } - dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); + // dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes); HostToDeviceBytes+=AccCache.bytes; HostToDeviceXfer++; From 616d3dd7376883e3dcca2078eef60330b30b27ab Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 8 Jun 2020 18:57:41 -0400 Subject: [PATCH 49/86] CCommpile updates --- Grid/allocator/MemoryManager.cc | 19 ++++++++++++++++++- Grid/allocator/MemoryManagerCache.cc | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index 599cdaab..17850333 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -86,7 +86,24 @@ void MemoryManager::Init(void) Ncache[AccSmall]=Nc; } } - std::cout << "MemoryManager::Init() SMALL "< Date: Wed, 10 Jun 2020 12:57:55 -0400 Subject: [PATCH 50/86] Offload more loops --- Grid/qcd/action/fermion/WilsonCloverFermion.h | 37 ++++---- Grid/qcd/action/fermion/WilsonImpl.h | 16 ++-- .../WilsonFermionImplementation.h | 84 +------------------ 3 files changed, 29 insertions(+), 108 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index aa8fb150..a2755389 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -258,15 +258,16 @@ private: CloverFieldType CloverTermDagEven, CloverTermDagOdd; // Clover term Dag EO CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO + public: // eventually these can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices CloverFieldType fillCloverYZ(const GaugeLinkField &F) { CloverFieldType T(F.Grid()); T = Zero(); - autoView(T_v,T,CpuWrite); - autoView(F_v,F,CpuRead); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView(T_v,T,AcceleratorWrite); + autoView(F_v,F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = timesMinusI(F_v[i]()()); T_v[i]()(1, 0) = timesMinusI(F_v[i]()()); @@ -282,9 +283,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - autoView(T_v, T,CpuWrite); - autoView(F_v, F,CpuRead); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView(T_v, T,AcceleratorWrite); + autoView(F_v, F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = -F_v[i]()(); T_v[i]()(1, 0) = F_v[i]()(); @@ -300,9 +301,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - autoView(T_v,T,CpuWrite); - autoView(F_v,F,CpuRead); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView(T_v,T,AcceleratorWrite); + autoView(F_v,F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 0) = timesMinusI(F_v[i]()()); T_v[i]()(1, 1) = timesI(F_v[i]()()); @@ -318,9 +319,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - autoView( T_v , T, CpuWrite); - autoView( F_v , F, CpuRead); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView( T_v , T, AcceleratorWrite); + autoView( F_v , F, AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = timesI(F_v[i]()()); T_v[i]()(1, 0) = timesI(F_v[i]()()); @@ -336,9 +337,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - autoView( T_v ,T,CpuWrite); - autoView( F_v ,F,CpuRead); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView( T_v ,T,AcceleratorWrite); + autoView( F_v ,F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = -(F_v[i]()()); T_v[i]()(1, 0) = (F_v[i]()()); @@ -355,9 +356,9 @@ private: T = Zero(); - autoView( T_v , T,CpuWrite); - autoView( F_v , F,CpuRead); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView( T_v , T,AcceleratorWrite); + autoView( F_v , F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 0) = timesI(F_v[i]()()); T_v[i]()(1, 1) = timesMinusI(F_v[i]()()); diff --git a/Grid/qcd/action/fermion/WilsonImpl.h b/Grid/qcd/action/fermion/WilsonImpl.h index b4afc69a..52e1ee00 100644 --- a/Grid/qcd/action/fermion/WilsonImpl.h +++ b/Grid/qcd/action/fermion/WilsonImpl.h @@ -106,10 +106,10 @@ public: const _SpinorField & phi, int mu) { - autoView( out_v, out, CpuWrite); - autoView( phi_v, phi, CpuRead); - autoView( Umu_v, Umu, CpuRead); - thread_for(sss,out.Grid()->oSites(),{ + autoView( out_v, out, AcceleratorWrite); + autoView( phi_v, phi, AcceleratorRead); + autoView( Umu_v, Umu, AcceleratorRead); + accelerator_for(sss,out.Grid()->oSites(),1,{ multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu); }); } @@ -192,10 +192,10 @@ public: GaugeLinkField tmp(mat.Grid()); tmp = Zero(); { - autoView( tmp_v , tmp, CpuWrite); - autoView( Btilde_v , Btilde, CpuRead); - autoView( Atilde_v , Atilde, CpuRead); - thread_for(sss,tmp.Grid()->oSites(),{ + autoView( tmp_v , tmp, AcceleratorWrite); + autoView( Btilde_v , Btilde, AcceleratorRead); + autoView( Atilde_v , Atilde, AcceleratorRead); + accelerator_for(sss,tmp.Grid()->oSites(),1,{ int sU=sss; for(int s=0;s::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); -#if 0 - PropagatorField tmp1(_grid), tmp2(_grid); - q_out = Zero(); - - // Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu). - // Inefficient comms method but not performance critical. - tmp1 = Cshift(q_in_1, mu, 1); - tmp2 = Cshift(q_in_2, mu, 1); - autoView( tmp1_v , tmp1, CpuWrite); - autoView( tmp2_v , tmp2, CpuWrite); - autoView( q_in_1_v,q_in_1, CpuRead); - autoView( q_in_2_v,q_in_2, CpuRead); - autoView( q_out_v , q_out, CpuRead); - autoView( Umu_v , Umu, CpuRead); - thread_for(sU, Umu.Grid()->oSites(),{ - Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU], - q_in_2_v[sU], - q_out_v[sU], - Umu_v, sU, mu); - Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU], - tmp2_v[sU], - q_out_v[sU], - Umu_v, sU, mu); - }); -#else -#endif + assert(0); } @@ -508,62 +483,7 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, { conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); -#if 0 - - // Lattice> ph(_grid), coor(_grid); - Complex i(0.0,1.0); - PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); - unsigned int tshift = (mu == Tp) ? 1 : 0; - unsigned int LLt = GridDefaultLatt()[Tp]; - - q_out = Zero(); - LatticeInteger coords(_grid); - LatticeCoordinate(coords, Tp); - - // Need q(x + mu) and q(x - mu). - tmp = Cshift(q_in, mu, 1); - tmpFwd = tmp*lattice_cmplx; - tmp = lattice_cmplx*q_in; - tmpBwd = Cshift(tmp, mu, -1); - - autoView( coords_v , coords, CpuRead); - autoView( tmpFwd_v , tmpFwd, CpuRead); - autoView( tmpBwd_v , tmpBwd, CpuRead); - autoView( Umu_v , Umu, CpuRead); - autoView( q_out_v , q_out, CpuWrite); - - thread_for(sU, Umu.Grid()->oSites(), { - - // Compute the sequential conserved current insertion only if our simd - // object contains a timeslice we need. - vPredicate t_mask; - t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax)); - Integer timeSlices = Reduce(t_mask()); - - if (timeSlices > 0) { - Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU], - q_out_v[sU], - Umu_v, sU, mu, t_mask); - } - - // Repeat for backward direction. - t_mask() = ((coords_v[sU] >= (tmin + tshift)) && - (coords_v[sU] <= (tmax + tshift))); - - //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) - unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 )); - - timeSlices = Reduce(t_mask()); - - if (timeSlices > 0) { - Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU], - q_out_v[sU], - Umu_v, sU, mu, t_mask); - } - }); -#else -#endif + assert(0); } NAMESPACE_END(Grid); From a7ffc61e82770c141538debfb352c989931cf0de Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 10 Jun 2020 19:58:33 -0400 Subject: [PATCH 51/86] acceleratorSIMTlane() --- Grid/lattice/Lattice_transfer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 44d7674f..ad2d07cb 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -99,13 +99,13 @@ accelerator_inline void convertType(ComplexF & out, const std::complex & #ifdef GRID_SIMT accelerator_inline void convertType(vComplexF & out, const ComplexF & in) { - ((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in; + ((ComplexF*)&out)[acceleratorSIMTlane(vComplexF::Nsimd())] = in; } accelerator_inline void convertType(vComplexD & out, const ComplexD & in) { - ((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd())] = in; + ((ComplexD*)&out)[acceleratorSIMTlane(vComplexD::Nsimd())] = in; } accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) { - ((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd()*2)] = in; + ((ComplexD*)&out)[acceleratorSIMTlane(vComplexD::Nsimd()*2)] = in; } #endif From 237ce925401497227718d325c7c0915c9a176456 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 10 Jun 2020 19:59:11 -0400 Subject: [PATCH 52/86] Offload loops --- Grid/qcd/action/gauge/GaugeImplTypes.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Grid/qcd/action/gauge/GaugeImplTypes.h b/Grid/qcd/action/gauge/GaugeImplTypes.h index 1368667e..9b7d5a60 100644 --- a/Grid/qcd/action/gauge/GaugeImplTypes.h +++ b/Grid/qcd/action/gauge/GaugeImplTypes.h @@ -86,9 +86,9 @@ public: // Move this elsewhere? FIXME static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W - autoView(U_v,U,CpuWrite); - autoView(W_v,W,CpuRead); - thread_for( ss, U.Grid()->oSites(), { + autoView(U_v,U,AcceleratorWrite); + autoView(W_v,W,AcceleratorRead); + accelerator_for( ss, U.Grid()->oSites(), 1, { U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); }); } @@ -131,14 +131,13 @@ public: //static std::chrono::duration diff; //auto start = std::chrono::high_resolution_clock::now(); - autoView(U_v,U,CpuWrite); - autoView(P_v,P,CpuRead); - thread_for(ss, P.Grid()->oSites(),{ + autoView(U_v,U,AcceleratorWrite); + autoView(P_v,P,AcceleratorRead); + accelerator_for(ss, P.Grid()->oSites(),1,{ for (int mu = 0; mu < Nd; mu++) { U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu)); } }); - //auto end = std::chrono::high_resolution_clock::now(); // diff += end - start; // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n"; From 84c19587e7de6c2de69d9137eb17047ee89f7df4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 10 Jun 2020 19:59:31 -0400 Subject: [PATCH 53/86] Offload --- Grid/lattice/Lattice_peekpoke.h | 34 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index 2ec97b08..39ab8e30 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -46,9 +46,9 @@ auto PeekIndex(const Lattice &lhs,int i) -> Lattice(vobj(),i))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - autoView( ret_v, ret, CpuWrite); - autoView( lhs_v, lhs, CpuRead); - thread_for( ss, lhs_v.size(), { + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); + accelerator_for( ss, lhs_v.size(), 1, { ret_v[ss] = peekIndex(lhs_v[ss],i); }); return ret; @@ -58,9 +58,9 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(vobj(),i,j))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - autoView( ret_v, ret, CpuWrite); - autoView( lhs_v, lhs, CpuRead); - thread_for( ss, lhs_v.size(), { + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); + accelerator_for( ss, lhs_v.size(), 1, { ret_v[ss] = peekIndex(lhs_v[ss],i,j); }); return ret; @@ -72,18 +72,18 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice void PokeIndex(Lattice &lhs,const Lattice(vobj(),0))> & rhs,int i) { - autoView( rhs_v, rhs, CpuRead); - autoView( lhs_v, lhs, CpuWrite); - thread_for( ss, lhs_v.size(), { + autoView( rhs_v, rhs, AcceleratorRead); + autoView( lhs_v, lhs, AcceleratorWrite); + accelerator_for( ss, lhs_v.size(), 1, { pokeIndex(lhs_v[ss],rhs_v[ss],i); }); } template void PokeIndex(Lattice &lhs,const Lattice(vobj(),0,0))> & rhs,int i,int j) { - autoView( rhs_v, rhs, CpuRead); - autoView( lhs_v, lhs, CpuWrite); - thread_for( ss, lhs_v.size(), { + autoView( rhs_v, rhs, AcceleratorRead); + autoView( lhs_v, lhs, AcceleratorWrite); + accelerator_for( ss, lhs_v.size(), 1, { pokeIndex(lhs_v[ss],rhs_v[ss],i,j); }); } @@ -151,13 +151,12 @@ void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ return; }; - ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// template -inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site){ - +inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site) +{ GridBase *grid = l.Grid(); typedef typename vobj::scalar_type scalar_type; @@ -185,8 +184,8 @@ inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site){ }; template -inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site){ - +inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site) +{ GridBase *grid=l.Grid(); typedef typename vobj::scalar_type scalar_type; @@ -208,7 +207,6 @@ inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site){ for(int w=0;w Date: Wed, 10 Jun 2020 20:00:00 -0400 Subject: [PATCH 54/86] Keep on GPU --- Grid/qcd/utils/SUn.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 476c3d40..6f50af04 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -223,10 +223,10 @@ public: int i0, i1; su2SubGroupIndex(i0, i1, su2_index); - autoView( subgroup_v , subgroup,CpuWrite); - autoView( source_v , source,CpuRead); - autoView( Determinant_v , Determinant,CpuWrite); - thread_for(ss, grid->oSites(), { + autoView( subgroup_v , subgroup,AcceleratorWrite); + autoView( source_v , source,AcceleratorRead); + autoView( Determinant_v , Determinant,AcceleratorWrite); + accelerator_for(ss, grid->oSites(), 1, { subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0); subgroup_v[ss]()()(0, 1) = source_v[ss]()()(i0, i1); @@ -257,9 +257,9 @@ public: su2SubGroupIndex(i0, i1, su2_index); dest = 1.0; // start out with identity - autoView( dest_v , dest, CpuWrite); - autoView( subgroup_v, subgroup, CpuRead); - thread_for(ss, grid->oSites(), + autoView( dest_v , dest, AcceleratorWrite); + autoView( subgroup_v, subgroup, AcceleratorRead); + acccelerator_for(ss, grid->oSites(),1, { dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0); dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1); From 32b2b59be40a0f6619df5eac88f0e3dc3e3e6fd0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 10 Jun 2020 20:36:26 -0400 Subject: [PATCH 55/86] Offload --- Grid/qcd/utils/SUn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 6f50af04..0cc0cc1a 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -259,7 +259,7 @@ public: dest = 1.0; // start out with identity autoView( dest_v , dest, AcceleratorWrite); autoView( subgroup_v, subgroup, AcceleratorRead); - acccelerator_for(ss, grid->oSites(),1, + accelerator_for(ss, grid->oSites(),1, { dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0); dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1); From 244c003a1bd5316e80cfdac38959d7ff7528bb0b Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Fri, 12 Jun 2020 11:00:25 +0100 Subject: [PATCH 56/86] Updated Baryon code --- Grid/qcd/utils/BaryonUtils.h | 40 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 23267270..3384d273 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -7,6 +7,7 @@ Copyright (C) 2019 Author: Felix Erben + Author: Raoul Hodgson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -58,7 +59,7 @@ public: const Gamma GammaA_right, const Gamma GammaB_right, const int parity, - const int * wick_contractions, + const bool * wick_contractions, robj &result); public: static void ContractBaryons(const PropagatorField &q1_left, @@ -68,8 +69,7 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, ComplexField &baryon_corr); template @@ -80,9 +80,9 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const int wick_contraction, const int parity, + const int nt, robj &result); private: template @@ -173,7 +173,7 @@ void BaryonUtils::baryon_site(const mobj &D1, const Gamma GammaA_right, const Gamma GammaB_right, const int parity, - const int * wick_contraction, + const bool * wick_contraction, robj &result) { @@ -279,8 +279,7 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, ComplexField &baryon_corr) { @@ -288,7 +287,6 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; @@ -298,10 +296,6 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, GridBase *grid = q1_left.Grid(); - int wick_contraction[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - auto vbaryon_corr= baryon_corr.View(); auto v1 = q1_left.View(); auto v2 = q2_left.View(); @@ -311,10 +305,10 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real)); for (int ie=0; ie < 6 ; ie++){ if(ie==0 or ie==3){ - bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contraction[ie]; + bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie]; } else{ - bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contraction[ie]; + bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie]; } } Real t=0.; @@ -325,7 +319,7 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, auto D2 = v2[ss]; auto D3 = v3[ss]; vobj result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites @@ -343,16 +337,15 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const int wick_contraction, const int parity, + const int nt, robj &result) { assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; @@ -360,12 +353,13 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - int wick_contraction[6]; + bool wick_contractions[6]; for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; + wick_contractions[ie] = (ie == wick_contraction); - result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + for (int t=0; t Date: Fri, 12 Jun 2020 11:35:52 +0100 Subject: [PATCH 57/86] Added Baryon3pt code --- Grid/qcd/utils/BaryonUtils.h | 408 +++++++++++++++++++++++++++++++++++ 1 file changed, 408 insertions(+) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 23267270..a6f8b78d 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -84,6 +84,55 @@ public: const char * quarks_right, const int parity, robj &result); + private: + template + static void Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + + template + static void Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + + template + static void Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + public: + template + static void Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr); private: template static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop, @@ -371,6 +420,365 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, /*********************************************************************** * End of Baryon 2pt-function code. * * * + * The following code is for baryonGamma3pt function * + **********************************************************************/ + +/* Dq1_ti is a quark line from t_i to t_J + * Dq2_spec is a quark line from t_i to t_f + * Dq3_spec is a quark line from t_i to t_f + * Dq4_tf is a quark line from t_f to t_J */ +template +template +void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; + auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + auto Gf_D3 = GammaBf * Dq3_spec; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Complex ee; + + auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); + auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); + auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); + auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(0,0)(0,0); + auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(0,0)(0,0); + auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); + auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(0,0)(0,0); + auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); + auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; + auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; + auto Gf_D1 = GammaBf * Dq1_spec; + auto Gf_D3 = GammaBf * Dq3_spec; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Complex ee; + + auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(0,0)(0,0); + auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); + auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(0,0)(0,0); + auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); + auto ee_Dq1_spec_ag_ac = ee* Dq1_spec ()(0,0)(0,0); + auto ee_Gf_D1_ag_bc = ee*Gf_D1 ()(0,0)(0,0); + auto ee_Dq1_spec_gg_cc = ee*Dq1_spec ()(0,0)(0,0); + auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); + auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(0,0)(0,0); + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; + auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; + auto Gf_D1 = GammaBf * Dq1_spec; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Complex ee; + + auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); + auto Gf_adjD4_g_D3_ab_bb = Gf_adjD4_g_D3 ()(0,0)(0,0); + auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); + auto adjD4_g_D3_ab_ab = adjD4_g_D3 ()(0,0)(0,0); + auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(0,0)(0,0); + auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(0,0)(0,0); + auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(0,0)(0,0); + auto adjD4_g_D3_gb_cb = adjD4_g_D3 ()(0,0)(0,0); + auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr) +{ + GridBase *grid = q_tf.Grid(); + + auto vcorr= stn_corr.View(); + auto vq_ti = q_ti.View(); + auto vq_tf = q_tf.View(); + + if (group ==1) { + thread_for(ss,grid->oSites(),{ + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 2) { + thread_for(ss,grid->oSites(),{ + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 3) { + thread_for(ss,grid->oSites(),{ + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + + vcorr[ss] += result; + });//end loop over lattice sites + } + +} + + +/*********************************************************************** + * End of BaryonGamma3pt-function code. * + * * * The following code is for Sigma -> N rare hypeon decays * **********************************************************************/ From 5f5807d60a7f6bcbc16dc5a25a157fb54cc6edcd Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Fri, 12 Jun 2020 14:48:23 -0400 Subject: [PATCH 58/86] cleanup --- Grid/allocator/GridMemoryManager.cc | 145 ---------------------------- Grid/allocator/GridMemoryManager.h | 42 -------- Grid/lattice/Lattice_base.h | 13 --- 3 files changed, 200 deletions(-) delete mode 100644 Grid/allocator/GridMemoryManager.cc delete mode 100644 Grid/allocator/GridMemoryManager.h diff --git a/Grid/allocator/GridMemoryManager.cc b/Grid/allocator/GridMemoryManager.cc deleted file mode 100644 index 369f72f7..00000000 --- a/Grid/allocator/GridMemoryManager.cc +++ /dev/null @@ -1,145 +0,0 @@ -#include - -NAMESPACE_BEGIN(Grid); - -#define _GRID_MEM_PAGE_SIZE 4096 -void* _grid_mem_base = 0; -size_t _grid_mem_pages; -struct _grid_mem_range { - size_t page_start, page_end; -}; -std::vector<_grid_mem_range> _grid_mem_avail; -std::map _grid_mem_alloc; - -void gridMemoryInit() { -#ifdef GRID_NVCC - size_t free,total; - cudaMemGetInfo(&free,&total); - - char* ev = getenv("GRID_DEVICE_BYTES_FOR_CACHE"); - if (ev) { - long bytes; - assert(sscanf(ev,"%ld",&bytes)==1); - free -= bytes; - } - - _grid_mem_pages = free / _GRID_MEM_PAGE_SIZE; - size_t sz = _grid_mem_pages * _GRID_MEM_PAGE_SIZE; - - assert(cudaSuccess == cudaMallocManaged(&_grid_mem_base,sz)); - - int target; - cudaGetDevice(&target); - cudaMemAdvise(_grid_mem_base,sz,cudaMemAdviseSetPreferredLocation,target); - - assert(cudaSuccess == cudaMemset(_grid_mem_base,0,sz)); // touch on device - std::cout << GridLogMessage << "gridMemoryInit: " << sz << " bytes" << std::endl; - - _grid_mem_avail.push_back( { 0, _grid_mem_pages } ); -#endif -} - -void gridMallocManaged(void** pp, size_t sz) { -#ifdef GRID_NVCC - if (_grid_mem_avail.empty()) - gridMemoryInit(); - - size_t pages = (sz + _GRID_MEM_PAGE_SIZE - 1) / _GRID_MEM_PAGE_SIZE; - // find free block - size_t m; - for (m=0;m<_grid_mem_avail.size();m++) { - auto & b = _grid_mem_avail[m]; - if (b.page_end - b.page_start >= pages) - break; - } - if (m == _grid_mem_avail.size()) { - std::cout << GridLogMessage << "Out of memory" << std::endl; - assert(0); - } - *pp = (char*)_grid_mem_base + _GRID_MEM_PAGE_SIZE*_grid_mem_avail[m].page_start; - _grid_mem_alloc[*pp] = { _grid_mem_avail[m].page_start, _grid_mem_avail[m].page_start + pages }; - _grid_mem_avail[m].page_start += pages; -#else - *pp = malloc(sz); -#endif -} - -void gridFree(void* p) { -#ifdef GRID_NVCC - if (_grid_mem_avail.empty()) - gridMemoryInit(); - - auto & alloc = _grid_mem_alloc[p]; - if (alloc.page_start == alloc.page_end) { - free(p); - //cudaFreeHost(p); - } else { - // can we enlarge existing one? - for (size_t m=0;m<_grid_mem_avail.size();m++) { - auto & b = _grid_mem_avail[m]; - if (b.page_start == alloc.page_end) { - b.page_start = alloc.page_start; - return; - } - if (b.page_end == alloc.page_start) { - b.page_end = alloc.page_end; - return; - } - } - // fragment memory - _grid_mem_avail.push_back( alloc ); - } - _grid_mem_alloc.erase(p); -#else - free(p); -#endif -} - -void gridAcceleratorPrefetch(void* p, size_t sz) { -#ifdef GRID_NVCC - auto & alloc = _grid_mem_alloc[p]; - if (alloc.page_start == alloc.page_end) // pinned to host - return; - - int target; - cudaGetDevice(&target); - cudaMemPrefetchAsync(p,sz,target); -#endif -} - -void gridMemGetInfo(size_t* pfree, size_t* ptotal) { -#ifdef GRID_NVCC - if (_grid_mem_avail.empty()) - gridMemoryInit(); - - *ptotal = _grid_mem_pages * _GRID_MEM_PAGE_SIZE; - *pfree = 0; - for (auto & a : _grid_mem_avail) - *pfree += (a.page_end - a.page_start) * _GRID_MEM_PAGE_SIZE; -#else - *pfree = 0; - *ptotal = 0; -#endif -} - -void gridMoveToHost(void** pp) { -#ifdef GRID_NVCC - if (_grid_mem_avail.empty()) - gridMemoryInit(); - - auto & alloc = _grid_mem_alloc[*pp]; - if (alloc.page_start == alloc.page_end) // already on host - return; - - size_t sz = (alloc.page_end - alloc.page_start) * _GRID_MEM_PAGE_SIZE; - void*pn; - //assert(cudaSuccess == cudaMallocHost(&pn,sz)); - pn = malloc(sz); - memcpy(pn,*pp,sz); - gridFree(*pp); - *pp = pn; - _grid_mem_alloc[pn] = { 0,0 }; -#endif -} - -NAMESPACE_END(Grid); diff --git a/Grid/allocator/GridMemoryManager.h b/Grid/allocator/GridMemoryManager.h deleted file mode 100644 index 9e619301..00000000 --- a/Grid/allocator/GridMemoryManager.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/GridMemoryManager.h - - Copyright (C) 2020 - -Author: Christoph Lehner - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_MEMORY_MANAGER_H -#define GRID_MEMORY_MANAGER_H - -NAMESPACE_BEGIN(Grid); - -void gridMemoryInit(); -void gridMallocManaged(void** pp, size_t sz); -void gridMoveToHost(void** pp); -void gridAcceleratorPrefetch(void* p, size_t sz); -void gridMemGetInfo(size_t* pfree, size_t* ptotal); -void gridFree(void* p); - -NAMESPACE_END(Grid); - -#endif diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 668583a1..73b1b6a1 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -81,19 +81,6 @@ private: } public: - void Advise(int advise) { -#ifdef GRID_NVCC -#ifndef __CUDA_ARCH__ // only on host - if (advise & AdviseInfrequentUse) { - gridMoveToHost((void**)&this->_odata); - } - if (advise & AdviseReadMostly) { - //cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1); - } -#endif -#endif - }; - ///////////////////////////////////////////////////////////////////////////////// // Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents ///////////////////////////////////////////////////////////////////////////////// From b5e87e8d9746ead5baffa477063d119232db3d8e Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Fri, 12 Jun 2020 18:16:12 -0400 Subject: [PATCH 59/86] summit compile fixes --- Grid/algorithms/CoarsenedMatrix.h | 33 +++++++++++++++++++++++++++++++ Grid/allocator/MemoryManager.h | 2 +- Grid/lattice/Lattice_transfer.h | 19 +++++++++--------- Grid/lattice/Lattice_view.h | 7 +++++-- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index e56b39c5..8d184aea 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -120,6 +120,39 @@ public: blockPromote(CoarseVec,FineVec,subspace); } + virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase &hermop,int nn=nbasis) { + + RealD scale; + + ConjugateGradient CG(1.0e-2,100,false); + FineField noise(FineGrid); + FineField Mn(FineGrid); + + for(int b=0;b "< "< & out, const Lattice & in) { //////////////////////////////////////////////////////////////////////////////////////////// template inline auto localInnerProductD(const Lattice &lhs,const Lattice &rhs) --> Lattice> +-> Lattice> { autoView( lhs_v , lhs, AcceleratorRead); autoView( rhs_v , rhs, AcceleratorRead); @@ -283,7 +283,7 @@ template Lattice coarse_inner(coarse); // Precision promotion - fine_inner = localInnerProductD(fineX,fineY); + fine_inner = localInnerProductD(fineX,fineY); blockSum(coarse_inner,fine_inner); { autoView( CoarseInner_ , CoarseInner,AcceleratorWrite); @@ -486,13 +486,14 @@ inline void blockPromote(const Lattice > &coarseData, for(int i=0;i > ip = PeekIndex<0>(coarseData,i); - Lattice cip(coarse); - autoView( cip_ , cip, AcceleratorWrite); - autoView( ip_ , ip, AcceleratorRead); - accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{ - coalescedWrite(cip_[sc], ip_(sc)()); - }); - blockZAXPY(fineData,cip,Basis[i],fineData); + //Lattice cip(coarse); + //autoView( cip_ , cip, AcceleratorWrite); + //autoView( ip_ , ip, AcceleratorRead); + //accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{ + // coalescedWrite(cip_[sc], ip_(sc)()); + // }); + //blockZAXPY(fineData,cip,Basis[i],fineData); + blockZAXPY(fineData,ip,Basis[i],fineData); } } #endif diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h index d21ab874..a10acd87 100644 --- a/Grid/lattice/Lattice_view.h +++ b/Grid/lattice/Lattice_view.h @@ -30,11 +30,14 @@ protected: int checkerboard; vobj *_odata; // A managed pointer uint64_t _odata_size; + ViewAdvise advise; public: - accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { }; + accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { }; accelerator_inline uint64_t oSites(void) const { return _odata_size; }; accelerator_inline int Checkerboard(void) const { return checkerboard; }; accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view + accelerator_inline ViewAdvise Advise(void) const { return advise; }; + accelerator_inline ViewAdvise &Advise(void) { return this->advise; }; // can assign advise on a container, not a view accelerator_inline void Conformable(GridBase * &grid) const { if (grid) conformable(grid, _grid); @@ -86,7 +89,7 @@ public: MemoryManager::ViewOpen(this->cpu_ptr, this->_odata_size*sizeof(vobj), mode, - AdviseDefault); + this->advise); } void ViewClose(void) { // Inform the manager From 3dccd7aa2c376fe175d26c27c2704b21a971f138 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Sun, 14 Jun 2020 13:26:01 -0400 Subject: [PATCH 60/86] Catch edge case in SharedMemoryMPI::GetShmDims; Change default units to consistent MB in init args; Want last element not past last element in MemoryManagerCache.cc --- Grid/allocator/MemoryManagerCache.cc | 2 +- Grid/communicator/SharedMemoryMPI.cc | 2 +- Grid/util/Init.cc | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index 5e259a32..5dd7575e 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -77,7 +77,7 @@ void MemoryManager::LRUinsert(AcceleratorViewEntry &AccCache) assert(AccCache.LRU_valid==0); if (AccCache.transient) { LRU.push_back(AccCache.CpuPtr); - AccCache.LRU_entry = LRU.end(); + AccCache.LRU_entry = --LRU.end(); } else { LRU.push_front(AccCache.CpuPtr); AccCache.LRU_entry = LRU.begin(); diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index 31d6f46c..45fefc71 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -180,7 +180,7 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD && divides(prime,WorldShmSize/AutoShmSize) ) { AutoShmSize*=prime; ShmDims[dim]*=prime; - last_dim = (dim + ndimension - 1) % ndimension; + last_dim = dim; break; } } diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 01348384..cd85a784 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -297,11 +297,11 @@ void Grid_init(int *argc,char ***argv) } if( GridCmdOptionExists(*argv,*argv+*argc,"--device-mem") ){ - int GB; + int MB; arg= GridCmdOptionPayload(*argv,*argv+*argc,"--device-mem"); - GridCmdOptionInt(arg,GB); - uint64_t GB64 = GB; - MemoryManager::DeviceMaxBytes = GB64*1024LL*1024LL*1024LL; + GridCmdOptionInt(arg,MB); + uint64_t MB64 = MB; + MemoryManager::DeviceMaxBytes = MB64*1024LL*1024LL; } if( GridCmdOptionExists(*argv,*argv+*argc,"--hypercube") ){ From edf17708a813d4ee2c4765dc24bd7e4943c3e784 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 18 Jun 2020 22:41:06 -0400 Subject: [PATCH 61/86] Range improvement --- tests/hmc/Test_multishift_sqrt.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hmc/Test_multishift_sqrt.cc b/tests/hmc/Test_multishift_sqrt.cc index 5a6d8ba9..31697c12 100644 --- a/tests/hmc/Test_multishift_sqrt.cc +++ b/tests/hmc/Test_multishift_sqrt.cc @@ -104,7 +104,7 @@ int main (int argc, char ** argv) GridDefaultMpi()); double lo=0.001; - double hi=1.0; + double hi=20.0; int precision=64; int degree=10; AlgRemez remez(lo,hi,precision); From 1aa988b2af51e07862cbabd4aa3302a7ffef7f7e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jun 2020 01:21:14 -0400 Subject: [PATCH 62/86] Comms overlap fix UVM case --- .../fermion/implementation/WilsonFermionImplementation.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index fd81d322..f647bef8 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -67,7 +67,12 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, diag_mass = 4.0 + mass; } - + int vol4; + vol4=Fgrid.oSites(); + Stencil.BuildSurfaceList(1,vol4); + vol4=Hgrid.oSites(); + StencilEven.BuildSurfaceList(1,vol4); + StencilOdd.BuildSurfaceList(1,vol4); } template From 66005929af0eba50e811f2e0a96a3262dd665753 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jun 2020 12:50:54 -0400 Subject: [PATCH 63/86] Set up the cache size on all ranks --- Grid/threads/Accelerator.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index d049fd2f..ca46f119 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -37,9 +37,10 @@ void acceleratorInit(void) #define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); cudaGetDeviceProperties(&gpu_props[i], i); + cudaDeviceProp prop; + prop = gpu_props[i]; + totalDeviceMem = prop.totalGlobalMem; if ( world_rank == 0) { - cudaDeviceProp prop; - prop = gpu_props[i]; printf("AcceleratorCudaInit: ========================\n"); printf("AcceleratorCudaInit: Device Number : %d\n", i); printf("AcceleratorCudaInit: ========================\n"); @@ -49,7 +50,6 @@ void acceleratorInit(void) GPU_PROP(managedMemory); GPU_PROP(isMultiGpuBoard); GPU_PROP(warpSize); - totalDeviceMem = prop.totalGlobalMem; // GPU_PROP(unifiedAddressing); // GPU_PROP(l2CacheSize); // GPU_PROP(singleToDoublePrecisionPerfRatio); From 11bc1aeadcf8f43c4e52af52e0fa8c1e7188d835 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jun 2020 14:30:35 -0400 Subject: [PATCH 64/86] TThread count defaultt to fastest --- Grid/threads/Accelerator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index ca46f119..2c4ad9df 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -1,7 +1,7 @@ #include NAMESPACE_BEGIN(Grid); -uint32_t accelerator_threads=8; +uint32_t accelerator_threads=2; uint32_t acceleratorThreads(void) {return accelerator_threads;}; void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; From b949cf6b12e7a88894344f7284c242aa3eb9eb4b Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jun 2020 17:13:27 -0400 Subject: [PATCH 65/86] PeekLocal needs a view to keep thread safe. ALLOCATION_CACHEE reenable --- Grid/algorithms/FFT.h | 9 ++-- Grid/allocator/MemoryManager.cc | 24 ++++++--- Grid/allocator/MemoryManager.h | 2 +- Grid/lattice/Lattice_peekpoke.h | 24 ++++----- Grid/lattice/Lattice_transfer.h | 32 ++++++----- Grid/lattice/Lattice_view.h | 2 + Grid/qcd/action/fermion/DomainWallVec5dImpl.h | 15 +++--- Grid/qcd/action/fermion/StaggeredVec5dImpl.h | 15 +----- .../WilsonCloverFermionImplementation.h | 53 ++++++++++--------- .../WilsonFermion5DImplementation.h | 34 +++++++----- Grid/util/Init.cc | 11 +++- 11 files changed, 125 insertions(+), 96 deletions(-) diff --git a/Grid/algorithms/FFT.h b/Grid/algorithms/FFT.h index 550186fc..765305d7 100644 --- a/Grid/algorithms/FFT.h +++ b/Grid/algorithms/FFT.h @@ -230,14 +230,15 @@ public: result = source; int pc = processor_coor[dim]; for(int p=0;plSites(),{ Coordinate cbuf(Nd); sobj s; sgrid->LocalIndexToLocalCoor(idx,cbuf); - peekLocalSite(s,result,cbuf); - cbuf[dim]+=((pc+p) % processors[dim])*L; - // cbuf[dim]+=p*L; - pokeLocalSite(s,pgbuf,cbuf); + peekLocalSite(s,r_v,cbuf); + acbuf[dim]+=((pc+p) % processors[dim])*L; + pokeLocalSite(s,p_v,cbuf); }); if (p != processors[dim] - 1) { result = Cshift(result,dim,L); diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index 17850333..6d638b60 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -86,23 +86,33 @@ void MemoryManager::Init(void) Ncache[AccSmall]=Nc; } } - std::cout << "MemoryManager::Init() setting up"< &l,const Coordinate &site){ ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// +// Must be CPU read view template -inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site) +inline void peekLocalSite(sobj &s,const LatticeView &l,Coordinate &site) { - GridBase *grid = l.Grid(); - + GridBase *grid = l.getGrid(); + assert(l.mode==CpuRead); typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nsimd = grid->Nsimd(); - assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( l.Checkerboard()== grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); static const int words=sizeof(vobj)/sizeof(vector_type); @@ -172,8 +173,7 @@ inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site) idx= grid->iIndex(site); odx= grid->oIndex(site); - autoView( l_v , l, CpuRead); - scalar_type * vp = (scalar_type *)&l_v[odx]; + scalar_type * vp = (scalar_type *)&l[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w &l,Coordinate &site) return; }; - +// Must be CPU write view template -inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site) +inline void pokeLocalSite(const sobj &s,LatticeView &l,Coordinate &site) { - GridBase *grid=l.Grid(); + GridBase *grid=l.getGrid(); + assert(l.mode==CpuWrite); typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nsimd = grid->Nsimd(); - assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( l.Checkerboard()== grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); static const int words=sizeof(vobj)/sizeof(vector_type); @@ -201,8 +202,7 @@ inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site) idx= grid->iIndex(site); odx= grid->oIndex(site); - autoView( l_v , l, CpuWrite); - scalar_type * vp = (scalar_type *)&l_v[odx]; + scalar_type * vp = (scalar_type *)&l[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w &in,Lattice &out) assert(ig->lSites() == og->lSites()); } + autoView(in_v,in,CpuRead); + autoView(out_v,out,CpuWrite); thread_for(idx, ig->lSites(),{ sobj s; ssobj ss; Coordinate lcoor(ni); ig->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(s,in,lcoor); + peekLocalSite(s,in_v,lcoor); ss=s; - pokeLocalSite(ss,out,lcoor); + pokeLocalSite(ss,out_v,lcoor); }); } @@ -588,8 +590,6 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro for(int w=0;w &lowDim,Lattice & higherDim,int slice } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuRead); + autoView(higherDimv,higherDim,CpuWrite); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -632,8 +634,8 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice hcoor[d]=lcoor[ddl++]; } } - peekLocalSite(s,lowDim,lcoor); - pokeLocalSite(s,higherDim,hcoor); + peekLocalSite(s,lowDimv,lcoor); + pokeLocalSite(s,higherDimv,hcoor); }); } @@ -661,6 +663,8 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic } } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuWrite); + autoView(higherDimv,higherDim,CpuRead); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -673,8 +677,8 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic hcoor[d]=lcoor[ddl++]; } } - peekLocalSite(s,higherDim,hcoor); - pokeLocalSite(s,lowDim,lcoor); + peekLocalSite(s,higherDimv,hcoor); + pokeLocalSite(s,lowDimv,lcoor); }); } @@ -702,6 +706,8 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuRead); + autoView(higherDimv,higherDim,CpuWrite); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -710,8 +716,8 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int if( lcoor[orthog] == slice_lo ) { hcoor=lcoor; hcoor[orthog] = slice_hi; - peekLocalSite(s,lowDim,lcoor); - pokeLocalSite(s,higherDim,hcoor); + peekLocalSite(s,lowDimv,lcoor); + pokeLocalSite(s,higherDimv,hcoor); } }); } @@ -739,6 +745,8 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuWrite); + autoView(higherDimv,higherDim,CpuRead); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -747,8 +755,8 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int if( lcoor[orthog] == slice_lo ) { hcoor=lcoor; hcoor[orthog] = slice_hi; - peekLocalSite(s,higherDim,hcoor); - pokeLocalSite(s,lowDim,lcoor); + peekLocalSite(s,higherDimv,hcoor); + pokeLocalSite(s,lowDimv,lcoor); } }); } diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h index a10acd87..3b76b921 100644 --- a/Grid/lattice/Lattice_view.h +++ b/Grid/lattice/Lattice_view.h @@ -43,6 +43,8 @@ public: if (grid) conformable(grid, _grid); else grid = _grid; }; + // Host only + GridBase * getGrid(void) const { return _grid; }; }; ///////////////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/DomainWallVec5dImpl.h b/Grid/qcd/action/fermion/DomainWallVec5dImpl.h index 890c680b..0c8a0930 100644 --- a/Grid/qcd/action/fermion/DomainWallVec5dImpl.h +++ b/Grid/qcd/action/fermion/DomainWallVec5dImpl.h @@ -114,19 +114,22 @@ public: U = adj(Cshift(U, mu, -1)); PokeIndex(Uadj, U, mu); } - - for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + + autoView(Umu_v,Umu,CpuRead); + autoView(Uadj_v,Uadj,CpuRead); + autoView(Uds_v,Uds,CpuWrite); + thread_for( lidx, GaugeGrid->lSites(), { Coordinate lcoor; GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); - peekLocalSite(ScalarUmu, Umu, lcoor); + peekLocalSite(ScalarUmu, Umu_v, lcoor); for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); - peekLocalSite(ScalarUmu, Uadj, lcoor); + peekLocalSite(ScalarUmu, Uadj_v, lcoor); for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); - pokeLocalSite(ScalarUds, Uds, lcoor); - } + pokeLocalSite(ScalarUds, Uds_v, lcoor); + }); } inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu) diff --git a/Grid/qcd/action/fermion/StaggeredVec5dImpl.h b/Grid/qcd/action/fermion/StaggeredVec5dImpl.h index 2d4de18e..18fe993c 100644 --- a/Grid/qcd/action/fermion/StaggeredVec5dImpl.h +++ b/Grid/qcd/action/fermion/StaggeredVec5dImpl.h @@ -113,20 +113,7 @@ public: inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu) { - GridBase *GaugeGrid = U_ds.Grid(); - thread_for(lidx, GaugeGrid->lSites(),{ - - SiteScalarGaugeLink ScalarU; - SiteDoubledGaugeField ScalarUds; - - Coordinate lcoor; - GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); - peekLocalSite(ScalarUds, U_ds, lcoor); - - peekLocalSite(ScalarU, U, lcoor); - ScalarUds(mu) = ScalarU(); - - }); + assert(0); } inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &UUUds, // for Naik term diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 36447153..df1bce7c 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -98,32 +98,35 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) Coordinate lcoor; typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero(); - for (int site = 0; site < lvol; site++) { - grid->LocalIndexToLocalCoor(site, lcoor); - EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - peekLocalSite(Qx, CloverTerm, lcoor); - Qxinv = Zero(); - //if (csw!=0){ - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++){ - auto zz = Qx()(j, k)(a, b); - EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); - } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - - EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } - pokeLocalSite(Qxinv, CloverTermInv, lcoor); + autoView(CTv,CloverTerm,CpuRead); + autoView(CTIv,CloverTermInv,CpuWrite); + for (int site = 0; site < lvol; site++) { + grid->LocalIndexToLocalCoor(site, lcoor); + EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + peekLocalSite(Qx, CTv, lcoor); + Qxinv = Zero(); + //if (csw!=0){ + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++){ + auto zz = Qx()(j, k)(a, b); + EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); + } + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; + + EigenInvCloverOp = EigenCloverOp.inverse(); + //std::cout << EigenInvCloverOp << std::endl; + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; + // } + pokeLocalSite(Qxinv, CTIv, lcoor); + } } // Separate the even and odd parts diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 2a202a77..2cc308cc 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -580,16 +580,21 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const cosha = (one + W*W + sk) / (abs(W)*2.0); // FIXME Need a Lattice acosh - for(int idx=0;idx<_grid->lSites();idx++){ - Coordinate lcoor(Nd); - Tcomplex cc; - // RealD sgn; - _grid->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(cc,cosha,lcoor); - assert((double)real(cc)>=1.0); - assert(fabs((double)imag(cc))<=1.0e-15); - cc = ScalComplex(::acosh(real(cc)),0.0); - pokeLocalSite(cc,a,lcoor); + + { + autoView(cosha_v,cosha,CpuRead); + autoView(a_v,a,CpuWrite); + for(int idx=0;idx<_grid->lSites();idx++){ + Coordinate lcoor(Nd); + Tcomplex cc; + // RealD sgn; + _grid->LocalIndexToLocalCoor(idx,lcoor); + peekLocalSite(cc,cosha_v,lcoor); + assert((double)real(cc)>=1.0); + assert(fabs((double)imag(cc))<=1.0e-15); + cc = ScalComplex(::acosh(real(cc)),0.0); + pokeLocalSite(cc,a_v,lcoor); + } } Wea = ( exp( a) * abs(W) ); @@ -775,17 +780,20 @@ void WilsonFermion5D::MomentumSpacePropagatorHt(FermionField &out,const Fe cosha = (one + W*W + sk) / (abs(W)*2.0); // FIXME Need a Lattice acosh + { + autoView(cosha_v,cosha,CpuRead); + autoView(a_v,a,CpuWrite); for(int idx=0;idx<_grid->lSites();idx++){ Coordinate lcoor(Nd); Tcomplex cc; // RealD sgn; _grid->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(cc,cosha,lcoor); + peekLocalSite(cc,cosha_v,lcoor); assert((double)real(cc)>=1.0); assert(fabs((double)imag(cc))<=1.0e-15); cc = ScalComplex(::acosh(real(cc)),0.0); - pokeLocalSite(cc,a,lcoor); - } + pokeLocalSite(cc,a_v,lcoor); + }} Wea = ( exp( a) * abs(W) ); Wema= ( exp(-a) * abs(W) ); diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index cd85a784..e93f3046 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -286,8 +286,6 @@ void Grid_init(int *argc,char ***argv) ////////////////////////////////////////////////////////// acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver - MemoryManager::Init(); - if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){ int MB; arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); @@ -358,6 +356,15 @@ void Grid_init(int *argc,char ***argv) std::cout << GridLogMessage << "MPI is initialised and logging filters activated "< Date: Fri, 19 Jun 2020 17:36:05 -0400 Subject: [PATCH 66/86] Typo fix (excusee - my keyboard is starting to break) --- Grid/algorithms/FFT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/algorithms/FFT.h b/Grid/algorithms/FFT.h index 765305d7..1a3e1eba 100644 --- a/Grid/algorithms/FFT.h +++ b/Grid/algorithms/FFT.h @@ -237,7 +237,7 @@ public: sobj s; sgrid->LocalIndexToLocalCoor(idx,cbuf); peekLocalSite(s,r_v,cbuf); - acbuf[dim]+=((pc+p) % processors[dim])*L; + cbuf[dim]+=((pc+p) % processors[dim])*L; pokeLocalSite(s,p_v,cbuf); }); if (p != processors[dim] - 1) { From 1a74816c25d199fd3cfda5a960ffa849f6aaa693 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jun 2020 17:50:52 -0400 Subject: [PATCH 67/86] Hopeefully fixed --- Grid/algorithms/FFT.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Grid/algorithms/FFT.h b/Grid/algorithms/FFT.h index 1a3e1eba..ad42f049 100644 --- a/Grid/algorithms/FFT.h +++ b/Grid/algorithms/FFT.h @@ -230,16 +230,18 @@ public: result = source; int pc = processor_coor[dim]; for(int p=0;plSites(),{ + { + autoView(r_v,result,CpuRead); + autoView(p_v,pgbuf,CpuWrite); + thread_for(idx, sgrid->lSites(),{ Coordinate cbuf(Nd); sobj s; sgrid->LocalIndexToLocalCoor(idx,cbuf); peekLocalSite(s,r_v,cbuf); cbuf[dim]+=((pc+p) % processors[dim])*L; pokeLocalSite(s,p_v,cbuf); - }); + }); + } if (p != processors[dim] - 1) { result = Cshift(result,dim,L); } @@ -268,15 +270,19 @@ public: flops+= flops_call*NN; // writing out result - thread_for(idx,sgrid->lSites(),{ + { + autoView(pgbuf_v,pgbuf,CpuRead); + autoView(result_v,result,CpuWrite); + thread_for(idx,sgrid->lSites(),{ Coordinate clbuf(Nd), cgbuf(Nd); sobj s; sgrid->LocalIndexToLocalCoor(idx,clbuf); cgbuf = clbuf; cgbuf[dim] = clbuf[dim]+L*pc; - peekLocalSite(s,pgbuf,cgbuf); - pokeLocalSite(s,result,clbuf); - }); + peekLocalSite(s,pgbuf_v,cgbuf); + pokeLocalSite(s,result_v,clbuf); + }); + } result = result*div; // destroying plan From 0d2f913a1a802c09d80d2fad28a360821b78dfe3 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 20 Jun 2020 09:37:31 -0400 Subject: [PATCH 68/86] String.h for linux --- Grid/threads/Accelerator.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 1cb6d637..5bf17072 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -28,6 +28,8 @@ Author: paboyle /* END LEGAL */ #pragma once +#include + #ifdef HAVE_MALLOC_MALLOC_H #include #endif From 6c5fa8dcd875bb9fb5e50b25adb8534711bdc4da Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 20 Jun 2020 14:34:29 -0400 Subject: [PATCH 69/86] Aligned allocate on CPU put through this interface --- Grid/threads/Accelerator.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 5bf17072..74a3ea22 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -336,12 +336,11 @@ inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemc ////////////////////////////////////////////// // CPU Target - No accelerator just thread instead ////////////////////////////////////////////// +#define GRID_ALLOC_ALIGN (2*1024*1024) // 2MB aligned #if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) ) #undef GRID_SIMT -#define GRID_ALLOC_ALIGN (2*1024*1024) // 2MB aligned - #define accelerator #define accelerator_inline strong_inline #define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ }); @@ -367,6 +366,14 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr);}; #endif // CPU target +#ifdef HAVE_MM_MALLOC_H +inline void *acceleratorAllocCpu(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);}; +inline void acceleratorFreeCpu (void *ptr){_mm_free(ptr);}; +#else +inline void *acceleratorAllocCpu(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);}; +inline void acceleratorFreeCpu (void *ptr){free(ptr);}; +#endif + /////////////////////////////////////////////////// // Synchronise across local threads for divergence resynch /////////////////////////////////////////////////// From c48da35921035c45dee83d1cdb251a834b199831 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 22 Jun 2020 20:21:53 -0400 Subject: [PATCH 70/86] Memory Vector UVM and Lattice alignedAllocator separate --- ...GeneralisedConjugateResidualNonHermitian.h | 241 ++++++++++++++++++ Grid/allocator/AlignedAllocator.h | 62 ++++- Grid/allocator/MemoryManager.cc | 65 ++++- Grid/allocator/MemoryManager.h | 12 +- 4 files changed, 350 insertions(+), 30 deletions(-) create mode 100644 Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h diff --git a/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h new file mode 100644 index 00000000..22b7725e --- /dev/null +++ b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h @@ -0,0 +1,241 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_PREC_GCR_NON_HERM_H +#define GRID_PREC_GCR_NON_HERM_H + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +//VPGCR Abe and Zhang, 2005. +//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING +//Computing and Information Volume 2, Number 2, Pages 147-161 +//NB. Likely not original reference since they are focussing on a preconditioner variant. +// but VPGCR was nicely written up in their paper +/////////////////////////////////////////////////////////////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); + +#define GCRLogLevel std::cout << GridLogMessage < +class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction { +public: + + RealD Tolerance; + Integer MaxIterations; + int verbose; + int mmax; + int nstep; + int steps; + int level; + GridStopWatch PrecTimer; + GridStopWatch MatTimer; + GridStopWatch LinalgTimer; + + LinearFunction &Preconditioner; + LinearOperatorBase &Linop; + + void Level(int lv) { level=lv; }; + + PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase &_Linop,LinearFunction &Prec,int _mmax,int _nstep) : + Tolerance(tol), + MaxIterations(maxit), + Linop(_Linop), + Preconditioner(Prec), + mmax(_mmax), + nstep(_nstep) + { + level=1; + verbose=1; + }; + + void operator() (const Field &src, Field &psi){ + + psi=Zero(); + RealD cp, ssq,rsq; + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; + + Field r(src.Grid()); + + PrecTimer.Reset(); + MatTimer.Reset(); + LinalgTimer.Reset(); + + GridStopWatch SolverTimer; + SolverTimer.Start(); + + steps=0; + for(int k=0;k q(mmax,grid); + std::vector p(mmax,grid); + std::vector qq(mmax); + + GCRLogLevel<< "PGCR nStep("<(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history. + for(int back=0;back=0); + + b=-real(innerProduct(q[peri_back],Az))/qq[peri_back]; + p[peri_kp]=p[peri_kp]+b*p[peri_back]; + q[peri_kp]=q[peri_kp]+b*q[peri_back]; + + } + qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm + LinalgTimer.Stop(); + } + assert(0); // never reached + return cp; + } +}; +NAMESPACE_END(Grid); +#endif diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 6c6dd7d8..ebb3162b 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -52,41 +52,79 @@ public: pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); - profilerAllocate(bytes); - _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); - assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); - return ptr; } void deallocate(pointer __p, size_type __n) { size_type bytes = __n * sizeof(_Tp); - profilerFree(bytes); - MemoryManager::CpuFree((void *)__p,bytes); } + // FIXME: hack for the copy constructor, eventually it must be avoided + //void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; + void construct(pointer __p, const _Tp& __val) { assert(0);}; + void construct(pointer __p) { }; + void destroy(pointer __p) { }; +}; +template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } +template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } + +template +class uvmAllocator { +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef _Tp* pointer; + typedef const _Tp* const_pointer; + typedef _Tp& reference; + typedef const _Tp& const_reference; + typedef _Tp value_type; + + template struct rebind { typedef uvmAllocator<_Tp1> other; }; + uvmAllocator() throw() { } + uvmAllocator(const uvmAllocator&) throw() { } + template uvmAllocator(const uvmAllocator<_Tp1>&) throw() { } + ~uvmAllocator() throw() { } + pointer address(reference __x) const { return &__x; } + size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } + + pointer allocate(size_type __n, const void* _p= 0) + { + size_type bytes = __n*sizeof(_Tp); + profilerAllocate(bytes); + _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); + return ptr; + } + + void deallocate(pointer __p, size_type __n) + { + size_type bytes = __n * sizeof(_Tp); + profilerFree(bytes); + MemoryManager::SharedFree((void *)__p,bytes); + } + // FIXME: hack for the copy constructor, eventually it must be avoided void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; //void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; -template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } -template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } +template inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; } +template inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; } //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// -template using commAllocator = alignedAllocator; -template using Vector = std::vector >; -template using commVector = std::vector >; -template using Matrix = std::vector > >; +template using commAllocator = uvmAllocator; +template using Vector = std::vector >; +template using commVector = std::vector >; +//template using Matrix = std::vector > >; NAMESPACE_END(Grid); diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index 6d638b60..fa60e820 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -7,6 +7,17 @@ NAMESPACE_BEGIN(Grid); #define CpuSmall (1) #define Acc (2) #define AccSmall (3) +#define Shared (4) +#define SharedSmall (5) +uint64_t total_shared; +uint64_t total_device; +uint64_t total_host;; +void MemoryManager::PrintBytes(void) +{ + std::cout << " MemoryManager : "<=0) && (Nc < NallocCacheMax)) { Ncache[Cpu]=Nc; Ncache[Acc]=Nc; + Ncache[Shared]=Nc; } } @@ -84,6 +122,7 @@ void MemoryManager::Init(void) if ( (Nc>=0) && (Nc < NallocCacheMax)) { Ncache[CpuSmall]=Nc; Ncache[AccSmall]=Nc; + Ncache[SharedSmall]=Nc; } } std::cout << GridLogMessage<< "MemoryManager::Init() setting up"< Date: Tue, 23 Jun 2020 11:10:26 +0100 Subject: [PATCH 71/86] Baryon bug fixes --- Grid/qcd/utils/BaryonUtils.h | 168 +++++++++++++++++------------------ 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 3384d273..297f6a2e 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -168,107 +168,107 @@ template void BaryonUtils::baryon_site(const mobj &D1, const mobj &D2, const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, + const Gamma GammaA_i, + const Gamma GammaB_i, + const Gamma GammaA_f, + const Gamma GammaB_f, const int parity, const bool * wick_contraction, robj &result) { Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) - auto gD1a = GammaA_left * GammaA_right * D1; - auto gD1b = GammaA_left * g4 * GammaA_right * D1; - auto pD1 = 0.5* (gD1a + (Real)parity * gD1b); - auto gD3 = GammaB_right * D3; - auto D2g = D2 * GammaB_left; - auto pD1g = pD1 * GammaB_left; - auto gD3g = gD3 * GammaB_left; + + auto D1_GAi = D1 * GammaA_i; + auto D1_GAi_g4 = D1_GAi * g4; + auto D1_GAi_P = 0.5*(D1_GAi + (Real)parity * D1_GAi_g4); + auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; + auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; - for (int ie_left=0; ie_left < 6 ; ie_left++){ - int a_left = epsilon[ie_left][0]; //a - int b_left = epsilon[ie_left][1]; //b - int c_left = epsilon[ie_left][2]; //c - for (int ie_right=0; ie_right < 6 ; ie_right++){ - int a_right = epsilon[ie_right][0]; //a' - int b_right = epsilon[ie_right][1]; //b' - int c_right = epsilon[ie_right][2]; //c' - Real ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + auto D2_GBi = D2 * GammaB_i; + auto GBf_D2_GBi = GammaB_f * D2_GBi; + auto GAf_D2_GBi = GammaA_f * D2_GBi; + + auto GBf_D3 = GammaB_f * D3; + auto GAf_D3 = GammaA_f * D3; + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + int a_f = epsilon[ie_f][0]; //a + int b_f = epsilon[ie_f][1]; //b + int c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + int a_i = epsilon[ie_i][0]; //a' + int b_i = epsilon[ie_i][1]; //b' + int c_i = epsilon[ie_i][2]; //c' + + Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; //This is the \delta_{456}^{123} part - if (wick_contraction[0]){ - for (int gamma_left=0; gamma_left From 3e97a26f90b66876fa44fd72fd444efb8ea61cb9 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 23 Jun 2020 11:35:32 +0100 Subject: [PATCH 72/86] BaryonGamm3pt threads -> accelerator --- Grid/qcd/utils/BaryonUtils.h | 96 +++++++++++++----------------------- 1 file changed, 33 insertions(+), 63 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index a6f8b78d..d36c3cb8 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -451,17 +451,7 @@ void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( int a_f, b_f, c_f; int a_i, b_i, c_i; - Complex ee; - - auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); - auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); - auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); - auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(0,0)(0,0); - auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(0,0)(0,0); - auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); - auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(0,0)(0,0); - auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); - auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + Real ee; for (int ie_f=0; ie_f < 6 ; ie_f++){ a_f = epsilon[ie_f][0]; //a @@ -476,18 +466,18 @@ void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group2_Site( int a_f, b_f, c_f; int a_i, b_i, c_i; - Complex ee; - - auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(0,0)(0,0); - auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); - auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(0,0)(0,0); - auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); - auto ee_Dq1_spec_ag_ac = ee* Dq1_spec ()(0,0)(0,0); - auto ee_Gf_D1_ag_bc = ee*Gf_D1 ()(0,0)(0,0); - auto ee_Dq1_spec_gg_cc = ee*Dq1_spec ()(0,0)(0,0); - auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); - auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(0,0)(0,0); + Real ee; for (int ie_f=0; ie_f < 6 ; ie_f++){ a_f = epsilon[ie_f][0]; //a @@ -577,18 +557,18 @@ void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group3_Site( int a_f, b_f, c_f; int a_i, b_i, c_i; - Complex ee; - - auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); - auto Gf_adjD4_g_D3_ab_bb = Gf_adjD4_g_D3 ()(0,0)(0,0); - auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); - auto adjD4_g_D3_ab_ab = adjD4_g_D3 ()(0,0)(0,0); - auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(0,0)(0,0); - auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(0,0)(0,0); - auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(0,0)(0,0); - auto adjD4_g_D3_gb_cb = adjD4_g_D3 ()(0,0)(0,0); - auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + Real ee; for (int ie_f=0; ie_f < 6 ; ie_f++){ a_f = epsilon[ie_f][0]; //a @@ -678,18 +648,18 @@ void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt( auto vq_tf = q_tf.View(); if (group ==1) { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_ti = vq_ti[ss]; auto Dq_tf = vq_tf[ss]; sobj result=Zero(); @@ -755,7 +725,7 @@ void BaryonUtils::Baryon_Gamma_3pt( vcorr[ss] += result; });//end loop over lattice sites } else if (group == 2) { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_ti = vq_ti[ss]; auto Dq_tf = vq_tf[ss]; sobj result=Zero(); @@ -763,7 +733,7 @@ void BaryonUtils::Baryon_Gamma_3pt( vcorr[ss] += result; });//end loop over lattice sites } else if (group == 3) { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_ti = vq_ti[ss]; auto Dq_tf = vq_tf[ss]; sobj result=Zero(); From 4ef50ba31f2b39f39492c35b57ee9e4c10457f3b Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 23 Jun 2020 11:44:20 +0100 Subject: [PATCH 73/86] Baryon speedup --- Grid/qcd/utils/BaryonUtils.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 297f6a2e..9e9a6957 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -205,11 +205,12 @@ void BaryonUtils::baryon_site(const mobj &D1, //This is the \delta_{456}^{123} part if (wick_contraction[0]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[1]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[2]){ for (int rho=0; rho::baryon_site(const mobj &D1, //This is the \delta_{456}^{132} part if (wick_contraction[3]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[4]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[5]){ for (int rho=0; rho Date: Tue, 23 Jun 2020 10:24:21 -0400 Subject: [PATCH 74/86] Adding code under development --- tests/solver/Test_dwf_multigrid.cc | 594 +++++++++++++++++++++++++++++ tests/solver/Test_hw_multigrid.cc | 356 +++++++++++++++++ 2 files changed, 950 insertions(+) create mode 100644 tests/solver/Test_dwf_multigrid.cc create mode 100644 tests/solver/Test_hw_multigrid.cc diff --git a/tests/solver/Test_dwf_multigrid.cc b/tests/solver/Test_dwf_multigrid.cc new file mode 100644 index 00000000..9e11c160 --- /dev/null +++ b/tests/solver/Test_dwf_multigrid.cc @@ -0,0 +1,594 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_hdcr.cc + + Copyright (C) 2015 + +Author: Antonin Portelli +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include +#include + +using namespace std; +using namespace Grid; +/* Params + * Grid: + * block1(4) + * block2(4) + * + * Subspace + * * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100 + * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100 + + * Smoother: + * * Fine: Cheby(hi, lo, order) -- 60,0.5,10 + * * Coarse: Cheby(hi, lo, order) -- 12,0.1,4 + + * Lanczos: + * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61 + */ + +template class SolverWrapper : public LinearFunction { +private: + LinearOperatorBase & _Matrix; + OperatorFunction & _Solver; + LinearFunction & _Guess; +public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// + SolverWrapper(LinearOperatorBase &Matrix, + OperatorFunction &Solver, + LinearFunction &Guess) + : _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {}; + + void operator() (const Field &in, Field &out){ + + _Guess(in,out); + _Solver(_Matrix,in,out); // Mdag M out = Mdag in + + } +}; + + +// Must use a non-hermitian solver +template +class PVdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Matrix &_PV; +public: + PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; + + void OpDiag (const Field &in, Field &out) { + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; + void Op (const Field &in, Field &out){ + Field tmp(in.Grid()); + _Mat.M(in,tmp); + _PV.Mdag(tmp,out); + } + void AdjOp (const Field &in, Field &out){ + Field tmp(in.Grid()); + _PV.M(tmp,out); + _Mat.Mdag(in,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; + + +RealD InverseApproximation(RealD x){ + return 1.0/x; +} + +template class ChebyshevSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; + + Chebyshev Cheby; + + ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) : + _SmootherOperator(SmootherOperator), + _SmootherMatrix(SmootherMatrix), + Cheby(_lo,_hi,_ord,InverseApproximation) + {}; + + void operator() (const Field &in, Field &out) + { + Field tmp(in.Grid()); + MdagMLinearOperator MdagMOp(_SmootherMatrix); + _SmootherOperator.AdjOp(in,tmp); + Cheby(MdagMOp,tmp,out); + } +}; + +template class MirsSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & SmootherMatrix; + FineOperator & SmootherOperator; + RealD tol; + RealD shift; + int maxit; + + MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) : + shift(_shift),tol(_tol),maxit(_maxit), + SmootherOperator(_SmootherOperator), + SmootherMatrix(_SmootherMatrix) + {}; + + void operator() (const Field &in, Field &out) + { + ZeroGuesser Guess; + ConjugateGradient CG(tol,maxit,false); + + Field src(in.Grid()); + + ShiftedMdagMLinearOperator,Field> MdagMOp(SmootherMatrix,shift); + SmootherOperator.AdjOp(in,src); + Guess(src,out); + CG(MdagMOp,src,out); + } +}; + +#define GridLogLevel std::cout << GridLogMessage < +class HDCRPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + FineOperator & _FineOperator; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + HDCRPreconditioner(Aggregates &Agg, + FineOperator &Fine, + FineSmoother &Smoother, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _FineOperator(Fine), + _Smoother(Smoother), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + auto CoarseGrid = _Aggregates.CoarseGrid; + CoarseVector Csrc(CoarseGrid); + CoarseVector Csol(CoarseGrid); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < +class MultiGridPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + CoarseOperator & _CoarseOperator; + FineOperator & _FineOperator; + Guesser & _Guess; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, + FineOperator &Fine, + FineSmoother &Smoother, + Guesser &Guess_, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _CoarseOperator(Coarse), + _FineOperator(Fine), + _Smoother(Smoother), + _Guess(Guess_), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + CoarseVector Csrc(_CoarseOperator.Grid()); + CoarseVector Csol(_CoarseOperator.Grid()); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < block ({2,2,2,2}); + std::vector blockc ({2,2,2,2}); + const int nbasis= 32; + const int nbasisc= 32; + auto clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + std::vector cseeds({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); + LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src; + LatticeFermion result(FGrid); + LatticeGaugeField Umu(UGrid); + + FieldMetaData header; + std::string file("./ckpoint_lat.4000"); + NerscIO::readConfiguration(Umu,header,file); + + std::cout< Subspace; + typedef CoarsenedMatrix CoarseOperator; + typedef CoarseOperator::CoarseVector CoarseVector; + typedef CoarseOperator::siteVector siteVector; + std::cout< HermDefOp(Ddwf); + + Subspace Aggregates(Coarse5d,FGrid,0); + + assert ( (nbasis & 0x1)==0); + { + int nb=nbasis/2; + Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0); + for(int n=0;n Level1Op; + typedef CoarsenedMatrix,nbasisc> Level2Op; + + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOpPV(Dpv); + + std::cout< CoarseBiCGSTAB(tol,MaxIt); + ConjugateGradient CoarseCG(tol,MaxIt); + // GeneralisedMinimalResidual CoarseGMRES(tol,MaxIt,20); + + BiCGSTAB FineBiCGSTAB(tol,MaxIt); + ConjugateGradient FineCG(tol,MaxIt); + // GeneralisedMinimalResidual FineGMRES(tol,MaxIt,20); + + MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M + PVdagMLinearOperator FinePVdagM(Ddwf,Dpv);// M_{pv}^\dag M + SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe + SchurDiagOneOperator FineDiagOne(Ddwf); // 1 - M_ee^{-1} Meo Moo^{-1} Moe e + + MdagMLinearOperator CoarseMdagM(LDOp); + PVdagMLinearOperator CoarsePVdagM(LDOp,LDOpPV); + + std::cout< IRLCheby(0.03,12.0,71); // 1 iter + FunctionHermOp IRLOpCheby(IRLCheby,CoarseMdagM); + PlainHermOp IRLOp (CoarseMdagM); + int Nk=64; + int Nm=128; + int Nstop=Nk; + ImplicitlyRestartedLanczos IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20); + + int Nconv; + std::vector eval(Nm); + std::vector evec(Nm,Coarse5d); + IRL.calc(eval,evec,c_src,Nconv); + + std::cout< DeflCoarseGuesser(evec,eval); + NormalEquations DeflCoarseCGNE (LDOp,CoarseCG,DeflCoarseGuesser); + c_res=Zero(); + DeflCoarseCGNE(c_src,c_res); + + + std::cout< CoarseMgridCG(0.001,1000); + ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); + + typedef HDCRPreconditioner > TwoLevelHDCR; + TwoLevelHDCR TwoLevelPrecon(Aggregates, + HermIndefOp, + FineSmoother, + DeflCoarseCGNE); + TwoLevelPrecon.Level(1); + // PrecGeneralisedConjugateResidual l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16); + PrecGeneralisedConjugateResidualNonHermitian l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16); + l1PGCR.Level(1); + + f_res=Zero(); + + CoarseCG.Tolerance=0.02; + l1PGCR(f_src,f_res); + + std::cout< CoarseMgridBiCGSTAB(0.01,1000); + BiCGSTAB FineMgridBiCGSTAB(0.0,24); + ZeroGuesser CoarseZeroGuesser; + ZeroGuesser FineZeroGuesser; + + SolverWrapper FineBiCGSmoother( FinePVdagM, FineMgridBiCGSTAB, FineZeroGuesser); + SolverWrapper CoarsePVdagMSolver(CoarsePVdagM,CoarseMgridBiCGSTAB,CoarseZeroGuesser); + typedef HDCRPreconditioner > TwoLevelMG; + + TwoLevelMG _TwoLevelMG(Aggregates, + FinePVdagM, + FineBiCGSmoother, + CoarsePVdagMSolver); + _TwoLevelMG.Level(1); + + PrecGeneralisedConjugateResidualNonHermitian pvPGCR(1.0e-8,100,FinePVdagM,_TwoLevelMG,16,16); + pvPGCR.Level(1); + + f_res=Zero(); + pvPGCR(f_src,f_res); + + std::cout< +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +//#include +#include + +using namespace std; +using namespace Grid; +/* Params + * Grid: + * block1(4) + * block2(4) + * + * Subspace + * * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100 + * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100 + + * Smoother: + * * Fine: Cheby(hi, lo, order) -- 60,0.5,10 + * * Coarse: Cheby(hi, lo, order) -- 12,0.1,4 + + * Lanczos: + * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61 + */ + +template class SolverWrapper : public LinearFunction { +private: + LinearOperatorBase & _Matrix; + OperatorFunction & _Solver; + LinearFunction & _Guess; +public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// + SolverWrapper(LinearOperatorBase &Matrix, + OperatorFunction &Solver, + LinearFunction &Guess) + : _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {}; + + void operator() (const Field &in, Field &out){ + + _Guess(in,out); + _Solver(_Matrix,in,out); // Mdag M out = Mdag in + + } +}; + + +// Must use a non-hermitian solver +template +class PVdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Matrix &_PV; +public: + PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; + + void OpDiag (const Field &in, Field &out) { + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; + void Op (const Field &in, Field &out){ + Field tmp(in.Grid()); + _Mat.M(in,tmp); + _PV.Mdag(tmp,out); + } + void AdjOp (const Field &in, Field &out){ + Field tmp(in.Grid()); + _PV.M(tmp,out); + _Mat.Mdag(in,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; + + +RealD InverseApproximation(RealD x){ + return 1.0/x; +} + +template class ChebyshevSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; + + Chebyshev Cheby; + + ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) : + _SmootherOperator(SmootherOperator), + _SmootherMatrix(SmootherMatrix), + Cheby(_lo,_hi,_ord,InverseApproximation) + {}; + + void operator() (const Field &in, Field &out) + { + Field tmp(in.Grid()); + MdagMLinearOperator MdagMOp(_SmootherMatrix); + _SmootherOperator.AdjOp(in,tmp); + Cheby(MdagMOp,tmp,out); + } +}; + +template class MirsSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & SmootherMatrix; + FineOperator & SmootherOperator; + RealD tol; + RealD shift; + int maxit; + + MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) : + shift(_shift),tol(_tol),maxit(_maxit), + SmootherOperator(_SmootherOperator), + SmootherMatrix(_SmootherMatrix) + {}; + + void operator() (const Field &in, Field &out) + { + ZeroGuesser Guess; + ConjugateGradient CG(tol,maxit,false); + + Field src(in.Grid()); + + ShiftedMdagMLinearOperator,Field> MdagMOp(SmootherMatrix,shift); + SmootherOperator.AdjOp(in,src); + Guess(src,out); + CG(MdagMOp,src,out); + } +}; + +#define GridLogLevel std::cout << GridLogMessage < +class HDCRPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + FineOperator & _FineOperator; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + HDCRPreconditioner(Aggregates &Agg, + FineOperator &Fine, + FineSmoother &Smoother, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _FineOperator(Fine), + _Smoother(Smoother), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + auto CoarseGrid = _Aggregates.CoarseGrid; + CoarseVector Csrc(CoarseGrid); + CoarseVector Csol(CoarseGrid); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < block ({2,2,2,2}); + const int nbasis= 32; + + auto clatt = GridDefaultLatt(); + for(int d=0;d seeds({1,2,3,4}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(seeds); + + LatticeGaugeField Umu(UGrid); + FieldMetaData header; + std::string file("./ckpoint_lat.4000"); + NerscIO::readConfiguration(Umu,header,file); + + std::cout< Subspace; + typedef CoarsenedMatrix CoarseOperator; + typedef CoarseOperator::CoarseVector CoarseVector; + typedef CoarseOperator::siteVector siteVector; + + std::cout< SubspaceOp(Dw); + + Subspace Aggregates4D(Coarse4d,UGrid,0); + Subspace Aggregates5D(Coarse5d,FGrid,0); + + assert ( (nbasis & 0x1)==0); + std::cout< Level1Op; + + + NonHermitianLinearOperator LinOpDwf(Ddwf); + + Level1Op LDOp (*Coarse5d,1); + LDOp.CoarsenOperator(FGrid,LinOpDwf,Aggregates5D); + + std::cout< Date: Tue, 23 Jun 2020 22:14:56 -0400 Subject: [PATCH 75/86] UVM used shared for CPU alloccations andd ddont migrate --- Grid/allocator/MemoryManager.cc | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index fa60e820..c92cd18a 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -35,8 +35,6 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes) if ( ptr == (void *) NULL ) { ptr = (void *) acceleratorAllocDevice(bytes); total_device+=bytes; - // std::cout <<"AcceleratorAllocate: allocated Accelerator pointer "< Date: Wed, 24 Jun 2020 08:24:38 -0400 Subject: [PATCH 76/86] Memory manager initialise earlier --- Grid/util/Init.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index e93f3046..656e29a9 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -318,6 +318,11 @@ void Grid_init(int *argc,char ***argv) Grid_debug_handler_init(); } + ////////////////////////////////////////////////////////// + // Memory manager + ////////////////////////////////////////////////////////// + MemoryManager::Init(); + ////////////////////////////////////////////////////////// // MPI initialisation ////////////////////////////////////////////////////////// @@ -357,11 +362,6 @@ void Grid_init(int *argc,char ***argv) std::cout << GridLogMessage << "================================================ "< Date: Wed, 24 Jun 2020 08:54:49 -0400 Subject: [PATCH 77/86] Force initial values --- Grid/allocator/MemoryManager.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index c92cd18a..e11ce948 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -24,7 +24,7 @@ void MemoryManager::PrintBytes(void) ////////////////////////////////////////////////////////////////////// MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; int MemoryManager::Victim[MemoryManager::NallocType]; -int MemoryManager::Ncache[MemoryManager::NallocType]; +int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 }; ////////////////////////////////////////////////////////////////////// // Actual allocation and deallocation utils @@ -112,12 +112,6 @@ void MemoryManager::CpuFree (void *_ptr,size_t bytes) ////////////////////////////////////////// void MemoryManager::Init(void) { - Ncache[Cpu] = 8; - Ncache[Acc] = 8; - Ncache[Shared] = 8; - Ncache[CpuSmall] = 32; - Ncache[AccSmall] = 32; - Ncache[SharedSmall] = 32; char * str; int Nc; From 22cfbdbbb386e4787290522e1a24010fd964e821 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 24 Jun 2020 12:52:31 -0400 Subject: [PATCH 78/86] Boost precision in inner products in single --- Grid/lattice/Lattice_reduction.h | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 67709c94..07968024 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -93,7 +93,9 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites) ssum = ssum+sumarray[i]; } - return ssum; + typedef typename vobj::scalar_object ssobj; + ssobj ret = ssum; + return ret; } @@ -154,7 +156,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. - typedef decltype(innerProduct(vobj(),vobj())) inner_t; + typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -163,16 +165,15 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & autoView( right_v,right, AcceleratorRead); // GPU - SIMT lane compliance... - accelerator_for( ss, sites, nsimd,{ - auto x_l = left_v(ss); - auto y_l = right_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); - }) + accelerator_for( ss, sites, 1,{ + auto x_l = left_v[ss]; + auto y_l = right_v[ss]; + inner_tmp_v[ss]=Reduce(innerProductD(x_l,y_l)); + }); } // This is in single precision and fails some tests - // Need a sumD that sums in double - nrm = TensorRemove(sumD(inner_tmp_v,sites)); + nrm = TensorRemove(sum(inner_tmp_v,sites)); return nrm; } @@ -218,16 +219,16 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; + typedef decltype(Reduce(innerProductD(x_v[0],y_v[0]))) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, nsimd,{ - auto tmp = a*x_v(ss)+b*y_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); - coalescedWrite(z_v[ss],tmp); + accelerator_for( ss, sites, 1,{ + auto tmp = a*x_v[ss]+b*y_v[ss]; + inner_tmp_v[ss]=Reduce(innerProductD(tmp,tmp)); + z_v[ss]=tmp; }); - nrm = real(TensorRemove(sumD(inner_tmp_v,sites))); + nrm = real(TensorRemove(sum(inner_tmp_v,sites))); grid->GlobalSum(nrm); return nrm; } @@ -243,29 +244,28 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti GridBase *grid = left.Grid(); - const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); // GPU - typedef decltype(innerProduct(vobj(),vobj())) inner_t; - typedef decltype(innerProduct(vobj(),vobj())) norm_t; + typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; + typedef decltype(Reduce(innerProductD(vobj(),vobj()))) norm_t; Vector inner_tmp(sites); - Vector norm_tmp(sites); + Vector norm_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; auto norm_tmp_v = &norm_tmp[0]; { autoView(left_v,left, AcceleratorRead); autoView(right_v,right,AcceleratorRead); - accelerator_for( ss, sites, nsimd,{ - auto left_tmp = left_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss))); - coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp)); + accelerator_for( ss, sites, 1,{ + auto left_tmp = left_v[ss]; + inner_tmp_v[ss]=Reduce(innerProductD(left_tmp,right_v[ss])); + norm_tmp_v [ss]=Reduce(innerProductD(left_tmp,left_tmp)); }); } - tmp[0] = TensorRemove(sumD(inner_tmp_v,sites)); - tmp[1] = TensorRemove(sumD(norm_tmp_v,sites)); + tmp[0] = TensorRemove(sum(inner_tmp_v,sites)); + tmp[1] = TensorRemove(sum(norm_tmp_v,sites)); grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector ip = tmp[0]; From 936c5ecf69d240c783ce9fee8eb66e510eba71b7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 24 Jun 2020 17:28:31 -0400 Subject: [PATCH 79/86] Reduction GPU no compile fix --- Grid/lattice/Lattice_reduction.h | 20 ++++++++++---------- Grid/tensors/Tensor_class.h | 31 ++++++++++++++++--------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 07968024..c2955485 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -62,7 +62,6 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) for(int i=0;i @@ -156,7 +155,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. - typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; + typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -168,12 +167,13 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & accelerator_for( ss, sites, 1,{ auto x_l = left_v[ss]; auto y_l = right_v[ss]; - inner_tmp_v[ss]=Reduce(innerProductD(x_l,y_l)); + inner_tmp_v[ss]=innerProductD(x_l,y_l); }); } // This is in single precision and fails some tests - nrm = TensorRemove(sum(inner_tmp_v,sites)); + auto anrm = sum(inner_tmp_v,sites); + nrm = anrm; return nrm; } @@ -219,13 +219,13 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - typedef decltype(Reduce(innerProductD(x_v[0],y_v[0]))) inner_t; + typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; accelerator_for( ss, sites, 1,{ auto tmp = a*x_v[ss]+b*y_v[ss]; - inner_tmp_v[ss]=Reduce(innerProductD(tmp,tmp)); + inner_tmp_v[ss]=innerProductD(tmp,tmp); z_v[ss]=tmp; }); nrm = real(TensorRemove(sum(inner_tmp_v,sites))); @@ -248,8 +248,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti const uint64_t sites = grid->oSites(); // GPU - typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; - typedef decltype(Reduce(innerProductD(vobj(),vobj()))) norm_t; + typedef decltype(innerProductD(vobj(),vobj())) inner_t; + typedef decltype(innerProductD(vobj(),vobj())) norm_t; Vector inner_tmp(sites); Vector norm_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -259,8 +259,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti autoView(right_v,right,AcceleratorRead); accelerator_for( ss, sites, 1,{ auto left_tmp = left_v[ss]; - inner_tmp_v[ss]=Reduce(innerProductD(left_tmp,right_v[ss])); - norm_tmp_v [ss]=Reduce(innerProductD(left_tmp,left_tmp)); + inner_tmp_v[ss]=innerProductD(left_tmp,right_v[ss]); + norm_tmp_v [ss]=innerProductD(left_tmp,left_tmp); }); } diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h index dbcbae8d..36becc49 100644 --- a/Grid/tensors/Tensor_class.h +++ b/Grid/tensors/Tensor_class.h @@ -59,6 +59,20 @@ class GridTensorBase {}; using DoublePrecision2= typename Traits::DoublePrecision2; \ static constexpr int TensorLevel = Traits::TensorLevel +/////////////////////////////////////////////////////////// +// Allows to turn scalar>>> back to double. +/////////////////////////////////////////////////////////// +template +accelerator_inline typename std::enable_if::value, T>::type +TensorRemove(T arg) { + return arg; +} +template +accelerator_inline auto TensorRemove(iScalar arg) + -> decltype(TensorRemove(arg._internal)) { + return TensorRemove(arg._internal); +} + template class iScalar { public: @@ -135,9 +149,10 @@ public: operator ComplexD() const { return (TensorRemove(_internal)); } + // instantiation of "Grid::iScalar::operator Grid::RealD() const [with vtype=Grid::Real, U=Grid::Real, V=Grid::RealD, =0, =0U]" template = 0,IfNotSimd = 0> accelerator_inline operator RealD() const { - return TensorRemove(_internal); + return (RealD) TensorRemove(_internal); } template = 0, IfNotSimd = 0> accelerator_inline operator Integer() const { @@ -169,20 +184,6 @@ public: strong_inline scalar_type * end() { return begin() + Traits::count; } }; -/////////////////////////////////////////////////////////// -// Allows to turn scalar>>> back to double. -/////////////////////////////////////////////////////////// -template -accelerator_inline typename std::enable_if::value, T>::type -TensorRemove(T arg) { - return arg; -} -template -accelerator_inline auto TensorRemove(iScalar arg) - -> decltype(TensorRemove(arg._internal)) { - return TensorRemove(arg._internal); -} - template class iVector { public: From 102089798c4e048b3b47bf22e224720bf23d0278 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Thu, 25 Jun 2020 16:41:58 +0100 Subject: [PATCH 80/86] BaryonUtils: update to autoView --- Grid/qcd/utils/BaryonUtils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index c7a72812..11744d16 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -712,9 +712,9 @@ void BaryonUtils::Baryon_Gamma_3pt( { GridBase *grid = q_tf.Grid(); - auto vcorr= stn_corr.View(); - auto vq_ti = q_ti.View(); - auto vq_tf = q_tf.View(); + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_ti , q_ti, CpuRead); + autoView( vq_tf , q_tf, CpuRead); if (group ==1) { accelerator_for(ss, grid->oSites(), grid->Nsimd(), { From 77af9a3ddca85a34097a5282c290601b95c1a345 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Fri, 26 Jun 2020 10:08:42 +0100 Subject: [PATCH 81/86] Baryon revert sign --- Grid/qcd/utils/BaryonUtils.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 11744d16..df16db45 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -257,7 +257,7 @@ void BaryonUtils::baryon_site(const mobj &D1, auto GAf_D1_GAi_P_rr_cc = GAf_D1_GAi_P()(rho,rho)(c_f,c_i); for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, auto GAf_D1_GAi_P_rr_cc = GAf_D1_GAi_P()(rho,rho)(c_f,c_i); for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f Date: Mon, 29 Jun 2020 09:43:01 +0100 Subject: [PATCH 82/86] Update to baryon and added comments/fix whitespace --- Grid/qcd/utils/BaryonUtils.h | 827 ++++++++++++++++++----------------- 1 file changed, 428 insertions(+), 399 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index df16db45..b268b684 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -62,6 +62,9 @@ public: const bool * wick_contractions, robj &result); public: + static void Wick_Contractions(std::string qi, + std::string qf, + bool* wick_contractions); static void ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, const PropagatorField &q3_left, @@ -80,59 +83,59 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const int wick_contraction, + const bool* wick_contractions, const int parity, const int nt, robj &result); - private: - template - static void Baryon_Gamma_3pt_Group1_Site( - const mobj &Dq1_ti, - const mobj2 &Dq2_spec, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result); + private: + template + static void Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); - template - static void Baryon_Gamma_3pt_Group2_Site( - const mobj2 &Dq1_spec, - const mobj &Dq2_ti, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result); + template + static void Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); - template - static void Baryon_Gamma_3pt_Group3_Site( - const mobj2 &Dq1_spec, - const mobj2 &Dq2_spec, - const mobj &Dq3_ti, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result); - public: - template - static void Baryon_Gamma_3pt( - const PropagatorField &q_ti, - const mobj &Dq_spec1, - const mobj &Dq_spec2, - const PropagatorField &q_tf, - int group, - int wick_contraction, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - SpinMatrixField &stn_corr); + template + static void Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + public: + template + static void Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr); private: template static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop, @@ -215,15 +218,15 @@ const Real BaryonUtils::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.}; template template void BaryonUtils::baryon_site(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_i, - const Gamma GammaB_i, - const Gamma GammaA_f, - const Gamma GammaB_f, - const int parity, - const bool * wick_contraction, - robj &result) + const mobj &D2, + const mobj &D3, + const Gamma GammaA_i, + const Gamma GammaB_i, + const Gamma GammaA_f, + const Gamma GammaB_f, + const int parity, + const bool * wick_contraction, + robj &result) { Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) @@ -231,101 +234,121 @@ void BaryonUtils::baryon_site(const mobj &D1, auto D1_GAi = D1 * GammaA_i; auto D1_GAi_g4 = D1_GAi * g4; auto D1_GAi_P = 0.5*(D1_GAi + (Real)parity * D1_GAi_g4); - auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; - auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; + auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; + auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; - auto D2_GBi = D2 * GammaB_i; - auto GBf_D2_GBi = GammaB_f * D2_GBi; - auto GAf_D2_GBi = GammaA_f * D2_GBi; + auto D2_GBi = D2 * GammaB_i; + auto GBf_D2_GBi = GammaB_f * D2_GBi; + auto GAf_D2_GBi = GammaA_f * D2_GBi; - auto GBf_D3 = GammaB_f * D3; - auto GAf_D3 = GammaA_f * D3; + auto GBf_D3 = GammaB_f * D3; + auto GAf_D3 = GammaA_f * D3; for (int ie_f=0; ie_f < 6 ; ie_f++){ - int a_f = epsilon[ie_f][0]; //a - int b_f = epsilon[ie_f][1]; //b - int c_f = epsilon[ie_f][2]; //c + int a_f = epsilon[ie_f][0]; //a + int b_f = epsilon[ie_f][1]; //b + int c_f = epsilon[ie_f][2]; //c for (int ie_i=0; ie_i < 6 ; ie_i++){ - int a_i = epsilon[ie_i][0]; //a' - int b_i = epsilon[ie_i][1]; //b' - int c_i = epsilon[ie_i][2]; //c' + int a_i = epsilon[ie_i][0]; //a' + int b_i = epsilon[ie_i][1]; //b' + int c_i = epsilon[ie_i][2]; //c' - Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; //This is the \delta_{456}^{123} part - if (wick_contraction[0]){ - for (int rho=0; rho +void BaryonUtils::Wick_Contractions(std::string qi, std::string qf, bool* wick_contractions) { + const int epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; + for (int ie=0; ie < 6 ; ie++) { + wick_contractions[ie] = (qi.size() == 3 && qf.size() == 3 + && qi[0] == qf[epsilon[ie][0]] + && qi[1] == qf[epsilon[ie][1]] + && qi[2] == qf[epsilon[ie][2]]); + } +} + +/* The array wick_contractions must be of length 6. The order * + * corresponds to the to that shown in the Hadrons documentation * + * at https://aportelli.github.io/Hadrons-doc/#/mcontraction * + * This can be computed from the quark flavours using the * + * Wick_Contractions function above */ template void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -383,6 +406,12 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl; } + +/* The array wick_contractions must be of length 6. The order * + * corresponds to the to that shown in the Hadrons documentation * + * at https://aportelli.github.io/Hadrons-doc/#/mcontraction * + * This can also be computed from the quark flavours using the * + * Wick_Contractions function above */ template template void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, @@ -392,7 +421,7 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const int wick_contraction, + const bool* wick_contractions, const int parity, const int nt, robj &result) @@ -408,10 +437,6 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - bool wick_contractions[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contractions[ie] = (ie == wick_contraction); - for (int t=0; t::ContractBaryons_Sliced(const mobj &D1, template template void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( - const mobj &Dq1_ti, - const mobj2 &Dq2_spec, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result) + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) { - Gamma g5(Gamma::Algebra::Gamma5); + Gamma g5(Gamma::Algebra::Gamma5); - auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; - auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; - auto D2_Gi = Dq2_spec * GammaBi; - auto Gf_D2_Gi = GammaBf * D2_Gi; - auto Gf_D3 = GammaBf * Dq3_spec; + auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; + auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + auto Gf_D3 = GammaBf * Dq3_spec; - int a_f, b_f, c_f; - int a_i, b_i, c_i; + int a_f, b_f, c_f; + int a_i, b_i, c_i; - Real ee; + Real ee; - for (int ie_f=0; ie_f < 6 ; ie_f++){ - a_f = epsilon[ie_f][0]; //a - b_f = epsilon[ie_f][1]; //b - c_f = epsilon[ie_f][2]; //c - for (int ie_i=0; ie_i < 6 ; ie_i++){ - a_i = epsilon[ie_i][0]; //a' - b_i = epsilon[ie_i][1]; //b' - c_i = epsilon[ie_i][2]; //c' + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' - ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; - for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group1_Site( template template void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( - const mobj2 &Dq1_spec, - const mobj &Dq2_ti, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result) + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) { - Gamma g5(Gamma::Algebra::Gamma5); + Gamma g5(Gamma::Algebra::Gamma5); - auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; - auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; - auto Gf_D1 = GammaBf * Dq1_spec; - auto Gf_D3 = GammaBf * Dq3_spec; + auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; + auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; + auto Gf_D1 = GammaBf * Dq1_spec; + auto Gf_D3 = GammaBf * Dq3_spec; - int a_f, b_f, c_f; - int a_i, b_i, c_i; + int a_f, b_f, c_f; + int a_i, b_i, c_i; - Real ee; + Real ee; - for (int ie_f=0; ie_f < 6 ; ie_f++){ - a_f = epsilon[ie_f][0]; //a - b_f = epsilon[ie_f][1]; //b - c_f = epsilon[ie_f][2]; //c - for (int ie_i=0; ie_i < 6 ; ie_i++){ - a_i = epsilon[ie_i][0]; //a' - b_i = epsilon[ie_i][1]; //b' - c_i = epsilon[ie_i][2]; //c' + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' - ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; - for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group2_Site( template template void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( - const mobj2 &Dq1_spec, - const mobj2 &Dq2_spec, - const mobj &Dq3_ti, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result) + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) { - Gamma g5(Gamma::Algebra::Gamma5); + Gamma g5(Gamma::Algebra::Gamma5); - auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; - auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; - auto Gf_D1 = GammaBf * Dq1_spec; - auto D2_Gi = Dq2_spec * GammaBi; - auto Gf_D2_Gi = GammaBf * D2_Gi; + auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; + auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; + auto Gf_D1 = GammaBf * Dq1_spec; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; - int a_f, b_f, c_f; - int a_i, b_i, c_i; + int a_f, b_f, c_f; + int a_i, b_i, c_i; - Real ee; + Real ee; - for (int ie_f=0; ie_f < 6 ; ie_f++){ - a_f = epsilon[ie_f][0]; //a - b_f = epsilon[ie_f][1]; //b - c_f = epsilon[ie_f][2]; //c - for (int ie_i=0; ie_i < 6 ; ie_i++){ - a_i = epsilon[ie_i][0]; //a' - b_i = epsilon[ie_i][1]; //b' - c_i = epsilon[ie_i][2]; //c' + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' - ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; - for (int alpha_f=0; alpha_f template void BaryonUtils::Baryon_Gamma_3pt( - const PropagatorField &q_ti, - const mobj &Dq_spec1, - const mobj &Dq_spec2, - const PropagatorField &q_tf, - int group, - int wick_contraction, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - SpinMatrixField &stn_corr) + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr) { - GridBase *grid = q_tf.Grid(); + GridBase *grid = q_tf.Grid(); - autoView( vcorr, stn_corr, CpuWrite); - autoView( vq_ti , q_ti, CpuRead); - autoView( vq_tf , q_tf, CpuRead); + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_ti , q_ti, CpuRead); + autoView( vq_tf , q_tf, CpuRead); - if (group ==1) { - accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - auto Dq_ti = vq_ti[ss]; - auto Dq_tf = vq_tf[ss]; - sobj result=Zero(); - Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); - vcorr[ss] += result; - });//end loop over lattice sites - } else if (group == 2) { - accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - auto Dq_ti = vq_ti[ss]; - auto Dq_tf = vq_tf[ss]; - sobj result=Zero(); - Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); - vcorr[ss] += result; - });//end loop over lattice sites - } else if (group == 3) { - accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - auto Dq_ti = vq_ti[ss]; - auto Dq_tf = vq_tf[ss]; - sobj result=Zero(); - Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); - - vcorr[ss] += result; - });//end loop over lattice sites - } + if (group == 1) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 2) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 3) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } } From ee9889821d28996ed52cae1868c90300e4febe26 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 29 Jun 2020 12:59:52 -0400 Subject: [PATCH 83/86] Runs through to coarse space solve --- tests/solver/Test_hw_multigrid.cc | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/solver/Test_hw_multigrid.cc b/tests/solver/Test_hw_multigrid.cc index ccca9971..b728faa7 100644 --- a/tests/solver/Test_hw_multigrid.cc +++ b/tests/solver/Test_hw_multigrid.cc @@ -6,9 +6,7 @@ Copyright (C) 2015 -Author: Antonin Portelli Author: Peter Boyle -Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -267,7 +265,7 @@ int main (int argc, char ** argv) // Construct a coarsened grid; utility for this? /////////////////////////////////////////////////// std::vector block ({2,2,2,2}); - const int nbasis= 32; + const int nbasis= 8; auto clatt = GridDefaultLatt(); for(int d=0;d Level1Op; - NonHermitianLinearOperator LinOpDwf(Ddwf); - Level1Op LDOp (*Coarse5d,1); + Level1Op LDOp (*Coarse5d,0); + + std::cout< CoarseMdagM(LDOp); + BiCGSTAB CoarseBiCGSTAB(tol,MaxIt); + ConjugateGradient CoarseCG(tol,MaxIt); + + c_res=Zero(); + CoarseCG(CoarseMdagM,c_src,c_res); + std::cout< Date: Sat, 4 Jul 2020 03:53:06 +0100 Subject: [PATCH 84/86] Fixed HMC SU(N) integrator which was causing fields to leave Lie Algebra manifold for N>2 --- Grid/qcd/action/scalar/ScalarImpl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index 203e1824..d33cdc79 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -38,7 +38,6 @@ public: static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; - std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; } static inline RealD FieldSquareNorm(Field& U) { @@ -175,14 +174,13 @@ public: P *= scale; } - static inline Field projectForce(Field& P) {return P;} + static inline Field projectForce(Field& P) {return Ta(P);} static inline void update_field(Field &P, Field &U, double ep) { #ifndef USE_FFT_ACCELERATION double t0=usecond(); U += P*ep; - std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; double t1=usecond(); double total_time = (t1-t0)/1e6; std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl; From 43334e88c36b40d565ef906f545a368dfcc80db4 Mon Sep 17 00:00:00 2001 From: "Henrique B.R" Date: Sat, 4 Jul 2020 16:11:16 +0100 Subject: [PATCH 85/86] Tiny change in a comment for clarity --- Grid/qcd/action/scalar/ScalarImpl.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index d33cdc79..14675b11 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -28,10 +28,9 @@ public: typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ - RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); - // CPS and UKQCD conventions not yet implemented for U(1) scalars. + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // CPS/UKQCD momentum rescaling gaussian(pRNG, P); - P *= scale; + P *= scale; } static inline Field projectForce(Field& P){return P;} @@ -150,7 +149,7 @@ public: static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { - RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // Being consistent with CPS and UKQCD conventions + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // CPS/UKQCD momentum rescaling #ifndef USE_FFT_ACCELERATION Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); From 7cf7f11e1af8d0d11d2269b047ba88638037da02 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 22 Jul 2020 14:44:11 -0400 Subject: [PATCH 86/86] Doc recompile --- documentation/Grid.pdf | Bin 568974 -> 571202 bytes documentation/conf.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/Grid.pdf b/documentation/Grid.pdf index 8b9f2be13ef2079b1c009726e04d7e6dcdd6cab0..c1475d63948bcf25335ea60fbefc5a4af0b3a96f 100644 GIT binary patch delta 171536 zcmZU)Q*b5R^935)HqKND>E0!Knt1{m@p+bz=Rfj;}^-hj^Vfhbo432qV=*8Whgx-w-Sm- z9<<7U>svS&Y%JfMv|K)TJYB%s1A~(e{H_!S1Onh{F)9V@R1n@%ZQS8Z|Ec?LwqfxV zSK+v%L)-3#*nF8xIkR{x;Y&=}nn54h=PEc<5EqsmDrt;Lug)KpF=zsSwqm$`{EezB zxV~3^nkUo`nR_54Oxw#dDwQT3fd%rR@;~{m zyjNfE8&0t)@fVX_RTP&gF-q$Xw1pqgIZH$}8<4*;U5b0wjKl&;cBk=vyIvyUN(lq% zGZNK*tj!M6(*W;;O}QRF-fx6%c1VN6cM(kOOq^YuObu=S&$2hNLSSKI36duWlP3WG zZ$Y%k6F?^5fOCPEzo9Te4@{5@KuPBUF|3iBH!NQ)RrG~F@fj_SJdQSTRZ=XD$FpL; z+9v1;QawJd7Vy#&f;&s0#xw?ocodcHCvP6Ve}VNO2{`chFF68vJf5>#!4xwTV*5N0 zs1G;?i+6B)?UudFYaFkz}E5F!PW=!-WXkfOFTHm0Fyu?UXGyw z=H5j2Mpx*LX9xQ~pc1B_Z0SVN9Q`IYcpp!8P&vyjwD`I@f zqq-H&*JlmaaZS9VRioBJScCiaNL%XI``2651r@YBX{CR3s9IV=pHrfb4b&H`{P4vL zYi2ik%UMr=eC%At(QeZm1@^_}S?=F2sXVTzgY6^i=8cNLTQXpvL)GV(=F3njBlhL} zJ1S?49dJY11P-FM49BT_$RWrjl$OdChOGS~MP&YkcFU8jxV-8TeofhNL$p^udSSA! zh#;n6ENT~lt&RbtIdM!ZvrtponlYb=;#pHTY^qd8q}R<>0r5vi0eyj_&~<4aCWmBG zjojffLyDeaB1!YDb+tIY*fMJNxPtM&h1+!@SXwNZ-z2mA7>+gm(Q_ zT$e6onK&u>9g{Rq4Lpx4e|ce7YS^fxeU*3hE5%&Zp+fnLa#%8r9gB_E9O++6h32GR z9j5m~KkH&6Udx%MxGE9wFd5;P$O>iM!|K z@$Ml`9iAI137Yp(Ct>(C9h&pS&24>&Iqm1q)({)JVz?n(0qj0Ft$4V%Cl9bN4^_%0-I~^TD|uzt58n(8xL=aM};ciUIGb~*sbD8ne4zkC`3Gr z!ODAy>0H$nm2@JE;Et|TiBeW~WbqlNgsJF9Ht!-k66v8&)I#E68?&Ra)%#^f9vnKq zf{-`w#C^xR3~aUkd+03Cwd|kh1wf~2cSP58md7}J@V&?Yw|ZMS5RoOIZaes@O$z@` zVhsKDI9PcdAupOr({o<>!8INnxr~IxOvO9>#MKOvd__bBn9gwlv#3~PFQ&b6441(ytyco?b)pt7SgeWlrjkE1mC8%%6c&>kKuF z$J~g1kU%%@def{>rz+a8l@O=a zCMS9?7e1hn^xu<*^f|3U#nnW!)+RHZbbt##+L!Kq7U?#Xf9zkk-=oj8k4*0kBAn6=(flj= zM&7P&?7vv@F>wX`jR?tg^pvXfZuOCRxqLw2GLV>f%-4HJfy2$mar=+h>dm6M#aHufW(`@iibboO8GQT2p*ru=Ch1Y9`Cl}`T6X*dl z6rkG}RYC*>99@)9Gp?fJgPiOD z6#43qXPTej(eVG!5sZd6DInOMe-xJ}s=B_)9!Dga*8NFh%}H2AoOJmQU6q)xMo_bv{lZEs{gWEC;Z7;R(R_015fC9 z?#0e(uehaQGJYX)LCTFHkn9VOV!tm^k|#NhjEb`4hF=I~=M=Y*lqL2w_?hqan!FWL zZ=b_)pG|CRvw68m$Bp7Ag$sb2NQ%V_p^uk_LK4`LeeLEwHe3f(nXTxR zAMt95vIyo%@(0I+(-LahkWC;Ln+VE?%)h~4KTjVAYrc~g-21la-6zg8kL%>fbdQphC6$gb3HS zu=c{skY;a;S9BqImUl|&ZV=%TdK~}Z;vUd$a=6&Ux33H%M9eTm$(~--e!jcm8&5n5 zz=!5GS^M6i!~%+Ocpp#ylN48Rg)N}zuFQCZSwE+#+lIb9LAxbu_6}ko&&G=xI#u(& zx@2I@i)*HID{@w?<S)+~BjJXiLq1-Gt>dU7;**zI=C*E{r@eE_V8} z_~-OA`E&(l;Kd3i0-a-N(u1TJ)4k}4G`KPDCI3t^atCBdI(f(oa84`}PN%}gJz z*Q=+)wF!fxLOp`_I%n^bV46oYAiC@R;xlzb=EmmhX|;Zi{+6Cjj$gNL!JOqOwBaS4 zPr3(1@$_F(51JyF&(99T5HBakKe!CB7>w}!`q*~;zaB6divu9PyJnd>+PghE+BQ4? z^la;FS~QC+oV&%Y>GVM0FY^1RO(~8FY5Y>fg6d^Zg`o4m8zCW&I8fHu1#-Aw3rvO; zR+=|LB=FbSC3>;JXHpmHB^QbX+;^Y2GN!+^)!AcBZthjhgOyq?MlM0p$m_*jPE94AW*%(Xy&p`bemRL>gin>FwzUm|F?rJ`MqaI){M6yv0H?kO=e z+8SMN#=^gdx!9GPPGRl2t+%8LhX#Y`raRAecc(uP?+=8Dhc$^tJi7l6yt%bgdaSsJ zajm%vFQXER=dDy;v|%CP-za3vPNio`SAYX{+=YgU1<^cAKM&!7McIk)i6r&>k(sPu zh3LzXG5yj?EzoiRLaAa3O?34KnCdAQI?NOtgjhV1E>m8ez2k$TT~oW}7HIqSuD+Cf zi1cw6330L>;u_DAB*(D`|@9DP-Q8@ zCe;|US+Cu%x}FpQK(O94ERHvS5){h7eh~Jek89^)3EyDI+klG(b>?s- z^h8H7&fixnM6yN@Ac28ni;B_RJhhduu{5Dm<%hxvkjPawcjw%JpqoasK$z&Bgnaxq*5p) z*euH|9pcbJQT5M=jWR6dhFri7`9U$h%)w&R!l+p$10?Fb(wLBN6oo>%WXPVkr+>HQ_^kvkRKi94d>Aw*A!b)T50UKz#o`^nofoz{`;dTr8ai zw|0!I%4GhZT?rQmZWLmuEHxw^Q;-R3g>W;q_#p>X|+8{x}K6 zZ}^OLUs}!4G%H^=KAk9T{QLqYp$7K8;Vb36p%egtj=@p|%3B&5J1$Or^4LArsvmm# z)m`E=Ebcc{LOu&Wca85aKMN!{HhMflhg9$61WcqhG8D}U7It`HW^zFGj?c%(pMOxx zv20GFvU&u9E%EpzV99M!1PZ1+`A{YF8f-mBIcyLsn_9#NoG6+9#^P|f&Tg(@%_N{T z^KFS+rrBWCbPbhhZO>r9-osm&EcD@}-+E>nYuKo;BTv~t#fbbVO@dpTr=^bHRb>r~ zQ`LqzAjF5ITtZI9s@x!@X$PNl^-2ZJ@IjISfBmwkwMOm~kC-5CJnJq!6skkGglkA; z;a=`zB?u;wyo|;oT~F@GXW}#oA(2uuKd2Ta+XQ6!fa2^D=dVo=ykfjA@l(f?o8Z>$T>k285)V z>aU{6<0RNo4|4cxSe_2_M*B=(kWfyqgVprXb<#t(&dd9z&BerzC5v8a<$CS)^K|f= z(W=@KSXz@VbV)7N>o0Z%iQeD1yssg-FBWFN4|DCWTK#cEJtsR6O;5p4w52Bk{tD#8 zY)HqI#1M{Pw@r}tsiWGZW$?C)Wd?!6Ew4tV6EdA68NACyY`KDi4^8S(`Ew8B({dUZ z;$xwn9zq~huwd&8k|B98NkqImoi>6C?VqP(MYkAv*ho10PI&U(ldG|nn&6k3S@+H3 zhjl-lqTehJ_qEf&GNFt0F1`HY>ML2%d+PLBBx7=F-R#trpLdCT+bu~%Bl$gcXC6wX z`*}EHi{2Q9wk@7_qZo;kD;@g{R(pYG{(mnc7C1YI`M<^q1Siw~L0xKKm4WRj)X{N;8ckwt`cLxPBU+}3puqkw;XVQZFcF>VxuAleP8(=KsW zuXoGqk=^~BZL)WWej(uT`nrp2{F5H8i+=jEHN3N9{Jq|H<;Uv`vO$Il(k;BZ2n(qNPEU$eJp00$GHc{Oq@QT+okHt!kywullCK( z7BWn>p&)3~$>AOH##zPSC#o4PkL6>Q?dX#}|aJB2kdiklwawE=9EY zt7ruf65kg((o=zQFj-HdI#f6c>5=Mjr>+aW?l{D5Y4VV~87DTDQ@cE~XHKP>q!Ph~ zXL)C#>N(r*MDBGmdu?a-Ux^kUOY2(ZGMBi5!z0kdz^LVP6uW3vj;(QTK~R-2Bqgv1 zM$+N^>;RcZc-@+e|8Pz+A(@^5kqKA1zKC?DoKDu6Bo8MSDlsz?|0mf%D5fMU`#)?} zVtg{%W)8N+BTQS@bDax}yCwk607f#sd}IEE%{4$6@i%xANNn!udhl+H$@{kQm};pl z8p*;=7hyeERLC+Y3L{QMeO-j)6#$YGfdqipfE-02&48%5s^yDaZY#u#6io@(tOZvR zbzd&!4~?+8M^dfi{DfOVW{yd_as{v%&>)g2;(^77b;lA5^{l|gEJ>!S9viqBm^Jq3 z8{LI)Fb?CrTAnvx?FVnZ8XbNg9U-QKq4O}#J*k5a<%ruC{&r$1CH)~7J2OcpQbi_i>G*C4lNNJ{mKs4asg2NillT2{*=2sjRf{E= zBxr|ii!_b2wPyT<-=+0S4&Ys7_dl#Tk0>7@VZAOV5ft7ajO`fG4Kl@lJdN`t>ZQFg ztub7E(Tda`=0kbfrkmN+!3WJthwge}*tsg}@&l>AK6kem1}qC<#XlCin6OjYkDuIz zOh{N4@b2#Z90%ufrioY@{y>zM!_&P87^cas8ph2v!0a!XGWLLwxfT*=2^gbF)p8p* ze7=lL;>*@xF)2MGSWCSI3(!i2gJ3T_lkVyxEcsJu^=dNQmu3C5;zcUNN4a4l%he_{ z>j8wpBI%crKZfU^3)ZvHCRQlbCi*iZZgQa^FQb{PEuX;_mzO&?og{sBB-t#U8izkQ z9L&|F$eRbZ{I()rlQnq<71IL6W5T2TSH`FYx3f7m_AXpD$|({-2`{#?hdk}lNm^sd zaFiFhb5vjzeo>7k_R(;^2aHmPAT>zl{4P{q_SxulqOP0O1?ZP=z-04gtH@AGdHO ztXApzNeE@=3I4wd{Xgc?(jyM30tM1hgyaNg1>O2V|Homp6zz#PQT%rF^G2RT=@Rm( z!36_2M7_YPEvmY@KskIr7iJ`f&Kt%~Jp z$Lozw(*Z^=CqZ-C)YM1mFyT~0MKC$^)AdOt<8cbn(VnK<#$lWWy2_v8V~XJr>M|#5 z$R`aAgP;fm!)ywqom2o{@TcXjDVJPWnh6HmCP7l*U`Ac#2P%C(U`OyVbBTzpVd9Y3 zq?)KMl?=QSbt2cY!=;IAdM}ms+xtfuA_!z1C4rDIQ%gmGV+k-ItAJ91ERl?|#3T-E ze>$C6L|#!D=(G%zi~xcs_DYE*N$CATXf6NFV98?*+te{rN+zh6zYSX=AZ7nu^qRyo zG)PEn2bj~g^kr#R0<28BMayCVcJ3o64X|L!Xkl!E>0R)T%<=%;AxmYascI+)BW$fW zBjDf=wzqs!dt#az5d;!syhC_3H3QOLDMRo6YL&?hVJGb207A#5O8>mG@uBb~)d(@9 zpz&EI$l1s}^=OnPjPQar4JP9M*pDqu#`0zx@$)LtVS&Yuneg?e|)i?ToLCUbowhPs7_ed*QIWei)ix{e*bO<)xDu2z6kq zNSDLychkg0d|=Z}=m_rngLX&e4&@fghy7_>*k1EM!$i#p)C?=U9}Ci(VXF=@3or+n zV&vV&zSuP^jjD$jxM|Da(+RnCB0RFi2XzywC>N?ouvODC^y%xd(*EuCu6yu!hO&GD zdGXwG#uygSNyz=l7vG$nfleGA7q;*|$G{jG;YV_ALpn!DWM60n{O-g(R}?%|h$jvG zok@K9@mfFsFUYWsxtp+oe{1BT28f!26862)jTD8#_&oXLp^-%@e)+!j&yMwU--T&; zM#UaBkg#A23B+s9Y6d$3`M7;h;p^_`_`da?w?hg^q8>yR*1bC_6uax^c42Pb!5KQ4 zL{;%v!Ov&6`8IT0ld<%6_TgFCo^Z_1)RFsi&K1!=UXlPx9IN8_4_T%kF)Q z7@B8v6|-?Z16xBUm+^F+tYZ4>skZ0ioqyHdcEa^U{br}em8G#D=O-0Y+`DfSn%a0r|q#~sw7L%5Fo$R`*yvC&6h^(w$;PZ z@z`>5qFu{v#hgB7Lat@~6ZpXKlp=69q}59k1U^-DxLaFS_~6h)z99GlI3A7A9Vp^v z6}aA$C$fZ)w$0#~0q-^mE@W|dGH}dFhw#i6e#}A-I0--U@1wyq{3A!PhxFA5!+y@h}^bq}gS6F1RV}$lJpYo@Y8%;O?2`9zbu%kS-Ex8TC zX7>x5yOSWznBEE`pF^fHn$yG37EBw111}`{v|R+DHX7!G4erMe-lftPJblD|;N+m` ziU{{1IH~N33=_XWa6~pGRNfV9RD`E+m6vcMgwpRy!b=CC=MP0|49Kwc7Szj(GWSpw zB{FrZdH_<(+t6OdSjdMzVzCs1GVl%qG?WuYlVqk&h~IPl9unHt*AaTlKXctuii1}^ePGtO}64!sGFV?~Q;t(01x zc1q>x;(UzE`%)!jdzf11^2{Z54HEFOP-^5t_rR4Q_RchGVmgFG^MtkwXs6=!S@Phe zbTwmLL$#zQU*46i;>}z3^q=RkaQ#BE>Aqs+hA+tyHGE@z0enj;xwaAPNPPjE#Ng3LE45}ue35pYiB$LhdtHb)NtViy+EItMG{27-F(_8ek*>T&DCofk9cUbV))>IV1se0J(o^%-OM<(H;q}`^>XxR- z0f_}}J~&qgV!>0&7j6+&bdV8hBupZK@WZqJ=~tb-@bEmqQeSM1cBb z7Gdy*HsnEouJEpvdsSFk{~I=(paTgMfCGi~KP?=JnT3n>|7Fzb6HfcwXj}K# zTKnZ>549ho3Ya;?0RCx_&7Bbo@uoI^vHrx_;_#+hTKW$8s+(e%@f%26HX?X?sDzr%Nt*9ii4$?e)uo)9C}FU98^@%UQtlnL^L;yfNj980!sTk%rieN zELUu8$`*c5<)?;{vvO97|AJ@<`pn~sxT?5)6!6ZC00yE^d`UdBWzBk~iTh533Kx8N zd0j@J;Y@6487MyAPkCjTLGz7e?K!;ZW+b4`+VDW6n&M174n&EHYMcF@H7{jr(90Ka zck#i(fb*>ESlOv|DZwu_#$D6L>qyh!V^#6_v$TfV_Oye&FA-TmvG1^Js|Sz%PDFlQ zRN;yR33LE^J-c)6oo(f0mV6f460;fxC%Q;-s=|dWDy429ic-Hymw{D`pYPmV_4Yo$ zM%$`3b>gLAKS-t^e5CE=c)Gnw`gdi^mDUr-kaX|Tlqg9(Fm4)7^e0^Bgq@wBW>FF~ zaeuRtE82BnBFyDy+_Z=t%20!yJ4w*)MDL4wA+TK+2B!iiL$m#F*=*r$I5kfQ0wODX*O4?yx1AVh=*xusF@na`9qP;;TtYjiVMCM=i zDA{;$?~cWCibf4FGwhdNbxV5+@kl9SY_X5(GeprmJ9yg3@nQ&}^>F!I_9A`_DCAF` z#K4M(IX>gy>9!UsuC(l{_RFNjNW3hg6uH}(xv~!AQ)Y^>o_3@0D3%ugt@PJ}58mW5 z!{noofJ!8lJT`phxCM&xLz8Fl@k8#Z0>D)BQj6|qa?24aPoc!8frz^vaaqb_Y#hAG zQu3RE;ajLx424&y5%6_&5j~~&*Myb9G~`Qc)vmCm+~3}62evO! zsBq%^1gRJp%zO{^;EiS!A=^SE=CloMYz5T!@^nU9hA5*fP9p{9z~8I)pH}UAXsqDJ zA?t9NbnaC#r@BLX)oC|Rmu}!=POJkC1|1g$sdmO|s;>BIaHzhXF>qA7s67|t3L5BWN zSm2x>!wN`R;FPvz+y)nh-_3ub+}pmSvtmtO4eJV4#SilIa);1>YZ-7;BK5c_0pKkgz2Sfi(>|M?u*YvBRYMeTCP6uJZI}~%RD|VpH9-+NX1Td?LuK203y;r1bU$h>M!=c0mXx`uIpg3 zBTp5&njy?+rX)4{P&|Ox#F>#<4v1v<9#;{2Q1Wonp&{3!I|}m|?|2P2`pisSqa$tD zr{ZP;82ch@3@;i#vs5;0^(-n<_BS2@ekknC*qn?rxGsJ0*I-dx8W!swtB9`=^7D-? zfA=6L-189N6k#-4;HUnt8i`}BPA87$@PNjt<2dwM$8%xom%&?RbQZ_#+3F_74^d)y z(1a~|)xnK?v!LrAgGbAK-evR0<_|8q3NlG6;GLHT%A184utol&vzBumJr~?VjBYgq zwmoU@df=Z25skhc6v>Qu_8~iuyQ6EzX5q#NSbDJlzkn@rBG0!M+|b7J6Qx_cYD+@b z>*rR6JS=tKFqv5M5K{nGLT()TKY8A|63EfmY)xFvQwCokcYP+HZ&ft@2tsikoMWpR z7&Q`~^z*yVr~zU?W02O(=RS2_=DQtfL@kRA9C)|>XoG} z<54-KT+-EM7~autFW1i;=LFMp7n70cN<{jHVRN545g{LRT<}zkL-b{Ej);X*W`h#T z**F{R5E2vNf?3a1tJ>AE)!1wuT%A)OLs@2YHLP0zbLfzAD3&LdP9@r{3bI`?G^b>v zOrR~A-=_`Q*MVVpN!eVSgEnf#kkJ@k*7w`xr_kJM1N6xKNzW+<3#0pnt)9vgbWDpU z^Jst1-8`SjE#>$)&ucT;HV-XMHm7)05iH2u4Ne;2^PF#KUS2^;ATMEPZI};8h;xP_ z*|`|%5C4m}T5Kr;Kz&UN4>{6FDBkHr7*a>Fk^F1+AGG&s>Q6z4XA`m%Lfdte9v6GL zVsBJ=>cLtdN^Puu_DsT!&0~nt9$c!QEQ$HbtJLD9m318*JI`QF&saHiU5h>Nb!od2 z*Cd-U)ufsjF@f7;m6Xjzu(=+H8kV_ed%Lc81*J})x1n&@K|7s~@gklw|FKXtO%o8V zRxba+@$I#`{L)DAwGqy_e6G7MXdhUDyGZiPAjln%FfxXIG0vsS3xBW5CeVJ5WdXOc zoEu%fh0_^MX7@7;9=F3K>5<9d{&s2ux0;3;r#D1aK!BGU4DVE0{; zL0=y~*K_$Ud_P|1>!THR=6+RY;!Z^YXj?p+osz4Zp35EVt?-3`!9>=N3^aM1>I=B~ ztr+324;YP$Fa;mssl?vxw1|P~c%Xk*5I-7ZF+Mep{#?WBObQuI~V( zlRmX|pPB1~OZn6%(9QI&=%1bc%!W&egrC1H!dkMdLlCE*ZV#F-SVE}NuR&Y%gDVPl z8m71A4DiTKP@QoAzY>nt(Ed!XRGW#5^Jd*Y_Fj&LUqUXnuPmXE3rQ9e3vs(i`5BPDc zGj0OQkz8A(SGl0y>$iH01%KILZ1o{qe=j5y!!*;QXqRyBS4Kv6F zJI+&k>n53JQQ0}>jV+%W13u|2JGcu8r`cpKBesRe!BhK3Vy6$geS^s*?n0T=;_Qyr zv|#Ij6wxbMudXU=d!ljM90{5_bCfJ)FllEc**B?g;X>u34`QVRF8q^0T!*(LxP`EL z=~(}k(EN@SvG9Z)pp7N7tVjzFV8^9Tsg_I+-xOp7X*N>d0kpyrx8zuZs6_pH-L9$! z?56?#f_Lnl@vab1@t9KV>!0I%P^GYY0&Ww)L|Cvi%9O{o9=fKmwIeVj0P0x$-w&2xrE}#^ zWYEU5d(0%_k+kqBI2}kzct6Nhf)-g;RbK}jWU;pp)wLf?T5Vcc6Pi(IBEGJ5?}9af zwT=aq_8DJj{3fPwj}^dCUifF9Pq&8tt%TT|7`{_;kvM=FANv+JK%sND={ znc|;O>gjY%lePY(YBTgLPHO0r+%c>`&KWXE6tis6JiW{LjhHvziM(T|JES|7SOGC} z%i+ru7P%A2Oy8!CvrKj$fEnjDDE{vmGT<|*OG%=!VRUIiR?pg5W ztL7e1HCC-nFZ-C~9ii#(I-!eh)1qid%H2`uRvF}{U?yPo_KV?8MjhW*T44vs7AlAi z`<}ALq2&esm(7~{{iYc9C#6;sWOYkYgAOo7#I=z(^#>c>hhpRId&pG>mm}d`(UgD0 z_Rg4lhV}-*YSb3NqWkN5_oA-x$~lZ>18RYLN6mYZss4>C&s&>7NPy_~iILl;bVV?0 zf<7r?9ZdSKzcq9*K?els%7w>3)J+~nh!q#OX3I@X7?RBuNaX{jrk#mXgUWh{W!Rze zg{DS%f`#sMWkWIvUU9OKq>~wMnZCSj(AN6ARCH#x84$nv@nf>l%K~CfnJpA(e_H2h zNwqzK;gIEbU^DO2ByF?BuI*&dj53(DMRwJj-+K>Fq#0!xHWmE-j1i#1vk zAROGdOoSsXc=pkvOLU%mR1s3Kh^g7%;5tWSHZ6MJ7A{Qu4SS~YkN4`2gb9g)WGP+H=F>`y1s-1b=x$YCf zA`To$Z~cF-u%4K?WK%WV277&-me8+PA2cFOP)LvT0Cm58p7!4^<5(dI3}L+#Pe%>G zW+Il&IeQGvVXpZ9;MMiIUoUD~hjs9MRA*K+xSOw97{v)P4u#_`B}&5>rrC!L9FVV$-FHugzH>$5-a9nOPUD1ukC8Mc$8E&O?g zZTt9#fTGE9R3g!dvK^1mi1G3osMz!u9RXoJImiv&jPc?@##LZXe`>ogV8hW3d2WYH z!_K@BOn()(6f*3yO((&n`u7&drh@jaXqVk{M!+P9QZ%b?3T$xP$($QQIap?OUEG0q z_;CT6Lz3~gIu?L+F?DrD4_G$dqGq85vz6X&o5k%iA#r&0e+(nNyUY}gi?ush=K@o3%aN0Z6joIcE7fur34TT%k|#d45i6olMegZtK%9J`E8a zn4SYgk*4YZrl`(!LDPZ#L;58F?iM9Hc6lxot{oE)DhPFqu$e?A!b{O=ZY=T#?3}6% zJ16v%H`t=)#jVncgMN0yd5lTJz@sH2pPv91U?5?30Fc0C;SFgKT8Qt4jakg=FMy0H zNU7`d7bilkP!7H!C*VPoESiXit?R`CzD@=wPT=IFfM`=Fo8|96T@&U%HcokKAWpzCMt|G*gd`$6V42@96`rSf@vXq7~9=6qBn; zCOnz~%mo?)yuTy-7&*Ra{|~fqg_N9EY2xH*W*W(Oa>b;s(lH4uA*U@4?v?=;byF`= z)Mgg*W}BlLMij>iWUXEkFJpoXX%sz(pg$;R6nkq7LQQ(SHv|Fi2)52YrOzwOao6vZ zeBuPWJB>KVX15N+fA$VUj5$JyF|d9;xj;{6hMKYz_bBFtOirGEOHFoy!C9M7M0CJe~?&jm4NEAwHOxhbssAs9`cZN>)|SlYb1q zKxWXQ?dry_s8@HD3FR^wH1CSrl^bjeLvOsz>N)`9A3b-43VB4!(UcUGE4sRUoO|}X zBTa1Gc9J~r5dz;3F6Hn@eZ%{%@06mq888<#Ji9#qam(UN)H9*(1fK!vC2aAY-^CP9ace*)+n zN42ZfuZBCX?~3#Q8#-f&ny-l?r>EyhNM-tJFGl4H*hPM5%^W3~O$|MYr^luRjyDge3~ zJ3HKQyMcP&zhUSI>T^UPX9&MoXdWAo=6gn1Bx=-hR)E|&I}BeIjBrH^^9V@ZDMyEc zRGatKBgr?DET0)>Mcu21j)pv{Pg|7~K_JxeOFq=v__e>CfrAFjH7Hr*rqHNJdP4VJ zU}OZ4>>8O9brk((FSW}i_K$tGB!L@;ORpNm6Rv3#Cao7*E$7c1a*MRHawBv=hH(>S z7$8Ii2e8;Z=Z2FqV(LTV`J)>2OtghP_Pq}|P(IKp77D=IjGoZw1X8YrTgRI*m+h=U zM|hXF8exi$iAP(qdrxU`C+9+(Hmbvi*Q-3~+1xYVi#bLK#h3FacA==EV)0OVET+f{ z2j@z>JYw}`Gobz5Q0392K2MZ0S>kAuHp@NB1M-lSk+@fEbEw71Q*oOL4h`$3)Kls> zwE1!;EYa88FGtibH`vy*=yPK^wr!nGyrvmp=O4>WfX z0=2#*lgjv+FcTW3n@^*baiYlDz$+p46^O{huv?O^HX%{YUtnf3gj#>6GEn0L4r zkH}#gx-fN{L_!JmFU0&Q+Ej2ms_1890c1SnP7p*YEx9&nDm{XK+UXN8Atv)^IyGNw z&KB}M#6fITX7d^6~MxK3^tI+GJ|}}5EOtJ`5NXVWW~G=QYSXY{fHqD z2`x2>7xn*qSP^Kel-XPlQl+XW8p>jBr7Cl_XPBa(UNDX6SzY@ATqEztM_s<6&f0Cn1S*^JEw=Ri^t;9Rg+J`j5Tg*Kej`bEw>8-> zOSTEVC^{lxC3F58+(xx;;D108kCW;DKexof#QHz!i3TJG3k3_z*3YwV2uMHbD9j_A zHM0h>pVd=tK~xUpYPbMI7_;?Kk9TgU6Pqi!bd7&oWZ;A6V~Uc*jT~$Vlf-l(qI%an z>1AH?`i?;`isTi}|H<;qgdQCky!Z6?80}nGeK&q6m)XA!e(O68Gg>#HttsH9xx39N z;lKIxXn%et0k;FL2B~2YI2r7EFWB6~@z>!Bzs`_|(Ia(UG^-su;`g#X%02mQ4X9yk z`q44+2zUBI^E{F!;~CkG)tlo3WOmVCMoUDVt0&rK6i#7D9u?6LYE-t7-a4nuX+&=# zxN0^38;+_}`-S(Sf1aIXgZLES2MF0I^59u#Hcm-Dn^ z3wUhsE4e@QQv&W7Gwp#77d!{w%AW*w!T05U?M5j9IdW3;Grm@;lE1ih9l=-x!1K0D zJwTYfc8Uc2wI<|T!W3{Yw_wZ^hS~B^;XnA#qkJ6HyER@4w0Row0_WfV_3GZxYK(LQ zn#b>!X;=X1@86VCr09ssU$d-K^8G`53#uMvX7-+&wLpZ%5(wAjKe1aa0>mGSSOz;d zVa-F+LFc@~4?&)+UCoCT1;)Z6o;KkH2OxXfgo@-tRpO$u$b{6s3<)NXe;>iU;;2oq zM-~+>=#kTyTucjzAQ~mWG*SN)G)te|{6bcvY6TqCujjFxYN)~(n(I_6vhA`_(2s*B z!M>oKQnd{43{NQo`%y*4Q^XlX(2zfA^XaU*QY@*SU#t{LZFa1zmL_TW2?x#(n8dr7 zpeB{a3rKHc5y3xF54AEiq_8pdgrLCeZBzCC($Nj%HWoHV@M>jdlQ0j5dQhUgw$(Dx zOaf9Za7^akil8yS`4a6WT&SNB(8dviiggfFsuC>LHqryKD$1KfjUKR%lzCxvo%o79 z7FTD6I_2sq8_W!^e*G#|v>FAv908z%4YrNhVet+5;vZ=<247Uajru#$`1V`!dGLx+ z=`Obw{cNE?tCjC+!f;z7VSHDA_AC=-5`j^}X-ia20j8lXSnVn#StV8q{{mgvWog4+ zIid~x!G!-)cyv5TN_r2t z7I>0oCP>x9p$t0xj#JLC19FT1E?4-I{}Ne@$-Rj)yjD_3&8CLKhDDJ{%x_~RdEow^ z`{?xcmo#Q;EHpctNj4Fv+!@es_cd%0q#o-iU)GHsQnscs7e3+^hKJJ^gYQe5(yYc0 zm)qiS5B$1om;Te^&90fV*aGTNs*p^S9czP z4uh?Q94v_A=&8$cco-F8zBh7Yi$H|hBwX3Vb>Ud4rK!#M2AsL%;~_(JGGJsmvq8Oe z%6cFxbX{H1v8Bp}nWnvGO|CnWhm<0o6KlLO?1s5$I*u{IB(hH^pcj>(2QrZ9gn{nG zIsjVr5-+(c%O8_S0&NqaYA+iHd+@_;;$JkY#u$@5YQ&6(02Rn#X=>$4FsD}#a2JO>iM{e_EzSEc%H$~9cS-8;mySnX~wn0&8=MCDg05RX&?#nzup@HmgoWebjy`*7r45Uqe(u8}#$NbV(c_r@t z#q`Y)e9km1(-z{Y3_eH-!K!4?zvYXWhzIic%}AAxtkagqobcu%zZW@$3U}k<5lE-m zXk&drhzbZ)n6~AVO4!IfJxMpm0F&}JWcYkqJHRP3rW!lPx7L6T02Cb?t^^|j6`w#R zv&=w-A)jEz?!b1!%XP~rn{xlP(WJBJZhB8Aa=Sq%w$*rjyNONX?Tiukc;itu!F!93 zAVbQ}f$|E2LK+> zn-kj_3(i_V7IH@9q+6bp+G<{U7lSifqpCh|u{IOSGg#Ub_0(9O4Ux6Ts*TJGTDz=U46pmq^eHc z&BNDBc9X_t@Fkt0LX^d}lKCT#!SyIC;x|ciz*OvsNGvVq7 zu-jPExMV`;EgR6vBg&*%w+pW}H)Y}_{qazJy^-FE^a&La761rvFdrD$3w(3~+u0pb&Q$UBuje+_OyfHvk?MITuI3L`Bm`-V?Kwt;Vg{kqHCe_8qQp?~ z8j9^gWf#TaeHv6_+$HE_KZ*rO<$F*skL?vTF>y8hvP4Kos^QHj7xu=Fp6Fdm*}Qwx z%o{q$gcnnI-4+@n25c4AO~j8?6Kk=qeU+TZ)vn~ zfAEALi_qjNFq4X)`HRC?Js(L-XB*PRrayTEa!eJ?%*Q%Mk1?Ko zGw7&_sc0aa{l%nwr7ulyAv#?bK;&Q`N_%`eAdB32d&q$6nY#{39n<6aX5{lYkekI88$g$)yld2!e~$LlLl*<())Tln z!2|g@gV7e%JpVR~9~ckQjX=7x`tjEC34Tex_5`5pXqY-QHD@SsM(w!h>o*LHlm3us zcr$z16NwPH@uu;<1<3)=3*WD*P7n;mnV~m-L`6Cehe|uq<=ZLyGH+{Kl^FE~Qk$ox z^$NSY0zAC?=u`E&ekr{Ff>C}TSz}b_p9qT^QWD1iGC$K+v%$hu>hC$ z0sVa>Y4EOtaqIXOEi8PR=8dfO7dR6EPM0#&DPYCs1>_x~MyV;Jx^d>f0E`@MGOpGp zv;XVToG+s&)umP@h`Vp*t2}&amr#0c&a@fPc`_SOua~T4;!hwa8Yb9y?izR!-f?w=cSn zJAi$m3Oc*J=I=#HXfu+&Q=HL_zEW-jEF#W9Y1nJt29>CTCdov?qe%IM5nG zy=iK4dmey74HNx772(|o4Vwdq?_Hf4l|h9(o?gnQ59qiQkkEu>tWdtzO+=o_DgzJ* zH*Do!UvSZPJ}zFIQ{@GR;qYDKGa+0kS;E{PcvcoGq`OvS(!f_vEM0dDHo;o3cP~}% zl~82IH!ZD?EnAEQNUbNQ$rYPX%2G)5KZDUQL0|~f> z>t;X63Ju%lwgOF(EYc;n#T&rF_oHjB#oY`n^wyl7)V~=WC;sD$ zxK&j1K+GsTM)t;Pc`S4V&%4iesp-!&ZZ)4?Yyw`RfCWJ~#UM^NNh%UW0141sLFv!M z0}pKG;8engJ8_kO9bIa*>K`n4?FC(2M(uOODcr#mh|o-)?2GnM-TyA#KOI>au73iq zEVSOS12yY6D~hi^vA8@gXs_NQBA7U(ZQ?{y#HOMj6~%DoW9j^7* z3`R(&Yhs)=Fq}m;91^qsq6Fq2^N+Ie_rE~2>6hm2@70pP`P-_ve?o!IPz*rrDp*p3 z+r4ScgVop;VpmK*1c9(P?c#HIfxwr|42;Z`HNhQ#KT0XA0^1W4Q z3_<4EN0jIj>WG;RtkX~j+s>jv$sI)X=-`ODO9?L*ILPF!0Mxak?Z>C2eB-;5@9nGCE8b{JxB_(Y|9mk7p+LvSel<-FxcP&Kr(%o zz)KhHfU%o83BXs%-9a`b;^`(Q0bT0ZdWEY=$!eQtkH0Y8wmyCn(k_2*mMEK?2(A9I#5DEYkh;IlTg!s$Evz?< z7KZZj%LtXhC79s0# zFgU{$-tRD;Bz3{^(!c5}7fgvLy_0Pg7AaM0w4c4hHrA;dkw0o@Td-+4g&Z*)U!M&C zr?HHcU`6PJdl_(lpM3CmMKa38q0%Of<3e0kMJYr!)9Vx*U(9 zSqY#+DCWf)h_kf-Ms)*fMSFt;5{iT&--7?%i6Qx85+Uir(`;0C1c%qk%7|iG+?hL zJ|H%<>>wc-V({NldE@59_8#I&m^4dUuHAD>Ml(GGB#&csC;+H#0^&qe}Xif=vd%cYomBl)kKrS99|I=>@Hp~3Mx z^lYu%E?}g(zVF628qHPNlrAzL*7o8v(YPF9$(&TKh6iq#%-|)DOV*GLrhbnZ^`FS# z*)#b(WyDBGPR0_S`>3EY1>BygLTDdOa1|RB$+PTsf&EH1H~+uTNe{>+hu7m>EN++# z&bwdsCcnNx)II&^|M$gY|8FqK$o{`zvRO11v>F_s682n}D{iew0i0p?(TrFl-(s%@ zbEfR7u2-2y-l-1Z-jJB;79U82j%e?vXvj>?(YXOaq}*&R_ytGOtVBvBJqI$D_a zox&YfMI&^4^cUyoE~RFRia2G}Wb(L)+zA7S@5=fTtVFo3I()SU(4WmP*1y{WWSK#= zK|IrLOt@IT{({1Y@e^Pb@OexN9QQSD%)T#WchWph&BC(90s?2?YYw?W zabW+c6`6Cyb(ow6a|yIExLUHP(peqP8=RTZDR+SRaXHPZfr7sbTD&uLEANDv&?T}P z?qbfim7~8k+LUP7z84?I&}x32CzH#g)F0;PI>dh;@Htu}L5iSxNT4y8!ZI6;lYD~- zG3#plpC$b-iVecT^dEhs-vG@`yNwoP!1GUQu})99|G)$%N zx^=@Geo)s+0T<-nR z#p`|Ku`s9CW0Y!~Fdz}Univ@;o?4l%S#Ufrs5wEumeysJr7lnRpG#{=v89#D=++$B zAInCx(LLG4x+ZHOvC3V1XU&xctVja*!YRotx6R2)q2;s zHrkB8bR`StZi}?#gJ&w6rt2dnX*9f9nv(OEEa_8S4Jx$sy#R(Sa|?`Ro%wrgahK-5 z<)}4Xv-@vP>yGlOo6iC$KzRw}0yd-%eXC4S8vGXvxHMm9=oh%*-g)Cs5RZ|Zw}+dA?6#X} z+;TBy*2;D{>;b`Fj>!YFpob30=3O)NC{rv(XWo`o6=}knwj}VSk6pBEWrEL#tAf{> z*u$@3y9h4S8ChPai$ngvkNjhaiimY;CQ=W+Gwqfu7WgWcE+uTaV#E0=a^5P-2xJ;I zRvnXd`VotArhJN_#wj$Jw_|7G3jW4{6>&H? z_F!**SO=KVPf?Ah*SQpd=q!orAY?(j2k4jPH*2O;dH>OA=BaRK--=m#Q?zYvTemcE z97=+#B5T&+TM1B-+7A9`Fyv=cFK=4#`VFR#?esdhr0!e0Y@?udBO><+BeV?HFqy4s zHA*iTy=$eo9vA9G^>_WvPuB%GG2bLKk2|srq<{QkwA0?$MJN1Yw^@LRnpS{$k^j^5 z-#c@gmePHhWggDMG(Pf|#(zXxU@z3E7vK)Em0q1fwba{HA0j**#|SV(#TCXYCE?E^&c2vvxhpqejp*MG;*xCr zJgmuGQP*F;Y!|nZwg5Q6}yGb51pX$09nazcpD(rdIIP0cE%GMM(iN|5UNk( zV-PvnC6F(5NMhoeEDk2<@lZRcB{%@jHjl?Qvxl2jQ4DEG?#ps@nq&%zg6jSY_HT~E%oHVBEtT*Ep7Gi;9i7}K;dlin;C*ed zx070`dvtF>E8^1QY|?qUCn=AM>H`p}-YtDR-~rZj-EN1(qHt7FFhhWF4Kn3Z{5b7^ z(R&s9@N%6>XHKSgg64kbuSI5ln^oNJzgP2Z7MyXGRGCp&_*Y>eEJuO2fPww0mg+X{ zN|uz~V`y6IBg5Db=E2TL{So$(a9ibfm)Gm=wDkuw&NpKHEjZpoSF<&Db{t?BAvv^U z?KS+@RAF9Pwt5MswekT%ApFH^vi^#KFo*=k(W}}h&>uwxPf~s~Z4-QCWV6~FlkmVm zf)PK^>KwkRp*%P%d#A8$9kU%I+BvX~?AG_F)(+8tYbphnUhwz{H5lytNJvHabcm!s z^)^yN1M(>Flhw@pZp&DhENlSu+(}yo_E%qOEt;khy2ntb@Ns5-9BTFdIiT&cbesZym4X)ArFpa@ zC$9UhB7CL>-qDxS_YLVSbtAebHr`n~%>U?N8RbH4G?R82&*^X+8pkD5(k@L zKcMS2%7q=IFm6QU%EgyNo}t%T6l1S|i@BlyGQC;v*#1-2=b>K2h$rf?ny@6WMs`!FdU;1ds#EA}-$omfX*90%# zG58}({5EJy?WXct>lu`M;j3i@eoE+<8_uj_>nPS{xT#EN&Y0+CnI&F=?lxZrUK4y5 zvu-h-4oaooN_P)dAzgUxzd-<~kcBap)OEoIjo4Z( z-a1cZoJS+bB=0(yFDNSHP?FLPUH1M4bQ$TQQJ*^SBTR7a{U-DNt2g)9@JT9*7LiPj zSzt_{@gV4HEJ>Ubs4-+nA*P4f4n!q-|!!r&mkr#=8Qhc}%nkPwp8c%px2->=mP z*z`mZM{AL-MKzjBfyCCkxl6jZ=zZ%* z+gWl^y>vqXsr@uq31x{fo2TXHiD+VzbfThLQ3*KEVwQA{?Y|tyIN7axJ3fAa%Vysl zoL;`834|gR!Ua2Lg$#PwR`% z0JG|5|2tfe^kUjk_&i6+)TpGg9cOa4!@?HYY<=;CBAk!u{PZ!6`8%x49 zDddvqy-@Wlln#HS>|YE>1d#pvgS|r;b&e%t$4E_zK5dlHnTh+_y@@c$$@DUgukQ!% zbM$m`weB0~ChmQB3*z*C)e)a*ok}j%0kqAeo^6O!s`w_C%jlZ#J+&mu1ssgBfed&j zB7-YtE;5mEb;|-oEPNRh>lRI^x@RZDT-KAWLGO1bt<04kA*qgp>?BKW3(yP%L68}R z6NIt&i|*6%SY$!^3eVUo%M~e2b+RnW0T+C5t&0c%FRj1kMAhj*=s)25m z^(tz^KHj%J}^OxaBt<*m+=-4hRe4B8Ceqs;_UKaZ6FeZRKWP=ymia zJOZ~)g6u=P+gyZkxB`Y>uGzHp07pq??{K$!Ms5b7I$HD0R*LJp=4{%k3Heytc7#a@jZ2?!!d!0FT@ zRumLB`^hmkG5g=y9_M4vgIT}W?DL?^bF1sVuVh*~1TUeXhf{ct!A@H~yKl*u`)VOC zW^|rM)118a$I9c&YD!l%04!<}(wZ)7wq018EppDq^>WFH%SI**Xef{y zn?!Im&Nm7bt-g!%H<2_n5w5c|-`onACz655N!wtH3L6L|NZ_tGAOP%O{?`Xl`MEcp z*FtQsW7MmFEM}S|;)-b2BXZMv)dJN5u{NrawDB20w~ z-UPk0fY6A4kJk@)0QheEAj&2!Le1)FU(Z*drU&f6^_p=|+x{wro)|q|Kicl09oCOK zzxA7P_zzLj@kn6P*B+M%Z2xrzD<)gSdVUCsS%-x^Dp7rJ`kz4vT5d>T9JVHJiBemP zmY^j@bKj4)u>%OlGP=)Y;m-fObP?EBqqNh}b9n=(zoQ4B6(Zy3CY*r)$T63Mz)xWe zFyDEmlKnuUB?>F?CY|*L(#ntSwFx@{ZYElz)E8qR=t(iR*;wjR<(uW+VHLIljU`P- zbQf!&`>?Ha46{hQ^-6@<^3~D237)(=c#oXq+cI<4 z=>}omiN8*X2u6DPnMC7aji928_zw29?=5iwuRo^leoT%0$CMt9JMOD@?#~2--OohF z&x8-@t9K9AUE}Vv>gW)%eH{}I7#J3^Tns7UAG7-oq&S73;i>$Wp2iOx;PshlP-~%X z^eZ7oJt1y_mK1jqeVj25KO4&BKD<$Of|D2r8H}E*> z0#a9N&qdwabw20x@+B29qs_nb?cDVm69q~4HX56J7QdI1xqh{!sBBiys}huHR>fIT z%tSUl^!0N1@6y-TcUMnvC}zYz8aroY(=4X5nbhiZ7}IPdy%AP&&WyRgAh4(Q^V#}N zEJu20FSb?PoJTLA{-ml01p!qyhlK$(k}0m?Xuh?S-fnM76xGZwrb{3Bu@hyuvU#se z1G=1Y8zz#A98ZwDOYl4DDip^}K-|@@MW$@0G(QXY>=GjWvp~>1~uTeomqH(#1 zK>hccW?_Bj`CW|~G9`sd^^KFZ7scCYxS?%xq!X+ebt_4&|L~371Od$VjdP*2GIJo7 zU3G`fM!!ir#eRV2<0Ll`HVuvAHgLjAo7=uI`g#}eCiPZL8aLSH_hkC2Z`ijtUPEbr zPMTluVo^#W?+lye#GfuiQWG7CdF34#%@Nc^`KmRCtc@7`L)8;9jr}F;gq?koj99EPW+U~2rlobFdHU_?K40J1|x#CTY!xc@Pe5mA)`?5?E56Zpa`>|5BZPd!G zQ`-Z`<4Vm)5da0LL4iU6j~d)3L|_oHQ3E_jMWh1i&=OGj4pYlMXA{(?^oL?N;+fsby-L>KWeeS@(G_CZ1S`^#zFnuir zBA6=Sof<#jBLX3(C6Ms&yJ*hV-nT4)8%DI~XQ?z?rvujW@Lhorf3x7vr~)HmZbKH= z`{FoJ*_{EG%Y|GrwrBH7#jv!8#Su=#u986EyE1DU(Ody{*1jNQ!L6)A=GQ+XI3&}f zOL6F5kJj)!p^v3~hUiuz>-Y3l}1%W*;YFavP%f@`?3hkjt0wH)5!a?N*RY|MA%OZ zL72amG z6s-pt$LPm9Psge0U<}Exc33cA;&9?=w~_nXn>ewdUK_u_W(+`gR~leApHQ^6kg25$ znmtlJOakc+>NX90xqN5;!Y00ouG$=Pa_eCHQ`JO8vB<;q6>$EDh_S7!_wgR!$AZgG z4Fk^s1r>Psw6|Z)t}gJ32rNny6X2y}g=3G^>w@~>eLD~Cn!cbW%#R#ZYTTAgDgaE6 z0JF{3h8kcwTe$^-EF6_G9SY5>3ms*)oQg87ta=oT7gi`<{-~;QI>(Fg z9{v0Bl3smB2GEwK*2IZi@3_VCJ=q{^Rl2BR^OdDfE3~r{AsqGt^i{9BgDkt-XP}L3 zRV>hB&P!KbkYJ8zmpA^_i9Q=g&;%|+Cdg0YfYqzkiN5qzDr%Q$Yd~)ba4i%6J@4e+ zHp-Uqkr1)!qcPshrlEKlX?)+3`&+|0DRpn)pK)uG8^EGT(X#MwP02A_KT=c2`MKy9Um3+oY3HA6@(o(MIv2&JJRie`8k zvFvGq(%8HuinLg{Qd+jElD?;=3qx#6xda1VrR>xE?3wq3kfsGz!YH%5ODUT*Xb>7@ zk^xuCp>;}y>c2?QN2R7ZwnJZqU!!#%`+-(1Iz=XX`#Q_gdM_0nr1d6s5-{)ThfkOJ z0Dnc<4nXosWVBQ&=hA`u(-^B;+s)Iu>^_NOvxG_^;>lgauKClN%V^mVi4WJ2DFeS> z?7CAkz#h0A*{R0GUMrHNb+Z1a61TQ=757_I(Y!ok4B-5m{}~ZnDP~Xnt;}q+6B}m7 ze=xnMrjq!BympeFu01(4w!|XC04gJZL(!cAJK1f$+vaS1b@sROr{b zA%LfiLP9#gk`~odH{h8T*ab@yc*>Fnx0J?kWr*-rg~@n_I{|F!AH`5ykT%%MTQi8; z%*$bV%PyRBM?%zbxJa57qgKMeQ$01zeK3mV>6PvLz}uI|_;X^8l{vWYj)DvjCgFvb zBaX?B!-XEjllxh~ns+cyub;T(I-SDk;mMVNsGx~!q$eKJ%&kUaqL|T3Jf3hj{-?dk z2SofDPnLg)h6BT~=VZUJ%BP>Il1D7N+C>H#37N?eLIk2;Z_1#VWZkSi3WZ>a=`2Ri zMZ=K(?{8{6wKKQZJyfvVA}@g6%aCO&{aAxb-P@q5Ts@8oS`*`G&fHmka5?+US@B47 zl_E$TQT~;Mr0Hw4%_g55>)7ILhew@&G2V^j`lqckJ&D@`92={6HXG4%2Xs+US|_vF zSaT|pqpdZl*GLDovPM9|96B8;1>5vP+zA(I#ke|&jZA~>SeaLmw+eu7;BA+;a_MO* zWL>k~_*~fd{Zf#_$AA(7#M6$`(pupkG4ok^iAtMt(JI!?>UlJE6D8@B1s>(wX$ISq zX|i&-3`*LtizD>+CD1C$$p;f{^lfE@;6G67xXYTyr;ss?zjivbHnKjQdL>62dRnXw zedtvPD?JQk;0B<0bf18x^2SgFjYLiM2F3Ed{82tXiy3*k!W_%m(^ygEEW6c1Kl%Qx z>STa|T?Xq}v7=ySpNMCBm8K(eYK6r3lxfAhSWW1*+pND&>UtXBJUPGKL#8D8S#&fSF7 zDH|?xERhCB!1<32sVvd8nSsDr^J@AJHA4+uHj+#71}}6P$Y%I@14}g0u)zH^kc%XC zZugVCv_ailVK0Cem$=p6r#b=t1z5)|r$|N7`xT!1opc3Dn$2=1hc2dvSkz!n51F*E zzHeD)7vvPo{=RK<(CM-3j64E)Rzg3ooG}N_HkAdQs1%2x3~x_h@7YY(O;x}iqE^^S zl?PwCg<}bk9Ly|?pMg;nqLe7Fy9R+68ShuGkk?5RK_Vb;afg^&OmrCQArJ>UVD+CC z+*c9e>|C@I)OCb5*LS;fQJb08Z>r}lt_(7$s`EGHX9HOrf=;yJj)WjMCqv=3*~XnQU6_tX!@gz=IMP0zx4{At-sZYF?JVK@=-ea{Y&pAgbdI~U_mrmfhy03KikFxSEQmN-Ukk{_vz zia+=qOe9vgb_GL@7F+dj@7Ae;cTKbJ!NaH8^#UMk&D(_FVSo`fymO<~=&aZy^?jv9 z-4?Gz2YsC^n}o*G@cInD7=GS8NLGSrZ`(Zb;flg9 zMq+@$!Q<@{|+*(zzNg_q~z zMTpN;JVHw@;l6cM;T!sZ6h1K{?U0JM_*lP6osvi%sy**KGQ-1xncntjaI#@@qF%sY zD&$ThEoZ2eHRjQy)q9}vFlX^Q+bU95dmZrIZpG!LTpdPQvxv00cu6OxX4b;4?=(Fq zvv1R|Q|b9<=oE>_K!_}xV1s)dSbhT8?840+=nHO_tjWUxh#q0**%|1&vc_nE$ll4- znKbW0HUXat44zK4_*{lxJQVEbh)n94n$inD-^v`Rv5Ftx452sa~0@|4# zKZ#o=Hb>L0yh{$XnB&YG?l$(<4@SAnq#^|*4vbXU5ma=qF4RzHSfwj%v?=E24yC~} znlRMob@FZHI{Z3;KaXdFgNOi%ZJo>x-dq3llnvYd(o#z=P<)?nk+N3_~TZ+sW6;8FuHwSo6Kly zHqZL#p7&kJ*lYSe3|N4q=gsjRlIK-mFejvSe*Jy;I|2J)=B^1wxD!NGykNP_=X$;6 z{T{nhMz3C>_oI)_Nb|dSTSNN0(Y!#^i;tb4-sQ~Jd*xcD=6k^(`~*DyZ+0eA@liOy z*=4)70~YL&FG4n)@1NUyH*mUXPoBXlT4-d)BZ^T47XBzGI(NW@2Mw?rK<^`pXkf@N zD$mV47ZbT<51-}8A#Hl!9_%bQJiK2{rD&`|E6V}DT`17z0AV=+!uRtj6%w&@-SoBm zLUJwucTVJ_)lRj`-lWnvD+6Aq9!r_l>urp)zUvZO`%Uy$U0PiLnFrl_w(4W$#G=zQ ztYdm&(ZGdq*dIV@Lqyt<(2r)HrBK~ii0mm2Ok2(?uQnND2pvx#x^B&0#|6*2)P7xp88G%_aqQh1WB-A5crC>-hk8kMQSiM5c|JW8H zeS&$sh!&8H6_}y1T6xkwl5a!kSgKnvyd|r+?tfhc((!Nudqox9BpJ|reJAg1+qN8R z41p%QyT3Q`MfZT|ITHSTd4lZFNxvs|a*OJTJ8R^*ed2&p_$_(BdRrS9zs|@4cLpQm z{CYxp5MnrRjbTK8Fr>sEV84kWL4w{6geCek;0AzQ1Xj;iLAO5bvRICuLSKUAESi*vOV_f1B>;iZ4O4@m?9u5$#bj%5^WerT5xVl6Qh`%YPF|l z%^6TO!9SEC3xAiB(b6*g7pm`FQadT>g`4Vj#;n%P5NMaqN-qjqFXQC)B#(zj9Oa)~ z#8&lXo}8A;o8orhdzfjgE2Fzd*^s=eeNXzz(~h4D96YLZ3shQIVgU$xL@%g=9Wb>x z`2A)3z9xuf!=FTmh!g&?w>|qT_@T`**BU@_S~Oad>Ks{7)o5V@4i2*l8eTBlM{aS6 zI=}IGUVD}kfe{4I`IhH<{z3p#^jJj^z23>B?wR4}+9PF0>dB7E*my4(Lt4{W&w(~x zTKQ2CQB=dXGow=>;>jfFe6q<$rQN9&TN@5(RC zApZLu4wY}xN;e)N9Ku3bCXdd}KlBS>=q~k1HN#Opi60rJht{&Kf_WrYqdVxqx~FHd zvC7f^1wA7Thw+ywxAFkKOP(Wj0tZzO9?Gw+Jjz`{4Qu+OEtFzYdS~)u2t7cn$F~2~ zr0cCP*~{v=ffRwml+Lkg43=Hd z->a93&hK?Bef-X4Sk4)HdF?&6IBLBq)*8v+>I@LbDTds2!K;Sy%klE;3EKX^A_5Z$ z1LDve0k<5dof>HrsmL>f;lP08HycCUe^Yg6nl%1>OFMCqzcATz5L5+t(E?(WD|Y}taILPkJ9YCt zXf*s4;6hf?5)!SNcZluk*v2yPzXY*%Tza%7t7@Ihd?2y1gC81%>-;EQIX)yQg#`*T zv!tJs66YKqEo)lYl*WQlXQ$2g&(KVKS>gVCU0|=)udazMYKe+RK#>rcgWTyDGC7%N z+vm!o{<1~P>pmQ+D2(RxG2e_aF6Q9fGHKUzT{3UpYQO+6XeFKhS#GEw$Feg|6W#{eiAC_59|f7Fs`0srN$^W*~I7ihN0TsHkGFYC~K zG*GXVwBVv9*)45GlMh&4pSik5Qqa~LY3+Jz2>FGxM)z+=ysnTXaHT60aEY8B6;f}|R7 zk2akYu6`?;S#lt1>|DXK>3nf5eVP*ev`hfev>qXQGzVL4qXTYZN`wNx5XuOVxbqzp zTt_vP!~CcDHt9M-kbAOcx$Kg{u>6le^WZ|f&sFgWXf7w7I(GpPYc zF1e1`)j#XA*htH#0G5x|!K(A)&J}h~^2=Up_(At$Tb{nUW(f{q4hwTcR%W%GQRz~o zg2;J~WOY=Y_Ko`5K$Xf+Q%K?YvYb6|5}4eSK~f!m!i2)^(6)!e@?TB!>yQwmYl z=tR{#RaI14)flFk3m!PJ)S#TQN{_G!QGhKy!)~8^3Y8nwB1+M#wLB+tw2&GbKVP{e zmYctPuPqp-BP!7^$_nc8gBt@~u3G2^exZ@{7O?IGgfk|J32F$4vv|Hsc*i zj!Gxfb~l@#-P%rW(ugJ;!`vx07)=@Q3EUWNoP)97yHt z=mwJp%Tlk^4dnct9K^Sna|q_MEb|T~rFF?Jvv_0;VbRSsDXSD&nj>tUNdKK$duG!h zZpz41y;Hjy4EV=yuW4MxvP1x2EVcmmZ$~Wb$U5I!QJ91l3ktm~O~E8jz9Lxc@v;7J z5b{ei6LJ~V`1RU>^{VYCOg*J!8Ro5WCc1Nla!V`huir<_@4cq@>?5UR3Nz4G)HJL7 zOGJw#2pA>gBY427LP|Bx+2*Kjw#B413>mthtXd5JF37o9L#pyC#JCbLC`E<@x$apA zhS}@rY=?O40Lkvkj|o{ulRlwFJ7!%z7)QNVG{|ZSAEs8}tw(~%uVmv19NqL*uEFDw z1D2)Upo136>EU~v`6ATrlq@HGw9eLyfWV;7b1gT8w}8j)6_`dy*QXD3&Q3lH8pFLa z(#yVg=Z!&w3>WP1z&8X)`0|q%E4xNjw2a|Snpl3BH6^}Uog-)3773-;<3;W?2s65@ug$zF|nWTp2(Az{sLVDcEs&-Om%$N|M?z`6(Qrzai{KJtGS;(b+V? zsxkgEPW!um)CFE}ORH=(t|_!k3L=D^;=+H@ZN&S}42S!BMojNo zwWXc1z0HZ!G7haE5`vS?N*0Q=iM;L2!Di*_dSIsN{KD2FiR#4R%I02o!c3RtiUv3aumS8wJ3WS5BO*N1ugfB3~+ngFc z8DZ&0dfNwMZhO^Z{^k{uCb4{*h{X_aQyDu~sWV5PC<_D(_6v5R`W|d7PWTS)mN?SR zWlr`Q!LY7Z+~vz2b;if?8PEVq*x=OI?bd*2$iz-S{9STFLmKFyJ-scdym+!p7p+;U zZkCk=)~+O>C%?=X>0=$WN=sjG6OQx@|1aC_Q5+*Y>vW4lvQ&@@pX*Nj`85*7$y zRK!6%RmA`J;*$D^laG}C)$|7j^PcrLV24!EiXpbdQ5H9hB?vc7e&8Gw$wB`A$}lFj z#{b#7nkALt`)4aZMz+)*u^&P0OYL>Xbyg&wWwjcQ5pgV7h#nkFE)VlW_e_cOrKJW$ z+o68>hA~y*UkR?856A5-@*U!;{~uf57@SGeFq`CwZQHhO+uk@E+jxSFZF6Hg8*5|R zwr$;g-+Qa>kMH|;YJT+0RL!Z=r%zA!UpW&UXMPk4WPN%hM9h`LAnmN5+I**<+-C&c zvl&Ok!-c7w?Q`%sBzS0(oY<`R26!>eQC~9n(NJR6jbD``9l5W@XC)%&h=Q@ettZ1c z?>ryyB4dP_LdOD7Fx$zjM@B&tJNQEfj%e2~tM71Kw#hnd{L(eA>t#aCOd}`8+@E*j zt1u%6KxNXND*92q7W`i@_|PKQe4TEF_A9*3wk0UG#Bgj3Pt9~cU3HXeBoD*OVAitI zf!d-y&Z8M%v#cDD#JZ4B2?HQN23|+Q9gmfrJdoB{TxBO|X7;fLNhmS6{jb+YU&6ui zkAyG2=fl?NWql7how@0wXTMfp&P_xCN#i+{Ak})dS92k$7`t(s8zmOkE{-(+nX?GDX&7dq1{Lz&b zs0j!B{KYHEM^dBSP*oh^>O&GErj@K^m5FLLS9QhrJ(Nc{a`xCaOH=68v9+L(NV`?o zQNUw{M$w?%593JE{wj$L3`{M{L?q=LfACtx)G+BA<*iHXY{2cv4ujoXo4g-oO!GwR zb8U5c&wf|%ag%0v*^QSv5@PLvxj!Vo9r=(ty2iUdv_9C%=ow2Y<7K5lg3+FaW_Y9V z@RIS|ZI)J|NizoPf-$}P6+ns^V2Xx@-muhe!|CbT(&o6DYNpK${I%*WP{Har3kuPG zgh>6OSR$5SzK(jf+{(BD-e*u&j%A>6VxW70%8wvO;9P3X`&e2^gs4=yKY9xFKy_U` z5oNV!p30}pn>i7{$XwxHvfGPX5c4rns|R&ziEA!2-vmmh_MLmw*g`;;G3{;vj~~0F zDKFKsU)5q#kwLBjY%v0y67PH2rKBQOb&<=tv&|D`UxD_Jpu;qc;~G3(uJ` z*gCPJzG*gl2#w0cLBl3Qu}kVnnof~5*)W%MJdfQ@Bup`8T@H=+!u;_)btahPJwIf! zm1G~2W!~&>dk{&bC8~JdhrTrI%B?YX(!#DLezelNLbJ;A{ht4;1dQ-wl5W^}33cgn zN(p`CZ_Nk*rd?X*m&|Kkbj**7iyJZCpE&pT5ONSaib2?vbJ_4f(`U1*lJ8 zPuBLHAb&oB6w&3|MENiab8$@{*KKUk?oVK>k&xa4*H;AxI)jNF@>I4&;#;SQ9VkX7 zCvQ8bp9UVrHFeC2=Mk&Z776U~47RMxaxj%0^4zv`%37x-fnsG^>l;44wjg=ke0t)S zI~@yXS{&$$)~={no*Uw``x?e#C}xchqASdFqk2AbJN;jytEZLYlvsl0du_ed+@x0s zTof`uL8Lc<$l+@-AZAG5vb7nx7pJgPXsKNp=O0~5r?CvMJ(hHNZTc)P6-vPKF9yhd z#0I~`ho#Ub1cwHkSInr%p$!~b!DOmxP53cq!{->0K`VD69CF+E%}9|D>fW!JQrtIB z?hTZL;c8}qkdYyOlw)JuoY?UBzKYUL2Qvp?S>p%##lJ}<5W5B2p|*9{X!+ok-cri; z=4#=02F3C^sFc-v>zX39!JocXPZ&aF_s*W`srY0U2AY-vKRE|=F8WgFHsowo3eY0a z<U zIZXExz>;;mKsv8Ff8F@D6S2h*uKoH_<$skaZDnbbtY`h_Gg_2JvZOjqi~NgSXIxX1 z+WDgRVQ2DM{ZjrVchhmf`nOWI(!mu2?`lm#$J1+5LP5((y1W!+*}O#-Qn+LKs&86c z&Fai+I&`Bs=m%sn^9gJwk|7i9JaH%R=$azG#Fq(FDD*2J{c~OYE3*SlXxrz{0>WY! z$0C<{I%MI}5L219zXW7BGc_2kT(g2Wayo|W`|rSzciODX9{K@1<&EQY1K;{x2{w5+ z9@m5=r3=(c;mL2%N%xVh|78JW;rahggtQB001SYWhx@-y*)*m}06g&DF5)jzwH{Gu z%^+;mimIv4NTg`Wp?CnjdGZ1|;Hl@DTD5^7%sz+=O=utF&{FETgxQ`jr}cOeUi*B{2n0 z=#`@J8~oQU=YiKvK`oSZmb1lTpMX@*=nmWhsO+(z)Le5#xaCthyxN~NgZ4~Eql1AA zNxR=H!=5j8s= zdYM#J&geV3vS8>#%SPAyn>r^}Qo&_Tas^ zWUyag9?mB6%M`f`H#q?m*uOb+m3w{Ca0ZychwM5Arv?JBm~Vo9%(JUfDuS0IVC(2C zG|_PsdZsIk*NyPSBHeWVV<-C3LCZSRuQuej%&c|Qg>~v)`hlO5)$Qe++EB<_sr5@=CS(= zzECo%XeCbAheG%~(SM>zcJJ)53vz|EZPBDO>1Fz|kmu(!olJ)?izDc&2(;3bP;M_K zfc{lIh|rr%dWWdEyygrMFe)h`K&=!xoG~9820x&0qbzQZ?d((PK#x+a(C@mDHD~%H zLiyOk0RKi=-gf*SLt^f+e+b$lfBL8NqHpuOxrsK!lyQ=NdzK|5>q-nT$OrLXJ6Am@ z?22jM*r`lqzGT_TlIw@-RN)-2cI(KeK;w&PyOrUrAwAqFvqi)F6%+Mbw+w0+;j~H- z9l8W^iPBGZYAJk0KQA`_N{L~@wX?J`v~6ORaNU);y|dk-96WDlMNTxh@^ACe^b-8I zs^MpCn|OXw8YSV*Rkb{0=T%F4HKFL3-uopZZhW~SyX^DVw==vgr7jw;oR)-ci(yQl zB4$3u1&Ut+&Z@@GBYgcDbP4{5$9NlFd%X#GLtN+obaftlUSZ-UTV1YZb!pUlbTd#3 zs`n7E32;YRL4sOw|J(pPiP>D@fnEp>T-l%^876V#vqPL@j%=3nT*#qnXRBKNu#*WQ z9rfab$Mac_z9)NXsUCA!oPO6P~~No6x%NpdAxckPnkm$lutu%?!c)sO1Sv~Q&% z-S_mm_J7w8tuMRBzl+UR-co$JsP$Sugu-RIT$dYX9ualF)F$GU4ry8!Pzrz^RbEjH zZXeuwXNGB3Wv#l$?p&J_zDklCHOn3=(#zwpP*x2g)iP z-ptMKM>S0QH*@$FS_yOY-ESxq30oxg{=V&9!P;7aV0zEYNG6aluyT`d5GT47Co34x zyJ8nQtAz~EsEn&!Uq;q&_8q_$VgB&cZUlx`p^BMZk?B<|g-Z*Xr!t?~k$ugWLKUG1bVp3nlSLnbGJHY5I8vKUnmDGUdUwu=vV&f&ckqU+y{Jw%KGHgdVayV5$SA&lAuX$VOU3li(0&$aZK}LH=<3MJ!lgEkh*NwV!CX}*nBJ;~9f4boFkwkhBXlDR?JXR~VeNOpW zJI}Yan}u;^uu5meJyx{gvIpO9mM^9_B;a{g?jeWz%l2t=V>qF-|ve# zGFbXc-;Qwp>CgF}pvVBiL|E+PCN@6lwDV%YQjaO$w-@-X`(nB;w}wIrL&GlM`!vx} zkFPi1_u<*L5F+*dVzCJxSQuIk0JvvV%`GaKcLLfZ?PXRE!r=?OoumQBgi0HZ?Mctl zoCGM(akL&_W6~|7TV^|K%c`mCMYi0z;C6FcT40DH_g-2=kSn_*Wt;eWLH!G*F{)QA zT*q?gfB|L{1Z?%u>f{k1Ky{F1SW_~Ln`y zPiue9{S!+uQm$+af6T6#Xe7TKR0{?~UWUvP=@tYrqx$#s?b(zV6D2qEbqJK`%7Azp z<9=?BW6Z@dV)GGtx%F{NLQ?r44~mwYl?9=I(=?U5a6p)P!oocEHwr^pNG4{pQuwbD zhEkxH6RY}N^f8)_a!{gM0}qbkN(6lQ(_Ew?&=>&aPGD=1!MEBBB4rE)_&Vw0wmQ&cZuXH^1R z$rQ!~g}Thw#&&C5`@N9w5Jrpt{*{+0cU53ID{8v73$TcKN)baYZGm$j=QbT~_7(e=`6v)WRA6!q5rbNFJ*avZndGYf>*{1J#CUgaGi_yVflCU# zDY#AevuNQKL!IFIWa)UP35z{`dVg0b9}B_zCZ(*Z3W; zQ%iR1^&n8AAgk;ZTIhLK7E5sW;^++Qm&%k8c|`(V62bmbdNCnxYrO>;)?~b%Lr#n7 zN3x5>TL5wmZ^ISf40$+URRR=~)V~U-#UUc%QSuac_TRm4t6HvO>GmrrcPR*n+dkM^ zZTKvFIMlf%fsn0U7(rMXzUb=09EE9pF;vr-vaf|oka%z1dG?iIdGZxfasb;i0Gc9f za+-QzK%-vVB?zfU*H7!6kv(fMv&nrnJo_9PqHshE)mr2a%%>rM%ra8dO2f|RZ|T>Z zGmdVO1DmhiNXPkrLIvPB3n-WXvGLyohDTiJMrPMRAo)&SRALD?d=unI4x~TQz{h-V zorjwpk}gj+F|V-vG?cLz#|BvKSNYsj36&>I8XQlqQAiTgQe72v-!PY5X$3&VGsaxb*=lc0W&pHkH_tI2D zD|MbtL}-bPy)uhxG9#D>vM1ZtZHaint>cbO$H!!rUX=GY80ps#oBL;6DGs#?|890F zUI2pE=+l>BN8)DIxgy#rV*EO}7r7FYjwDV%K?v6=BZ0DC#EfW6m$k$cOd;C!*GMSH*A_(NO#%MtTmy8UTfurY|$%f&V% z(Km0zF$+bJ`hXMsY^Cb^wA#}au|~zRLW$bp7Zj`AW$gdt0sl$n*kuNwfwOXObN}b0 zloq&|wBCXO?CA+os%G!{4HJ+j^RB#JO@*^Ab9sKy_vlHCnux4cB(K`0|Mhld0vdTB z(^A|)p0n^Yfs)q&cVTk79s=fY2(i@|HN1=!_eG=1bl;dhy!AK-CvipvdY{PrI3Vx2 z9P(`OR5d&J5lMA=VU=^uS#f4odT#yo^afmy0_y+p{bMA|aASlS7D0k6-RbLcdt|qR zG4oZ8^+GDwg~w*SAL0bNJ?Gk=K+W?JEn-kc&5&AP8um@(uROCqd7<;;K|^l^ zD05#d2T}~79~Ta28SwYeVbGFEk__xL_zFRxwe@`+4MF<#ApgQQxs}e^X!Eu~}HqJO5}E zfkji)Q0CT2E7gd3)lD`62ij_a7?>!0iF!B$u!yl*_i{`uSrS^Z&SjG#lv zSOWS3_6q;L*tjItEVn}-XR`oTT-&Hq%5QX9H5W3UwTB-U7V4GNw?||hJSgC)%l0Kb zZ1`zPJgN^;GtG6DctCCE0my?*9_NyyL2Do??jQrk6NewN;J%b}a_UoF%V25)kn@n4NQP6nGEq^3507= zx~nqPIiz+-mq_Rf*%FO;trgaGVqF)j4|B8XZ?*^1-i_?*cB{edmPwq{7;hMHf7!ZxV43#N`tZzH2OoW%b27+qzHopa{R2<}+XYStG|!BLiiZ>!U7}6^f$E2bw}=I%ETiZArrN z#b-+>XJhG-cd#7h3_LF9TYuT+Tg3jRHrf;h?^}u_hyDozl`+h1F2x%!tPg zt*gnd${EA1E9L^R&vGQ2oaNoBJ5Oy;t?`>r=P-4%rF_=fe-0RuB!1Lpoukg7ur3^> zUoIB8HP%AW0fvA@Tg$cHzWB6ZgqJ~uelwBMLfxTwJQkau#DRbIk}($kZ)b_O;DX5d zd#B>jryNxXt-lRbp9l*bo3o>HfGrjw=B;i%Mu&c(r6Hkiv!dRu(dgx@VulA&qok!K zVLOIkPr6mlkpw7-r!2Bw!Z1t%%UO_695~_Q9b0Zo57BBcHkIV1*6KbJb=||`S!4FT zMNQ?3i)dNf<8_NJ2lljxtNxRL$~6>Y?Xm$6^ifr#(jLK*A9m#;%nNtoqSwF%E>W;f z$~}jSw!N7U9mgZ4N*DT(bIwa1RClA9VA5K{3PC7)vkK_e-7=|{QCS&04sL0tFxFPQ zqyvynIK|lBV0SJe=aGpgE6?F7p!B3@ z;yPD%odTpKOk)KK;g~)K4@zuSZ)Qes=Z`g%bNR5wnOEXl^HL44hsr0Lp|ZR!nq`{p z!5GDza80m8u-|$a)h2d=OR={V2SJlT$=8uTLE{ zmD*BUv@EoANR;2l6)TDF4XfleI&nJMcb`O>_XFdS9J7`FJX>rxUkP259vvR!rlCgxkv2s^=ml@@&~#`)PSxh(zRltcQQ{gS zbeSQ5bhF#-bbf|6c*++x%7b5VAu$sArN#jg=%Li@bGx^R1TEU_^Y~Go!-meYF9_hv zSOFf1wV5J{=#n>-^C_n18NnX9dQ6a|IR-+H;-{*}UB~{8;KR(vkhJlAQ2wSoewi=K z!1Ig`fF(tnCs_4APfBU?N0}Gt=@^(J1NwgNX$EE)MUR^-TU=4 zt$MuW36OICXlby&T#4}@!o6yNZ6l(5Zog56MG;)_8zxgWj(;kLBXogY6TbB<9<+5X z7CDY#lv`htM;9+;NVIkR+fCQgFXSGPY@* z!`0{MK#TmOd#15iTya@5G#tK`kStF#7)DqZN1> zXDK8<{pAxXl6fvn2Zn$HeUn2i;PahzK9T<+H7vmnpFLcmdB)5XV7Vth_2qWd!m-;_ z9FZDgaG<(Pee2_2IW?TTJ6p@>5#mqrn|%7H#8E{WA~Iu-k7mrAWHRkz3cdUD6;{{8 zOt#i`QQE9zXG!x%cwzK!bJECzQFUN~$MLV=iL&U&@d2JAGCiIt`4p&65pve33#f#M z1DU}yFn@2`2T!IL-h4g7bXwK%SFxffan0rzY6ye;3mH7jD%K(FknF5oF{O!SKP=d) z+L*pkX?15F0tPeCLNy+?LobV-tgy&NFB2O{ucylM#Mg$h5)}A7V+ZywC;)H`wx6@c z7HnF5!c32zpj@3if?nP@x%8AZicIAwM1!scwKucf#6&m3w#CX;!H12pe^aGK$~w%4 zX>bCw)Cse+K9NlzW`)4uc${^r=&c>>w;oAVI^WUZ8wau4cNpq@Lwymzm`R*56bdfk zm<7SqV9GaARLC(rE-0Qa#1(i2(s^EJ&HbE*(J62S4&o4Ry>sbBC=G{4)S(hZ&Zvdy z+$VZjJ8t{vXS)1Z?=g2dosr=_h_FK*8BCs`0%q$4su1z_QZ*^Pf&N)X9i5{3vY zVp=P~!~n9ak6%!HXzJ&_N;GvdNv$&-m|_<-JnDYN6TRq4*~DZTWHYd(!ksolT{^_N zwZS);l`YSrK88dB<9GWcUFocXt5MR=nu5#3ngEVxE`}yC9V;E3@Uo^K!q+|pMXaDm zAbRoHkSL!OM_h1;Y7L#?x35oPy$~_gO81%)oi^XGTl@o^#8y-^1KWBW{pvnrV z=LGWBXzJro_WieMaEgK9Z&02TRgA4s^ zuW&$9I%Vxk!4G(^hjY*SJKeLM3c*W9kb>7f9FohUw2O;A)grEw)%T>fA6+YWkK17f zOH@k#vcvMmFFD;#v&9&(Kz`!VN%o67Z4Z$gl zZVh8ZL#}r7;_C49OgLZp8b(F~ZU%3H{(Hfk2Hl@g7j`MGC`3l+@=tYR>k^%uB9|vc zmE9n3Od9kJpFYFf!swk2(S1;tW!+~LV1Orq(#Ov`#D$Q{>nI%Hk+>0>A>QZ&9h z#1Cvmz(i|#M8f%^+!>Yuo^7ljF;}l!l;wfaP(FIg1qn z;b#{ z{fz1hNlzssbcf65Q%UAD%<&BqjjizrwM%9JPYZ~-w&~PSBvN){JGrWkRE~>=P%f6~ z1L&(mD+)1gMbo41fk{XS+(dhhKrP=|xLlG9r^TUJ6<&-2QpUtF9!nu^z%Ss{_;M|qyyLjoM8AaTmeZSwYzI_VI;e+{G8#ROz1WJc=dR;S)%C@+wE zDwGiRMAnR8`zu3eDWdM8JXV*JCWX919lQb(oy%;~vtX>pD8d_L2ILpy!~5k9y}g5= zKq&W*%kj6ywm3`y7ZW6Jz2fcf`UvQaY!HZ*M--%Gqnqn@?)F-=((;LmX0lHo#!FZ& z>6PW>)rH0OG@+3_GQIv-B~~Ix*q^{YMhWnXbvWJpoL#kLG-e*SGtVV~_cGzRm_ZhJVWAb7h5BC|kMp(Pw)KcMPqY$=%w`lI@ zBA^V^FtKg^5b$E#9E(u8gcnKJ$;t<4Tl$=H%b9z&5}3|%n5-@y4NXPj6OR$3TcXHs za&F&4w3gwXK0mY#dRXB|u>-pzyAc1P%yZGg)ITe-BUx;Sk?0WgCtRseQa=DS89pk~ zu3iAt0Iq)=_jAyJvP3Ty}6+_#;(igpilt{paW8y%v3rnc$HHiZE3BmL(%LK&Cm$@z(+6{bZ9DPBLjx6{Rx5Dp+x33nbFwrG#QL5} znf6x@ih8{KQC8OW_X8DN-8cKKKx%OkRE7;(gePp?;lbym16TLm-!!X9Yf+?Y!z}bu z(+SFmv;xz)VR+vx^1$m`Vq&cC8&ja6mVwG8_#5u~yt$ggiPd`eI&-c+oJL)nK>PN1=1STj;aS?yT+SdiyJL z?fFq{MW1o>)!RYiFI;VC+LnTkzfq0irUF2Jx=N-f*#iUtJo2|l$Ed6{S^(n`ij4MdutkIW9_;KV^|{kqkQ3X@z9-Vam#RjTv&aN%a+rihnh*w@Tda&bQlr@#Er`xqkPqd~Ra&;C}2piS7awjggmOV<%st$66n1t;mJ8ms=z(BIc7w0(ByTYe?GR@V@S7RnmKp? z8`nWf3f5=xmMA8E!$M|-;kdl%9vNs&Xy9C6t1v=dMwR=$c9!emz$hlPL4<2cq$o+y zIF{HKxbe#;AJfNb{BRBcSQ6R;K*_J@oA)dI_I3Kc4w18V-gz!ywsrR%f#t^hyr;X2%W`uP=t~;mQx2Ag3x{_$Ub>eH{~EVU ziqHz%^}1bhuFmYueH|mKvuQmDj!Qp+1m-gx9j0-O1ObOiG03kQ(nP~7smjdTc0b9C zqG%`lO`1}<#L~=H8|$6}hl>Y@K&D}(h_hxw1|^WWgBCiUW@1>I=?h0OOit!TzMD|styr6~7P4;bKv@S?p)Y!B_^K&mMuUA!;N0Fo9$!7+d#lHn25y#o3 zjjx8l&)k(z4*vRb2$i9h4jK77`O3k1b!=huG&C#i82jg=!90_c8T;2qwHw8{Vpj;h~zsG!#u|3t~d3 z>rOZZt)E8Tm(1=^wx(J~fPxFlBu&TZ;TzNiP0StLpiPfTwqeWHD-kZzmbl%H$TJ za>+GgCs0vHZ*ri@Th;a7y?0hBj<82Caj`Kk$jjd874A}No#W#E!YdJjFwFkYyzUW^ zkv&1Twt)=WT4z;?IusY;9_oDYc`fQEVcbhDyOd{B7B#z_I#`1!9tEMvs;Ml9P-@tP z1*lU$$3NR1+~oY|b84}7tBYBkY?X@A7q}1s`witDl^np;_tgT8<}$1Jcc@%tNE~fx zyT7XJ70OyZrqyRUu)(?1iDWvtG%`Q1#$!uvavdWnx)54=b=}K`|u90(e)- zu{zpFn#MjfEg@+$tRxwVqVU#om1t9nK6iIHd}3vahay)Z%0NeM7fZ-*KN^o0fVE`pooyM5@U?oSFb7 z&m4D#Qw(52EnVwg^os4@2Yw`5M2t??5 zm}-GFr%`;dBxeNPkBd;_#^~Pum=93n)f@}mxkDbtxTa&@QX@hg5p75;-yc6eOY%$=aZWXK%Z#(X zVRv%RoYvkM^k(tjc94>5MzXuJrU-@8(>@2DE;`8wMePB-7GtV-py+=1wKvd~B{zrX zMKnn64=@b-9jsjU(;FG$LQ?r-g-Yy8-z3h!HQ4rl^zMKBMPjw~5Y-q-r|2eVMAy{8 zuZRqMxri45v<4I3_0zk9;s5Hziyi>MX-Uv{zD7B~9>P`HBpygUle$I58RY?k6Uzp& za3LUqG7d2WAiNhco*TG8<4Xn}hfN?jCYSA+fX=JG_R&{Co=n8?k6eQ11g1-!8VdE$ zE;w+DZ9wZ1h}(Pf1Zg%p_M{P2W=^efhMlm8wZV0R`AOSS$2z#`i(DYPTjnFBwE(F@ z#V?rQ1UQnu?m3}@tiG@MshbUiZLEQMD@&9{$;*mH7`4-Z8lCQ-m0o_3RTj-?+Jn|` zPs0^SQi^o5{F7E#E1C0{Etzbw?fIvd{SXTO_P;-JfzRyaOvCu`xKRL!8dA6Z!-^${ zay9L2sSi|LJ`k>^mRTj#b%;93C!OBRHN*gf=4nvQ19#A>Q9Hms5(`rt@;VT-hNLB? zvg{MQlcX8oDQ1oL#a+!LNic^SN%cXic|Wo*=+SQ-0NZebs`(GG>K|gm8bqDN<3Gg3 ze~6X;5Uc(n>evDFgh~j>TK{3#;~6Hr|G^5~N9<(AZ|&lqx{Xo^!w=h=9mT4POX5c~ zMnd5w@xu3^>G&z}PkPyi-;flNamMP+Jg@|{N#q0JBySC@-d}M_>;PH%H=HJK`v>Zb zw-t|%ghvfMtbNssFmQ*$Wq90?^3ql&NLcVO$Oun^H29frAmQbH=Xh^BCI#P4-u>$i zu@~WRT57q|Be2HH;dkCa7rP&b@g9>1ZgYF$vgogl$yr=Jw1?dNWBCbaRHA2?$Gd<-8b^J#+dL@N;j{iO4hEUI{ zE(m@;9w;~7W?2B7Yc&|>TL$e8$W~bZg2U&BG^&xremWxe&G1QYxKOQlBhA@aI<)>p zpE7tpH!!MZee-<+ZZ)D>YNV5%yBt*Hj6%jc0#ZNDj=UFAhz1nVu^B9T`)4)Ca`1od zumK$aaGV6?FAX1!yLZW%1w@Dfh{fobq7U!5eXwup;!09`I1tFDEBKVn z(;es-Sea}Z;FIgjExbI&A}plMoxQ=weIsr&kWSUZ`N;WWb8m4>C1Ot0u^taV(mN;P zIg0f1yOtEf>EKp4V%;Muq_ctMdf>)3@)|PjOz@%w<-Rb5E1WoFOVBzp`-+PE4Z%km z$m+0?^jUDSlq}3NV%pq6fjMmZgLKe4)*3xBO6|F0p!-;WiK>A5u|( zstjs~7*X&ray^->qXFmm#tNNzE-N$(v;)s*R@GUCg|1aEzko$;4`uw9TIOb1$gz65 z3o6R%jZ}G=U)JvGJL$)#Q{3$URk^0_P%cu@**h82(YpRIe`UkC(T6`Zx8T1YJIvfd zcy1sy&8>JiiB!Xc(%-Kr`Cy(nj&7^UCCe1qvh;`$x zF)NTL;tnVHfVCQ_`Q?{t$!>OP0vm@x9Y(7@U(arsho9x~u&LrT3QyG5ruEi_{Lrx7 zTOoUM>T#z88X!?G4GW5Bnp!Tz4PxbbIJ~iPq~lPB=^%@kVawf}rn>x)H&^du5bOIx6=Qo+MI{RStGB7`Nl?-;gYmnE3 z(_wc?N*R1;od*)deYtr*45NVB=K>s5JQD!Hy)u|mJs(FrG&4X~{xpdKmtt!M{Hrj> z$((1YZ^66vOz5Rqa4(eUNA3I+;RueOD3LPyG6vdXXH`|0tQ-&hm~%)@VAf|H&kp|H z<#Jy7(gS2Pm9k6YbKP^vo{v;otkWFDspEel@y7Y%vVfFw(L200W`6L8;YyisIzO#b zgC^)B5Q0$`qFNUqlSF(hfBi{7Za3bPgmzHHe8Z&^$TwODOVl9B!z^j1-}7R0d+nr0 zcyyNiD^97PW8SRVXGZ~L|5v`ppDD>xV_-a}+~P!(uy$PI_qa}V?IG)zDMdkFH`Js) z{kW`#kuDIaibQ3v*{_!h<7One-P1!e1AaC!K=oQZNcFBT!` zPsN^_-MD0rxkh1?T%9|w+4K*A-NV(7zpg8zdVZf><$HyKNsH>?5k8>JBQ6|40vz;} zE>c)u_qd^KU!W3LkThLR*HSf7JN^93jKlSDu7p5lE?ChpNPBFU(!?p8=$EXj3;dPt z3Z7jys$V?2E^Ji3>FIytT-CG&i?0rp?wWk*{_&AHte>pxBAZ3+UW`!YYrvH$K?rsb z%_U695Edp-k;=xX4Tn~pg~N{lM))rtQ77!7t%EGPQD&347&E2CqnkM9$s#FG${DuK zc{bpJWsQ;}e_5=I(Hxh+m!80AnOyLQCYP8iQ-^^+ONTjxVhOOU4VqA;_h%RmjG9Qg z@fzZs)(#}%fUARx(qgcw->Xd=LAuN34vuTgdUxBkhZq-m)47Mk0DkUcKd!s2HuRJ} zYN*fcKTqZjY&@Nd2XC6E(pWQLEnQ2=lOI(;Q%I0t!*YT;1;?RqgC!WS9mWrs{T zAB2a#t(#h_&5Jdyq@vs4uYz*|#&PDK=tkN3akhKf-rSWLcz<>^2A$12Z z6;Px>QbTElu}DeHkbV_uhYAg_GLiOq9Mjo@h3@#|DDzagfe%{1U-kef=hdIe;B`Pr zEJ4h}p`lqsUr>0g{H(O}{!m#423`g(EG;44xR>Aiz-Mi)lzA>Ld`5;1 zZGxdT2=9;eFK8BMt5o zi8i!ro)Z!S=0jgH>(wY7v`<#%-$&iwpVbZxyI;?n#I}9bnG9#2!H$+^v55WeH_V{w z0prNNJ;c%0%D0Xp+5uy~S_HS>3=TKh@N5Dq_)=XBpF-$FE%?rKmrssth|@g_{Jv}h z0I9l=4hj%RkizJ7Gl&rQqx`@<0h(XWe*@lfc#)Y`0+@eVEbtaQ#<%Fd081D?N!PY0 zqaTW@p^8NwHZ(R$N%m)Mm@Kk=R*&wb-`R;LJ;anQXsc&2yj|QSc|5#?;dOtvAMw!$ zDO!@V>M)_+jy8O7Q-pV-dnMhgrqEi^x}j;Fp-|%8TB`#8=}tAod5OS?rq&qaj#0Aq zbSH9Xyz?gr(#Ay)XAY2zVH;j2s6)XZ`_5=nv@$b2b&3-Uf?8No9f>_eU^ z_AXU3c_OU?IKJu-c&@sT>n+ap%P@<0xyyw<_7*J1vk%k(GY+z2;bl`Deu z6E}%}Epq~f{EL9@{WB+V3x1yINb-!LMiS3&(ueHMV#>z_f~q_LVZA|NoGe}d6Sw^Q zp;X?%Ki2f96Cmh}C^(o3-B*_A=Gpqbg5iB&0_r{Ne~pmY!6XH#{(44@7S*C#BBQp; zzbK*q^wnU=0}rIb21cg?kHhi#lEHQx3;;k+=#Mxy;$dLuPHYw-LD2KA@j$7RLH&7`a^0>Mxcfr7=-!QL-ppHEL(o?gwS_#Y%*Zs#0pcmxDhtkHRcWlFj&>|Kx@i#k z{|?fy*!F>!8j2giX~%O1zc8ItM$QTn0FT6G2|w|8kR4sXye+|9UtF}f;0dV1ZtWz6 z$($m*#(&3Zg9Y>K*DvV+I|l8K%L=tx>~#t{Y#tRD++#yA{LO_CKRp7n+HuGPZcj|u@ z2co0wa}NaX`&!2~3T1}Q5S3^@UAFs_1YbQx=1ZMpd^_MTkU6w&tlP)g(MeQ2Ii6Vb z9Q;(~U2A5Q-2{pIjjw;+*NkX77i=T&XNl@ew+CH)%jPpbsj5rh*+H$fm4WzT@Nda? zhNHmLv#zMH0JL8n1~_WJ9k%=Ojm8v_B~V3^%ln=}IngtN!^wUs)aL)Nbxy&7L<_f$ zZQHgcwylY68xyCaiEZ1qlL;o8*tRCt#QJmodvD#BQ`Obg5BsUBd+*h~*ZNjwT2 zyD&+XWR4)%9^YWa(z2VmR@L`Ax_8I)i8C|xj4;;?!VcaxS*zOP>0E7|hS?)BPP_Z9 zIiz(L^MiGC-46L_MvtBnG}-on{5H~JF(0kkOsxb#r2<`#ZGdd2%G(R7(%< z0vT%%%p8yw->z?Y{$5BVrIqxanmQ}b*sGgDT;ko~w4Dc~D4v4??qEoNuPR-ne=@ zB9XtPv21^FD!91Q`lY~WzYyVH(I!rm&&#i96BK^Awkic>B|etHpz`rm^`nHBZz?Frn+I zRH7oVpXr{Y=;0Czd~)57^O|%D7Op}V^VmZsU8!D}n$`)^g1J2zz?&QAvyW&1-op+P zv4fc~$v>>KvDdg2Iit6jFw~fk$1%N@2Q88vuDdM-|2(4+nY$<$mY@V;Wj{?}en@oa zU`LN`^dPb+zsYWcT%!t%(tzoZdz~8yanyBJVA9OP+t`j7I%d zsnT!`TcXl%DAux3Nw-|;LSM10MIa{tC(p=!?koAgK29NY2XvqqwOo591FI^Pr?Zvs z^R~$EpkGD#-=WD{Z-x?59iUJLLOl5qlu5#ulmwQpT zh6Ey*$lD^f=q%t>eEVjU3t}EXTvZMlr@WI~ka&i8USn59XA&CwYDv>3xn)BLuD+RI zj^QR0)P=Zs1@Ip9PFaP?#agW>>zX-h|Dbhg$CO7dn!#1J{jH+}vgE3r{rTxFpU+6Aqa6a6v*uFbH_zn9duFVuvchE%gZQucOlOHY~B#-~G9~Z@W?iu)g z3_t)mW7CQk^ZaS2TIYjqU+I8ALI`s4zx~4 zI&MdvHctJ9ki346Vp;GV+GU|&4gr7_|LVlSFMy*6{}rcPH=gelqiK)ULf$V@5rGnH z1@aX#1#D4lHlrQn4@5`rMYkwip*cA4&n-ARIffNsc&x}{9?(R26-A$zYKkX68%0oQlm5Bz;E(I*Jrf#j1&R3Veei{FYBP(0bU zSKq)O_Z=nqA&(RFDq-oyX^?Z<>HlP>&e>nNtoV|T$H86D^qH<8(PMvlZ_vrT(QH#2 z%Vw>Yq7*vD$nBlCfGvDw9COxRCq}dk`j?c$=K9Q(!YC~a#LA-b_dilljv@$W)C&*} zd;nVbvlBAg*7;U)(wD!*;P^rK`IYIkCXky0UWyJit)5 zFJQG8(})7;TX#AvRoQ8N`6f2Km)Vuaw!!Nng4-ZP^G`e&(GEL1Kcj7Vnd|tzuDPi; zwp1jnAfeXWp!D8+k#6e3z4W0pELGQuwe(QYJ%2SD-bBeZ{eH<>eNo=+Z^>co8t@?k z?~vz$UggP%5203Wqpk-M(a?`nKq@RY3&}^}=HumPH4-O!u#9uB7Ko!4IOFJj|2;j7 z?LddTtwvZ6%i6^ULd9m?*TegMfgPqdylO=l+VPU8ss-CTW#ea^rNGnaMsW!#wtB$} zyql9iTj$y+f$wh2N#Ih3B@m6wjCs{PuLd3mT5R_3Qhx z(jETAH1%*#2SOpz_p(9BviN#zw1=oU04L1ki&s=}jnlLCL0c2X;;tKFmGQc1yO2zZ z1$i;)7r_7StnqZLh%!Pi(2}&ZDKMT^>=7m11b<8Z6OZf*$oC`i+h>+m5rB+&UYQp6 zt#ifo3fW}YeCt;)5$T*(3-^>q9=M+gN3GJiuH0r7yuKlHO_lgE=67h+3i<*ql7Y}1 z;{{)3)J@lKDWrdbIx}Ti!~|_CEBd)x)bv@Lt_)2vEi>H0a7sP`&hMZ=Ojfha?1D0T z$bLz?KBqKRAvdtPKwq3wTLB?p261;38W9&mbTt~buvE6Mt-p23=M;{QcsUuP?xW>P zE+|s%)3;6Wy|uk-P2A1-#_fB{h056BJMORSFE8k2qzz+^rJ2 zxk(g=*H~WpgN+%t1?d8yUQdXn{wJ5@qh(o$QYcBjz*Fp`pClRYJ*;-xCji2P)zy*~ zhy{TG%AIDY0#1#@`3;U)-rT{`&5DGTizV&G|Eo+xTfq^Z3#I3(j$}s82PTbJ%tfo+ z`VO>6zGX6ikKGjY~4Qc^-0)1i|$7u_Czg^jjscVWmJUF74)(56i9>e%rQ&QiF`tNQw?tT;hnOLdE>zm%#0cB z$j-j+;)d#EV1mS(T1^_L2>5YEj>=Nfz?slr7fF*v?>1;4^~mV`QO+g{73DGCk&|4S z$;SoyOiIh7`Dv3~?y$bW>-*O)a@jXiKfq_gi~;+YAK_ zzC5`GDN3=iHt&Z?yDt1$sbz=6)XWE0uj7ZeMgpFNGFHc=rFE4Gm*S#8_6 zRY}?NcFhSWxWQ9-i558?EyPF6$H4_e@}Z9RM!d7wO^~`W|B#2;C8=h^owHP{dqJ5qH2R`8xN5>X$Xh!y zCR8hB>J_GjVG@077;3nJ7DQP(dSR%x5&J{4DfGFARlx0lU!+z4kpe!*Vm`MGAXY;O)`3`P&^xlPO95p5ogb*%0%Bh%fwZus z6n5JmfXB2M#p0J;Peu(VQ?$=7&LrEr+54lSvr(yK3Z3hMyOJY#T~F7K7NN5>A?$=J zzicm${AAmG*wz#3j9_BRUP2JR5fEv6Jzgl`ZG-?)2*#?f_l&p9`5&PR7iYeGbzhHs z4RhmQ`pu1Qf z!mzFZIt$ltzp5xlUyUQZZVh3*3kT&69n1OF3g;9xbFA z;*!zeXEyVK3X;+d^$i$cHH zB)Iuz<3Dw}BMs@d-JPv=@V zx27$z{`&T#OpRrQA+wKJY=ij1Fd7RFln2*khW#AHkxkm=QmP=s+By*<<7gkhk|+jH zA{<*if#Ma6qDI{bV+Ip zGUwx(bpEK`QP)`9FujyTp+x7p$$$A>j-7tOu*X;M5)`_}MMQ!$is9pw6|<&1V2-s4 z$%_}}#XjS>WGx$+X^+mfDuJSI&de4t?W>7VUdO4ajwQ|P2+&_ZqKlY~V1$kmPWfJh z0t5`fMcjbDi`4J(x^c3pniC{jM#d*=m;{Q7E6ONYfW}G$m_BP3sL5aN&4%m_U-nZ7 zceLO(`Uo>QAP)_v|7JSNCH(&9=e3FswfDZ8>w=5MMh0m;Vg;`Ai(mi;)59eId3Zs$ zS!xs0kN2|2FGrHF&`=(UX3mjGrwpnXGzThG6fQj@gdE{-vaq}Q)6B{W8hy7GKOU@5 z3$z)+gj?_kUrzb?r&?GL{Z6*mE^>TPd~V|}#j}_Jy{~9PJw$fk_;Q^>iV+B0J}FHW zEM%PV=AVj6ZtZkNjG!L~1-h5O0;Iv9vggiK{@+ICX4Ed-$e)egY0l{r{vgo#A_JaO zNbzU1gx>vurWhtB`#Be{s24svr_I~uFFAk2G8uTNcCV0?Oj)(lK^e4QjJ>$g;YTmU z>JJU-^U+k8*0tQlkr`=-R``?^JI>gGZ!eOJG8m>n7=(kv+^85rf+}Kww?0vbL{_dy z>q;~B&@kv;q&TK^S86|zc&v!AAOsRn4w%%tcq7&cc_$%h;Z&MfVT^l&p>XW}>;ARC z*ilhQ;4iZa=_{=pJS6b3!ynYi--KgKN%T&+mKb7&9mI?)ecY)F)v8Wpqfw09YC)d5ZxI*!tnzfH@BPy{GT$mDP76d*!&A>MUBHfG58QE zpJAAYR(SO~^?Y>4D5G`KkL|L?Oc49PI0n+ELnOY)NL5+WVUX=WMX^3T%{rwAdA+C6 znbC<$4@eXm6<_Izk`cRhE2v!nR2Dp9l2PaT0b?|}{ywKH-FEa9E^;&>w|?)snseyqx=30>2E!T%XFS4?iv`&rO-NSs3uxGIco#Ca!gFC!<~WYm&K1jP`grOUlq>vWSEAVdJ_)c6Dr0)~h;$*{?SFiU@*gvg4p z0doTK#7mDkv>o6JQxkBFl25ecEWHG z`bl_g6V7HkpLCAe zb6S7GCNL-|11jQ_^6jw;m4zyb9RzBo?`A>=DS<`ka-<|Fom9l0btw0tGddi33M=9J zupb~^O<@jNq|N3ZV9}|3+)jgsnwOCqPXo4pUgj%CK{1FbBp`ejhHA!t{bSTk`%`(# z-2UIXfJ_l`_9-%7+f?7TBg^G(VL2S@B!aiM<(y_BcI^)wqSmSUHd|ONVw9v zTwq9pt0(vmb*8Zz_Tr6ookK=7cc7i32TOV%18PQAf*OL4#n!iw~PGGvL;AU$n5R`MuI3Rz!&8 z#3M^AU24#hD=V+9OX^gfjVy#n_lHr9a%X#MkFF}Ty;tth(PeYqSab{LGA6&uMnbkFPLH9@ulEg@o#Y+`4VTk0cTa zpTCE`R)NhhW>KwPDU^8>S8_ANz#w+d+LkwCNt8Vje6@Kb!Bq69S1#_K*p^0uv-=^g zrl@Iy**-Xxzo^rhjEreeqaO8#U7#*vHj^f2Iq$sKH)gVLeZ7CMx)7Dd%Q3Whp8@xW z&7twtA%K}h+LZxE;!B)@`Pw#gcs^E{4$XYz9FpKcEybL+H0Uu0O#gma9ui_ED2gb$ zJfxR1?Do^&R^er{d8SE@q$L!anY?WX=8tZP{KY+?ZDvh7NBBix7%@0nl{B<>6(pVL z#agXpVVsQ|M5&fv$-~#ZMb`N@^tE9fyK4|mpgd$Mc>5-d$Gone7c>`m7)sN(@Ta2% zBI!|LQBfEPL97=3(+se>Xzl)Xd96nuAO%IdgXGkS?M;(Ej7hm|msEZ*+$!5#1(5;B zx|KnS2kWgRo~a(pph(hJ7(V=dZzvV`hFj>*XrCZ1F`G7OL_>Z(JtAq+1wc~hT$SeT zudKca1=|NN#@O2To`$nG`4Xp;XyGvDvJB?N+ z61)Mw-b)_V)g)B5Gr{UZ0EUIm+w}`%6&^Ea)jkL;|4w}6tL`>_+1h{R$#%;L)|cl~ zKtL05eg)p$qnPc<>0uT2=ZF8pN7&a#Iy3Omu=+2oz76?9-v}9`QwDz}t^uzs$VrfNQ&<1iHhM3!n?U z#5m&nN092V!7zl@#4AgHxAfuOqMX#WLo0zR<@A65lq^QAra&+@Xj-%zzbl`(g|+xG zyYAXYgTHjvmy3*oC@-E@(@_xr1$9j1-3oUr$vSdL>cBVv>H zZ;*SeZ(Ay&_Wp>UuQivsATkmH!ibHg@ek7P{kZ$YB)2Pc8#y}WHM!91**e)D+1Vkd)3ke8{nlG)NmVMY ztX6`fv=~aTlgSIWs?xsmgUq>dLi^O(&MP- zdsWHnoflJ;6CD=CCmK+xB6E@8zf;*CP~FjFg-~*R0~`-oVD}%3?k9B^mR?kGjH4J* zO75(3aSFJu&*g8E)HUy;O|*~rTx&GLZPaSN$C1)jcEyK5KFH<}M^7Ttgg{Sko5Em# zVYq&ZhYI@l#9i-O{>O!d1~w)7T3Bzgo5gXnmHd2#hyK}X&g zo@^56g`7+h?2LgPEpH_Om3LdIw?A*Ext3ZVqPUJ%LWK4+t#sEWT88YLxPHs#dj6fj z4~P}#Fqk9+5+p3Onf4x9BlvEa5l3EQ+o~+WNUSO_Hkyx3GD;_ks*xW{1fv=`{ zZUq*6d}tP^&n_Qs6qM(wVO*B$5uZB}1fxD+Z$IhL^3DK@KfeG%gLcG>ODCmdopDll zEl$=j8$Y3R7$)Z&%5p_Osd2~=$UM9h8TYYpbA32vr2T%BphaVNlI$pA8*bOe!9KRR zo+m%BOrPNLtmr`)bBJW~Yt9n|_=jQFZ0ZT9O-1Re#@>Qy>aVVuISaIT%TMaAA5+L# zT4?>TMb@YEsr#ATC~D_xWaI1Q^K8uR*19+Eyz*5XjpKrZVl0dtCYT-r4O+G^;5_03 zO=eaf-ldMbBt^a?;A*^Rv)sn^&cmh8LsAi{RT_V#v6uUi*^(*5F^Q)IWGFUkA$w?Z z&6?bUrj6J|l>Tl4bX>^`^rHAVUHdB4&m;kR%I~l26hYWIuJm_`;r!S|=~lnX(%0_n z(uP%-x~lLShslM=U3D+e7FENM^$cYsTKTbY4^lhvVQ7}|kzSYxP0eow&iLv65!(2_kUBCprfWi3eJ`JrT&zP%*Hv$#tCP_vY=?e} zO4L`=kaEZ>pBSn+Np$imS6{Gn>M7>=Q}vtkINk{R9@z>Bd#qNRh_N%Jf$4RvZvdSh zW~UsT;YDc3xL&g-35aO=)FtsEQ9v0k=84ah7Z5$C*MGa?J8r!uRuKATZCp?j+{%l* zC@_agAv@4JAXz3P{->YGyQ z7*zk*@hENqLJlf|YOPf=0!3#2a)3O5#uuXlP^wk~@H5fzQ`_vWhUSAez1NUll_ZENP z@%}Wb3s?+GfnL#r|M6VU`UO!-A=bRw>e2wcu47z=ZCv0zQJ12R{_dH&l(pDK@9{?l zL!MiuoqyRm7$W0I(_;s%JzTHLaW!|J#gfs{S?lWh*<;C_?6~es2dmJ8um(fj-~qA+ z54JrfN=zA-Z5I2q3Q_HmHR^uBB}xx|uC>HN@7#W- ztGHByKPSOg)W(9~bPVpH9lo(?t=?JCgZV1p$mdHtq~a!7=Mdrz$*oD}YenIxn+bmr z3lNKJ5*F+(8mO(6)+;nOAN29E7rML8gU0+2-8X19p_iQ%*n?}z*e$|b>^*2t27UG` zVUTw1qhGEgX-}NF%VvmF7FA))E=>jVT!0H{YrQmg*l1vCXWx;|W`KcWlu=lxZUyVA zyy=+~xq?|+L^C$m=YlwmDfIOj??ONR2e_P%D{@d?5I}%XZ#-#ReTpeKA`k&V)b)=h z7g7h47eTPwETH2Q(8a%1rf$J!=cbC938Ws(jhAVE#-OproJ-PqXemQ-C(4Sw_L&7b zm$pEBm#@hR#-q~KzD3(kwhFP(BeiY0Fu>>r5eP9T$Jm}bmq~8MCd}#k8P9+92aq}t zKZD7)vcqW}V5n~U4E-s7=Z5mUo7hIez5{=7T21HX3u^Wnt`r)Vlge~u!!s=qoL5jg z3`zN0(xdMN+2H_h1Ol2u^V}@Y!_`h+R?-a~%HVe88jI5hoAD8?YKj^#MT<(o2+U?6*<@6&QkLaHd_=; zmP&@kiGAeWZ^!UXU(-lP@0)_U!4XBEB~~CxBa~)wVtm4@#0gs(%_vfu)fv5nh-By= zm=()mPA|0+AL(3XdT_ihT5$ipVr!0M(2*`hQcrsBQ0!8_+q1msXMIc}y?U*^Q}7Xb2$mm91AVU8q4BNrls{@>Pcc`&uZ z`{9KhFGYa`$vtY@Yt2Y;7+M3YOfZCRw$~w{W(@1id%5%o@KsLFU0ugK+A4i6 zx~x}G&G|InY9veVxkN$`;oxY+6_)XinZ{?y*LQy#ul(vOFQ5+t`Qkm~ zsp!Rqlj9>(HC{f3;VwHaE3^Fk(KpX*fqijcZS#Wt_v);%`rg1vhf;|%Lv0fz)@@}V zNzYyOZu;@hx$qrx^LxM(6)7pK442dov=twfFp&7(L--GQsxW}`Y_c&gInE}{lVY3M zN;l?d6dltrsFd!9nz2{^JM{7QJyAm(kYO(0xke>?PN*R6i@xPxO#`eat&8i8!FklLDud+k3vtiz1L%hDU@3kD&hTi|NUHPg?+GC) zLM34D;0*$D?;t=>a8&&2yEuYE5j1+l+3()6OBxJ*c`$0Z1z3Lf0hW4-wjWiF%sag7 zGyiVE&J(~sou$*T1L0Tn*w`c%(}L0<;lMYm8cW#B(+i9(QJQTKG*Z~VgRw2XT$RwM zVSkqOZ|ghNpC2v|`K#MZ*2!LYzL|T&?M`=bZ;b~_S2>zZb$>w5@HIiN$kjxIIP zu;j)dF1Qkc9w~b~Ir3RQ-~h=f#C}mkDc*M`X`ga7a`C%8u?$KbbZ++?OW1+lMoz5; zVemcW8~aBha%}q+Sr9wx*V5jKbLEGpT{ZaZsYwyW9DMpiD{trD>{ZpF0AJmLxw>)e z`Ll?zn^X7Yp=e>7?4O%qITXAXUfwRwss$CBZDi<=q`yemF*bojY+M=ipv~yyN?95N zQPYwG-!aCS>r`M2u4jo2riI?Y;GD#ze;1L93Ou|Y`*LpdRZz>@3u(K&2+hFwZ2m`Y zV5IqTL!g7QrTvlurw1-{WLy^{F?wG$|0X8}mz^ju(uNQ;?!cuGCW#?PjN!%9i0d}5 zP^o?R7%=Nt4r{TCkSct-RWu9-!hJ0R z|Ng#Fwx+UtE)0Y}=eied$uctWw^J=O73IZUE2D)WQY19PNTzq_gul>%;f9++8{|J^S;;VsB{EK` zgNJ7BiMgzz#Rb$KK^)Q8L!$kt7aap$Y;}Z zWt|toQh9h+^42pcp+EfB&R0DgyZ$QR-DtXYAl#83IbV8iWCiSW?vM zuvgit8-)6%DQqy)E{-)@1L1SXAJ>#2Anl1#o2f=CXaL~Q_DR>aXLy)e>Ud0jV%!lg zhc+>TKLsMNVlTLI5fLr|KH@WKxksQzGJTcZmY@a5tv z8tt>u#33{AFqo=J3*%vHl0ijIW#pWNyHpBAL1vYz{7H~REJ&D?Y!f{Da%|HjtYNAr z`t=h#oW1q(Vi&=+E6bSZ+#UxmLJME=37>JFgeDF;Zp7jQfsaMbnY(F%^3a&V*CUn} z+x5`5ezPt{#g{vLf^hxdO7P9D(xudWsXg>IUg~ds=-QIzPHo{_kA%|TE&}TGdK?T4 z83{j}BFe*14q`04K>V4T!W14NEwuX+0Ulu6LPDm7MSu~+RE;6-xbAkF);l&Y!SU^o z44VMeb&c?VOmJ=GS+5V1!0BgQWtFG8((OR`Lh2Aa0seHzvgM4%iH$*xntn*-1w3wh z!MGh?(OkDi@-u6*=JHLWJ$PoNZix?DS&~?crUI6%EHv|3Ll}WdiSODaPUj#x2M2t+&nvN$e|s;maCpjnc8>hUQ^P%tCIU7!8Hq^Y@YbdZ^T>N@MR8RH_eLgc*uGHvGo9v%KIOh(m zt%ut&!jyN$$MX5yU6EBHqE%E8Cr5Nj6W2ON4K!*ynccrUHw%F4z72-`N&us=7RU4js;L?&scy@&=xPz; zp)))|4tN^h&I>^A;94O<_yL+AH99oGGzqpx#NsEg?*)bgqPs+78!b?E0#H*#En`q` zD0Fxe&I$mckhODdfmM1~ z(VMBG6Yi+yyXtt!fEw~IyRbTky<#v!L^0e7B>gHcBx%_t*>9I*=Kv90;Lq?N9In(a z1S`L~I>OS8{?PMmbnM*}&3L{bmymDyYy@%86YrduJ(WNzF;?NtV>%?sN1 zsiR896(X~OneO4CekKa2n&eiqt7OXLHPbVP{SgLp9Q9BXn)HQ`?Rv#-v_VMO&+#HJ z`DNK!;#788{*{6w9UvI`-~$D*hEX2314Q@bw;SLd6@PUSHa$IW0j0e6H1q218j#p~ zTKngk!hVhI(jZHQQ+ue7awe27mP-)S@ki}B^!1?wF?8W#fgrMYl?SMxPnVTCF@kUH zya#Lr@RY#pj~vLnMSn>fC#n9Xxp1NV_A!{>mKtlQpEtt14Rjc-XL~bwv^ea0M9h;Y zfLou=Hz$a6*z#S4jlLql(u`V&A8EWubyC;}K^xOl!e;f&K(!jiB?4`PbAd@ogbYC1 zu&3ZQ2HXUo8|(63Kuhus_g(W%F_jTYy%V$KfnS%w;LHT7?OP$WyqJAANRM@s)18ul<#wsZbT~V z3e}X^M_RcycFO>N)qZsZLVh-WK2sEhhPSQoE?IlD_qO(WC~fq&_nX>5RLsYZi_4d~ zu*DH+ahQu=^ud72n*X7@0N>c)mta#b0>)qmcb5&wLj^8Z7w~ylq+gIf7(V#_OTdLY zNdyiZoR#a_{|&h4>3#)VFuu^LGxJ8^tRr2c4m2~Rx-*5Fom4p9e)0CJD)e%-6lI%( zAFr|Ebg}1pHZoObS}-VM$zE=k$NNLSrzZNRWUg+sua37fTsYZ+k{pZiac)yZ);+(sVN&Bg{J8Xzq3Nfw?~VSi&G=tlYzChLky@dKLi=x8l9 zaz~XM0jQSNBrh;$^4*%!hmxlc>y)8rklsTyK%_)ayrtBPjYg$dh_OD6lR5Y$c@%wm`+@kQg& zxqnd4`9u_-!i8z&La%yWtD~TY4D2QV71CB%3DBl-Yr0va3xSb}mdnIPEEIN$C`TD) z0;4^%bT{1V#)pf3RVtcLg2H$VYZ?!qKx39I0}pz=F1L!n_E-IFspDmfDqbZ#G8L~{ z{Edr07cu=WcJUhpY+(j$QbL~J&HJctI>bDGiLaL4SsVG#iNBQfOAPnRoErr zQ5BESolL}L4N?5YLMN|0+Coo-)&Df_e0jJuc*kq%?KmAr6%OB4OdyQ1ISeHdR4vcW z>+MiD)cfYJ>Ci4k>SJB(NPg{P8>Qp;Wc@yQY``thShuw%2Mak8AWfG7T5+MV39z!f z{;7G?b)GlJIg&n1>->YSiU=t^m_nkUmKOWKLs;V9nj7=5-E(#fIT`QvEOPXdzo7P3 zRDTA5q~AnXVRY$){l`_HLiR&=8fO?nLHQR7h-F22@XI*q5gZ|SRZxH0IO_FkN1hJE z8RdgTMk@IvLo3U2Tf1im%kuoqGr-2$79oTiD~i0+2s;14oy)u@hh|M+fDjHsl0&q* zYisB8;`OSDjMf?#*+4Ap0aj#e;pNkS(3DSricz%rR3yX%^*4)Ti#_cTXTBVRrjc+Y zUR9RX)x{p%6+}pyt`ljZ14`3u8#VpTgqsnm1#L^`(B^pW0Y^|U*46OS8^E7u>W(dU zFKuHQ6P8#U*qS27mYx@5b{ z+EQiXbGULaMQw1j7)vsmh_}&Tvc*TLknm@tCzwNAA08^WJOeIFgK+(%9u9uqt}*5b z*(Utg60N599nKm(_qxH305F9v=}O`YnYDpn!l>E-3-fg^gH}u)vbewoc<}WamI9o5 zHwGX&Q0=DnF_#z;oFFZzK8cIX;Mf9R!G&ePcnceYlL?FYm8D^|`+l+U7+}AjJJ6L4 zHZ1RGSz2N0-^7cBWwe8*L&LD(?~c<8mvHvli5C41MUXDHK(A3w20+bP!k9BCq@AOf zmC~XlvdwJ2=byN!E#Tgr@cgc`%=6rxAf_}}Z%<_TS-!S1BF0U&_8!}w=sbgX;&dpu z`v(uJ!ekmt0}oRo^Ln`b@}g*&d3z&R;>#rHc-9paTCF=ZUXimqUs>nph>557RQ@rs z;m&6?-h26P8IT4XHGF>9mdB&~#3`Nv--eyXT-?N3EK@n4{$fZp+Bu>+v<>aH z(;)rSYk4pGnUYm2=yqSA;)j9;1r$~x>-(*k`#q5>9vR>if4tKm_3!;9qBxR_7{XgU zz-(I&?3w1Pk3td9pLzr|ihnZ7iPMVT8$0wdX(nWvLtTm9hPq#bgb+}i}WZkF=RA3b*v^=`zl-9HAHvF+rAxf$3hXX zHM~!b-3wuL9AR&lDH-z90zL0a-4k=Ing*mrG9{(+RZ#jWON;x&O+<#xZ0#&( z-WQmrw4^v*fO~;cmkC143Op$X^Bcwb*`!-n@S(`Ul-uAXML_jhJ6vs})kTvYUOdg2 zroZIm5Bym!D-dkG6Dp|=HAQMP0+r{qyJTcy#mGdTycLP1bS^S+EtU= zIi0dt;zv%^^3>t3F;;W10W0DK1tejWq}b;Z6Nii-4{rf0?cT4~*pHnWa)U#cP|@Qf zIC*=264)FmNI9+Aatv-dSZwnhGaeb7KGJW7kB;u^w#r%TnH?epk%Z-~5eP9U(zLeO% zTH8!wL@+?Ip*V}AJQIcgbzIMs@}dB_jnV!)i~aX&Meh^@8Zn#V z=3!w1!Zp;rz#m2&=Vn9^mhgeWiJ{&Q#=lu%VoPT6FV{i?>>O6dWC>)iUO8XQ91=i~ zK*KmY2s-OgWO3!I5t#nb-|Qh4KZZGDpf7elWl0fkXdC}*Srr#kS-JyEpkzXGv&=F% z$}sUnceHGJZ>N9|k8_^xyBAY^s(t9aXEN@cjlNQ-+fvL=sTW_Wqk;{SqQjwyJMqGN z0{l^7IF}ZqTZYnnF!X-@5VQw|(g3zT;T+dStZ89Pd7~wVx4ZJaXo;jLL4i0- zsm(pRwrjTfExWF3UNs;-Q7iO<5eo&u?oN(G7#4DBHbl6*sYsobA?cxO7eun?r(mrF z_P&)nW6jSyY&Y9V>XghMNu8HFXdr5C*$|U$H7tvVjD8baaHfmv`aTO zyMU6z2Ghrzg93&RSJ2V_qU>6ncFo&6eqyumAQx&MRK>jAWLOg&{Cay}M@l@inqHe% z#|$}RgFJit$@ye1U7(TDP--wepSnwRSsQ3LX@Q^GjmLzZj_-9-t96)E9Id2`Dz6Gu zM0*Uh`_W!%7|6j6?jfL$$+n98Kj(t+rDC(E#D=4mfYJ&AD zzF4(oE5*KJiMefVsA5Hvi9(AR8x$_Tg{$K2@n3fUr<&aq{$z{dr`;H*bIc?5Yw95?0^9iPwtuysz+nMF&ujcwa@(y^0{ZQDl2Nhg17 z+qP}nwr!*1G1rhjUep)yjgo&H%cE5Ve zWea40b$js}c6%{*q|=)%wAJM-WbJv9^=jpm(7^&Khfhp#@1l@XhMMIb3OzwmGCMRMS6IJI>})Fr zSsCmicb7KO#Q!zbiQ%dylba&AT~qpZsF^2fcNDm&mhSn}@nh9^QNwPAg%ZIx~=JOJpLd zy6R}^E9Z(oRn;x_p~ts|Y>fsVR6WZ;%<|1l2><3(CdhGEJEg%z)vnvIi)vCb&%@c6 zg~c$5Y>JC0yp(|OkZ-P{>Iv)^EX8q6#M`7Ls%5Stue1PM zM`voxQwUEMF4kU`E*VecYl;aOtEW#MVrq(OvgK+{)I=L;rtt29|2o(uQJ_w4F)B7& zc4n!$;uCLSolPf`DQ_48`tneqafH3LuHBZA&r4480fNYvrGyfbW4VP5&EZI|wI3)H zNO>AnHB!@LmE%fvT!N8`B{G?~8ft)Nm#ero@+L&j+!`Ymw#aLV+#0HfQappm z+SvtPKcVD03jeJ&C~eqF6|k?uENc9)Gzt)lxGh8%^L42Ak`6aAPn?0wjk++>aMPU| zL_$;h(Ap`fIg1UO6l)Fc4&~N>>ip@AQn%u8fe88Y)^njQ70+}0-wZxx2(kd#a&GnlPv0KM&bLrGqSgRi@P9?94&SrthQY4k z;R63uHF`EbA-Rgnm(DHVCkiH0A7Nj2ZqH z$!#oXkT^Wl+++Pni6kTv%ts2iCbSP+T5%kxr&()AeA?4weL-Yge`RcqBf|$Y|2b^&h%O`WUh2aZ<%H^%hsGi?ATJ>P?N=D$%rq98 z*hz1Eb*A9@5^{BVVez-7p-N5+dJThENx<7eyhquEF{whT+)4)3rY5wEg}RRBxYSs= zAk0YhRZ-SiNhhtNJiY)BZR;q3nc>_ZFPWXFXuFh_pL+erIxoEt-=uE(!j@X{LRpk% ziE7li!n_*7AMJB@^2kA=R-R-{#l}f4%nruUf&DHV86;JGTrKum(($ z8Ob9EHcR~bE=wjCgq@KE1~oMXs&1$y<$^d*#mj!w!n`Nv;+Dt;fKYpIFLQ)4sg%kVMMSm zlU2Xo00aR5lsxo@ff;6hk_`$EI&D%(;FgUTKtZrbyGRozE!i?pe*pDeP6c{#AUb)~ z?u@%i`Ey?H;@3zH!_L=jfMxMLHvLATWYXbVQ2U7EZa0<6@(b4-qyN%6Jh4f)brh5P zEZ_v!AKJAK=Ta+{qV1PDggCHWR~~Ckf|nBA#^x^odx|t|GiVY;khhUQz^HA-DBgOe zqHZF)`Qr1zX6D~!t^)3xqe!X5!}B8Dr>3gcz~6hAYRc7r^P}qZW&Z}vw47HpafN9A z-Z5t-6uVs3e$$_G*e&5qmB#xT4nbX=hjPRc!rIV{o##L70Xn8&L;ruvhZKD`Fz}Ql z127Db|E)^W0M7n1GW-t{=QN}b!YaA&x``^*+ApH;>=F)t8N-imE}9#^TI6K)NB`r9owD$ES`M;ff# z0oRB)wLk}_WoyMVb+AY{LbpeNxKQ|Xb}xHySbqm767t1aHb4&wfF+nUp@|O#Lj5zR3ovtRb_fM>3PSThNcin}nzWzjd_^!e)Y+^OBkOS#+p6<(&x7bhn# zPa8Kk&*@Y3wEQ%pe0x{S>fq3iT4DLYFJA_7{?>!<2c?@V2 z%Pp{3PiVA8)|!)d8>~`89fG?Hj}W<(EA(5M}AnY39_W; zt$H#D_%hApyJVV6TGILK87nhQYej*h6}sPUj*V02<}LwpRq6{~tkX3?!Z#g&8A9EX z7P=EUs`ukGihp;D9q8H0Z%JSJb`j2GB|h}aF#jUfH$4kPaA-xd}n zkM<@8Tb_n;PTPqHBEcfa;u;?c{_29su7x+*I0nm4Eg(Le?PI{5O@#!8LLcF58sw_h zF4wO?bS*)G2hI)_MaM8W1yjri#3JL)Sqa)sf;E$&&VQ^PWDk0X=^cwAM;x}qU@2oz zLzOxy(w@?4M6O2U?zzG*CC7*tupZRT4^AU;LyiZH^{F(lSD(Oe;hbu=oY$vbd8HVdo`94liZa5 zzHE>;O{Cf#t3yjbUp>A6)eFT%g0AeCQvvGJ0AHhYCvRo+!Ma4rVkmUbqy1-(1eO)dfDdls<7a zh*3k_4nVC=5_C3EgH1ieDAM?lABC>^)4L+iFVplGcz@i(je6G6W!d8RZC71|BkbqD zPE@g?%SgR22IA0{P+@pqw!xxD#$ru7AQaG9JiDxcny44CxbZaE9wFt70LGC44iad+wgkW;CcrQG}Z-`LZ1iX!<&& z;`%Z#qH%Za^AN@$u#66lJLOZ-O?=V{V7AzAgx6^B2EDWa2qCf)ciy+rPhrUnHx8|p z2gD#2HP9uj^^LErE4`zhjF&XPrCtv(G#UO}ddD>}*8dY-?jH4AB(dJ0O@mHu-e(Xc zww_=xaV5&DRw{)Z=UJ9pVI~T?j<>ZlK9nwLHzQT=e4xQIKn7d4Y!0|YVL=~3xs>OX z8FuOAG6Yc<=sqBi<}+DXcbkTGd8zw4uCyews`}i&{HW;I*9s+3>L#X%HD4{K-a6D0 z|0M(4mY7UjikpN57WWOlxjQNU-<}tX`TyVZa{Oo0i5|d?^p&ljCm#T}SSfMdgbP7d zm(ebp&fuzUV{15za%GW3t{%VO`u@PvJ=$zSS%&W>`7_iK#UJZ989l(iv6)E4N_PC? zz{=_Vk@zO&3zE`0Y+$pxD4rbzKl3jRU%?m&+kPs6H(JYf|J95V_Cz^5xiYe$@yX}* zBbSHs#U0@5mtopV2ZCyAu#3I-xO?R_5$gN3S&Ib!_aFg}jm>5y#N6l+(TA&+6-D~r z5$bO?2008ql!D*zh~OZpgIGZ90|OX;nAMPWG^5eYEU`rM8V%7qAjgTlu=(pjDk=#!;brG04THqnJ-S zr>*N?LOE+SqRIJnpv&aiW-s=*gAu)s*NylLmUL&tfR{I(+;ae)LEQv|a5e+eV6|{K zvK3ca$sT>0g<3MGdf6YVR>mMx9)0LlZrM>^yM|?u#hzmQ0wadS>&ooPQ7jc=XXe=r zjOBo|sHFy=-$!1vyV@UY2Aaa%wgZqh1%avsKy<4vUZyLwvE3llDp3Ov%Cq9r$&+LY zIHIR-Y7C325R`vi7@!Jh`*m=+qB)zCmK8JWKcsJTrUaY8$^{A&aYUt`vWV!!}vw|zBWdRq{IKXe0g$^K^&`(h93A2c-W z_-T&*dj8~rbN=)cr`(H}Nmt_`YPNcbjn3_TT(DEu8=zPzxa4*rc8gKfMpD8rxydfaNC@`#~+0cPnSrF@}DzcxSq+8b(T}1UVV_)JT zuL3SCsx+n|uis1716sUniGp8bhipv5WiP>f$P9X(j4wc~I{k(&f-IeP`ktCLLVSOU zLtyBrD4ipy56G~;0Nwz)cr>6m*a2EI77L-p7v7;P3d%)|HQ`SUm{{@!3KRfpQBb9( z)gknxRCE^ynKI5ZYof4`!C^2MPmE{F*xo;#0+AM7~g%Su~V-(nj?HCERI{Wd1N#gi`|^2JyN zPKKAR2;`xg;~}KuO)LSFk)^U zJTx?1PeglOeXVy{v_-#WxECE?_#jqe)t@NvMF(Pc@q6cK!pMctM;bzF1IZzJuJbIh zK+crwy}n)6_5cqw3`~c^SrG_a5cHh(@GFZ5E>>UVSJ1y()@%qx5%~?Zi43A6hqP-9 zg;6L5!<^%eQ-pSdAOR5f1Ve}rkwwlcTQGnz86dAIy z`erOksQ+an*_gF8ItV9PLd_?7N0-z|unlH#%j653!~&o$$n;d1gMD(*%X2J~k*qzk z>qVTDq4M;zY=lt1l*)h@r78^sKqHPV%SID0@YY^U{7BFfC;(30U_U(#0>4Po#}Hv zpfWuPRx|6L$&9KcqaecFO87iUvEXD^7gd*8Obew=xrFUX#Nr?&5`Cjz5HxML83|Q` zA_!qN`#p|n8bC<`Rv_T$oVCg`L>f2vJpz@^-})CJG~1QM!Sc>W$)7TMH88<^bCzkMFhvbi@g|9kFY9?Gzdu zYfVN;R!qkf(%SY}0S>lBTEd}SIM~~2Pf(Mz&2P-hUOsjF=2)+-<;Z#uoId(L`cI+w z(?w7^1NkN%uf5YHTBcF=EaM^S4quUhz@4Z@0=&5ZRV`F6#`S^2%;Fa`(^RDJZJ@I8=t*pa0qp% zU(ho9n4$@|`vapMQ!sciZBxot;g8g^QgziEx&W|i@$gkkDZqRbE>AhWuC>ILHG|&uc|#qR>d__gq2+EB-iTSdbXdKPWhW~W$+Xe z+z6}i#AYF?O)2NFj8Dy#cD)gA6mzxsnBajbKEd2<5n!VLP_`T};QzbrP0@7&g9K*& z&(QqGL(K7C4>1iu-+qG=1u#=G+l6OCWF{fCn6teN(lNz8nt$b*KzD7}0c8@_?2<&D z5+6SW{Jdo!TxcTgm>zNE>eM3YtAV3bfc@u~V*p#x{957U^7Or;AVJ;kRD|rl?dN*J zn}L@UOha`iz9U&_lgiQ;H>Z&vK_v(*SYWYCt}i{{ri}pH*{cf>d^)&1-h~2locAi@ z5j^?|?UGFt=mQGmU>*tar8cRY?Vc)&4qk~rj;D|2X2chzgr8E=d5-t@D{<#oU{;_d z0{tzqNn@v=vB>$2#Ri{MW((=gD=o;zX-+MQ$BWvusJV`foD-5{O{wp6`blLf9KN?E z`vh`RBdP$2+50{sRH6G$F@Z#6R58ws4nn81&|G(<;*#bNI6Ehc3yW@9NUj6V3d#r( zV}X#D-YD&AOXYG=tAhb4&ff}O{NNe`kC7B-0R1Al!^)B+y7J^L}!7UDsLIgD=~uI${kk`lof;6;u5;xUN)_Wj}%`F z8;U^@jMVcV54k#!w7m&_&ag|Q-;;~ZiY7_UzSIs}*M4#4*8??N_f{Wf)5xdGt=U0v z+NAPDe9djU>J2oB?GeA$C}&dn1i7or7Cd0UWA&yIoA0pW{kArnGT*m#abveRWgalsMqg4k~bl#CvIqJU8!reY!18P(sI~G!|xvsKY2{k`)R9?1miRpn` zn2@VJ7hmF*>@;!paack?qPF!?AO8wp%^W@(4z1a0Ee~GfP^n_}^_R|I?<9)~oZ+=o zkvpUm0ii(!X$SV(J$1lSPHU{v=#6NSlGk@(L6=sa!9d`98IYBcG(}@)nA7sGs zN|FDst)8Dj!dfW@^C4qT&X=P`)G^?rP0+Ik%zEJ3Rs4+4y%B1H3%Z|A5ohqWb0Dt~ zQ~1yTqv)IY-)&;*v#%u(Zw0vEux=2OEtJJ{1@G7DX#27ObR#b<|LTcKW-5TR!>o*EmA^x)JMsHHS(CUOrhY#mis5D9(&&jqCM02ul=>^Sh=;8laq zJ#8;9cZFKrYmt;smv$ii^}1UrKH%WX;Kf!=AzqIdvE zfukR26Wi86r!R?N7^voF%yV_3dH^+YC@0k=k1Q7ZPM}AUc4V8;t8eq6Gd z@A?9!wY)xM4ao?pBGg8}2At!%8)S2_W@e}Vl)k))!=a_yF{8Hp`oI*K(C9kl7a3$UAqU7dKb4{OyyZsv#%gE`dD%7vj>yT?XHDdaj`z#uDsFs8$VeGQ zj&CNLxib{%=4Y4(?de(w&I*&Q@WA7?B8`{ht=B-escOLQq!7xhc7%w*ZNu=v6iL@` z)yJFLoe3eAmyf5jwOZeg!~5C^;Vyb(b{3w1z{=hopk~sBL@A2l%h*aXEui=byL{6m znR^>*c;=a6DSJKynWAC%!U>Y>x31~33FU7n7A=2Zw2oLRCBB62gAT8xW3icK==CL28GWE`g?)w0QssZ}mbq@Sa< ztfySP<)uqelB%q{=Gvkp}2f4e%I?M{<3!B=u1jFrC3u7vkiu8 zr}Id&$rVG{h$zFK4rdL3wsl)=?xGKX>@`*1hAsibHUk6w+dbkt;jf>^xO+~*>8O2b zW$Lk4G^>ZtHK@m(X=%=ykx?Fmi3G|oSHq5?b$HU!&Oh^^ERx!QKs{>1Yo1bqH*#r5HT;j zh35wP?RVTaXsY68)_*1#XuxbKu~R>+`v2LSXaGN?dNGursl5O5kpbc575}Jrqo|*> zQU`VBF@x(IqNO3zqLyI{33|NMKE~&cy{lw6qIA6|OL77f1ew)T7XI|~$8i&iYcYcB zG(xrx!C#m0!h+cSM4ptD=m|2ERL=gSQPs(|OwJm_nR$ml&cZ>wf0Mx?ewZ-=hzT#J zEr1C^#O@wmcQ-*_99IL3pfCm`BsnG?_fK3`3Z}1u0A%HP5Br;RE{2p_0xnF*Z@yk~ z%GEE%EbgJ9ieI^wUj~J(8eB>>lj!|{oYnoKM}w7i!@7O3DD!!XzXuQG%CoFB9@+EK zqo#TK1tHrQI+4tNL8H3j{z!Z9fdGp^U)@3jQQ97&`-VxW3ZWHH5P~2~A*uE$ zdMN(~WfP4tLWaT~muNQk7x=p0k561ckUX z2qCmbIF_*!!k;k`t03*#eGz-%$A_f?^l0C3Y_~X$7Q03C(Ux;!mIIsBlD*YtyQPuBm9C{Z;;^+Bu zx7RZ@v0yZrubGaInF9nrZ&D9+8JyvFK?;%>zy$N4V1A?#55T}w#=lscN#Qu*cnAlt z_;vK@qjQhkxzzVvKUAp&3pHi)-ZS6EANAh+hSuc;(=ohswcSMvWWNUb?^+kJ8D{h7 z^sO)C$2^7>b8Q@xMe7lCjF*R)j$VTfU5pQnkf*L$56sqkZRQb&Lz7 ?Fm zBbg8(o{Cvv7LcItG+k2sb+ugwFGsvAB6Y8)QfCbQ1ufbvLGtcWI#LQk#nq7XZR( zA@o-E>h?4k(2&{E+^Ag{ya8(pzr*Z|&*Q#S*lIYo0AN}-`zG@4HWvNuU~mLRYr;~D zDi()kjORti>Q;EIclI`NdA)?uX6se6&}PH-+ML2`NaV5Sttna&A;x!bB2&^N3Rk9u z?vGbzMUJd$ECjXmh@x6LxZ8mwKrC{DTTA-@{GaDXjZ6=lGk z5hlJZm3?%wxMFRt4#hn86CltgRZ>3A>gcMf}Jw|;R z$x|9<`QotT9wn0xX7qP$l$T_CPqrlGOkDGixcbBEYeR#n@t@<9insJHWbjh%{OvqK zv0*o7?gUg{ADM%2XB2UcEWMMop48+9e=>GAdCW3fJu_oBMsiRGjICpGVCP*8euIa2 zD8MSgag=Y{Ou?9Q=355I=+mGa;>v`p=- z9%j}AK>FZ{VGy!$+wLu|q)X=aD{EN1=v)8H!tDT4j-Gv=xa;|WD5$^;Mi){XX$#&Gau0~8QC9=?HgZ>Z@`Vt)rZaM>zRCqKrZ{7W^C}o{4qD=B3*O% zh>9vpv}0!ayz!sN3tB1ffm3cWEkYo5;Q4kcPpBvsAa-)J{9y38`du?YIQm`zLnC}1 zNZ&t(B1H0@6O4R@p$ZR7dMc3r;#=KgY(qcuB4ba!oL{dd(Klv?vaf3%+!^vewgm;I zo!(FUCeJuwm=Wn$F+xdZ(WDS<8@9egW;f+Ospb{$#s)WIAutHDdm59kJbzGuye!LDGrRsXBNPILnyM}Q>bkrA(;V^Q;lae4 z`k*{J2Du6j6mctTgnKX}6mdj{UwAeo!F9rSI}~gItkn4?Q{dH$cvB6|W!Gxhgs9w} zxJ4H_F)y$P(zRrH@3uCf04zlgZE+kx;4j29Uzi5$Ked#hmx%{pp`K|f(KTBaT%va z_L$^7h5gcyTIIb5Cs;hwa2Ux%uobK`6po=(-$v}s)X{=$kazZ|^w8#5A|kdK+PKkH zLVmdfk46pi`N2S~oOe-VaIIvo*tIw`IFS=e2($MuN1T$2%QAhFxeg&9Ym`;Q{+X_! zB2(|%{+?z>pEe!&;rk`Y!Gl`>HRA`R&)##c5v-05H=+)A%mM05ZQO}}IaE_^FX6gu zQ7RPF2xaQxv7t%568zVE#v%SX`1a@!`69&Jud`1wzL>!kGJjzYXfcA+PMtf&?)CH9 zb%KU7c-=z~M>hLbTC~(6dzCo@Q2%v|{%Rg5h}=HkHl!$|cQUXIU~DcxBa9S25(wAcUTzTaQ0jPhp|!g@?8`TLeOg&BAV5p{32UcL@3{khhjb&Ic!;J@ol4l zv4JQu=Z*^a8+NnYB3mQkWN`L8iEeTq5W`RI2pIs{7y*Ng-jsA z(<~A!?~a~RReg)6C-ozDXc7Rk#3z*X&c!;_PI*^<=e3`z`F1=QN~YX_I^VeOfkc4c z%jYkb>m)MoWIiWAXEGlR-0#*=VACla(ltUSrAe9h(pSL1JqEAFMfFu@%Q7G!H`ZNo zWzA$K<`WJo>O%yQR=H3_gN!T|szRoWKi@yebX+0j)|CkDFKV}8viQxbh{(O><&Gb` zd~$`v4$L-RIb1bHCm46nJGMJ4fj%$KRREGJ@PDN9VG4j8lYG>5@FW;x?D;{CS=i}; zgj0OPnO-Xl-iaag`+W7~`vKrFaqBpVWOs$awY}G6;I&dwYBIDWb>I=6I+8-)>O7wV zO*ZZ0`Sl0@@(8Lg>5caGhx=#>Pd=vNKCdqCk;gUT3(|_)EQ#=h7}!Ep*l?)Uqwcq8 zD$CDZA~%M zIbU43DmrfHA$B!HTgo6(MZQKu#ykXeJw2V1SzZUU*{WsjF`Bg1%Doy;rRXMX-P^5s zz+wFXs2hfiaoCAGr)4Sc9Hk-Ti80cvU?z~dsunxtY>k6wSnC14lyPbuuK=<1dh>zE zK?dxJur7iXHB9v`zcxKn={jnXT3qkWFvRNC$%PSfki9KV+7^elKf#ud=Hz4TdA4;b z%XU2^m_HYzezX1Ea4QHCtU80=Fn{A^$ywLOA=(nu8txivUJjk>(CWBk30=^6Yq1*Z z>(Ff)zP-*Wub1t#?yyEe>e3V~gOo7}UN=zDwTnPyN_p zZ(R|j8gxu!eH`byl5fnh_V{sTNn9 zMz!+dcL^g%{?^q2chy?~jM*u-Exe=VBwupfAUYo!drAb`mey7A~&?( z8{**t`2KV9wvq81J_JWi4tX(JRQq_ur4_@3yar^CgJwa^#_$dg3J}K?R8lc9B%Z*!v6P2-|!- zO;I?!sp=i=9(-d`Zxu3S%lC?4Lur9QAt`GB80+RA?D=c<6?>we+PXL9+rLUsg zK^+YikK1!u5@*J+&qD>icu3Z2w{*R1u>+OC*tH^doGDBMWARXXw`BL7})XzVh4BS&AK zBKhgklBqOacTO*cB0J?^UIeAGlp}T(aFaK}ypbfA{{}aJf2A;ggQ0>kb1?spO54&C z29^T>H|#Rjv>OWm63}&F`0s})e`CQgQw|pXqw&+Zu-|M&{!e7_Q>k(;^;j9D2C=s~ z;}JnNfE(hCxDD~X8jp4>$vK|AyRr5AGnk~Th~Pc82&k*0*Oz>5anix(*8njW%Rhei zRWBD;Z#UQ1_@Hc&`I2t8=UqpPd>~ZDUE=+C4>#JIt$o$F%4tRmBnG?sBUjWCRTXkg zfallemYX@CgXeuO5{T`xPa93|P>|?0IitaC$JieHYR3bM8QW9dr6y|R6RFzz=4!2H zx|TM~Aet;3qYtV<5Z2Mfr;kaWj_}|JFb%CYIOL6iPP>(!cus>+@xnLE8YSzbl3vk$ zs2Hb`leC(`xFv+z7dwaoR>sve0&*b-2;}U-f&sOA|5z@q-+-U0GJNF~@H~B>qz><6!+w#(LU|Y;+mhgY3_Sl9ArlY*Yqx%7janB4 z*{s*rbGifd2T`!#i(a`qCU{&*$VM^EHNqH9{4@A_-Rv)FinIbZas(M60^+9n@=!K$ zKM)kZ0o9NQQlKC)rLoIg{SgEhfEc-iLZgd6_Z!=t{msoo@96dISD>XIW=1JZJRu?w zlU6V=kZ&*`br>4R0Rj~I7}ysQ>bK53P-Q+MBvY!NZGhiIbBvHTVHLlbq!vDqNuwaB zFAzLVFenV@a7-+X zYeKxh9ZwXA0RZC51rFN=u)YKyB5j`0-9kgJwZ*baPt>~$_&D7*y)?aWzYLwI0GY+s z44s)aYqj|VsmMiRwm7!wssp;%dxv1|3-xr%PP~4fZm{l@%X#hL38eF93PP&k+mK`u zA6AG=gPK!Wm9l2~9usq#?{4&--SZJUbK7Z|n$0g@MpS6Aj9e@MfIq|{1bhi+kxcgU zC0|gA;h7PAfP;LTLv~syLa)RfA+F)W8QdU6aV3+~F-$kJAG*l~3r5(1m}iN)1(1z^ z&sfvUB`N;u+?ZXGTuUW@6?6sS-Skz+EuK=<4ixcOe*6rb$#e1O0 z=Ybt;Lg$3krTMu40-`3~sMrVMy86r-bwF2v;Q86+(BN3CCQtf;q0yxF4UCNr3`5Og zbNxn>3BSa2wEtIf)5p=Z)W;iLpCOVlV{ysLiX2KT4MiT5Nn z>rqaX!q>=F-(5Y@Jh-QcTogvSK3ZQ2`93Gsb8vkeiv>Hgu^KJOVm~jP8~>QYK3Qpx z&UYUH!{3H$$u{xzno*dJ-EzrlV3c+=ys=#MdA28iY|qcTD`6DkP<7u%ctI$FkMT}% z58BVgJpiAe1oA2(s`PJWwMFRt#1krONZ|UIF2V)r9=)+}!hHCDj?wF$f$o~+%Q4f_ zk%IgXIxF^37VgTH{#YYWdtV1J3Adh}P7y5tj8{vG!hxSk!+G|SRstt$(w!@c5Gs+t z-JH38r?Q2F0nTH27%atQG?*da*{*K!!$I`GZH+Weu*2;l5GMJ-j#I*P#bmIud|iZR zv4-`FqoaKzzvv3A#b2zAm!|(N;6ND0R*nWgd^_*l-3P!bqWQC8GND+yS>0dpxu2>4 zR0l3)+-3m_#NyU$r71x*V@+DDTyrR@HTz4dC=H!$oxC+o&uWBy=OLa9iX0O%L;H=< zyyu#t)GkqvMcHEfr62)eYQ7ui?}=zj7#O3r<>&6YU3$tDWzAZy$+y%O5{q-RV8mk} z6)POCQR`eaJB>@AGuXd3u86s)D}D92*4?esj_!T7QF;8^ma|9VS)S5#L z;IahV9+b_d;@39kY}dL%J3v0>Y6)!5MGOubic|Y>UhVdk<)62Z?&zLVtm+<2r!c>& z^YIn8+F1Xz(YH{hy3Z+IgXihPciZ1G!o~e8r&6*s#c!(ut)U+9jWgw8ZHNj1?rgek zr6_BQc&bgmV4rSWQRbcOQ{d|)Y3n4dX^GL%ctyboO#EWy9{+)I+Fw!U3f6BY`x`Lr z$1akQC4@*Yk@XuGkV-JQtIppkMM^^cvo4&DDyqITa(-`&NVdx^b-Y zNYF2;0GJIGR~%Ba6G90xeKe78w>UO-r^!&0J0a(jcHL?cwC=nQ^MFHxkYRm`x({Vu z_d16y+IzyJF1+;m$`ILW&daZ}gt#7cjDTFxf7deTt=hg4ayGC*0(Bh$en?dlH1cZnnk}ZRSQ39e!3{@{Pe^(5c3h{<>8HvO6{Dw==67hwN zGnhYst=-5eC{03v_r;j3d>mb&vPwFi=}$I7$NRw8KmC;!hMisM$@hy{bS7K^ z5Gi8GRLV5IA!6@0x31d&WN*L?AiCj!7Ad&xb+4@fWyXIBUpN=Y^#LR)f?*g-akT%VW-xWXTYv|TyiE6# z%IC`BM@49~KUXS@Eend5Aw1NQ$Gnvpz6aQ8_Ei%|5skP!=|DADRQHbM2jT;fmdAKC z`j=XQ#y+otgd{Z04R!?2>%aYD(@9FKvk1{I4Gf?#(h><6Y9Cu~`!v#(`M32B{pNij#4O6&{#)o` zvHpLf%s&q6|4AGENBEif58=m{d7@*`m3EfII>bn`j=h&)Jzs&2eC}K$+oXj${OcKC z&$%h9J$~J!%$O+NgNyg@72y>>et5erq^krnMjqjs&(NVJlq9)OVk#`U(kP4pkxhe`N!Niy3|Ay={|u4cpn z)%5~?Xee=qOrbJSl~C{582*zjrY~|WC>{QJ-kmyungoO)6Li=Kb4jv)qriPo%v078 z;3Z>)be=p+^y4Rn=$6f#{-j{md23E#YVyjSDCS>bckIuuJRBr@&dM(k^R3Rip8l@y z^i=}k@wi3FE;c3+yLl2wNt{e^bS?HmmpcHw+jTyQBeuTw5~0cih=#gtWGsPXX#%0| z0%2kmqyl71oN|bdAb~&*f0g2UnFH0iC|X-kDf$V)?d$yZ)0ALUf?1Wk1~)Kw*I1Oy zi*D+GF19`q4>9#2Z_pI`S|yD?$Bj|fLVa$j?Cg}}gQKjn@0 z(mDqE&YKJ?1QU+GD3~eo--h*Z2|^ZyOkOXhXwA*@72D%dN>Jm_xpT5i4JgV3a;utR z_gd44;q_me5W6Z5B(gZo|qRT_?w~t;r8C2K`u@Iz>qzX0q z&xW`TOIZs>sVQ(uLEyY6aC6*_zM*?#XgPA>-be)Y8*+c>luxCF&2=>et z$?uB3MfEYP^h)Sc2-18KYiF$bfA=I@+QW)J{u@@#3?p=N<<99fqLS&gU26+MwrN*W zYc^i=9e7~0yXLe^kR)Z0V@oxN zeT7H{W7!RIRWpL;cEC>uz%RUj$!z-b5yP%|)LXX#AEpG7q32EenNPdKcU9%>U9{OV z&jd84)=xBr1scE5eAxoKmWkTO^IK88>F8k!ffZXH*lrh#qaJ#kUQ_86X6B5gv_DG} zZ&%J^vhN#I>V(+fx<**w8vpWjwBYjwWtA8hsA9jv9qSCSR<)=@J|O+iqTwv*A{K zUQw>SE!;~aV5jl22I(|HD~m4q zgywg{D82k5ha&}Lc};QRpjiDRD)(*+H($e3zvb==8{^{*t8U4Y*Ly7$c6O#746zLO zv`KsbzL-vXeJc=#F_RV!HvWa5|FTXPq(Y$Z_DS_W2z#rm3%6+9vTeJ|wr$(CZDrZEZQHiGx@_C- zLYHpsy-(aY4<{lYRzyZ-M!w87*Z99N#tRvveFhu64(Wg~IoQXVo187uDDq3Mb|$F? z#3~th#6ww0KL>GTdTK(se-GSvCKM&rPNZ{b24gBFg;s>?@RrZ^=;t&m7Ll zWui#@#6)e@PplEj!t&QcV7D(>z%wI$~Ev*Ll~_JDE=1(7M~nVlxgEUs)TUe@{jn zh3s=lYarOo(Jo*#;DqM*W5XQS8yuqxize;FJ4%ejp5v>4C#h6l_Lb<>k=vIkhWhye zW3l+I=Q@L&U`ymuB4~wPsP}Ra$%yBUYg9Tw6YWB)cKKdkiASWb%SP$lXnVR_CW}G@ zTKd{t$if#UgG$mS`F&+y#bKV`fHzA5P_}-Yfb}=9I2phv&LI*F_bcLgv!3L+HiM)+5sw;O&GN(2_25qHMQp zONUVAQ*FQJp=YPXNl71PQil)MYb74tq?F`lzjEDoy<@G$Dg7V@JF*i(4LpJU6lQs? zYx2%z?gW@>yk=}kNAgIL0#D!y94Q6-IPHkFO$Nl2u#l&3quWAn(`?5B~z#8c{g{IjqMz^Z2;Q5KbS#bu*h zKx7#Q)pe~G>m#t^82ob{O2z6g8iZQ|EvAh{ODl1R?R~aecerji(QtC=L)u+8W&x2L{5COYaSj2m4*{iBFQD6Z|FRLzk!s!H;>?_u>-vsh=qmBDh7K8Wr;F?BRVnXGgu28N@{1~bc~8op@3SO`m7;tUyr4`L(R z=`Ip_&yaX8_F<{dd79Fr=qKO^bsse6Y)cTVP1)>gQ+&D{Flvfhim)jXjYbKODU_iD zg74TGM#n?7-@ifodkWf-PRGZnVqE88xj1Ut%q5!QSZ#V&W)*kV4n*ROMJ@c#Bl(*9 ztKIb_@LbxEN{(p_QERnXmOWR~7gZu!Yx8eFMUl6~pUt*QrV?$FNpM{xD1ya7*4y&@ zyw%lvQ~z`_fYQ?!m9|3LWY={MRYQ!Tmh6YzW~2)Eb+@Fi_m>N@f&w_|s^+BLw`<(! zQXP4AjyYSIn<$Cp9Y`_3&?ace(rvyTX-zS>RuZp);^n$e5Ba$tVFVd}rgdh*&fE6B z=ubJEWpZT?wA^t#T*Q%Qe*Z_xA9!K8RExAvS9vfb0IMmQjx~Ld7xIh?_@X;i#;wM@ zLw_5F^pxRuAve18yduT#_JU+9VPtgfOuA7qLWX~y^W*jABG4sxdQbPQPfAoZf1&;G z{9OJ?1}$K5XA)vSNTWlVH5ASWylAN5Pt?IA{PZhgp_nG%EFyrxDIq0iNi$03(gG}j zqp3W80TM{zG70N(43YW^Kq2G&gd~Dph-PCaZMZllEvtW=tK209$>%bF1U_Me@k-@X{B)jOwAJ1$z1r}fk>tb>(I z@y&@AT&rh}XOOD8XMVAc=A35Q=pGfDv1*N0d*%5Pc&u9|fAdEZLdtg8V$`-BleJPc zE!{W6TBB8ziwir`*7C7Ij?tNu96>^o;71e;AiNE!gUGuopJ1JTu-%_1&<*3Pz&a91 z0p@0)fnPO8_g`sK#1giyn@gLHtZP7)zP{Cr8g!NzD7^-)6Cj^v2g{H2VfI? zO&{Hs&Qp^;(%25m7|FT^HP5e9RD}UaZuKCu=1=q3Jo+BCp(Iuh{-20|R{9T7~rvrOZDOQcNn zu;w|T9*SnM^>^XrD}&h$-Z&%pzGNv02x z08JBB8t*>bc0rW7((_v5;TgF3w+S3b^Bi%i@`nNon!NY?VDEwS*?jJ6Y*>=&d^<}Y zHhvnYF7oATFFhFZF!)wpYmcggEOp28#z{;OKomiNE(Qkc!23uKIbNTaAFn_kMmX*u z>oJ6Um~`3rrH3px&FS*HJ+XJT$T({p^#yGM zAk(ESTeP)f;qpS=IrY%dqd%gunjle_+%r>5A}#Zdv>X1@fx$RxYb)PNhq$G;Z;rk4Z{kfLjyNW%TZIu>JdP5bKV!if^nT!9F0*7Pw? z2Ua+!rqqjnXz<(nvPzpZw$nY5^}{V&+-){44^&VX7eR6|{Y9rPh5hFM{LDDlLfb9h z-S;TDgVn{Ff|GXI^J41`*aYWSa~nGra)d!lh3VZNvU_LG#(7VtwnU<0YH=(vKvXvt zuq=_?n@;>LOVyxQr56`MGG?6et*L^G*H z=$92@2BD@L?`JTEkBfs$wW?$&;}Qx);;>r5bg2C)e+rgB08A*zP!I3Ji6R-hn4PQw zpB&Z*(d^$XMjnY;jMtSEyvbgm9z@JL{uH_lUb4_?JF^x|a-5M3rLkBdz>9@wySA*a z5LwN!F!8YL;HvlqBEFNsN=xyN&#%5zF1g_h1SZAV5CZMX7c;*W12;#RR2R;1X_dPW z={{IG-@>mJ7x{`5m=)NbNNAa0hS%U!K|v>w(F)?omhZxbM)Pc1dc&DbDnPYmzL1Uk zk(k{^q2EOd&?Q}NW9c6Rki%tyf{&H-%nGsCj;h4}47U%7T*M-;!gkec%L_r&+$-9$ zZYi#%us;OK2q+B0Yz*NfU+1gf6c%OKE$iA7&j{`0~1^0}HlZ(HtZKrC`FWNYN zDEIPUFmy-mjvWKuqq8e=O73B?teW7dYgm7HX@vr|lqd-Jn+R+T0J;qKEH}jQk_CI= zdhk=KCuKJaz)`KVGriLP@kGu1K#y{TQKZsYTR}JBI+psS*jfAPVTl5{+ITjC+aBoU zjQK%P_UMp#Rff6${-`-ogTXL<_D2C)sduVuduysO5IOwl;K+u)R%^E}b~U>_=ul!! z0a$x_7Xqqe|0Wy(KnkUz%Fi??+VJAX^kPGM3b2-JUyPap>X&=ziTbLg%OvhurDB=& ztf=ceUr9hKyI&Ar%o&V>?c)Z~wKc)Didzw`yv6eJzt3ley)we1hfq>VPpH#M_t*uj z{4BUq-wsrg@YxCL4@EH7gf~Q4y{tgGmz@>-hos}El+1JiTkC!oKdHSbmmgHq7E1n> zsg`HOt$yA`;8;?}!TSYO&F`DD6N1@6Mvc7}x;j|icrW`seXhS#D?$hQJ3jC|kZ8Fw|%?K(ZjhET9H_S~8Hk-c* zBSF+o6?(@#;?e061{@AqVIHH5dZ41uY?`9^Y4jV&bkr?d*@TL>B=8Q(Nea7e_^0L! z3p>n@HnaTt!JRImCj3Xts+}F6x@Nbwkz2xJU_K22rDLB3gh4?#Pfl!Y!iwlbHCyX* zYk0WK#1QRHNoPD(Z3DNq#avr1c3y2WCN4*%yR0u)f17tKSMj$ojpyW5-WR}nl5^a| zDR0wmWKdde~%%e5%85X%%vHH-AoF&OOfzKT$Werwhs?T@_y&BH|ag9qeoz!gw}o zZ$NmA6{a$cSEm-)L)=M$YjF z|A0jhre*lwEs~ury#fEn_rQ_9FZIJF>Rh^Pvi~2$Ls(X$VUJW-=j_NNS0{Vt%zEB= zcnP5;blO_7WNOKk49}0@;eJo&L1f{0V`)AHm>wf~)c7U*SD>N8iP7!v`Ss=bNiNu4 zD6u5z+l#S7T2Wwf{Q;2?(i1(o-uSO<-psf$R;bS(%Bfe?3OijYEzZ9O@bBjARzpI7 z5IG@??@X+a6cRX)LyN%g1KCHV0B~wFQZ(1iqAL?WjWiYN;q_Ecdc|5PBPxjeGt0Zu z$@CH6K@Y@=d59@df~=4K-hq+MfK+oVK^u$z-X6c_j{_h33e~PM;4YG%1-H>c@G(Il z!RTrPV1=$}7_tR^dS=xEq;=>{@5uK_2rD|Mmi%SA_ z=#QW>NDAKp#x%H8_#O$!LtrojMDii-&~m51B1%B86h^w|$5Y(^UDa0eCcDnBp+g0M6?x-5<^MxrP(E%-a{NHSR|mj5T(73LXpN;^F*mQQ(G2xwc>h)Wao5BeFIDDW^F|*X*Or= z56!W)b*J_~!1q!h&z%o4aT78ML?}px57UyzWZ2s{zg)Gm|>Rqa_*%>y<^`@ zsuedmxI}8A#!2hO6Bar4uEa9?(ql8uVTbAbO18bwu2=IaPdV%W3bbgmJ_M(>N{izN z-5<@wA0rXU8?n+l+cWRTL_BUKwOT1@C{=>mIJg8kFjm&Sh@DDjFLOgxj5H9CUyjW~ z2b9Y*7(zyux&3x_)0@>A#2&R$1h2U+@Uf4WG%h8V>XC$jWab8Cc7i;nkX{c*|M`MC1C;*7o z8!D)>=t~RF(iDGC*Y=)sb0d(ThotizUUKmVoQM#ayY~?mMD}NYBCYNYwBkRikNvv7 zkQ~uM3w~pnqeMD1N}v%avg6-Thk05(u1VzFH<1~f_QL>do7CDekzB4x1?0dvxP zT-ex`dUYDrv<%n5>(;JjNmiEf@JDAB@Cscqc1G@*nf^Ari?EKXzU@-_b;RLU(&!8B z91Af^2&MNSn>FQ1f9e8+k$buisY|_cN9_Ic8wR?)g*mP$60%cYsOI&y)Ghgk6t;N3 zHxdz*lb$@6;-C0N%q~&jg!8CR0S=!dv1;~{RWVg^R@?`DAiEuf`By*3wq%(VH9A~x z)7-&x3afq*Yg8YkfkS72%f7j~`8Z2=0h1Gqk$pLqtj2X@c&M09` z&Ri@JbloWE&&GI7T-38cEuPlk7M3KVeS$`X8#EGLfnH~b)H!bEI;@uO3; zqf1$#8IQQ1i}n3)T7++a`tdYkb^7^JzEWmuql6kdZm0G&z67Q zu+G+kpV1&3Bxt_^sJ>VtII?M(=qr;uaWfJ#X-nhSG3~2^zqjExC%~sHwUp()(>)xU z_T3OqK#+fix+}08+mbs-oM|+^wTG1kpFi z{bG3M42yqv<&A0WE^^xO!fWfq*tPETw$_xlRl3v9-u!%|onM-c8wyCSa^#8rG4q~& z+LK+SGmG6`toD&*LDc+rpBXx$Kx;OySQXkeXW@6+px2(}MR=eKG6={G{cGT!Qbgl z2r21)&1mVK za#dYH!UZKodW!B+6f-vhWhtlpvz?uKRkHr9yofd#oqV!i^I{M)WI9b@jAKEa_=k#b zQ?<1jz!@zF8id%&Aw{?`>xvRWuOtQsp&_s%A{ko+YSelyk#UIftS-k8t#nk6=MU}=H=mGt zC_;GVHVx|8?H&HOy$f=Z_g*!|KardP-U6{Q1=@e|#pX64WnCHmtWZEs$MU#tPoKWm z`SgCYI2qLRf4v-{z>nXiMc-ia(rIM790t1?xYnngUPd=3AA9w)KdA{Y?B)>w1oQyT z;bp-}11L!Gdnd1OeuI_Mb*srWt4U<(zut>D0+k>Q1HyKooGcj04h|t0yFP83BgZn> zhRH}X_Ue(zM_6FX0twc#sw`y7srYD5_*(|NaN8jS+etF4VhruX7{k4wPWEetub#ne zW>N?+aQ|p(mp^rgIz&ID5Li6{kOOvd_tx3tq}px+_Bvz2C@y>UsaSFHcwIan5;}gp z1>|~%Uk(H;tof<@+Y)w{{AUYS=rrNClS|E>hy@Fq*k zY^G*mriM9K>qY0naioX=FVelMOXHa>uBpC|?Opn@f^jZb3va&6wITNnpzEg10PXwW zcX}45z~na5xbgFvZLGg40Dab@W9nYh`4uB+{*v7Gbp6Z{ZYt87E!9>V>=WJo{(CsV z!tuf;OnF?fOiNL^f|SyXJcT*Ogk;B_L&cjD&KrS+0eKbKcLFzbE+Ym_8@VLYw1jwG zvk0-#C>3XBrhew6`bUyrku`%$-hnQ=nY3;Vs~rBS957D&Hra6AzNG6azie)wZr(BH zyzsx$cl~o082ywe=G4>}jqT;bJm9u!p}XM7^s>BTtXMxa$J6229QgaDd~rE{JGr-a zN95(m)TK+`4RV{IUVm-Z%WywH{(x0?|0XVxV}>WuM@M%JUDhdrGfbKU!% z&ZpaZ>K*RAfzAYg;a`t?2TZ!$sPy>sYHE!Q>^;PK{Zfdxo0Iy54&Hg$ULDyiShWe| z_nE&!)h8BbSY)!C0(KS>aeQTyt*IpPH7WjPOl+#W(;L9l_iMsK88Uo(eY`Gko1%<~ zsAq-O@IR$G47_?FDveAq$IaP5*!FP1wu6vA<~4+S*H+)jZhJJm_wPr>aJWqVc1+E5 zOg;Z)Br|@}^_j*|DRDQs$e)>1{`4D=k#x^MU-a8G_bocwGHVR*u7N$1v)??MGg?zLrLlctPsgqtSCeD$DhBvRmsuJS)~QbyOl-hQYn5c zncgW~Zi%_sD=%KTGsz3CY4blyl6?rp6zDNC@OQ=h&@r0fI&s;=ho+Ux&uM+oqd0)&>+gzqmOutEe7E=h z%7nR*UR`o+>j2DIl;2;?U$UJP5s()A$1x!@Bv*An884bgl8}*;b4_dboxbsXQEJNK zZb}G@3Q9n}UnwV2s(o!N2BK=tA^O`Eehc*rWZC#4`(6r1kc6k{pS40s#>+sDpW&O3 zWk(@lbd11;YJLEy%pla8_AGw&D4;+L+7tQr5PFj! zMn^pPS+b(0!xwXO)y4VZq$(q0?WA++;DH5GjC_U;HfHtyH{J%b>wWqZE24wNiRMuC zU%9j~^9lfyz4t}68-dBw-OQDfeOb$*(c@~h)pR?5>)0Dn?9Kx*uz+Vtfu>|eq!Yhp zNkNGM7?^QBX{Z8~G8DfA%zv(UmCrQGB_v=>EHxSGRe{bdib=r1;}}Z@$79gQOl^TN zb0vlH$dx@4(eFc4UbfE!2LXZw0o}d0&b#5pfDqJfxk=C=s>^PjrVaW-L1PoVJR5<3*Hj(rY6BO6HRB6}cS8xr+9fw})-|)@ zyu5oHVQK<*A0B;c935kQZ^;(i47UfLG@UXZfEudy@6M@?)M`PS3U7$Rk5L_1`kh|o z07vgkxnCxJ%BL5%FBB_CSC3LZ{{uksYl~(-{y}O2qV$7_->PAP%1ROk)j!o%)*ai7 zJd#S?^MaizdTNAKT}-W%LrYk;%dNNJkYkcWxZOD&<8lG6)SV7Y+D*$;i@90lmhSf! zZ?bRil7I2UWP-{ohQC0o0#Y6c0p8j;;2C;o{B8#(D$3Kd%EWd~%&%Ws_VxL;#3Y-3 zw%z-KF+a@+h7Lkr?-(L&p0q2s&4)nmxIW zNGfJ9FAs0(wN1E+Kcy4VP>G@n2C85znEob=8k8p@=5)>9_g4T7RODs|&2PjqfKCFX z!%3f?-b%Ew|5sr&I$QuIb7#9|qm$ut$NHX~vM$Q;dx6D(VMDMGReK%Cy-1@^c7Q05 zJ&e?A3_b~QL=fK@5OfdphbR5Bk(GvZY~5TU8Qk0ja7V$UFi@ieSi2d8AC7{4;kW%M zlkn!;b+i!r%ibQcfMLF+mSxT)V0-K3^JHP?tC!pZEOty+7-v?LbN0v>&N(BtSe70U zI5n2z(~g7~!b-YHuJCecCB1mn@WVB5y@6z{U#H-&T;6M^ zI_DmyZ~)}MqsaoI9C0vo0n@3XIK~;FWy#L(jetwsI8+XAdV8{2B(oI z01HhtWGxY51^)}dN%5A}^;bRWl*tM!c%sv_`i*Pv4=Sg4B3`YQM&5?%LG5gAy0gSs z5Z;)Cy^LGK%u_CV8oj}asl?AxOP%XhF?QJmM!nvvN7yU#*jd)f|(34_FvxSm>jr5s9;H6hqCU zO)$7wV!e&h(?+`>SXN4hw~p#tYi)-5DOs`()Kxkxb5H5yOebj1 zW2-0V-cC;*rHzGCg5%L2$GgvKSRilf~1~h|A3ZA+I_y+q_mxKM`)S zQ?r+Yeez*eyP*7J08PyJqo2R6h4%%V^&baZGmgM@<%?9H`u)I=%pKv+F&btl<*7=C zQv7ji-U>z-PXdyvD4afbFA?1aqQ;SNs4KoL<*lGQIW8|mJ}%w2wC|pbP$0+b91Wcm z*&Hluh<-QH^?x3}K^wf+{<}3`ax(otihO=nh=!j)3cwEsJcazz^l4}SQyI(dvuieo z!O2y*h({)8gL@|8ZODj5*IX)=T9VuG`Ix)?>sL~d?Y%K>FHwLP{p9-kcBVrgHhpk! zYIyGpg#&}Uem?W{+eEsmH>L)G1rC)g?g>iK#%Yfo?FikRZa zx%b!OC_sUtzn}AM9Qr>fa5*LKgiyf4PGWlIGx^ha!jvS8$+?%wID)BWeLv#<#nTx@! zeDnD|nE0syM{f=y?3iYsMnkum%Z)Wy;q0PJ!EdtO!_Vb3n5DC*wlUet-M`qfy3 zOGkur)Rd(=i)7BvYghVJ_Upz;PRAN%WJz5+1I+6OpWRau+K(qG9r1>heCnem8*5(V zluo(iX2Z>c| z#1DToKzt7j4txt_<)Q!lFJ*4S4{ZM$!24a~617PlmqNabN_gmoW?J6*;6Ge#16mZ6q69zIFC)2B z6nqHvDMo83#$^I#N4^_a_2_q`X0j2*0K)DC0b6z{ykn#%@H}vv7u@<)r4t0e;${(d zPh|g}$P6(VkFH9=@m!oUXmuARmis?#s#qZ>u;O=DBKukO!Aw(c9HP~;z!nIO{FG87Y=Wo`wwAlorJ%H~vohx^xD`Nq0NmeR!do0dp$! z?;r|2ojXSG-w;>7F>Lx73~4U>K3+zRvsJyb($QKR!AK&4R_qEue+(4>OpN_JI)DX| zqE$#OKNhYVfbb_0G@GVh{AM-s4qplvv5nk>9DahrR*Oi*QkohMwSp19Yry#&Vl9)@ zaQervs)Ug#NNqmq=9Qpn;WTb_&fLiZUzJ_2>=V4jmpXo_rcAJE+~H^4>k2uIR^j!+ zR~y$vxhpz=a;iFWL(3-#xH(mxAv%MlwZK%XUp6^LQa0gbd;%xl<1|Bp4~X94u{UgW zQ#GA+qTzDrMCiExDzz{{qq$|+QEg+JcmkKSIE1D!&;nQh2h>X(zU&QY zVpEH?aLm1uHEz&U>&B8J@7g*8f!XqyL6+nq{R_k>gJgYjKkgPX&0ydGmp#8Oy>T+2 zxQV*!*YnL2?bMHR)-IvtsvGzS;*eEVIkWoPGKx>d1oI^@EaAMs#*i-MUGO3B2mD$V z9*>iM9ndAGrCph}DHgY=0oYvUy7Py`z`(g`@#?TO;#;F7bx)}N!d_JC%=f zG!2QMhBAyrBh)xx)^=pZ_h84ev$I=m!B2KuI% zq>WNpCPgA3h=LaGl!%4a>!D?WaU4pusT8rKX8gva8U8K%Xo4d3Hzbm*nEy!PX_Q53 z3=E9;`#bXpAVl8j7WmF$^J@60x5AGGWkb$`1vNeapcIM$h7T6CL_@-1i!e@B;({Ow z=kgM#mH=i<$#c}$5u}zN&K(gYC~!u^m>4EvdIO)0*7rUC4b->fJyC%ZsCpy$2yrSaVyD8_Ft(wb0-+cugwmlsLwUp%5w?B?v`U-Y0Koqq20_#a1&x z%;7qblb`1W!db=i?R<7qA{uE2PLE~Z)T?lYs)4s$)eDa+BiYx->%cUpmfy;8x%_^qrl6!=V;8Lw10tM z&R|eV^#qzLyb{ML z>FFMjW#puw%&O33K>T0!S_DEB^7VaEKUtWP41w7AOof(;8m@IOMN;@4nH|R;~kcL_dPgv5hi6mr;Lw!;RsVwqAdHiOYtbJ33hQJ6W&( z&xiq59T`_UHF2H4+2xaV`_HGt%TH6SU49>qj&9jxH_HC`nDenVbm;rd0J$`JoP03V z)^|@WKL^XG2g9%EdMTtYzZYoZRxu9!di_)bCh}_5V6BcxgYi&okL$ybRd+Cp`G?xX zpgK9OlizJ7p}yig`t|On7v`r-d!4-hz6I9439+usaYdnO)d){uq3^GPRo?5i%;8jg zEST+?ruBk9<%bDu!>woiV8twSTiw1mQgy6~|EF4EWLnv{Jub#rG8U-Jlh&U%oSS!- zVr(tjXN`xDuNNmb4}2k7k*wmb&YS?242J1mm>fA8mZZVVMi`df=hk4{a0 z+)exFuw8>e0-y_s-HC@PFhUep!kPdgdMcZq|EqKL%MfsGgeJQ1Zh52aEh}lsa_juG z3dN()@K%@O^L%iC$M}~}^J=s=&D|WCShfHePYUmBy)`!!I7;}u5lRZhv-H87@l-Eo zC0ak4ae!!SPb41FX4QFrMUhcq2uB#yra}Xl1Bx zh-s5B-ZnUZSK8R4hFIZ}5i=RL`Gm<#!fNNg$2TUCIP4}G5&E0Sg!d5{18z6g%!C5J zvohw5vT4WS0tOl zSYx>cBqqz>A|JgI)m^%Bmqj)H&NSahRvUg0>bcGUD&*L;!lg>JJRuD)7?{C^If!PF zuAV`>{ysG`xWK&HhjZ;Y!0)my6|8}VM!tT8{mC7lb-Pr;cNGDRW9HgL&VEm8V;W-4 zhvq)mn=`aYeA&+m%%hPv;1VP1Oe$N5CX-H`JrJoqZd+;Htkv93?>7nPgOb{3j(5>- z8wPK{A?j*7Se>&wtvk|<#18)(^f0iovM({PHVHrZz)Kbo<~W#V2=tv{WsA@vVw$uF zBTgo2z|{C~Ns>FL#D}4BBNsyn~4w_S@W%bh~(>+~93+`V)W){ZuW|8!7eO6KnZ`GQ=TH0&_PFgh& zM_}AZZ%%odHruc5jo9QiYJwY}+`S@WA@5^Og1a%GT~=nBnm&RNgcdFtJ^Hk@>l6Nb z9LN5x%)l4f+XYsM-_2R^o%II4W6bo&q^ce^;*h^;93NHp#Iyd`jdR!6=F@n#Hm{BX zX4)=3r*a0mxT40GI6wrMm!VU-Ki72JTKcF&mG;o?9JJW;6yrX{c+1w>X-w$GsQi>V zn5}#o)T4zN`fpOI^iw+{MLZm$yp8H__?_)S3pB`6f}`zZ4Lv6N*2qCf(0A`BGfcX{ z`{UD}G@SIx^25R?unv4Lb^h(3t$4`*z7AEvhTav zJB1M`JWW0}joFs^zR$r68z?p&VLvarX<7nEHclq{E>8RllTIiOzwT#&I*iq_j|oP; z?T*)Gx9j3;kH3Mj;5|?^y=KGi1v-cY>D!)VYEws>pb1RIUb1kGD%f^S~NPzG#xMwD~+(S;sxK~rXv zWqf0eVM~6%#zoQ&%x;!lMzTcG!~ENGNK%wvyK(jz0Tg6sFESiFl}G!f4~CGvr`%F% zTp1)htS(4VNAroK-vB(REEF+77K)oUh8LXJzhz|d9L56V&*3{i&t>$*XL_Q?C%?CRkwcd(nv$XJB)>t|?FH5UJGeo(nEwN-qopH@gHxwF?EY9p091^I zRwrFlUFqZ(4Ruryqw35*jt6P^&8O8{O64}wY58RK%>W1p+H=<)o*h2k9VIY5k|1$D zl)77Qv^p@Q66}Q(!%1|UKi>7bNUXsBSVS9IVf-pO%zui{SE;zKd(Zgdd>|G6SF0Yy z@bessE0x#;r$VB{D^y=x3@8up9~(~o%6`=vl?Oi&W*Qf0$dF<+#M6=Hd%1hNYv2D9 z*U1sHc|uj$cyiEo!uG@@!-$MQHPv|cFOTq{NSibU&pdK>4AWe#+pKDZdk{2Md^oHufEdv%2sdQCM^J z`OgnFJ2lNbbe7X4KUzg;fU>payazV2OgDPw-rVp~H81R%ACn?l3EpgUtJ=_}2!kRB zwT;1Yld3}K2cfm$C_?w3s8|X9!f<2c_pUoh0-3-K#NpkA1W3u1HrQ4Q%Nhv>U6(Ks zPY%zGvnd6^n+KOMu1+obudxSH*BEibeMrmI8VjPEET^hmsM&Xjz}P|)Eyqu^9V38* z$7gK()U_sRwEK_^IhS|&-)`@vD9&y)RD?#KXtpzp@x8_*(8CguG0!;$?aRg&dbgBb!yE%4-414wAF9oypoXUXvkaPmZA$&Lgon3U2 zg0IZDn&}36p{4{t(QUvww^y3mU1+#aHxF1GL@<@xC z!scnO2>`mvb7mNy+NcGeo8fDLp0CsG$gKD%KrrtL_Nq7C7~OhCMI+k?L!-Q<0oM*r zE$apDPX)sM7P#u&yx-kf5yXD7<`B2yXzuFtHo?F(v2V@hXg>p9)pY8`&FN;h zgx)MKJ87Ua@9*^n&)(qOP!zhA|g9< z(oQ=cQQ5VSB6>e{);fBeK5wOO5MnWx4BDUm zY|ylVrx*3yoK{LzD#>T_V@k955S?s$@l;=aqtyi~oMZta?{@!+vg&QUhJ1(fA_y6k ze2PG^cbf$K0e7O;k&vb>ANLo7sI3$P3G)x*gy^l+s+%?Fn{EX>>_$H)vTyfs0G;N4 zo)YS6h2H_~7{rAtg>yE3k#LM6N^up)86tmrCBo+Ba&0>Q{jdm>;S8bgwL=}#`})m*-v=v`r>kC@%q-s6(|n%#Fk3aG)& zrEhb`cmDJu!P;WGe*=>i_k90vv*$_|(g8&S;b2N1l=yG{nvTn& zINFc)tZ2aP+{jU#>fTB+AZta;!fUF{NGCPH7*#t0BIEpv2QB04C9ij)?MQ}pTo&qz z(Ih(cdCUEe2R|W((dE?8iqfT3z1!W^v3dZAc*b&f3vW6pAy3+^(-C;xGOnh@vl0Q~ zhTeh8`%8_0T#+k^`~j_9{tH0ACGW5HcMD!Z7+r>#VG$&VZ*_jl>}(@jz&$30xcxwF z{6po)16KHL;@_&j7O*xl4L#`wS{Db$^H*^#iCt;&Kk8Bo6VN8@?qCF09P>S0Sx0_p zGH0&%kb2bhG_TCkGH3io$i}J@JKd9{C;A|oL{~%2X5$DUmJ%k71T`S88OVq+6dBQp z5hVxnFz+^Ke(M0NabSw{5`|YkP4zyd6C-@tFz*iSU=k#(qq>(DCYel?ofMIEhCk$f z894zKmB1R(Ex{ub475M4t|=FgnxipiNf+e97h>=K)XfCmR zgdBwc139%8UdI);3*X2`!@dl!?COYEdTEJ(z?VB{($$-Q)&}(9A;sL1NWTx0%?O}D zDojs%5FRQemR9Uyg5B=tVG}@ujI5&!lk&exJCu3;y-iJt526qE$4#l9W^9X3s_f$A zaWIU`x$hv?MDR~60E;+kCdADs>386UC~obiAV+I9;3G#X?T?yu)7TpYgqtNq1cOMh z^fGM7gZzS1Dg*>Dmo_njPy_p%gFI8iYf5o(DrR`#jjbkjsiunQjTfP)=@6H0vx(Nz z84>8+fQt%9c)3^nGU6HyMy$9~RyGs$x~OfCl8{whhbQ2KD*-bz6y0C;CXP}LT%!1^ zE}r&R`RGAeK1ZINDipnRR+dV&03!R=$?j1q_^7KpE(@TFt(5HO_lVD~@~?vb+jCBF zC|J+=L`bi;fUj!ET`wpgQ?=P7Ddwf=TR;ETB7|!INI7uPQmC34H>!VSRH~23{qgV7RcVrmD;X+QrPLU;9pD`JIm_3m;(zcHDr&BirOvY8KZE-m@(C2+nA? z{$B^8$H^M>0!D*N_4dj(ILtF_$C_Ymp!S!a;58t)9=7#MocY)xve^`Ub}C<&RnsTB z7WlOHYiqr;!Y*4gf_+s&?+7w>jdtY!W9uEGGYy)y-Pm?Av2Al=+qP}zj%{ON+qP|6 z6I&A}U!Ldv-o5r(d;jWM-CceCyH<5|)p-a)`iul)7CbV;i1${SN)8mfyjcXM-oD+c zUEPKBoWAX}Tqo9C#O;cOUfU^`jb!m$aC6jW6Y+_7He+cg(OLC#Vz$| zM@zEepy9pH9J-|=EVZ3hR8~rsi0Cvyx0K#VAR-=C=C7a~DL5rD(wQEwyP%XR<{zUk z_V_U_iOGe|)4m2|C{}EJc3v53bI6L!eg)m~P5>4ynr?b{FJYuSy6J9yU+OM&(e=U` zD^`2gZgfCW9I2`0{US5g$S7m#S}KDmg3M{v$6p>|J64N~wX*!2hqF6IF-?o^3#&yg zVrCxQycjRNtL|QOD7*4hLS0QCP;J|D#Xw|F3GZkojy~;y9Mo!`*Lm5YGPTRfNuSz)^N}rQp&k8p?82_=h%<0C>YMOS0j2>}dj_tOg9m6}*D)KI_MP+ltIqzh_+AgMs zBd8igRnIDf_m30fkIT=JaM1SuvrY8BcBMC%S|>jHAA)~c067JMPc8SATMXg}=RmmB zz6X2P6{IQ$4sFLp#letP;sCzAZ17)W6fUpT5^vMuM~XsSDxYC*<7zj;753F{(z#_} zP@8Yr@{{)2AjtTF2(;C#n~w28Kod~)`VkKa2PXOA*f3C39E{aDVUR&zkg;2grD+M5 zKldHRpGyxga~jf5vMTN95)2t2k}#}bFqjJD3)L}%*HP?yYqCC%q+LT=x(4Vi#4SX@%;G&pa-QYnO_@$dz^Pt6?F{%)pMt}JOAb$sybakpNI%;c44xr+O zDo2QM-WgY=?6(k3E)FG^P8>FYpuefy01IHI6iW%jbQh4=Oq}G#)j|f9g@A;D1k=@) zjY-3~{^{3(Pzn8IJAXww+ld28i38;U?OE-p4uUiOq`BQEC`Q%F2bjSkt? zvdTLHfmxJ>$??jhrAL6b+##vwQrSDMK7Mbi`#cZu4bg@^l(6%X?wE=i1)&B<-<)}J z*|(PT41(SKcIDxPq9HK+X~08L!|y-uNv_O#Xnx=V#m-urKEanV(@fe{ zeZ2&zi2_L+jQlAAoth&I^~tn)w~)dUTETF}4b6z1C0k{Kx)}1B2Rn!hihG7bfCV-HOlw3ot(}_r@vz zd&q}|skqi>?SGCyoR@O2;_@RuC95V^Y|`61yv+88QS_cXRx zJmr3n6PD;e)r0dN#UcrZ6AgWYVex+NKSSt$CTcPNA^$AT68$UoCcXTC z`XYJ%wImSnJx2~rL1o#(89zrtv{7o4w0Ef>-zp7)$eIGbGrBnqT_9PC-0qN!vSBE7 zo>`!4xZX_)CaqZBg4t}?vC^GtPe>3YT`07M>fC*2NU&8v_PcH~2`f@x5JQSgiQ zv@BDLDARElnrQ8b!~K>#t9AYktQs1IZxN{H3-LFYKIwmcb*NZOjQ`(C5>6I2!vAQ> zQMPWrgFS%Ap@*~=;S5j?`+cUa@xbU5fi$h^i;!H2%@F6PS5igizsh|aBT*YzB+ljt zww4+sVGSlXxawgcfExn{L)+pxO>ACkqP)2LkL^RRW2^uBU0|jpL6{!^1q(~sd@UFi z;7H5%KUk};YyJLrr#v9*?pjigutB0HF1g62v_{u7#P))`p_FDiGHRcCL$vN)zea{4 z2`N`<9?J#%pE&XI`tov$rBQh1V2`}7{|Nfu>N5pc4JVqPPO3Z#Z^Ge`#mM7*Sy32r|FI5(!Jo=h3k&ExS%!Xw0AO}V~* zXF~aQ9axfL+b-=nx!w9E=5{gWvsXuzO-dF|Qw8AKP{|wto$Vc9r|AR=ThoFQK*k9o zSbbwa_`zECj4t?~Vuz%8=|nId)T(u09#;G3CCWH>6Q@AW*@52J#&HTo9ii_eH3~xpHepoD$>iv$`iqs-mnb ziq&Uz34Ro9LWTKc5Cw??4cjcw8cdS$_M?Mu+FjryD|vn~IYXfAQ1Prb>eNcyz$7iyph8)NdH1JI_%purU?+3+AnEm%E$s7#* z!J~H!7I8pRTr^2&hwKA+^|B+8Ffp;Q2YiHov5(GJe7G{eO4EUH%UB44&6Ac-FH#1fk=j5<{s-|(*cJBbqP*T8b&nS^Z zY+M;6&5+3BF}csR2`!i1pmbM^V3*(MNfX9DyZEJ{uGuE`dWZSh(CS`j9t;wdgOD_)A*f z+It_+VaW4!@|xhq{%ebb$NdEQ~XV--Q3bvCH`>DyI zBdsFav{dx0qP=9hRO`^oLY^QCJHDrWFH^90`Jn?x}LnGgj{`a&wDozHNzo;!YBk=|yH?K61wc+0VmOaPbm&w@cDK_B= z(w>ujx7M^JSm}yk2YWagAviddi8~q~-yGw1c!k#1fw4)nUa!+_?)(F?&*TWFiC_sF zYsF96(Z%09Y=_E%$mhC)wYmPO`^0W?DYb@gcSq8Hs(C5II-q8wU~9YbY0Hz&*`77U zDhr`d+_j{{5ZDb7Q}!(9SkyL-0SN#RIMNpKoSc z!v&s=fbGUKh*UX(mzF1{CVj_Q44RZ4nj^pQb=@Um?;nA3c1$jSW!jNM068$)yt4*M zta$(1pY=eLxoc5PaIRTp8N^i>py&io(t2+cMs_C-;Ep30R*I1l++``yZPz$qAm>%59y!+Z7lxPt`Sjz~}; z1-28g#Y$+*&IH)?uX*qA?r~?pA!GSR0>&?dN*p6OKLP&dh2i4-?#?lb(O9(_WdSo< zLRNSq`#^W)cwHf0WO}zYg9A~=mivEle60^w#c59pLV$zQulPG{;oB7?pC;_^mBkmi zw+X)4D6^#Eqi$~y67!9MgQXa2Uu}aJm|D_1kv zbGWtgBsxAae=9x7#rA2JM8O@CbN9R@zS;vPO%U^V;~gMuGWSO;gZIrvBp}napB4fY z9Lb^D!SC@p=qK^?m1K%~?Y*MQXJrRH8Z^7~c62Hr?v{l*uKkCQ&I&cFGcsa#WB{M+ z3}uDStcOSH!6!g|hH|PZH#EtLUuzoFCGY4xw4JVV}s9RJh5MQrITkhmAfyU8Q0h#rmQ$Y>M5BL*TsmtA8LL z8z1w5YyyGZPloEQScNBgGCn%4yh*$gaes{y(`>-K8Hu-vGsGUgRQkymnKsR_C?`qA zcVz5_pw0!OC{tsH1gC^Cjy1Y{=tS!~tr1Le(+H3}g=yKv3}kUwD4Ih~l-R3oY{gwM zDI?6bgxl|&SK4hJO~UnpMo22wp2`%fD2yWo(gr8>kS|d<)+d>jNe}@l71Game_+Mq5^VSY1 zNDKg0c#@a)Fhv(#OmdNF_$U$W@z<{>3@(HYS>nc>r%&UH2D5Eb8@M0Py9B_qdpRdr zOJx)#82VhklO~5U)&W#70Zh}g;Sbqj!*M>>0eikfgY5urEJA<28Ze%94CcuM1r;5m z3wRyuG0OS76=pKBlttRV8EcK3*kWC0gkyk96ky|zd-yXh>ckSs)&m-cwYU06@!1kY zD#PzWvAnrbmss)AN+}CPbG$ei{@g573N5c=bMQvn>WU7?47JZC?*%gR{@;2I)0HsGlkiWz=@K4eBRq7Iy zP^g+CQkZb_^P;^WXsv|ahCCbt*o!Kyutw9vk>SWX5H#mr_N5k%I|xiuEf4V+h*$Dm+_#T z&s>f=2#D;sK(+|>K`2hp-S?ibdn>#$2}GVx%i)Y zxV{+nU%Go!cQFCrz2DqW0{E9NPj2o`A3GeQ_PD+8Q?a}6$1fK-vLS-KjAs~l_;-}n zn0Sc&DlPPaXbQ~*g#E3;J1zhyXoCS@I_Xg&5aL<(swwm#SJR-h>iOUFoVy`IUu=y5 zu-e63XmTJhr4WW?*3RrQwl#_oOd38hbnSY-GCjOP{34o}PvFdIQ_lZ9B-AW9iX^C; zCK}eq;CW)Hg`D@HUl>53I3$|)sJqb+Ek9@REF0yxS-B!Kvhi zUv-z4RgFwxH+g}2EGtEGtN><4Q;umMv`~YHV%6(eFo8d{!oeBQo?`d^Wz_Er zR;@+8E-TM?;jzg}Z|(_TWknd!l_I2lc0@WxnN8~iC4%|8ryCDC1BNV<3%fK&FZa>O z_s7FtDJIReVpZ&{y6M6bo8@d-@UFOcDl8~S?MfK<7zu})+hLbt${8_|aSMF;#XBTD zrj=WV`5>Th*Xty|7WcLeoB$cje(nbQrTpC#1!s$ogV60g5ONmq3W9JA@?nYnt4VA3 z^|3}+=B7c5c46p`qqDq!a$k3qIjyXv}cr$Cs5MJ71(pD|7p$6~riR}>NzopnBVPA&PJo?A;c5E5u{8Qr58%sD%ipyC zVePqhq|@4YmNsM!(PfsY?yIDSQfvFWo<`#wX(Q-QPR1&S;-3^r&)p+z@G7}VC#b7k z9LhjG2Vh4)_lfqmw?g9XL^8hIob1k{LMi|WIjMjJGJBXvlfQu|KOT#EvXq}>U|#h@ zQWdSbRcwXkTohUS&U@2LGf2T@hxL5*y3BPd5?zLgX>GVD_QSqIxmPCj@=b)o?Zp8L3Uf$#re`uxDRKk=hJI%!I7|i zJ@OdEDFLlG>)uHEXSnr+9L^7184j_#X3*U}kM`y*6SMG7qhX3Ody~`50x#uvuc`xg z6^vY%TEC^<87490yx>XbC-~`}{`?JBGPeGJD)8B7H+n1H0o1qUPQ^4dn`4aKlsYM@ zh)^IvCPNz6n@$m_Aj$eS_rU5RI^q6$6Z!s~EgEM+PAAd4zXNmz#AbGcZe(#7xRSX1 zF3Qkzp9Yi1014@VhBK_7b}hHe%xz#Dws^#L*YQJZc4cc$DBgkBwB;|-C4;xqE?$mR zek1cBg8Ih%xHU$mFxXtq_{Cle+dl|3F^lUo6x{7IT~ zXe0aDdwjZifB2a$VQN;)NHnme{sUYo18)o!MEa!4i%`a}bMDHi<*@e4>Bo?WcFCRy z>or?v$>mzxw{K!naKo9TZ()m1K=N%is2HVfuM?2l;MysNv%r&Su=W8a?+t_XX! zyQfB{{fwyf&S}(Y64)*%$Kv#rgdjV;U|%#N8J3s(E0g_PH%iw2F1N>{4V zU|G9`f^wS(4GD|DCnUG`k({?66lXwvYZ1$BJPCmc^dMG90Dc#BsUncnDE^e?RGGTe z4NhnmjTH;@SeRM3(n}ao;ap<*F)b?y4;c%e!3qu|G^j0&+mN}rGMyi}XlQ)u`2{DbxXAHx@>sXWn|3BhC znA57{Ja{)z2A@b9@Nq~$OqPJKQQ@_{P6Nw5Lz=*TuRTtDQeigmU%(zhM7^l_#6b1o zwhC6_!3!XH_e{(!5+^d)pnnG{{FenMk+MY3F=tvRW2j&(By^?_v50wDqe7sGb;Uhh z7z||LQnlBg(R@sa;SavkDa&fTazH$~j^KLSrJxS};OI6yo?kjM3!nlzj@FGP$S2Ha zb{L{M#^x6YZ*gx|DcYO-br_1FD*TBGEAa`^$`MxJTI)Z%PRRlFAJ z5h)g>&F^cs#5m=1`GqQKCwSJ8_v?p@3XE5sw z)VlteKyc8nHOXo+AVMI+*^uk775+zIhtYb_ftxU9q|m^aIza$^4Y1$o9+o)g#_7YL zfmcXB3a5rpm_9tt@o3K{A|5jYNk6BHu^*qy6yrj;ZP`A~i(=-MC>dxt_U79a6P_?Z zoI+crKXGrn#yHQq{V2Fr=rd!CzJS-b8&0`h3balU(07nzgW<3^=mGl%9t6v;cLb0F zAZ&yw$~NiXFkk>Nd$2L>@1j4%G8y$rlcrkDilaK64Hc5s`8#BAUg*cIH@%-+G(;%m`7_C zk^yW(dpRWavO&Run9$1=T!dQ@d66oW)3MGEa_(f-clLnc`mlCUYYp*-AtL4~$;I>m z7XN>2jk72mD&_4Sq z-ZWC!kBlfI1RPV|JR>C-n@?Y3eIDZ;5D*woD5d*as&RxdsA3*twvXxpy8qmo?Erqb ztnc+xg=-QVh2f?m{&YV?cpW<`5O@^UFB5kuSg8>f9s`4+{%_Ak?~69Y8-#+cZJWIS zzKZJBU(In8&fO=UR=Y_m#}O${#b_7JwA&YAXIKCY&9VxPGo9ETTUpMbn`~WcEX%5c z4}Gy}kUX{X!+zLiFKR1Mp>Uo#kGfd%HG#P`{oRF%=`^&(is^6hvOz-naKjbRAfh-v=g@J@uka2~s2j1M*+G(xmPW z{JsIJ*U$TIWcLD#@`Y~%NDZg$KK5^^xbaA6!wd*7vRr$wS#c&RAUK8od~`2-?z=p0 zwTAC+o*spLJ<<3Z)>woCSEvdpy%=yC)`U#rxN&J1BE)bhw!fLVc%SZbu7Xw&G?usj zl$mV5|6Tk}RUWf$4#@98BqLljn(zXlkS7FJrZiDQ4_Br1&dlMLQcIjMw?>gaXZ{)wv6MH^qQu@IHB0jB*QKA++kg=RWR< zUq?#kU6pI(p{Jxt;h;2QGG|ggnH|bNTjZ9*?5-lqEUh)hzJ+qbV^?Rtb>-XpL!8L7 z<_T!13n*FZ^0k$R$x4P&tA8tAphg0G>tr56s*{Utr)wg`$OgAI>d{_#`L>6M;2g(F zN?dzSEzXWn6{vB2E2I=SF@yI3GH0C_#BqBn0&n~>9ghi$p>LzWdt3CV%it>_<{BNj|X5bHo4#LFs zzeRl8_J zN{MmArjzXx5F%ERGcz+^$HwlQ1O2Rh{qK#f?&kJ(8dw%ZWGVgIqoP};#y|>(xv)cugeL$qUHnE=UFfT_YC(Vl_ z#9?0Sv^5#OT7P|;91G)xhCs?>&wP}A6N+yO93{U(8Ai`lX zh-KAjz4v~g1*m`kY;z-_wtE^ z^iJa)%tm?q&1#6?5xZuLS>%iZm|aggAs{y-oGyBYsBDx7*CEIayarcpc_3$ zv#N+1C@?DUVS^a8TU@}NSo;S2w2bSz;4G37ku{UHc}0Q<6RV`mEK?%E$TxguteHc& z!wy-=;T1<}r%Q<<^TS9LD$eJO0u%bGYSrq;tr*+%gA>g47Oz`4vUw57fV7wnoiH>$+4Sb{U?vA^snD?yl46@N_^95u+!N5&0=5$70n59gf~vQSI% z{zAmdo(nIl`+GtlEPislk5u@+j~v2E^BcO2P_|sO6}=Atg{s|ni6ubz!Fs= zYWb5E8y=mtFuaE1E2}>oPzn~&a?MZfSr5YS6kJA|S^3Hyu&LFC2Z5emH|qvFapnD~ zT_}KfyWzSx&Y)qi3ylTNL(U_zDOqUhT&7Z>pq}VGm3XHN%IcJVG(AUm#?KDf)!SM! z!fZB;{Sa>3v<5lrs0=|c+xxVAZHr$Eh3OEthCjDeWyTMM%{NpCkazUU6>X2xz(C5! zx@p3Vq~k?WZFOYrq<5X8WM@L1YQE{(X~1|#q=OEm7qW)u@J1ANA{7~4f&x?KWeY6{ zT#2@JKmBNq2_$_=*f$(Atu&GsA;6mE!(~~dR5_pw#}Qv-FZ&CvtpBTIU_1T%z@|%w zeM<^6uh~pH;DV$A@W&Rkw?*++a5s5+56Y$PifX4oF)J5Z{*+Zvx5c5vZ|o4Pj3zZq zIiPl%e&%XvhrZyMrSSq#Y%6t4jo*@?LZpx?GgJ$*hjq^qt29kNe`)zE)!}} zEm_$d`hQGQpEof+qI%7<1@oo(nux0_&L_CJ?9=jD$EC6lc7@(2?BKg*iu|#o@^<;p zSnTsS+-8uQfI+)x?@4-Z9u!51NEq-<$W=oH-DO-{JPU68vNLbk_WZNmX5ZT$U}e}p z_~f6cq}nngRD8^|d``dOc5xVZSY?UX7OYn~izHVBj~rL+`J6}%!^_MzNwUK%*w@Dr zqOBzpF((pmJ+H9LHGMX^U*6mq2GpZ|1MNWvlzujn07!0y-#kuf_n?Dnp@W{T3f0fU zYIBJa=AxhF2ze2pQiK9Ox#0GpgDH@DAnbiU;cAcm7Ws-Li0 zVVi^t30(|xbV>mnR3jS|Bm1yHN7Q5MmQ>ixSJG4sv8W2K8mJ*2=)F``eb+(SDyVm=Dhl$}Gb_H6 zbVeYtcK1|5H78DkikbVs18l*wc5g)onb{Hk0?4``DVQ>$zY9s3sO6W*^cSaJ5JSll z7Jku`^7b+uux7Rht#gc#fJJ+M2OE;L?Cei&j9=nal5i3_4$sNy^!UoRzK2&tsRoGnl z@`p+q#2&bQw!vyhar7Vr(NC6+sLCA5VXQl4Y|S3nE>*NyuxiI3;sYaBAQfkY9N}PR z1KOd+(j80wZiL4jj$@+cq{M#EWY9Ib0Q5l{6N>$O(y-?NJydBTl`gpJ5$W3_QMh~i zCqw#xwBe)(Pr#3=%Hqo9KQAPXZkA|V7Kh(XpCi+H#hYl!!j$Q-a}CpELZu6~5RGu; zyYWzty&JX=NJ}mN`kYp`ET{lZ#ZeFYbF);@;l?gDR};QY{)QY~0?k=YQKd*hfENVm zfCAz|qi?&186Rj%8~?<6#OnIQ%igifzo>RAyQhe4{1X%RVI5(M7mxAFIwf+5VVzKT znb&Kntx7VN5oV?buPI2NRD6aTilVKNxcR# zw#!Mc_vs?ldzAA}LI>O`VKfYw49;smpM@WM(Y5qeem!rl1U$su*f`Um))_b?Tt z6ff9~DvMo*313@L@PuuxYv9I7rN`1eR~_r3)4F2p+dfuoBQmtpP|YadFH%9qmfzS9 zxN5a;`--U;k{yxK(tGFvu2%Xl%_^)ht5o4J*cKNm@9sTd;wo-m8MV^{b`SQWpXxXb!*+f|a}8NEHF3duM>_?hKp%30%fWz%f96U@7|~;A{Xi!u`mb zzfWF$I~|fY{`ea!LgU`==qo>X##tsCLY@_)-yt z+S4AW2o$7k&8V0Y-yNG#IwM~taX3qGIV&9$`uf=51Bn1{txkX*R%1{5fJr5RScJvp zS83|HcU9qUrG2rDGM(+|hU}EB(QW$M%{(=UfDY|EevJ7De)@E;+1uyoi! zYLvI}HW;(CM1osHh5-w~+!Z7B5j(UU#t>*8kf2d!^Sf5^hKV+f>6?ZL-8ALrjG6N) z>ILpjMnGj7WfWlYjGVPiPF<6YBccCGvAJM)Au52;jv=*jxS;CXCUAy?Q zV4wkcmVY*vZIs60fDY z7_-Ph6kl;nyx3@n(woTq@QrzZI)h#CRt9}6{D=vEb<>ZD)xt8@P`$Ln0B0cQV?hgQ zn$ziFnCdVMy_Em00_G(7eXo&&k+JPZ*-JydKKcyEJ$|UFxxbSpzRs{PK)EC{+z0nz z-hzae2~;LsiUjihCm1x|iEg&t%$W-Adg~EOgd47h4_Hgn(!0{Jv_Be5grnWVPP82s zP`AelEh_@sF)x)=rGO`b;V~NsBor}J?o&Yu?yzD43Jw;jN+o7Dv3 zW0M0z3K>MiafFCVOJ&7NVaOoPe1u$}lB1zO{vrA+nbSe|CmIS1>O)Hv=I65-@Bu1! zJi0J;RS4qHAA^Efk380Ze`ykt`4&Q6kskua6ZsF#LO#s)-kcKDYAon1eAF`&eNDfO z?%HU{+ZkrEI$JpH=~sq5mC;s7<)R{P6x$NchTvI40E=I+e|^um>d{X~>D5!LXZq`B znBoLF6g}&wqE*}%SB^xL^};qUsC&~S8ZvI)(o1Wi&b;%0{I;j&Q+s3`w0rmPI~D-4 z#bW94+fp2hV+Qw==pEP)(QG_DGcpJ8|AgAt=ha6reW8jdJfRSv00{>K|O?3SLJ3?2g^dT4uQHNlc z;nX6+_J+gbrNZ`#8!`4h@+YwOr1t@)tgLzvGPNxd*B)knsctmf(1{BrqoKm{cq!Wp zP~gN*e;SY3hIOx6TLnG~yC$zZSqp65oMMduE5JBvlT*6UbX&f}BH_`MXeb#I+f@WI zN~YQ@-mxGXE8@7}Tn9h_kq%i%G0x#|dL7f_GLY^?AX+P$S=OzRNf&24?BW1Uod?(x zN$8e;fGgez<_BugKWca99(}tT=I=~FT@4?Wxuv=9ImlhPli^B?2}RvIEJ_-LP{GL{ znXa|0k;=FDvK`h;Ky?1VDk#ZfAw9~toJ1R5A+>UqNKct{SWiLVsg(P?P#A=P5_n)y zHCk{$QA4;cyx=Nu8UJiJD*-nIkTM0fN6 zfMpNwaWn>L*Yg}kA}H66R0nHxhi4nAx~}oVsT~fv4yua_aRHaZ78NjX0(xL)qP|C2 zA-KGn{{fO!*39UA>#Cj?!Wp2C;vKL_m*wAWnD*s-SogeY?b-vKomDtPM1JB?O69@d z-^oc7zW0Rr3s>?f#$Ma+L!^Jx^Qry?gLrV193%kpu7k$VT(}Kd%M5`8DEs`u087o^O?N1G#9ihG6X15<<^IowFAtn4(KBm&;AODrpdTD zcxss{&b6{O<=Gg`ous=r)l@$^NZO|V+hc}eRo^t0W+>);yb_p6*e4N8bhyy9DoASu zQWZ}K!vAhdSH`-#oV?u(wX0=tHZjWZw(?$=#%gz2XIC=-Kp{=UyK^w@Z!7HIa`1+clhBnes|~>R17Hmv#>5`~!dQZSMpk!n&Ub zEWKZ+t}X`4v{g^|$6+W#o=rp@e?+5(SZ5CYnZJ6 zf9i&;tZ8?C;8cJs?Ti1Ur@pUy13$@|Po8?(H;aH1Uwl^cW|~{Fe8$MY$rRZYBUB{L z=)(%Wz0C=dUm9#O;bwlrgJN)PWslC-o4c7y;%-53dN|)tPmiyID+h>aDPeEXg3h49 zFhL8|Ag$82Y5qKg@BAG%@{$r}lfd&!gDC!b4ULvFFyqHmaX$m-G#V5IG_HCSXQhf6xpCO zb6v{QMnZ`uC6XI{*4Xn*wM%lr{G|(?L;GNJRm1MdTWoYsH0~BsD$>g2LbMXxVuO+{ zkk=L^Ck2>xC`X1+B}Ju79&r;sCIZbyr;HSW$UZcH#%SeQrcI$$*n=OBDV8D{V0B27 zV6s2+Y>7>L9JOff2!C)rgNWt~-}+`5S)w9@fXW8A%7T1%8rNPRalL(glM{+H=()ya~X*>yQ~-`3qi|IX>-w+ z88J1sQF*K<8iwk3naypb*#r#6s;u3_8e=tx*9iy|bT&iAqZ z6#pdDzob-$rqAmde0Rk1kMP(VYl_jql?0$AR#xfq1bT>`XiLJTp+MpfC4IJ-jtskF z&(wW0$xP#uR~or&qQL!H>IiJ)b$g>8^NSBho$ZfRlk^uB0jpz-?`xS3lW0v7m~kif?8}WV3z@?#h9}c!>Q@kFQ|aqk{k1cLE&)+ZlQ5p2!T9q z4|(wefCQ1?2YZASVYBdo1A}-pu(@vBKe14xSJh4FIim6JUZIS-X9{#1Yj!I*I;uB6&roh$CF)c{R)^j`Nzptpyokw=SlF8>R!41;~n@#{KWIP?{D=fu^hbI9w3 z4%;Fbb>O7+LNDQW>E%1PV-iWFuZ8g?bc-Wg;pwDL9wX7arfV9U?gLHA_Z#VxSz!8z z20!SkYQUYGHx8%$)Iopy{elk=ta^6VJ)Oth*0u=C4G2rNNzZLxkTBp2%xyP=vhH>5 zeB1o=!xt@jc9uOY6nep0$nLamP|sL5@(bqM>E^#-XuonE48l8^e|+s}ih;6-Lk1uF zjVP%jTPE4XoqR|X+ZUVqMhuQw@^+z&kr%c~E&f@Y-hn-m)Qfp%m!K&%sv2dOqieh; z>zG7b-M%>R{PekX>~Z~k@Q?g?b4%2wCDc+gk)d^|cLvkEW^lb^;(Urvtb{W_7C0}w za_bWxXM_EyEylZHgFQ`<2OJF$43nV1aq*QLR+nmyi21eIQv~usCex3!LcNl}CKZ8B zMI8nXLG6#5?>oQ~G|$?6VNc8wc})Fk~{h%}hu6WrFc$&`DRO#|{5|;5P-Jn%_yCS>S^V`9inWGP}KQ?lPDX_MG%hN{5HBqeO#U82z=K3BGLXmFO^Qub`H9S%1 z2Hnm-nb6tZPV@fzV5j=9AR%s|RI($S1HR<{&g(J+#em)sIfm z{QcHZ|4=1&@IK{i!Brn&S&0zix_up8E~)SdlU9bv$17HKV)-k16W0iVO^z`g)Z4WO z$Lfz@8P;frb)x}*OFs3QlRCnz@@1QRhUB-l@tHPDIHkr5ieANSh``?~K7IM}(CVa` zUNQx_l#2~!Q|GjghHyI?>(tNf;*7KRG6eY1fi@iMLm0<`k)bv(JtXx*L7f87E(mP* z2c}L9QGy2DYj%sTqk!6z&42%DVjb5MrPs>gMmSj@&)a^~QUM-V7Xp`OBn4k&4S<$0 z1Sn*YxafBpXOvCUk7SV9_*fvn8RS8$M<{?s<6z6DB~5oMdZzBjw`DwcNzPpUY-2_Y z1J|kZao(zbtb3H`%{nCzbm5d(Op(%eYxybo$1Ko?{Dim=(S(FA1n<-Itd!;3S7Tg* zg70o;ZcsYlMV*6d%9{Q(Z&g4Ni?~|M`#lLN2JJba-;0_J+}>HiVDjLfaKJywFg^g* zI~36p5XI9H1Ed(wZ$e!B3o*QtH~mSiwm*&Eae7NI{5&5 z;qx8}*o5G`)oV*(*$^w^n|06bRz%>qOWox%VO4DacxC)ab=giM2lBWCsk6aIoJ^rG2@;6W23%=jra9-6AIw$as%U>$qC3UW*08aBGn3 z%|cs;XwdIX4jSe_tinT+Vc{m<`)4FjjpTKi^tEybQ<1sAc|q*@d(G@%q_F@H!h(WN z&FwFKQP=0Jkz@FOyDf2Vc{Le%nfmZBv1LAw;z3ul7}M(2`iW{GqLB6Fz zi+q`+HG+su?*y7A`dWB5Ps{0GI#ox~P91LBe)mqa!IY;O&-?c(I56qC1pZAd;?2xx@F_~jso}|q zAq5s=2ztQSLA95-TU zb}JspmJnDYfKujfAE8j8=E>_M64`G+u+UVK(9KmD_JLBrcO*TxJWqZaX_N0TnN!R| zm~32%A|sliEm$M7RXn^X=};mP#X*!pa3OCc`8qLMLu4Hf{e6b@bX1eL=<_zIesT_q zehFiU20HsH`Pc{WFk6d~R-|-{Fp?-z09C%2r|=0f05l(+IMWEnQl-cOj@$m;s>;6{ z$T(t-w+Ovlmh1UnJP6eq%Xkc5-?@@>ReGx^(&HO5T<8B`>ztx9iMDnf+wR!v*d5!p zZQIEg+jhscZFOwhwt2GmKhC*1f8AA$s*9?z)~dPYeCP9G){Io&CVE9zlh-7?%$-Cf z4H}^3@d8(`$-Hm5`YDivtB%S=S^Q;1)d(t**CL03G_Xl_^EJa%I#C@7>(1S ze%D%Sp^cuM$tVJ?dv%~&?OViS4Cpi z(y!AZX#wQ-CPKnG4D~r#wriWH0XTwfk+zdg6}5E)d$n%U0TGalaM?hbA0f&FLGaGy zBZZYI@`wUyM*AuREQu)8HII9x+DeK_FH}}f86}mT0=BNdk$!Em2&YRC%Y9fw`7)qH z)9v%9jp&t|c8{7~^7$9eUqguKyUNOlp;D&DKl6!CVJBZ2Mc{t=Z+R5pK{>o6XfNcC zmPYmxtCTv&tdJ~A*5tmM)NVeDDw>`a_v}3-1!GN_hXPF2`#}y&afHwc=dF|iK;^9l z2FC9@mO0m6VCv|Aw3ed1l6(H^Eet&Yxep)*e^YKvFz@5yEjBD!nbHuAA> z2WcEuh3Yy9I$k{#9j}f%3aeDUwp?|m?|0i5-t9fx=GI@R?cBSSTTH1=fGxnQ?I0o{ zP1lPp+|}jfU0>ds^-jlQkshD!#MBX+i%=5GiriYvtzKjfMQwrZbQ4Xns!i>vYQQMG z%EJ-1^qj?AO$Bv}xn(#7<~OFywH}R~k*k@D+2!%nWW66cq>B&G^VL~9=?)S;ublF|IF&3w~9Zto(~yMug}O$oFdhV)Ab&rH4GJ+#Ejc0NWU`byJ0#5 zSoT&NqWdyOqbqpBSrgwwn{=2Ss@MTE0knp^4?Z1kONfc&tLE+EbDsc!XuFX6@&*E= zotOlYmL1xJO(9+f4;(&LZe?=t?P6C!*2W=*H}SnIj^kz@k6$pKleC0%6ZgpmlhRwD zlWU-pR*zQdO@I-#TX*302R%@tqJnhqAcks?A;O=s!p8Ht(mIK7pD`;CG)#TsHM9#d z3mbatO}^0$#fSem*rg_4fFT02{KwE9H5E}5oDOgqyU&99-3<^Kgj~q4V0Xo(SPe{! zSx*JM>{p0*u@9BcXtyDxACg6U@)-P{dLdgSU4N?13<#Pz#rE2ZG8=xf^G&Zjuf$z$ z`{Q)^*uo5_w%jMnp9R!0`ka<{U}gt!%myFran(i>nRwNS9m5+FY&D&FHrvkj`NPH4 z^8^s25JV=KNT>H%hzaM)(9!WJz~}79aG5mr^#h5;w=%WT-G+~Y#Kw9;Vr}T|_A;S)=t>bn zJ$v;$5T5+?gOp^&^yVh&=8DJ+X)~_F+6UM-*3TJcPPZ{!I$BhFM}5u`ICTGOWl6B9 zD}_|iR=xAirZ^tMzA*>@Me`_3Ux6E!(nwd$u#;363cjS<8`C8IT_;m+WAr!4Bw-j` zFYW8vqKU1_t@>Ja7qS8gi%grGID7d><+0L1vB<<=Jy<0PBl*dkY%jEfN#j1NAO?V_ zJ005Gtn8Xk+2X-&ijLXlZMv4JIOd$Q`TR*q=-lL{l2NfGdc`O(dE7Qz+AZkp&O5!Z zkFUzBQPpa-WSf^{3!03$$FmmI5qoMuD4;Kn{?09z*xa zMB8d{Zj_6S*wH7PpR=OE>_CtWm%?b2xwf3 z87zI&w6qALRDCIF^5tES1nL#lNa&L|{k8js97XY4b8(nxe=pH?j%I+NGt2@s!A zpIsjy*iDKa#?1axDX$ty@(@KA6LV4!%724DQ;Ie+@$R7~676ygydQGnv9dZne93d~ zaHh*mgzK4t>BE&{)kjx*QQ(3Z4s69}LE@09=Ap21L)|iLB2%cFwyB$ zNVpVl8ppr4bjkz`I@hQGWl!W7zmm(Y;Q@hWjd*+Iywl$-C^Tg>eM>o#_&mT0$KCv*@f-qM&lN@y&8~U({e>J_;Il5i6%LDgzi&Q=Q}?$)8T>>v)zQ8 z-cmM|>UD~{2f3cT{aR+?xw*JDH7SsDt264@Kd(K@bqjbqCR|&R9DS!sm6?7wUq`}Y zJ?uv_;EZafVg}s&6VE(0JPEn1M~x9ZY@Qop1DU9xVUJ(}{zO^Nm!HWG-O$N+PYwgj zYSACg#dPc^Q?i>BL*v6O4&nO5MjKa-rCj5%77Ihb17B33i6W3B)Vm+I8#l3nO<9(+ zR=f9s7gnag?6HC8yAFl^nm&5Yj+o*S?4)uZ0B*1vgVQ}F5e zc>Q_ezXd!h)6z*MDBar$f}klb9dD-z9IWt-_Jl(oXUFY3nh)gJPS63=Gl+J!whZJ# z-6NOu=VFPstRdt{_84=o3!j?EMfk+&ypkXcF+kCovo~4|IKesQ8L!Ju^P2RYggQ2P z-sc98@G#b(izs1kb02ai^g|~(s0EPNLGWe0wgF@d6lrf;^l zCjs)uzM;|!TrvF88@aHfj|v$`7$Cd~{HxL_Lu!8PN@Q?nr^yF0Wm~pDu~&Zwx(KmU zjJN~5{lswm%4P+dln2T?XcpCg;5{8uGBN}?I;*TM}Agt zY(V`welFQGj&t+zWd3PJ;AldT4sb@3{U(br?B@B{IJHk{Ia=3a2eO5dlt%v_t3`!{ z<188-wxN_F%KJAj>`Gf23?KA3lhyLm%`bo35#Gv?v{w@6cRpvUXGO&$j|QIl#+|eK znKCC(RlVONj^!Kr_S@%sysGHkZ^3;u7}yz(8n#5)QV))GtszGy85K}v=FI}i$7n}Q zsiQLBWO8zGa{U7O6%)PjZcMd9_8~eUUS=E`_Xo=|(86;wM)$j%&5d{6Ud2}2y3X53 z?*M`wB`^k$nVUes5Kz1jvVw?SP#rj)=$q0P_-1nB&VRRtz&|^~Z!pyVh)a>zL1|jz zWx-n@Vdtc(L215$QJ955z;-m4g3X!(e0jU&u5}iMMdnso|R7 zD1a`3%w~oeL913Ygz_r#{ATnI!hs$$6DsMsq+c+nLz-2p9oxQ|Rnjvc$j)p;1|Ph6 z3@E}1{qlm)7~1*yIUC+&rC^_0!uBZ=jrdiTXya(ZFgkL)vbx8KsU_SZzotkNDSLz* zL>;Ip6MhXOK_#W0kaO|^9*O?y$@CoAi2cw;7e@)d zdi(CO$sE1XNmHR@-Mo}BwCvTh451N9jX7Kt;)^Ndpqgc}pberSlsvdg}MIs33H0KZ&;aD4cD-=v>j_+k)umU#gINZuj0j zezGkfSd;yx{iSU<+yEd6DYMrqA#Ec$F>L9LigO>B(v>K2 zfa2PIt8JjWW>`94Il2OE*<@Koow8M`_8cR*`7*TrSJ!7L!*MxN=dqZLWI8of8yp#c zs1za}2|PH)@&?RASOZIbD87K$BB$nBw0^}%w}en&b1l;V)%0tuZF}Ke9X-076BmZH zvFG*;b%HdMJ~Z<%!c7i3_@mYZ2xQ70gE5SmjokJz`Jcx zcEch{rnKsl3mj${M%hkl87n7m@Swq1;X*HEr2nET-CoP&?&jYCz@6#&dl(E|%3&BQ zV%IDx#Fxx%8d~rR?a1-lNhLMn*A?tA@mfa8ee4ie8I=*>4j2uvOW&QY7N;rI`A;~fAMN!=o z3r@T>+&BC?Jw>nPixbV zwtx{{@VVvw9|be#Ctiwnwsu-arrs1epeLHN^4r_8nM_>_)`+<6h*8u+X#CJ9U3S-H zY|}&|D$M?RzN~yg+M3Rzp;e{m7bdfl4CZ)#<$fnyOQhkNak(d?+k{&MdAb%%ilU0~ zv$%Hh@42?a@;R<#H+&Lk?6ybl=E8l&^giycOtV-H4BNWOd6Bo4dSTOGbb2Zbz_>I~ z*>@fU|9+SWDa}!c#7`FwBKvVFfv6FNXfI)U3F`z~(H1b|n5;FRh%I@|aasD?>sfzyD9&JdCSUxi5yrrxTXH&>4n=u+ME#xUzF)|dagL^_8xW}Y zP27=Nh+@iCio+{MLVwd7PvN6Cz})K=<%DjWrCPy+*F`C=DT`>E1K6`vOK_8P>Id#% zcUjLNV9>~9;HruN%WQ|1uG*9W{+Db>`6npfyP{B8TP8^IfO1VtI=y7DSh##eiI1Q8Uo)Y(VyrRBMY=64_E3Btok=i(1Hu6JXDGaaIjPs2@-ot zay#1b{sp`3Mqe`>{a)muS>llLg||$#B9i(NLJ>fNW*qKGM+u#=iLzPFMzdU1kkvSp@T}z@pk%s)f}mUH>)8-Z!OhXv#{uSO0xJZ@L=;Qv0S-&g+l(UB zLB&5v-&oMEVU=w79m^#Xh6Gu*=}gqXUhp>*WQ%3-bKrh?jFeHgduT>TG((>nTDn7q zgB{MCb-^R)u3bRIaRSHq#r)aIUNiv<)+aCt5rVJ&FdT%L=c=$MAP12uH1ITnvOg-| zp4(3vEeHX=lL1AxEbG@nyp5vPl64K|zTa(#eqcfYyx`Iv+#K<_zAHlJkGkjU43>}X z8f7L0j3jio3-76KvA3H!^O+-T&DWykbQ0^Y0QUEZN2scc|5dE@zgSv~H~|r3A)LXI zdeJh7m+P@FJsieE04u$1r!keyKc29E#R?C=h+T0rbUT|QV39xGxw|zx>?L8W(0%D* z{r2?Ogb-OgxKm{mBwx7D`1bTr!d@Ax#lVIa!n>D@?mML5A(Mi83dqgzIr!vr>jfx5 zZ}*>qT?oHiit1OXy{*B1nP8?vMHjf^IU?X$Vg>m_HFqFe0>U1MIh+Z|7Z@-+lSHh@ zya&4SxvqCQ)(r-POMLm5{)kZLfigf=@<2ao(qDGIa+3AnaBt*L7h zu**e089Z_J!c{_y(6n=lZ53;PF`W7sg) zKRyYBQ;0WedTty-6|yF>jx0{FiHYC&Yb&IeYit-lI$8HAc`r$tBMjAEmi5ool+L;E z`$m(G1c~TExo&V-<3gY~#g%Wt1ncV}bs@k|TASZ=0rJ#cppoFYSN9)N$E}!8&iA-L zoy^XvKC{6CaJ}#Zks1tywtwZtUUvWHFuX{<;EaDb6*k#FQ~GC1fL$ScI3S@k)|G%4 zA@CQ|sKi*zW9K!`2Ag-HPRh{JLBI?T-ay6*(czDwzgH?8%O&BMWFuq;(%lgoBT7IM z7|!689)P^q>VUr;`e3u&IsrR2n+9s5odL1jdDgfcY^mYiwQFj}D6#WaA+z)LwjAC# ze5j?kqW}k0ercS+QO@#xqO;C#U?*QgU9;_!Drx`<1@77nBct3Owbm9Sp(>=P=S$1p}29Q#!%CNh`qhH1YpSdGm z#8Rc(TJ4p5w1u&IR2+5B_nVu0zgsvLhm8YKn%ZRB?%DQAMhFwaLm{(IQ+RxBiARy! zA3zBjE)oTR85)`daxVdri)cH8W$Y_{te9P9iPOmqih zZO_?9eXS)!ax%kOq%Xoa^ZfLpj?4t-0^rj|X%eU6JdMEwyK%SpPAHniskGAxwl(0=I=kk0uH6cgl{=fBVrG!(A9A z7aUwgT&7^0F4n(UTv680NWb#C-;{NZF;}*6H3dlZS(Df#m`I0sx5w zj7^&0B?w}>>r`KX?>V(5NZd+MX0xOG`0(QXIX=lZ-}&{t=Ezrh`3RDa4^iSm%Llgt z3}yp-4%3b>m+v%)Eox)-3ql@LTu}B|7#=|#r5aKo+6b@TP56>r=fu`2W8K}E)yM7I z_mgYu_EyCtm9UoH-^=f_amV%92;fV6h+`%(`s$O1nNb*$ZtA!@Cer4Q= z^`PNf9$6kqv=@vRfr#x}YW4S}z>g*=R+29If1)V%|Amjq#Ln@5sY~mQ>!Zj%%W6AR zzzusp|MAGK^YRs4n}d2aU9B?i6*>$sL9}{Oe$ZU>;ecj#6*?8@Uj3PQ zr_*_}_RyH7THN#H#FA7Bz8r^b81WsZ<@U~Iu;=h&`EgWUWZ)LM<0Y>5W$tvj;)(mq zV~s|QA&Xxr5-GC00PSv$*TcJmeQ0ot7;rtt7kNLR=?GPdYhI2e+%Y|Mgk68T&2r7( zQmwq&sY~mL+j6)C50e7mJw>V!X=L!;l)DjT6C3>YEyZ* z|6b}SbH;sNJa^&&Qb;*OJzBXZwHeMudEZYzsH$_Wq%tCf0tlO-`B-ktmKudxXq=2N z4g79k7pa6zIFn&BnCq5#6l&Md8S+S=bH2RL#I#!6AxE6&n{YGWTuu%l3l@o|QOpv% zztdc%vx@>5ucb*G<q3!7fC9Mm#0pT@I=~y0KDN1i6}sulZ%(ZR~*9eRp_0M zyEH3UIdID5S^{Uor3UU{cz*2ANy!AppP^7=^tj=s498AL4)d`EV_TM$2!^Vz z^{(Y_1(@^O2MaBrPAU5LqCZ+=Omw{lcnu~4cUnlRE|LqfntqiXago7LUx?{6{y89_ zEXCYp*0x<7^Q^^I7L08RdXy-{If?nGT8G#n1~R+H#vG*yFnvC`SOC}lXj|TWr}*dn zvd*X1eewKW$5crL&*8uW%#N_Kyu4i75~3|o2zaPKj~{Mvqa}a{l>CkwKkg#Q#zu#6 z&$n!Zq^(5+*13NQm(y79bZhe{VRF4Z#X(lI7xV!n!O0Hg^oSyXf9zD{IZ1gqS81A}0NL5pXY2Jb;I)Xv z03^KFb~U5eGi0@mx$QT7bmCiz$?t*UIZ}UQ@_9Z`#pnxsBo!Ve`m1Uqn#)&VaabW_ zSj5IodJEzs)3MiC_@bxGMmuh2d93|4s3RtGpLZvARvy1`YNmS3T(kHkF~k{z@*r}% zvEw%#wo`d-7EPW*{(Jos{Nt!`?u*Ch3SgNH_kdEoacHYTAH?{4IV8$VjFwaAe#nLG zA54PRV{_PW&q5V?1ea>2gIb30;)Iztcqo%~R?HIGKm3sEks*D`by$3>`H0cd-naigQK~s_1S-B8$^uwZly8vZl??!uXK%As;Pt_1lML`(Pt*@+!eOOC) zws;MYDj%|LF&vl;R*5b^Q-H){VkfMkVM2b zG0Pk%qmmk?85=lyj!OY>48j4hG&Qhw(xn2fuobMHb?yvsZCHatuzg- z`a$r|A%vlC#MTDZfgnSnaP0N*uU|YY-QwUbemjwj@MiU8xa-gcn5D1&!A6u8TlKce za9giPf6!s$6BRY7spY%h062JwD(~l1o$IpE`=vDWvO0I-bxuUKhG)hM_`h3w0U@{w zj->Vh6G5RrsB12 z!oUdW+42_=XR3ekMSgO+WU#MFd72-!MpXl@+s50iW@e=tPS1AZ5|i(=b)D5`=D`^M zaQ>^ZOvK;g4|tx}yn5k*ELS-aDdwc}&tbsZB^PVPHfr|&`yqZ{(*qX?0p zR3sWUFr#se2Zp}v-G>3DiTp}`>gOv46UGqoViw3lEynfE#s&*&y9tIiFOk9mLj1cOh)wW{RZa6*Up1^w;h>K34Y2CUf|W$#%aKUgXGo=|&#S5LD?O`- zvpEa6g8LL#-w>pm^K-eJG2v2#)gJr&m{DEy$-i356r0fRmdbWWv(Hyf*^bJ&eL8IG z+RnhN^6tq27+&$B4k~{bYqfQbHL(BU<0a#%+i<_+Z$v9W3tc|#u(1zXz}h?qchSb_ z_^{QB&DAWe;7a!lkCyVoA_)T#M}zRn?}ybPV92k;Wny{ar)F}~a+={{-zjVbJuAXH zIUB8uGEz*nZfsRQPqGkptBq(%@G-ph3JHBWNhqBGgm|#JsLW~!Gv+1sI7$+I0gs*! zr=*45+U?}QknF6FL!lWodjrMtt3Ac_WYlFzzLcvgesRY1eFJBlyPLK{Vf)vd4^G7xKS{Bp!6>R#+1BM z|64x`ki%BoZQ>LGC#3E*$a>amPSAXtROwuxsCYo|>8&F&W`$mH(awfG;bMmQ>Or8O zML{bE1rm`9wG~~&z4V8XVS*o7fX*Gq*+1H?Dt@#tK#&60O++^4=PBKP>cofh!1D|; zLE1hA?(&0D?dUxnS@dk@XG1ot*yU!s=Ol*!psQE(CoxUp6fb~sGlwDp825R_!8XVR z9IZznvJ=;j(Gu8G&iUKJIfelg?SP!htgfmN)V7muO3`rP zxkS@5CO+waUm#@`cxQt!Tq?{nr?KIo;HpAb}{z!ig}O z^P}C{2Ju83Yn18vyv23Am}i>AKZscY#ID9)zz{~q8wBtzUaRCMc z%>G}L`+qWvyAnVC4>&qb>uh(MJppoV4-1CCabV$3Yw1>N2Xhik7JF^Hg?NZEvrP18 zWpPQ_8#O&TG`;jnF=T(bQl(7|?V;&hF(W^N(i$Ao?k^5X?i20)zSR|QlLb3Z)nxU? zc7-`mz!=iYcRO`EZ?#RId3&@YDO`MM7p=RfHcMox9iIL@2K3~7IQri027$62^{RpJ z!T|{!6rLx#0hr97Z}jkvUa4JeA1j+sN3p&zrhY*D;ZrI98d|>U*7$!ux`so z2w$L3@TNp0fZ?J$tT)xynr)Fq=ssAo(TMjO0+NW~ zICjw=(zW3V(+lVxP@WW7*9v?7==f5th`K>LUm9I2rX#rB!-32bIo0;OH$6~($M?e- z2~_1(rF!ZPvuYW0vUpVMqdc&({>d<1hiqMN0C_fjSJqA< z%=k463Qr(Ro{j7w>cFQtHtdT{(JKz&vqx$P(5s8*V) zT$>jFTq^|mU*B@@!a(}M+(8KKOSsfjh4Vao?HeNv}mYWwe>1dlUn-G zSMf^w*?D@6gH9tB)|MWVe|(2iGj4I$tU-3+}|1_gdL(!^{Yv(NFPO29~4@DpBT-;)Z-B8D6=Ur6b9H&k4$>QYahOFngdA|fB}Eto$giwS&eyEXUTnb-2_ zskCUL*!&^XjI>mr{kI+5tW08!q#@A)vCs+K9j(Of4tTl- zx<`DH5!I#~+b|B#qJE2<(UIyT!dV~yDI`8SyRb~Oi}+a9m|2R=>D(sX=Xtc6H>uyq z^6a$jA&H(~=ad5Quj1XP20y^!&R^-q z*YMw?xIt;^c8-EkH~_)@igHQ-=XNigDlW^vwWyg&AQE6CH{U8BOdv07t8>6Dk<7%q z)ctKe^(0i4d5d+n&&pBLtF+&w_1a#d0C4pS zu=Zx%#T4Gy_KxVig*n|~E)Ci8?{XE8Mw(rH|2FHi;4n|g3qJ@_0yiB%kh0Rx|KBL- ztNq6Es9O^*ON!0>9A#H^`7l3xwSUU6t;;KcJV}FgNFIC}jkk$7lO)i#T`#-M8SVyx zf#zz_PZLm@8Y(wC6up@acLFlR)nYTH{Q)ALOK!vT&T%>k$df?rRS@B_9HGEi1~-)% zf)thz$8xs$7+a$k*{?o8`64>4`O`m01(VZvrIr24?#cvBNcU&tLGXj;9Rmoou74?f z=n@gL{;Z+*@O6>=c4C}iOElFiAlnae7+q>;ULwSR@Wr}g{c#o^;>A^rj+(g zHva7TDCaQSD8E&QIU6IQqGU2=8~<5?Q+Tztj-9do^5%Y00Uig+h82u&MmuNB6OhP-Q-%cI1(C8e7<4?z_2bS+n>hAouSnuI z`#J^P@ojZ4?&Q$4a-9con$6wGv58#_)k)r`r2YGs={cRajN5Aik%J8^TMD#;PsUSmT!`gg)~^& zPQIV?eIExtP~F1Ke?(Ze!%}!;hXEYlI+7ZSDwYc6RbYn-gI6YY0o4FkNHrUvJ3>yr-N6{KSi_y{u_(|A&m$ zg4V*rD1qefX~Zxk@TPLx6}BIn(z3t7NUH@Cb9dL&*smqD3%iGz3uVE*@u6Dk&@?A; z^Mv{#fMu{oL9chmIKz*)bVwW=WhPd(p<#@^e6d^OdF44T)XIh9mqw67w7+T}&Tf^UvG z>UUtHJbmlg14y4TdQn~3AW>r@>1ux_j2FnTC&O0)2891a!~pr#7!0ad@$bU?X=iT^ zz_UPIf3*UulQ?m#K_gqK_V2Ra$W3(U)EjLw)$E`t%-YnywXmWp06+|rvT2)sA`+GX z83B7yO?&<$@8-4X73l5`3yl6;D3nSFq7!X-iD9u24`Y8k|2`%&-f(bdC=Wx3jJ#7b z0guw2NLw?i;oJ+Gl|A@Q)*>vWx*WMF0cK~t6#Q4V_H?HS@tV*GIxLAnoN;$CFI&Tb zYDMym+RWULSg-J|Me-ooikytq8{6rZ?!m2PQ|Vc9GvAML+yqrG@^&IIMF9v?B@ATi zvAGicPH>pPyL%F5sxcVSKHfo`Gvk3BX+I-%wx}1TWI8rNK0b>7pID@PFj5XZVA3aU zN-B859e2M>5N9xo{!&AZ)$+istD?##uzXFQW-hsw$gf;$eoAouX~i|nC>bB&0<%d7 z4(#~%`s?%-ph-yYa>ndphMyj>S!iUZmJKYQvpS|DaYW^0h{>IWYp`H6(t0_xeb!i| zy&4nDWFCP7yag6@d5cw%8Mx{J&?O)A3?7L@B=M(|R@RQXNJYnNvY7gPT{`gu*9@?h z7V{AhsL&rtQ84_`LrIYu-}alxg)?k=f6p(fT?-Wjbu2_5`tvlvOQP>{TW(;6RjHIf z?VN)E-$jz7qY)UB4zjiI{q8lA0&HbgKdBzpEWFzCp(}jtkkB&p7PYqtQ1$II_Q$P* z^Oi+y6>n`n`Lx^0Vs=6&*+_RmukrMu#7*UrRl@t~YL(Jhj&OU+Xpw%I0l0V05$$ znvX~n;lq7{h>GaFg8p_Lo%^iyF8gvr%>y^f9=b5ulWrk+Q_$%HU_j^w0Kj0*OPWmJ zV(%XL!M~+^p1Z`J_aT*V^k9btb9+7p4O4U-Ik6@NiW1C7A_kJC9jL#mzvC(BK)wB^{3r07s%J66=%|RirZ6z z33s5yN<>pbNZAvO1iQbMgh~G_C(4}G5(@wN0~t}*Tk1~V{VrtK2{!muuHH`!8KWMs5dDdQ9%9GX&lA&GXUGeDL~OW(Xvh)QSX$2K|wIS7dpH^B;c z5w_mor;WE$A&xo=2_{t^DGW-2$`v*+UnLA6@(8Pw?xpiq2xHb}EMr(G$P%QMg|jWu z{KSci-hCo25)! z&nR=4y#s;bw6)Bd3k{J(D;$7|Z!m=B@VvyVz`Nu^8Ep*QdE^&3fbx--8`Ip23lz* z#`Ar-dADu&M1Eh@+1@4++#6tDZ|UgBq;DYrxVgQT*M6UsY?koxe%Rl$YKwU9wt)1% z;*Wc`4ym7v={|FG75Ke>=jX<;%7L%)3C*5x3mJnXlBFwVIhIFqCS^;=Pff-4!oAJg zSHgA4!*<08($^urNke8>CLX^h$d*f^hov}MbCMm=os}qkf}IrbnOh}s<*S8urM?Uz0$za@AS z@B5_I7YdYRlK$00=e&p2EnzDG-G+=dZ75rS?thqzV3%$<>5OO2NKNHpf~%Lg-ldOOb5M0NA?p;jZ){z>K9u zr^R$R&;%`*CA+#O_EeDQ6f1Z}vTOO--9|q_3ym|%uoIr=eY9JWnAEbXreLdN%9MFB zb5i-kCXA=SEiJonx_c<(waLaj!NAv|2A97F~ zisJj-R1F-5v7CszXq|IUtfw-d1bwgyM{ z{nN?$>ue@7J{PovAO`2X6~zu?xc8Z`fOJ3**ZoGD{kEzI@4Y%&M~O+Z&Oet#Hj%a* zov9uG*PE&D!#y%M>vkV?NH8#zuxvBu5B@M4cYpg2@2k%B!1ak!!MeVQLTMLsVy6)VlxJf^Q|b*5oga)jD$$FfQrQU=tMLb8*aDDDr$7?k^|+KKUTz@4PT^tRYEvR(xSx=@HquxHQL- zgeg{GUvW!9%h7DCBPdWZ%l`1(AJ#ePk?O()MLLxO`uFE{rS_FDF)-H2mr^~vSZ#&V zZgBVY*Wy`k?GA$^m=pI28*=#SO`*0;%AM||g4(J`{I*`g1<1Nif@JadetI$UmvSXw zE_m-)N@V*MMAkcp1OJ4Ag)NBv5Yj`}^z_bWR-6_@xZ$9lYrT|{I57h!)7Cr%E5gjD zfjdN^hc&_fFeV6SpCv|57}PolD?}@f!DEh@m_v?~^V*$X&;eQ4YZmK$c4~RIiby*g zGM;gy1;saiv!E?rayO0O@#r2&|9}usY=(J`t9#h-e6P{JaaTxF@hf(t)0WYK>cLIf z7NrHOy5(Ft6uHa9fQOltZ@Y(!kFiRneHHCI_%vym3qtJPQhzi-F-u{0OK!9m$Qs*~ z>EIbvdk)AHV+{LXy&4|UpoUB2G5sW$j9V%2m z!y=3R}y2=-oTl zjx=aYX|G7?Nmwvf<~F)iw}|FSOQ!o)f_AfVSlILFNf`qR>*mGc%A#L%*vzd}TC$4F zgt9WrmJc*(gqVE)y)r~Y)M1=Ml zhjCgv-umQeXVJN!u15|R4^C2dCaC{ z^_-y%9u35Pg-ja+lI1+)KB~JZB|<<6doU32d2#7NlnvZu@#ea_;@(p28u?`5GC#7Z zc?o|s|B5~DuS9dSe)e{sI0e%MH#d#LNzpMx9lHRzSXfaMWlqtVa4c_*+ zLe9fQT7Dx8G}RbqjnrZ?+3?o~TOO2Q4)Jow?rxL;t z+Ld1*_nvh@!6xfAm0F89>F8tV&O;$`KZS{f(9J}`H|bH+AW;E;oIf0v8cj7O#2PhW zk|`z*f#qu27wO=B*pKQ0Z2H_HNK8k%#RJM&;iFd}9;?e>MSt!5kg0~&%YtkjJu_cL zppMPj)j(Q|-39_fM3~gt9Ks-Rwy9E8hpq`nCM=kV_?D}#zC&VKgwF7m5YqtS{9Ss2 z(3448di#QqdCCSr>E%lIma10W_NOH1&0E`v`7dfn!K7K_)$R2FF|H~`R3gKc$xdRU zpfUB_<9)>?l&mnxNmSSj#u5PkN9l|bclpz&U>*G(;JF3Yt+wkDbJ9*RG^MF37E06K zOZ`bWjZ`8VnQX|f5_a~itXo1UHHF-?mOq<=u^0EEz`_j(DMjZS{Wv$t@ZsS9P8tr1 zw5JEnQc?R>BB#6}m7bxlV;%DF`6#fN?_*lyq{`;j*)^6lN7=Iy zE%_Y>h4lf;7)?CPEdWBX%aOtHi)vZ;VB_zPpK`BInTxpi&;Iw!xr7)bgR(sc(UZt7 zF($^F!}bbrq_F3v@hK<7?0WRjv>DeC)=n&cgd3&r6;zOm86$`?qywdc3h5BOH$ zLs9KdBkakwDkaU8@mIG5CT!Nr9djaQoP@8N{94sCOmY?X*h8BhrL^Ysc`z7oEm1Wh z9n#qm;!R;kH5z~}yhilBTv2XSHB*xwJ4!l@n@|ui*pcYg&_YNeK7-WZ^23Z!x|Q7Q zcu|#{OqXBaV10lj5gpU7EcNAnb|}Oja&p1w_SWI8wj!VGEE{X}}U9~>X1nA@uDl(MP-HoT_%@(f<UNJA9Jn$KVZ5P3 zWC!x(6@7WV10k&43ed$RkNYq{41%<#@DWx@EhxV>(|_sh&L!{@lN^qjk6Mc-@5v3UxedTq@dT`oLY`8QwT7h8bnd z$-fK#_B_kyh!x|`npy6GIQvx<;`!X$ie%5DB)K)Zd3&OX-Y4+uz4G1H zaI`l?u-1&M5AZRBr=cX~Oc)Vj zuWPCnH%TupzEeCh0r$v{QyKZ=ki&oifg1OKe0aW_g(`=Fw45{{z-g- zGcvKUu_x|=QUI#lO_Xuh5`MCC*w|24*etE6Sv#_z(H&CuP**4$oButskuLVP*zL9| zpC@b5UuUfz%gP(9)Gj#}tysa!0+<5h>Y|C{H-^{Z{e$DvAqgStnVcQv*=kcvkecgQ zAiS~uAW>2ZU-(1B5aL)VfQk_!^23|MA%SB9t7`y&X*M=CpwNJjakSXI;>yxV{4`Z0 zqC;Y0zVuRh1c5?qzaP9eoPp9cK`HsVIY4P`tgHN%VhE7F_an#n343SZpcsHPK!Azw zV6YV(QGhI=|ETzZ&0w3`=|jugGc|(!0yY*9<+p+)gzp_3#@Bz11??RjUc-7&vp|R< zP!|Ls0D<}i(#}9VIQ+O;fi-@Piy+p5uW9+?;_`Nb@*@B#d4XDu#Nu5@W@U za7apeB;VGbf$w)+Sb#LRfN=e*V*rQ$2W3E-zcK^@e~dv6ww4BNAGtrkZ0$jR({OSC zS)2Zg0X@JTXlh_@YzcI5_<;Gq{;NCxsuS>EsyDE;wRHRYw$0yB|H%R5=m4}dVMJhI z{m5zL_>tQbWR1Y|*PuvQo7ezYnEzoncC!7a&lza{cY~<^ni-l85(dUL)|PGnW1tBF zlbns?e@99H_5Vy|#{WB!{|_YoKN0xiA}3{GW%vFocC|Tmc>oEMM3F z3~bC_J{}u08-Sag+v~r{_csvBzs}zb9PL4_0IiRrnOXiW{XahcI_dlyjF`2NjqzV| zf1>PYU~T*{TK{SI%WdRjZ~vj_->?0njyxXVYH zALPzCKd?SO*rIqFCDlY>x1rU6VN~UwoKnUeJw8zN4G#|VcV0tx?-9~vM8Q+=8sK^- zoRG#ly6M~B)f&CJFjVlQ#|4g)8ubjpFg{g&M(p|wwJ$^{NNjeYlg!Vy`O~Ojf5m%F zTE{5{vmu~;n7(cpu&dqRqAZMGI+4fzTx7`9=J$m(^)X(c&eXsvsE&e= z14g2cFXB-+)YyDu64KPuKf)g2eTJPPBi`G*k2Qsj{OARd$Qj;LRlPz25&xydLp%HO zx7FLXj4KWs9knR|Le!V-mRu2*j${NxbM$w`Fms9&OJ8Q~a0&e@r>sOXTMG zR3RG#D3NP?EYcfRRtQl&%{xiEi-iO$9{wEy_FafM!o`c5JP00>Ag0UE5u>1@J&$}{ zo6uDaq#?Ao8V@t_p8zGNLp5$Q5Z@cgPyPBzfnnepbZePbyA|W3tUMMJd1m$3l>oK; z8biqu?Y!CFiB-kCpkgR3e}pxPI{rtnV?;kxYG%)Yhj*sDEc2gMr{OKKRnwd+uo?oT z-9s(tQp-fbOyUH>854qy!%(=a;HJx&f8wK`N_){r$Td1125zwX)wP4!Mqr9Z-S`ds zB%ExG-i7*KwsYW5G%Ba?$i zy(4+HI#$@nri6cT^$Ex0PPFY`%A7tv$eoV_gQlQnL>SJ=1RGf;39Jgm!e^`{nENWm zyI~`DpDQhAoa!iBe@7UQ+J#R&&=85#uSB>>`eQRkJV(7qLmDOqT320RGdLIQXu91m zGYgptmDSesalHAESLI_(ZZAPS6Tb{%T4IPu1_f2(6HA;co2=#6yKYHhHT1%|+GooQ zcyWZS?CFuI{Aff&6YSd;6K5o=%Ug82fBGPm!dqj?a@M?ze@)e($%)ipaC5FcE^OJF z;1rr#74k6WTE3C7Jm+t}ff8t$5V{q``X}$ui|V~!6&IYd}G(al5KuYF$IYF$1%%V3G{-Z!q|~<*4JMvh!7Tn@Fd4 zBJ|LThQPZke+GcB^g`=yAE(UpfP5MWI&Sp3;{t~g3`{{=QW$Wo;i&C3YPEv$#BkRN z5?9qYMUNJin^kqOR}2#JKF7k9=TAZ+V?5UjFB*B~P=_o0oxfm7s=>z0_J(28*^c1q z^jg@iZ))#x$*47s2Dvn)Am;$Mhj?W|rnj0<;DI?0f6k|Kl9VPX^UTRIOeuyp0n$z) zB;q;NXQeOTFt7jzluX4&eZNaN`aM!(Ap*6`bDXb~@EH85CoiM}DuoFt1y)!>EwtgbZXItG31j7#&( zSf$nu21P{Hz)Y$yr9vM{V? zytZVT2P36DuJ*dvzDpk62f95&fu>TtVe>UD2R^&rfN6k7h%#Bmv!V(d(zb{1_){nXP zrtz~Hn*+rrU!5naIANXy4sy}BsP=x8xqNaWQ4g=44fP8I&2$b%Jas|5f{G?dTe_CbX zRIT>gO>h`%?b@DB2K>WKS!y+tF6$z*C+san%#6t$+TkW~bDi8sIi}byEb6joMJeXu zV__tz)`&8WnZn}$3#7D&=Gb0*BNzR0z1*Y5?D$-nb^#4}>O1ft4{? zy({C<+ZJmFSGGVDgP+T$-%zXY@s3)8Pf1H^`r zUr?Z3mF8smNm;~D6uF-+iL;Hhf$G@z-?cE8wJ_6IFCe4=YUJ9Nh{@Gmf2{!vQlx8# zp*?4l>YQ@^^gSJq+)J?F6d2B#YMYn20dVZt4QA9jwGU7GLka+$7EhC!6^I{?c6hwZ zO!qVeF&dDh1yxq>fpIyJ{)Geja1~-T$>~{8suavD=M2UE$JgaHEtP?gS`ye}*c4+C zJ_LLyeW+;}Bhi`)S$EaEk3nB=AiBz*D0e%E{9)_u=;K$edCh_8s0Mts z_(!g^2vokc1sGJq^7s2Hr@3^h8(I}2k^)aSyY3ii!9Hk@LtSSi7pU8a0z;U~&IkLl zkwq0HxJgvAYF()<@gjW^eMQ4%#ri!mvp*g909>wKD; zVv~eFMjaP8RpNb7e+RT>Bu@s=;!kv@cW2`*Q+7ql8Vw;T7%ggu1IMStfbxzv!39_& zY>mOF?;(Y7I0&;p_!~v2$(7px@Zp9So8>}2XjZXGPB0@#sUgPQrv@orZ>z1>Q%cLe zCmI5@meSFt=4Wv#1sVaI(;ME{dr5Jkr%C29j&_qnx=LraeW_*9ot!E*rrwr}hN$euvq}66?y_X3vlqYv+r>8iE5jA6T zzT(e!S($!LO@Zvc$W$iKOE01=uYr%`#}2jRv-i@!(Al~mx-8r);Zf!+XCesD!D(gO z4_BBug4X?te@;dAxrWaylhHE^ni#F(n;2G8y#IMbQct04e7-TVSX)r_gVpxy=c*s< zhX|s4*S#0GH;04U#QC3HiQJvCAJX?V%cYq!s9t^^S5FXmDLu&#$!gr%0eAIK?FrKP zwRtDX1DCL&;JUfIHQ6IbP`_C4^~{KfQzKmCZw56me~Is`-`Q}jN}uF?FPjvgfn#zw z?!SUu^h&wNoyWETh#c2Dys?9%(8#TCvnL?mYoG;f*iE*m8#r#y6devF>D~18+o~=r z44ET2KlhnzYDraZ&Wb9rW%0d}sCNb-(20}j;07F%0MF(rpoa7{D`|B`z9ZK+O?WpM z1Rrdge|jdl*G<3mU4Lek8A`nHGD*!z2=zHb48b53k+0y9A>bjwQhUjDY%9hKJQNeT zi1`(Q?rTh6{m?7_8~0D7FJ6f*>2Ul~3C8y8{ldE`I=Fu z1>Wx^kmljH=SGlHh~QB#w0$|u!lehwoZV_Xe;)ct+ALFS$HF)h!fBJwzJE`sbSt=@ z35Uim>sYABS4{W_aTf!RS(Ga7kh&_tyhNgo*tYnwn8e>oKHkFTwcRZZ^zcyc7^r!L z4Fj=GED<}(MCiT32CPH?MU}}Z1=jV{uZc~H3Ylh#4lme{m8@|?`}*HNLj+>1{^F1K ze|(7dGqf8+z-z2@bl1)J1=3sM(XSBr+t{HhBMT7YbdGcJ!-)dZLw6~cCGcivMtXCQ z;*6quzx^O6P$~Gei^(Cg*wP^Z@CF#JZ_rVUzrRFg_1ksM6RqgFOYuxxZ|*5S3zP_7 zicLAgnpnD`gT)m@8SpON|E!)Wy(xUQe~pLUw;Yy_$dpbB>-f^NIO!AOp-84AHsuG| zBFE{3309gJPaGvHil|kVjP!2Ok)G2|Xaa^dX5&7ti##T_WW9NupKur@Tr1ug*lyTDUhS6vTE`EJBgi4+Yxl&;)>hQ29!aF)~7Raz|(bm}Uk;?oLf z`23)BNIhf?I=#d94iOk9J+rsTN4L0PV!UglK@{{9LT=v)Mh|O5rx>@02MCM_T?ZI{ zCW1EGv%IjG0Un}LOo)UmVt3!0e+AZ>*Kr&V`SMjM9S>?3n`?ZdTH(X^GC;hYe_vxW zlxXRX@v!^Qhl$F7RP|M=E^SED*?r2iu`Dy8TDX#>A%f@bN2gR@f88fJ^YSbihfzkk zwny-Q{_W-A-AyZm#%u0o*~V?80xq`F&zcBBW6&%eRYOpL5LccTSTH8nUag{NDzDktmbE6uv=RJ%_)x)Sf+#01HD}F-R$jO{={>A%0zBe+6hc9Csx?(rtWdE?XGdt;gUjmssciX3o*;D%=;l2$_`1 zC(S^H$3U@zK;5S$Xnm+_tu)D*O%CI=A+g@N#)U;q;K9a_|K)i>_Yb<-xZg)#75UGT z)15|@77`JpCB@S%;T-E1$eZzy3eU=3v);PW5mR@kuNL1)EZ&Uxe^UbEUIcLf&DNbK)%R7KcD05HNSjCP{mEOLrvNEUtz z?MEpiiE+5F#+$_Gf9BJ2x_>3%rX}H6eH_KJL-Ti(e(E-xzruA!u8T4x6_J9U?ewh5 zKK41C{m!h{rk2V!dBxPc{MaRnV11}VCBDZJ@x6nLqKL5~nu@}QG-%S!(oB77*YFZb zLCtb6e9JZ+A8X!RQMl$zz9nG4b9-&Rh00ht$&gk&4_xd~e_NHQE}{BpMCm@!_M*FW z7s#dwxA!EoevWLInO*v?Ibg9NkO*Howl2!%&dD4u^rY()3e+3dQLLa+x{J^ z3&B9M%lP5o2j5ma+B}7+kS^0X!KM+AQ7>c@vl~5-tRk$rR&kOP!MwlTVS)INM!>5R zbOZnCS|I)Bf5e0_uI&J#@5Rxt+ z9fS*Fv=7*f#}4ZR7!_qJ^HMf=D+d%k&1oobkqdgJbM*UIk(7=Z#=D27tbNq?Ebkk; zC(TYxU=WaecQMbm2D^F&y`g6@h~kTkgAfp*X>R!Fe-3f zY!!0Bb9*^@$raxqrTFZAT zY?%9+1A=KuSwLK=TF+=G8cWHpy2bufvJ{ewU5d|8CH07qGN<=UyW$zexhh{<^@PD5 zIII{xr_i*>GMv+uI73p2 z!P8he?!hcpoWP(kI%)8MXB{f~Sv}IlzIsvmg2;%Zca;gXbb8=g*;&1Af3?PEK8S9i zay&(KbxFhgR=eH~2?v86p$H2=ZK=)rf85J9g?7etvUd8t0l+GKR>}eiw|VgC+5e>7 z=|Wv%2h2J%x&PqlPp5qW$WWOOO0)e(vC#RN9{-!a-RG7(G5@<>b?UgR5cZ@wjGd`U zorb$!^YkD@pDTPx6i zVrX^CUf-wVA0?2B^ta3tS&e>YE;3pKa$(ZWQp8hWLDwvk5JtYnaCE?!5t{z*>_ zsl^E29CRw&w3DxV^HqeB8q;Lzh;vr5Cc9> zwgM@%G7Bmvrixp-uO6y+f6bPf<3O%Y@>a2SIKduOgT)UdD5$emI)*d2aNiXD)ANn~ z&y!(u5`$D*dT}Al#Daq05Gs-}NXDV_UO?iQz5|BoUYgLG#jWy}7k;wYC%FwD(CbBY zgnSl%jo-T_TkQVvk{F%a`y|3EkPG3+%su>Z3@#3qJPnfjC497Kf6+xQ^-Ee#+6}Z4 zdF^=yW&Zlr6b4q#^0w^V+a>XB$N6ejgDb3s*ft$HqZg(1H9y{)xb13p(QYd_$uDZb zEnt_CbL4Ei3(}Rd6VNGm^yhaCizyCI(=iZh`QYX;{)4ejuU1_c%9o!wB2WL5H26%ySfqa!);L{9Wx0~H$<+T z^_@4h#UE*?hfn6R@BjJr!HdBz2~5Ao_QG>!=65dPHpotDf3am^Qq*_NV4~E~IcEzl zG|eY^IGqG4m7k$Xm54f?OoJf1_4vvd|FRJ0Nb9sTh3hGmJpQJhEvnv~>*|+eKvKE^ zrr)x~Hin${qH++&2~&W#C=4YNR0c@Ix_-$^XlP7f2T5yr@khsca&}rvDxIm86=a3V z@5{i@qd^M*e?FUB@z>kRsS8xT8uqtICit-hY~fB&DVU=m$hk>kFg-fkds5?>f3(91G*A&9>}KE;dUZj zTcrmp8mRe8I9NroN&lglN)a_Vpg=5?(=`jHZKm#z*(75@bD7wEMLL7BXjtP*Z?9pn zD9UZTe<<_L4Z0);0oAtF800S^^E5zx0(2y5)hVB$l18t^Q#s?awsD>ca%zqR)9Jj~ zXq+`Eh2{1{Z-*Rg*s<3)olenhOG+n<{TD2jw#JK%#JW&~&e|}y%lI{D4@`*!Kj+GP`OJ7!%r~5qf zuZ;0pPuh!I8n6}1C@}Bv@Y93b*d>1Cgc9*7eNOU-Hxq$Gf6J*#5;0{$8rRjEKG z#M75rOm~r^)z~!;@vuG-pKISCWOIVdk=7AVt(jJsecoZwf{lJDa8*Xr!ztXv**yIa ze@9u=*1RpD84`GA?#3;9C-+;JxO{RH!kg2ktQAtFXE z&);h}v=I!G_^;2M| zg*jfCIwn0|!f!h(ApOEGc&PLffBVdqIR_o=<1L>rO!wU=+_=%vr_SARa?j|rgp`sT zfTMJ6=)(DSuY^-X%h#dC%ib+2;LKlU-oCrNf%AvH{Xd_wVpxF7VhVPN49NPsApVzW z#)^vh$e6nMnpomZc14fCRWRw4vU%WZjSeRhwXfG9K9R1zg8|_-SA}IQe;=`({X{%3 zlRs4C`x2U=v!Z&|FK8~oF0?dO-Jt7bXgD_eP`nnJ@C*HvV3b-%s6YFSLE?pEVbq2x z{%NhwFUx}DCmr5FQ+AkddJzh0v2usRD3-omHP}ov%o=b4y08Z_QX#SN;%DyctO@)} z^d8J$9%>Hn{k^$3soieyfBCIuT1%389500rU_+O|m8{7fcO9OF$9^yPq8;i3^2!&G za^0rl7Q!@AGQ`th%=N3(Pz+WA$Gb66Cg-JLGJ#8x4e-)9%p`Och~#=q=Nhu|6PnAn(U!l1f6?Ol-_?WZE%Ne1ujqY2AS$B{s@xJ3)$vO%hg z+^P9azqaMs7#s|Of7l3TsBDCrG@4I&297gr?7gv2Aj#p%#mx3wHTGrn3l8C!KRI4C zGuP|i(G=@`vxFk93&{faT+n`9KJPV2CtRGWmS1lOw><8qp7sIK4| zWU>(RgX?%*)XUl&3^4Z1hPnt1`ol9f-0$?$6YrmY?FK}je-!aN%Ng(HPxL!f5h;wx z5e~|oLc4~{+L)x$9-Gjy>N*nsU9MglA;LwqOg_(OHUzzR<2q#*|NW}qTVNXn z^AG8?y|~o%0ZC3wwUo$*9B|h11TbN@NB%XpMz%Oq6B+IcBeL4djhu;A8sc3ME}0qg zv`mDsrahvGI{hBk20>?yss1ArXf+RJ&KZ_KQb~aqfAJexD1bJT$AUxs5;hi>6??g^1YAx-$k z#|o!m9zw&@>Xj}tCA&oxZ(RijcF7{`M<`h6V{RLEG1z|m2yHZ@|EhWz-UQTzwkZcP zI-Ok0f0ag_E@AmLD;@^gkT2$>6*`!go!~u_Eyi{8_Qv zu19B+I3kcm75VJaG^8d&e`HS0_X$pWQ<9Z(f4rS3ONtAMDqD)T7tYtX_Pr9?rZpK2 zMb&Kv0Ju?H^%QvRqKb2Ozy8+U{r;&TOsk?|$uc6JxqiXALR3 zAX`tqdbDwNBw-Jc`#t(`?zT%~VpP4NWdo7kOtI4}{=NV;m1o<+B7p*h*`cN+)^7x`LEBAj zR1_y7u26E5)WnlN;~*C9zd8p@f8?5n3evz4>G&fAbuIlcNIjuz{E8u-3Yg%B>f>WU zRh#jFc5`BTLsTfwB(R~BU(y<`koz^CS?T($s*jqhx&aP7cgm+$aTOtAG}v-IpHFSrwF!R z2PfbL9_)SZ#nY%em@E`aXWXf1WAR|OqgJ!waIh@yC5@&L!ihpmasQmuTrRf`MdtWA zv59e~rL3n-u?VDL=b6)su|Au`SI}|S79xmp6N4Vvm{Mk@HCf40f9tS3U6`HuK^gT)Iuk7<5t40qeJ8Iz$N{RedzdD&ZH>PM7`-%-Q|w> zhL)S}QBasI$12CaUt=jyqrvtbGf~qt?acR)8W1dKkI@lTko+d3{lPo!SFq1hw-2`<7>nuefKPsI4=qBoy_p+cceYv3;YlNK z@?MX@R^vrLiHOAE>gXM?Sy?Zh2|7;Pw_x6q9T&0$1LK^hm&g^Ny?`~Yf9RxEXbf1qSsvE=JwM>ePDbJ8~m zo*p-*53VA(@v~uQ)5}oLIK$fIO9Ra(U-J2ENovzmCVnY1+(!rP=o+89g#>znq5NbL z*hE4l(p!t4LH1CkV1kF9%)y{G=jCh|gH!m*3CO^WW0(ITHgke9AR}ycF9IKeO_!zH zy`v7hRrA%Pf6k5Xt9ycHyH|YYTamnZlCk*d;)b@Rru6U*(Uuj@2r3^rcqHU`6;xG} zU(r;C>_ibyO~9Y+w`K_w^&GV8&S26Xeu6m)SJYsJ)BP3Qj&LKzh%;a+%Q+FDGZiei zg+f_rOLyHdaiYc=pPU6T4j8F;W`vA9bXFL0-n?npe;HWT;|Fh;mY+U*;R(z*6Q`Rf zA`k@qh7H?@xHMdNSy5r%tjOvK9`(P5W7SB zW#{hMe}K|c!pM1t3NHFK|Dovhn3S#i)t4D}frBg??>PVUW4-QtdmjxTZ(af=gemuo zaq%npujXjLi*l2Xm$-L0F;%f@=b~_9kh_R%w-MT==H@VqN?vPYCuUa|t51|-uRMz# zFF|N+lGB_%nGK*2A?iUa> z=BaDbtAUX2+S+eJq^>H2)7~7DjIvxP3fl*U@=UU0CNKGaNJIy}N`{?#=+F)AQhSB+ zZ3d5w)TW|Qa((0Cjk7womgEWLQBb<9z8kx;yEn9(4PT<+m#$p%Vyu7qY)C$Hk96Iwisgh>gr8_DYl-g2H(7Z>;mG+e6bEkjGGXFlO1T)eM2=Kx(Sl(*?YufM0nX3 zDz17X-py*J8<-9xiHW&GeM-^dc}p~Bf5~VKdU7t!LIC#~wAt2$MC-7ulL$g*SVXV! zbbwD?zy#7ojR1V_gy2+Xek*gUjhf3!U&Z{$@9xS&_x1yo>9o-Ci*ZnW9?6<(UFj~8 zE>m}14uju1sz%`#{#i?@>10p5ghpA&hrfY3e4R2hFC(r$%6fm)5xEm^PP0PKe`*X9 z4(;h@Z(rwsaU~!L5Q6G;!9vuD*rU6xGhhw|t%8YbJ+)U8BC7rAoT6xne`P#!ZkK52 znoM2zrufc8w*XO(LIsvqL|lca$yyS3@1AT!yH28z=2?Iw4D&$SRjT2y6(GT{o*+<~ zVx4L;TNn=sq;iehhd-GzOy3$-e@@}f*RgBw>;m^+@nd>Ml@Pk9eTFyYXYY%k!QXv6 zbj^4y6mfGUvr^y?;_l=A?hOoww9I?uavMT(bt;YR`NKR+nP5#f zNISu1noN?9td8yiBOjF>hH2|H#ho>#C&*Wo=>s*;+Fvqhp9i*tlbp~0f0Eo3n-JD{ zuDDPUMfyV~4Q4)I3S+LZ&1Jcvb4$=X;x}jAHRQBz%eDH{1d$}pClWEGdi|JVvt5=6 zxsZ~aFTJ21Ew^8x$O8zVz6Z^xF8c2)47&0h7X|b$x~&qf!c88IhrfL+sPo zl59=n-9Ktt#axqdxvg%te>PHR&%+E0`B0BaG-y_(O2y4CGkHU~6qFsny*qwOYD{(H zOVyP^1;!eEmS2nHlr#)V2h6L;IgY2PW7(e&%Dd`;l;;B!S&f1gCg`QZ?zvb#`GLC# z`q7^ci;bZW{#H=8XNnTXYkyC6-2+l1ILD9Mx77T+Fp+_b?+wv;=7 z|C@mr8Y9TaZzMr6b#vmUGWPQChMTAWPS;kq``+yMo790)ZI{;#x$1r5=rWRX(7c1A zb9U`H)SJ!XC7@W>>%9`nwx2>cdW_a`@dp~&Y|~o-vX1zj0Pcyx4Wd3MxYj8*VOL+? zvK35NN7S+_Z+hlyf8V70Ij1u1y2~rSEU+>-T*l2DIYZ!po(s{8f#MNK0Wsf+bh|q! z9j3FJl%>$VCe`HTHEr7gHhO7^Frt#I#k^RtS6AHz&Ax~jHlvW?lCry|k#w`_myTu-sJ+Y3rfKK<%(A3FVSMw_e_;ZI=&*mV)@#2+mrjM< z!OkxkdekOP)P42#YTNL|xHXpfYpssXaqXouj^s#LOgPMhgL?MQtfwAK2~b^{ck;~g z){`szTPkXuzC@}f3R)5&uzHc>2$tQf%gj1yrtc#p3Q>bBiPC^QPOFCcWPA8*%+c7O zu2+umtpW?Q%!RV#dVTIt1E*&y5PWP>@Q{s)M z1I39|EMiG)FP#kZwtwXxWHbmQf=1{NVE*iqc(0u+Glh(Vi?E9J4M)ceu@pvlKvMlW z#!7t9e;wn#VeS1`&>CMRb!g~$U4uz+q+N-y!+pXKuWoCm4FUwSv;q?rFck(M_A6@E zsr2LNoHY2l9_cJCczG_f*Dk*t_A4S`1{dOkJrm=x%PnjXZ@AzvMt7ak{RCw5s7MYS zg(QA?xUMm2;lVSJ3f=A(U@%l5ee$69Kx@4^p0hPyHp5K~L`9dhtZO(}@3o%bcDxFK73aMwndyNM(2kk% zlAoseGHPhXC)!Kyb6Ek+Nc*L$RBm*vZ0eWW{rk#JrTmgF(CTZFrB$%VW5l34hlq$< zQ~Oe$VMmPlmTFe>{yZv8#e*~vwI*M=f16~o%6M~jM>@Iz;-UFa{=B9i13cHj%T**e z&*nBKK7SFTWC2@o!p#qC&6V(2wJ)zzhRFJCg;s6;J+yoW5&&zFid`_d1{I18@UK^C zB3Ve6;dq4?-@v7=D>T{GApA_j;Iu#vwRG3lmx$p4*RXF_=LI&-mcJTtqby zd20PKrF)TG<}dePZJCaQU_4m(DpA%Hev8ZZzkfS?2}3RZdiNzXZ9(v>;^EH??*fLO zj)yxE`jp5~FP&jS`!~#mpHNkv2g7k*#0iWR!^dy_AKrd59hczK1{Akplm}t412{A_ zmjPi06a_IhH!wGo;R7dsthi%zrCqls9NTtwY}>YNXUDc}Clx0Z+qR9W*s0h?1r>Ka z@7t$OkMn)~r~CZ4?{Tkrt$EEk*EQGNV{8f%Wi0E6=>-$<7Q$9_=|DCQBa7v08QMi9Ua6?+j;?O5W-iuFZh*g0mBl6h zou`|XiQ7N1U9JDZ07r|zJIx)<-2W-*U)W!ozo?suwSy~vzzyi-_D?KRAi&(()ydAp z`)};OWKJ&D|4PH%)!M=GKQo{QxBx9pT+Ho&uC9M+{&N3wI{#TG;D4*$#L3Cd`(NIU z|JwDxa&Q8(9{%-^Pd(Hs<4jK~&z~5N|kO%xTZ0t;20RIdeYqS54tBJj}o%jFA z`M>Vg0{%Pe|H=7Z;eUq^aj^WW5CaPbBgenZ)~=F&)?Pq!WotJxD}aTG-QUCemtDib z9OzjHKeSV@EnH(P*?Z1N@JjAEVON6E1GrJ10qX^EP&QXg2$VeD&ae za)3PAkB0XYxG%oVGW}0F8>`mqc57_+PrSn!t8T1_FyF@Kn92;2!GG! z@(Y%1AWJZdxW7d?9O8RKW+q76rjymk?)A_PdTd@xNA#0qqGrp!l<~SZDbO;}X18kR zliZ+I-6YI`S9=n5S7|#9#KTA_{J!dc_G7GYu0Ni*zDaJdxzs^|D*5y+1*&Lr2g~zR zy39{uT>U2u8>(Fx1)5KC{_7d()1`~3YMq(|li+#dW)X8)wat3mz0Z4-6_<3tj4DPS zdLjCPzu|~X4XEGv9n*6=OIux$xasfy6S3`HoXWfn&bjbfPO$55dHl4(Z)qxj4@YMZ zS_$#Db!B_7D9Zr-WM7Gfxd-{GEWxsOM(lI34nfoEW^C-{+11bm^#~aGDgtv_g$&#z zt7sL(0ON@+cI=H+`R(DWypo%{)B&IvE zuWe}%>-QpV#l%w}Pwp>T;(=j|doIxL3FG0dWFmyEC$he?1^{=OzKySTYmi&_)yiRb z*W$Y$sn*M5F>rzTZ)BPo65qBAcja!1oDHR%fr{G;7s5hKAqbi0dt0@9EsUuF;hR{I zG+uGPCCGER-$xYaqM*)yJ{*q!R_o~s*5ZbSJ)2`$9Mx!jeXzgE&X*ZnqwE?O=nx4x zBcXa>(Lv`@GSnF_-R$^%04EH@qU7CF0>sq10pv|LV-nd1)DD= z4t4Z#Q$0{5IOQN_D{!yqKUT}-|K-|@aR>q`48|3MlICX1UXN*ilCqs&F@+eys0szb;|JI}ZnVeh>38nSlIA^_{1V zg1YA8=p!s?oJ?YW&y494cTW%z2{QQEvSVUM-zF0@g4vukB$j2{8b18-W2<;ho@u

bH1f-5SUbiXL>i z6gSIadO!{L{OxpJiQ7-+wfIW_(k3sxm&K%~WtRzUPB+KNmRA8UrZ_I zX-Ahw1qoZ&Su(G|9k~?~*qT&|78%cktg-GB1qk}`J}%nP)C#8jYeze_Bej3fsVkOK zz)}}HRNtthw2g1maWv^SLrm)w(p`z5Gu>%+$m|@#KO?|QN=!Bnzc6yZBstQ1DZK=) z=2tm3CJ!xt%z@R{H-RNLKZ+4Jo}eEG>8n*>@~H8qTS$~~^{YN#mX|zihwv}bDt_+B zO}hu&e7$vrGi>(E3an^ij;JV)O}znMCJ^17X~&n~TJrpdduJpKk2QhfJeZO#& zcasi(ha!89ep4mZLN&5do1b+&7LNp9Kuh6yx=j@J;VbKgvrY-k7k8hCFUBzMCzsD4 z59>=mRpUh0l8r2OBHZG`;j7#s5rvQ&KgVo`08R}Lx5IXcgWd9MI{}=Pdg~QZmw*Pg zmg7v~r-`$%2$DvQt-lG|w)zi|{iv#H3tQ=bnLh7t1JcN3`D`8OoVF9rO!XDo^ezOUn53Lrrdoe_%wN$%HMg>N;0+* zph3zSHowaY3cZEKs+2!{_Z%iNhsZ1&($4XUF~*uf&c=Xv2SYV07s4PUo4n?2?G;de z)sBFoXUg$hp-L}y+MWf;?uwL&#U}TJ1-x^Fuh8tsnE4A#x6k*wsk;$%8S}-HESu7G z>ph~+uy9oWwwTj0d$j*HnNR%%UyWG>=5hMVKgNXYcXQ#7h&@W)bD+;RS6DW(OxQ=7 zgSMP##%oOKO~utY3ETMQ3r7@cYT-eDa*8;ec!fC|3no;KJ2LM^Q~~s)LU;Mn6)8O8 zcv@l#*qnT@C-@5gEFJ&IVS~$(iaimI81_rLCW@Uev+pc=&qtIdLg}5r?nE~bl})B2 z{FFVDB51@NWy0hED5JwseQe55(Veb6dDc{HTYCYxS$ecej3wfc0d_Gi$aDFBN8y!? zSvJrIS@TSCi@qzZhlC)BZO{=wnGyw!A2S6BREY>C%KNf7zVFvKUhs!9XAUfix z`7hr3v%1JinsSJ{oga{1!z85X-Oq0)8VAhVH->mIy^CP4x?Ln62E}LZV6pOfz?SN4 zZij9U?M3`{>mn#uWE$=x_7P`)uM?{u6&!RbKBi?|l*Dzk<9s(GgJ-!LMMDVEwI8E;V zS^-y<1I#E09uZs^a$seXqsKN^TBF$uO&a=xnS=&Y3yOxE3%N${o`^nwtQu0`;8BY9itu%!G16XWws*U2kNsp z9nOFrwQ&^qt5a+C_)`N>EX9hZNuAfLBQLW_JxApF_&AEvjPb*`_zLrs&#jPa(l0v5 zO}{^I2tx5joeX_PXynGtH_rmxVj4ehM6LHe*mEP{t{`MmVD)x?-VZDQbNNErZcRXz z8q&7t-ftHPx&>4yu=cd~T~1b-7#>(UUjk8hpe~0zuxmhvW)a7FLG_+)JcvO5b#so6 z2HnEYU>Ve>6_~k2g3t(y1#I74@3-oOA-9=>Hr9*QqY+B+nw6i1@_enHaSYS{r3AgD z?sI*mUd&1!e27qgx^KTnqjlWRgl5f+@&+sQ<+0bJVSHctLRlJaAP)LQZ2HqKxAM|I zmD>!2X8CnSxcAxn3G8Y3_m8_BM|Mv6GKw!*_s90{6@l3OkO&y;7Z?j*Iw-=CeEOk(yZdXXj|=(FU-H*o^VSjg zbG?3sA)FbJW4>%}!)2W6C?l=2Wo=@jeJOTg>UcqtL>sLxoIep}#yP6juIT8VMN8V)S- z=fPuBOW=LxQ%Vv%65jx2q~TW!A2U!+O&=NsAgFM1ku3Q|y)CE4Bu9j(ZJ8P+8O{}+ zXL0y;9Ai6Oi%nH@bkqUE_F3n%?>204S<92BGT{_|BF1b!D}X3qVoe0VRer*FDW@j+ zd@k>>5#`_ktq2WSy8@)ogdXtG6OpfG5dAnc69dQ!4*=dk-!krDqqeZ5o>5jOx9>7Q z^O#9}-jR#?`@9f<_jEc2nst@f%py@D{+h478e;%xK$pJ>IE|!aN+N!Ud9J=2a~obb zB1;RRA!#{VfB&ZhdqMI(?m9`Ma8eX55LHU^NU7@8hdDOw_yeXPe({;Zj)gF6Y0i02 zQa^21Z~mzbyoR5aR`^EL(k;LD(TKAFGy9kCQ}8*`58|o%7T3KUgKy=_TZ!6|R5hK; zeG?Q89>O1mm!nv2cIP9`!l+X~WD1m}>JAReV9iUwf8qeP`H3yN+9&V)GffH#v%Pyl z++$!C(55K__OA7ccTPF{rHD~6{E)Aj=n9<`Y>XdiZ?6BJ@_jf9k%k`yU~$b?<7E!-BkI#5)!Rd~L(3 zciOTMEKilph!6dt7=H4h8?hmm&h@~zMomdj@PwmI5Fj&Yek}!+6-kOioEcH738Ko} zqZfy4wVqt@J7V^(K-u^CCOBv+ieuA*H7~e*Oc82jZU{!Z>qBMtFLHG>1A|rHNS4+dARI z(;kbz6CYRFy;alIk3-d`mZp$K(8%EwuRgq!r3}@!-G!A`k8jeV(n^#vBG)VM)riw9 z*`y!yrB8@(tayx5RVZS&U5%m6qT65xf776d*&7QWIh^e&y$V%A_HK_Qx-dFlQW56! zN!Is2HIU+UJ zAjNPCp|F@;VyWt~l9&&rxLG}nq=U3`n)kPKnjK29L5+vWurLcKDbXC?-NJ$AX>bvR zcGoWv`tEvse@ZfY!B|%AyCWQIc_^6ROyMBf+&Bw*Fg;&n3$vuuNv*_~vIIHoHTnA( z9`37a(4HtU*KaEjZ*Dz8j^P0hf4C+#h>QZ}CSqa)EK%eynQ)$2i!GRe^RvzBC}vmo z+se7)tk${>Mx@3H+{lYs_QlxSQ6IYzEUVcK_=#-z{(x7ccg@%5-S$?E5E z)IF*@PgEG)jVL8(L`18v*N?QL!=^rDL>o;r;0%{)F!rlV8m# z)4|A?M+5@0UJ`4qInUz$l_L5IN+}ah)xTOmi8;2lWOD%uP&2Esm%ruS9|*z5FH;JL zPbUOSmijvp<-@N-!~CRz(Een6qS;5_Fk!C7UPB8 z`0qR_M~%!!jd6V+7O!T(e=JK9?wwa^Ew>BJ^K}%QRUihlSA-ZUm6K88IVWI@UGCw` z`}=v@xN-Y|A}3s2O=@VzV>Z_JKC(zGn^{Pe`%_hs^Ttj}0TPawJP#%l*JT88o1fhL9-DJ`j{nvci! zSWg7;u)J$4f9OL>8#HekR17hFA{{O5?Yk&>9{7>n+p&)zH6BW)cBx4XGmV(}9@+72 zp$%fy$2dos`T0}{Qk}0}ABp0vOK(j)nkxlo;N0}%w$)3TDMwX!1r%x!Yt|E$a_ z8_LICE+IsX$>i{xDz6-m(_`fO&s4YHr&2o>Dh!|s`T+~+!{P*Ks3|tFWtYxj$Y;wQ)S!Tl0y2efmM`k0T6Mt407 zp9(nNys&w$#Xi*+%a_*U|1qhf1U8Dpua4+Npyi{(%Z3wvq^s-ejP`< zE2GW}HXkd<=A|Yt;Y#)Q`>C00w?aC`%KuY6GKr^98(E*bEo!waf4_W)*lcqpTY*UEy#noE*yY94dYJ3pr69l}`b2qn z&Z)vYdkDclY=nIM&UkwBWzlf)b0I(np&^)kKyiAili*Rhre zH(BX9VNkj~sg_)}LhEYAKdw$+tGx6O?4~<*#P7R6JD>NVIH<9vd_ow}f3&}pHH`f3 zT~cq*E*<}z#$=3Ds#@aq)%5J8gpuBqaFO7IEVKm*yA@Wi~KmjQIYVOj#lT= z)9)kN2j}4EX8CnS6NyWzf2At18Yh%%UiEhLn^jXaeW?!-mqZpP*Ti$}5ARCQ!BkM2 zR1(<+cq;l!0+U1|^H%yidP`jOD6*#Nd|-Z z3T94YOk8{xZ6|x4;jd$L45FBsPhPQ{u37yrjTqL{UG3kEZ+gL{f7Fq~0w8pm*3*NY zz5ZFd)rA)i3O@6Pwp)b<3){Ksnc@}!)K6cUl=zM<7QQdmFC!mX4bjA1jE{bW(J3fj zO%smM^T!$Qrx+cV;^G>tx&`9gJs3+9-p+vyOjJ2E6&>h^zE`|bJki@fOIJ)dU9j!# z1(oS^)udRIE!A9gf4eQ0HUX@5Ty;!h(jc(uL(JFEd4~rbqIbrWC>IR1fsywcv6CA^ zv#>r-e3wjNAL)@%aqNb=9b_sRMP~_Flsk%wJPAhhD8jGSOJ64Tl!FCA^_Ra#(~4y_ zor~Ve6{8wJGoaoH-2!4HuWY*zVUl1+GTofoq>Z-7HG(ETfA);bc%#>9Hpi6)uz(Ln7Z@P)92QugQeo#aV}W#o6gH5g_Ouq*~BmqEt?Srth8tNx}XcIa^y zGsCpmg5HcPF*n*y;n-(N0r4xCY0s}o7`adM$9OnBe_Fq=f@m-3Y68Y*g%@wk1;sUN zX0)~kA80pn^YIP-I2QZ&K`r`?GCCTsv*;t((83tFrX;;g{jUQ+#rHh0{e=y3Dy>R~ zZ<#rRz8NM_Uisl&vTiCl7?j3ImeC^&CViV-*f5k3j6j1YcecccUy;{CishV(IFr*h z-TdlVe?w7cs5|lAk@sWZiL{jA61))has}77%Zw51fdZk2-Te$9P>N=AkoVxSxSp9> z5ju&H`ng_=&FG*)U>*4HNr4`CB}qKE7j~Nb$yc;|7eDQ4mA=}tL37%5ldIO(cez3D zpnO*M`gkVy#oXwJ^<_U@RWC{gOd05qA_|tKejMvFL-*NfWemH?`*S1RNd0e6HTnAyki{W0uNOmO-nFY4Tsqi0 zFyWcPAFFYzeO}VybSdL~9qe^aaSL@=avECmn}c;;)tTqb);&0Bz}4&R+m^;fCJ&4u ze`cRL^}k?~$4UKLUuO)bKi{uhM|oc!rn2l5Tvj`IbIklI!$`>^tkiaDxgEL%lOEE# zdkig&z}%1fec{ZlLixC)C%$3F`Ky_Vjad9fzfG{=2Ek=!uDl1K`x%2l5N!hLRJ4As zqaB>Dzgt17)F&mijuC0{9$%HbWR3nsf4%|Baqi9Y)^zNT6D=$3ZJbo$*5_v9q>wVO zqEhFIM#QVZ4?H5s)(6+27H*0?ix*ahUHos{JSw;}i*4SrhuBS|WDF@yq4e{xz zi|ZHp`9mX=Llo>JpXFL%A?iBxa?kYY8?^2cVo}8g92`PIOK`zVBB9pD$gdT6f6$&G z=&xAyIN&5R7Z7d(-F#k^6RJq^{5q&iry$Dd_A~!ilc%w}Lc&Zj`!%JDrVFCoe_`{7q4j!Wff81TBR*x&tDS$0fzdsE&o>I>V6zC< zz0MScS&FmsXQ|VM7xsDW!MTG|VtS}9cpWScl;WlJcHg|M`DNq%DP9{Xs%TPXz;2Kg z98WL=JMV;yGZ!LccKptC%TM{)1XpbP7+y*Sr))z8I5l?fUWoL0$$X?>e|~d^8$uDb zLM>n|g+C;`rm?u6lojeu1;@%O2b@YIYmq-eFGZ1csj+iPV41joxcM`<37VKzlWdcX z2<5UslH(R;Z3z}IQM9Ux@O@F%Bl zc+l)5*SW_*_-q3uKv1c%A?r_+_OCAa4%3dU+n-UoK$qTF0#kOQ89pX#Z$`fYNJ5l4 zX<-xrO#zpyf@yUz?a*Q>_dDt9%Kkh)u>l;UIk=lHU0UCqOFf0R@wIHMtNxCn+&&Au zWm+?5Bqe|RweW!9e>OA&y0SjpC;e?0b96lbgFJj2mktNi@u0q~@*OX9twU+DS24EZ zON2}DpQx9luBCZ6z>meB#3OFp@WYpa(VGL*2;1Udm%&%|T;^+Z?zQ!}f`&QFNZ00a zc|I__oxQGN?JknmP2&A-4iOT@W$ASgjF^%#9=c;r(vOb`f0YE;hh6x2Fz~u^VHBs} z!C1dmAWG1_$co-j{UnI#K53g6seJJslhOPtiV|9qKf;ooX(pk5&XxGF5qD_pg?YHW zW?+kNfITk89Ta3?3hT5CFIq7IP$HKpL@bL4?@Syhy7y|X1BAfFYp~FdBsL#^n5n-> zx0Vi=tGs_)f8lKjGG#LM0b(onBsnLYBy9f3S|e)~WQSce&Md3wbKVEU(>3^WqyKRp z2yijzq#7wEh7Dm;^gfq;8*lTNmY?#~J$3G5VTl43A(NsShzS-ZFb!~pLn5fltf-cB z`;5gFyivZ`$n_r1;Orx753$M;H-cpe^R`wMu3Wk)257=s3W=5fdPW9 zLIe<2{6_@+&>vDbh48N#P~5gwrNH2@-YU6nyDf$M_K1#+HqRcs!|pk;uB=&}k5apE z2RLd{T2Z9QJDCp~jk{93XhUl4@C*|4F((G%ov7Cgj!E$Wx#S$*wlc(}t#=XbuAPFh z{qfiRe>N#sz{V}{FwFwPMu$!MuWSDPLNfAwnr8AwQ_nCEh*nMHwFF7T2M_G$KOWgr76TkNkPJ&#A&R>d*IhQ_^D z@QNE11nHg`&)G#<;2nQ%gmA}(wMoElS!JVu#QT=9!+~c!v(;n6PEJmFB=Qm`h}^;= zf5!Y^g)3nW95{~beUCKWc1_GKphXmcf*1>IKh{A}jjle!79pK6jDXIVtM-ia)Njp7K9vCF=+Q&<=e8OF=ZNl&`|&)i7%nqxN1e!Wi|^+>|!LtjK`PH{BSas zWX?R-p>3^De_p&l_%S$vcJ?~lIpm=>f8u2x!68^e(t>EC>S$;xE91O0OI)d5JNko< z+Fz4GQo0R)(;u*@^tf1}S(M|pHIATa{qc#HR+6VQ!=qbh^hNxx)PPsNCu7LIm^1B9$TqtEqubj$y1pw0JW z!yP7JJ;p+mYK^8%hi#FgAhS>ws9Y{aQTv{L%?kwoo@QFyw)}YCYtV98^V?4Xt2Ym- zN@k_BfF6D{7u=_6TtLceA^7V4ZQOkRot%=Vk_()9LHk|W1u>!x0 z8kMYL4_7Lmi2;JTL^gq++3;Xlkll}ux?xv= zU^4IPZ(8H9{B+Xhc@~iaZK$>_x|&9~e<`ZNb1+^peZbfQ(wrG3?w7Pb-mQVV(XZul$t&d4s)PGajfrW* zTd(VIHDG%bQ^(pvDoF`$N7m#7B?yToR;=FR$hw$1h4%YgO|my&satrJe{!&49_{1lV#z8-i|dwe z#!iwwWcchABh|eDdgdOOWcW$+N5UmzD%t)9;;^;O^1Bc7A#x^AyNmj`>%x^W4dEfX zVX-&h@LBRxg0k5LLc*}(p|;V71@(4f{<0wK%iJU93}szxA3JzLlxsMiNNnr(&^8%W z*Yl2=R%$kyf9xn?6WU^Os%&i&A$|QfS?vbf8mQ#EIE&!?-=$@ZM%2r%Nvkw<7vKTp zqxk6T;6JX)Zkw!NB9(UnyiIDr7!Z6+&(XQz#Z1S5kN#VtKatVift=#&N6*pikv`}> zpFPmiHZ!xm?<0hKCWf=1z`&3a#^hI9v-{%$929D_e{iw_qXDqEK0$zlx-Z-x8cG&} zz+Gdr*&@%J$ioK=l;I~x{l_AeT^t1IS`dB*S=#A%xQyT1O8kgkrWO2sss`O)TQm{) zsE=3jT)<4dC5B5c0rFu(X_Lt&u>uPVtUl)%;YRTeur@iVX^}Sg@JVESqcBihx$6z} zc>==xe_aCap?P*0^wqV{3kBNJiO7BxKAf-__w?<|VAX=Uz)w%1O0FX5mCn^cE=bQl z*Y9{`XAEj{M(eqmkbY3p1Dq5hi)lkmhGrehk(cG;FL=>T>bK17O1F!z9@_DkHImOc z|NL8opvch!Iq9Wv`wBbgd(|!s`KIBsrE5B*e`zn!hvSw38`|cy6T1}?K#B+nkQ7|f zP=<&M!6IZ!ZpZMvTpCX__J&E_QBhhmI`Gu-3_8>Cbb{-j2S&A$SBJyRErmBaffl&t6_vE zf7Y+w7hemDIs!JLS6gG=>9J0IkK!Z|DwiFDSB!@phr)N7D9z=?fr&VL5HQZS&+aE= zfJkI*z$gaA(X`3=E?l4_H)q`6%1(^M9M6(#?H@_aGpQ$!S%$_Eq*J8k2V<@IFcx6FTd!o_Ma?NtYXO&z{?p^gig+!PjrT=+%IM`tI?@D^8<^Klr0|7B3D7=!4KC z_m~DWoW@rH$p=I8GUw63g$t+hf9XAb;5q_lPQ%lwg?s%WC1w6yb)lMoAH~wGQQrRV z_N(&eU*5p3dsGUcN1-Wv%vhnOoy_Za$((W5NOnHVxrZAM)psuWa~2I@FJtkAWtgtIe@|76w+x&v zmx@9SdYU}N(`&skUVTKHOut(8+YLrhqA7*@*EcnF;~Z3dpWhycDju!NxCadIe76v} zs+MLVs1=u{vt*%Tdh|3nvTK?=R=egdP2wdfn_87>k%O zKf1@RJUx`d+gwo~e!5m2y~AQD58?oB{$kuuZ-=m$2FFJiYCY7}f5}vYIfkZ8iqO;p zrz|>F7{p(xn!#}96U9%VgH>s^>*^yb0z8?9|EYxMS1OXS4|MIQ#J7Y-f}9yZ;v+|H zP!U61iop^YpwoW!M5qsM>EGc;3(QtE*kj$aZ#rd(G6k$&0>A$WNA zt0}2irs{Z)gB$?Kt7qLM3Yx(M4x8h({@a_#<8VpRe57TwHP4&TFf8)s51fFH*A9$# zact`b*q+@NFT=>sg^~WG@4=fI5rU{i7(WBKhKOIZd*?^pf9G{!ln19BSeUp;R2^8fCL44FXRCcCaHk&yPQ7967KuebFaZ2tWjExcuoz!^2w8A?U^-O7zt?CH<7AGt}6M5!MW#fA=vrP_vrEknrS!$luX z#E;m>2hoUW#>>yG!V<=4yVV2tPrn>7ZaOQkB!y`DwM}Fc${r+ z6_6L#Pn;c8ke+Tmb3f>F=H~X*Y%4)*)j#jper_6+ww$(M zv?8BvLwm__c!*cRMkFDHk+_x0%vYL*I(!uX5p0OO0bKjH^VuD)p)6ywW?Sbib ze`6=?w`PO!xTOpHI~UBt^~Yuo#c|2A8sZb zk3H$5WROuRn$3Qw>0a9a`tj?}>;n`6rXS0`Sz86(8HP$lR;>m?pc+ocKp z8AZT3<>%0eRDL$FeanNsiJN}M0Wtjgf2EgyORb9z?F^+p6#rdIm7;nq#O#c&Z8x2+ zq5h3$M@@+o_Id?~1MdURyXJ~*qztb9pxh#UY@e%5b?F~ykos22n&I!!cQ7X?Pw*R! z?KN*hs17cmw2Ld?htIK`f_UD$I`Vw02#mlN0u({!vSM7EHB_+DsN$xR6{*7gfBS}( zVT!GVcP=^0ZqgMp7R!)F^E;Mb(e4-j4D)I`i@9WDgTKK#SViz;Ym6p_iv|?+o#7ee z%;}|33o)S~(#axa_EqJ{0VpHdb&E1+)77FYt(C&dP`Qkn(P&o{?d%y{a(^D^9k)D> z%n41n`iH|O@YhIZzelEgy`?4Qe|i(=mEReviBn^Bj|+K(tt0413J`B0%1w%nKf4#M|Md2X% zDM=<#-X=Q<&iE}-MyA;)$kg9FbIYq%Yd=1BFHH4fYw@7FO|~!k7?jXgd}G3hC9!`( zeHFU=QEEEj^KZ5W3AIxWXYE=~n?z0ohFjyXU6PCNZ~IJr0bd5w(%f^^!W5odoGh>H z^9;5y>cz^o73Et2e4sOHe|F5|8V*Ne^WO?8tGIjFAZD%ma-Rt5tGuGPV{{0&bESQYldG?p-~Yf`;~o~UCtZ>_&2 z6~3B=q6BDGBPN( z^mC%TS+O?w>;2(Se{$9^`*A?LcSc0U@HOKqo;G^Iace!?h`6{{G3v(AavJ`v7kd6y zHnjP(lD~28E}HwD%Q-kS7hzzOwxzp*)4AvyDT?kRPflQxktcvGBPByx`c9RN*)|_1 zRZ33SFb?V{A;4@pV>+3P!R7JmAuy@wqDrrw`7-T0u0b2rj{(kIq@%wOaWm54pQ<1L2%`H5xf-A|aklROn z56_>EP=BK^%T%>rva5}3x!gwnD>a0VCHaZq9hH{XKOITo4m*Fz40OCQ068Zv+)|D zFU$+laXF^9tQ2~!KSGKuSmRWzkrn`Pj=DNY~VB&bF%UduzfD2E<3)mO0suFwM zh=n<2sJB>6rCT=BW<2rzQ7^(dEp3MlGRKGLe=Ed%2&*5<6?+8>V+)Ht*3c!eKt?^y zNGd>Qmxe!nUm#8pKoEXlUNsF$u_qVVY!y zi~GCx6F=|BcEfMX^{svBY`vY^lgp4jv+2I?jb1*y?_lbX*><^187GV{!A`R&gy?pc z_lo6FQ)@>3Ef1Q;UmVCqh(NNoe=!PXoLYW?8*aPbd2zCyRe7UsgqF4`_`H$oRVBRHC3Wwg)6qaY1=H{1^EnZQnwpe`Vx;(4sNaBF0j;w z!-^YUAaw&T2E$Y|9kh9|D_x^?QO}5pU@}aY!*XqwJ_Zy~nRDXbDLyNOf8qJ7N;Jnw ztFqpduSd>>qNjUMi=@{~=-u~nYbZ({4HeVw(ySV`k`MckJAXZcq|0Enq=*lqh>bS_ z`UsBpEt~an2q%8PHH$X<@a6k%G0rfRZfGHWTi()Ix)K z<7yJXTG8|*^bV4Ig=fO#f4xvC+L6oHoW4pPx$LX}#*PF2&@XAOpRzX8)J4#JtMv$XUh_;ZTIz<}6!Q*Lp~TYIk$h=W6$;RXLP^q= zDw7fa)9nslHMA-px$8MeQ3&S+)+jT&;N=~j>t{Fq*~2O6kwk^tf4!sbY|*fEKtMs_ z@eSgW?Uba6cspvN+MePEDiDIN!LKj=b37htwW?Tb z<$@Qa%K1ycE5Re6>8P>W+jH{_?=FmL%H&SY{?h?Va?fxC7Tv-0>ye9DXpv`aWDeF9 zxDJOFfkPNx>07uYGxVky`g zSjtiV>n*5$Qgk6Ni&V~ls>?U#Mzww!QKbQpU0pOM)o-I3{bRSoT;=jpJ$Jt|ooiQi zY9W+EMC680e`VXZ=vzr=P542`lQOXaexQCccKCHV2+fI@D(!ySlgBkGBaq#ty1P9y zT+wYKB=lZQv4?NAhPbJ19nWH1@+KlX03Qv>XIjaeze96Yr!cd@-xo8*5HDFPRUN6L z-PgF45Yk?E481dxjlI-B+RpWp3>5Y&~LhwgUJV0Mv`sDIq=srjKEMrb~;V&^^nZs2f zMOhm$b=!Fu=LE74<9aFrW3W0SD#H*X8Df59osq>7@pPRqmT(JwVYHUhI4YMI7ve^e7KW&0ow)ZJDK?SQBOBXQp$;RCgu4XJNhgG%R#%5fy7NtfwfB~3(VIX{r> zgf~m<2N0u2L)oqDOH8xbNQ)bqM=4TSji|{zC*Mral~C8p*}Sxp%lXP7quZ*2R5O+f zX#R%PR-_|<*>X@7ZQ4o$OdrEBH*-0eu^z0ofAvmQVh|Vk60yv;4{ZJTXuRX?l}7rt zDCPt&vP}ivX(&dxlma3{+UQPPw;D30wG~=UQhULYC-YK=z8PUCnVPXLodBB?*#+4Ie_x|>;Ltn;V)hl@*U{6{#-x@7UEN*395gc%jHVpj zzpQr|5=zU`<>koO7$my3qHR}C4A!`(3L@=!Dy63?tD&=CH$0uAK@J~FIvPo9K9Y*p z5*JEn+QAM%T2Kh5aL`5y!&myg!O@dOOdioUxU;be<=0Y#r3;nH{*M4XL;%A7kcO`E z

hWno$};y3RR?5^sq{{JP__vem_6&&H|OWhjRMJkkO~2d5eAX|GD_czpxU?l*Tx z@GOl59obZ8(nHa3^huLLT8Tv>11ND_Yc zui)b@7B>4nfWZQGV&`@|u_tj7TN_z&bj)P-*KbvIH`Sy_*>Z#hb}=wWHk;jb ze06lOC0R&Wu)!D$PWMMpqhUL%@g|Oa1LlzRcprHzBBtFuG^a410Qz*&xj`6lo z&fs8IXeEHM_l3677M8&T4l^tR>dJU%z)T!y84~3Z8qmvuvgL3{(8Xe}c5cUVnCZBJ z9$Ppj@R2W^kkBJ|;WP+lg)BS=?iOV7sEQS!AOoeL<)Nh%3!49z%N1t@E7 z{Ek2gv;TZ!>~oJ1{xOzY{ZQa ze#d}}+#nC5aUAlnsFBgs!f{42XyIH$+Y&w0T+m#nB(y*R09zMA;9L)K2-E|-!tMwT zbHVm74onU$KvJOeATnsqUmbi*bY8afFlLYFyzA^ec{D;xs;0IKv3O z^UE(g<(zv6^cwYSSOHm#HaOeay;|MsR zrNfO0&J%BcB>7JqX2}iWjUYe48z5euOedG8)x7vzlqXM*it?=bF)wPqXMf&R@L+IV z?UYYIhH5;YK~C{FeW!d^%_fWKWi_KTLtSsGtKr~a@}v07Fez~eBrm=Io52+3l2_ok zNS@Spnfl-l+U@~7k5K<18ka|&;xx3mpb%Vv4{N0SBaSQ&Z#CptN6u|T( zVE5TsUEpDi*|YbYopXEsLFYI=(vHU$;O51ihpRSzPZvFBcy29#3mF^Fp{*HPX+7l3 z84TC)c=)sj5T#RhZ{oM?$p$a)4d;72o;!yM04|K|u>$(8)_Tvxa~MIY3b7~BITJE) zgQXCE&^Fp2p29%eTE3^CO(VHODqbNUrO+;PHi3@8rPjj;E*Qfhf*}EK_GmQ)F&rmB zmBmSn!~+5#p3o$MfU|djmE41s)HpapX1mt~bI2fHfyk@pguWz*!kJtN91E6xl*Xb2 zntPN6@*gZ1#6b`k&?f~tu`_$Ll%t%Q)@p=*MdBF&@P*eb&V{88fz}8j=Mq_VUSLJr zlku#b-pVIkcS-Kq5akkyC+x zNU#ADXlIUohi-~h3)l`aGN*I!D5!5hL`a>Mtjn-p8jh5q;6Q~zJ(vb?AWXmqS4yUsq#(q0WMSsYy3WdO8Y!HGmH4Tqp~}Xl;QHRHH#R?1Pbtp>uf(RtQ-D zj~p8+C!%cVX~4oEK8d9q zxs8&T^&Cp>B)R|wEM`q|d4-_^Zz9m+Ed>gbNGIr|ur?r{)-;YwLKAU^0vu(3AY?sR z0ucdJi55OJjWiU_OH>pkN)2`Ty~n}FI1Pq|0$?RN9n`<| zp)s5})((~qORp#*+~doSb#yO(Y_Jpd<0L0#7>g3)yg06&7u$enO&j}>PCD@23u$4w zjaNF*7h;=urZ&oezj^$H z`d=vh4_Toz*~g^2O!iRz6(Lt)Vtz=n#~mM`LtBYuDMPt5qo48^a!BLxnsV?%jzkA( zqyziC2n!S)G2f^Al>14(n731|&&Q@~%GK$f#$kWT$GGK+dcjr2Je+jH19_xtu3No6 z#Qs(ekMD85rk`k_jCjR=gLo1xO;@dWW{D&@F0(me^@}>*;L??wuvSZ_r6lzvM@b7@ zt12+5Dj>lgQWentcXSly?sXL8@K{Hws{){r4wTc28% zV>@>+M7Az8pg&A{jbj z(SdsBHmsen_LQFuV|pV?2Zpg^HM*T8TMWDKPUPxRtS;HI(vGZ?(FuD^8PVK4+VN%_ z!(=i~yypG&oY<~wwyefWu`07?nR#$cjM?d)Alta~7Sb)9&ZfRK_>WM7Z&!N^2K~G8eZ;ykFQ4+K0KXQx9`F9JLP$t zp(FT_!)^aj^>uzRU$(0s%l7;(w5K>b4WgSgif-n2e-9cQ4S};|EdLbY zK19xr4^9rA10ru;^dTYzAfj>iK;fn&TLNOd$`LV6W|(k%m}9cbRYJvzUR0izugf=O zRbCHnZwKXnu)Hnj<&W~u^50D*#U@HVd-C%9`8!flcj_o@R8cCwB^5pERdFz#PX4%= zOunI!n`)^|)Oz~q@Zjkosb#xO$D>-DK0MlnT8{Us^#QDQyC^5!1~Z#zcKGsTh|_m{ zPN`UOO7>G0s{_^eUd5{Ki@~UzjH_j6X5m-fjTWo=G z3RHZYld3hbH=9?};q2S(V183x45sDnaJ-l`HQXq-?4O^%Iyof`yK@T^y1Sk}=En5t z`4;ql(`J2cA-`%)3d_im9arw49Dc_Z+Y%(RFUu>)uV2gS@&;DLf0f_L5v+}4 zNVj+8_i|c73M%I}(~2UtT$GFPRW+SmPNr4)d-+F4M&_F(r;i`c&tByazN@YmJ|o8#tbqAIndG=HT`Yuo;Z6ARSzP z4u|vM=&GtSLphiO+!uq(Z?n;07LvrpbZ}Ww-ltZ```G+e&FgMxhXAM ztMY0x8Uc~2Bo*RvQvJ9b4Q|1plxe?C7E>_o^>hF?*VAeMvVfs){=B=X#)M z!q2MTf%9zmqYNk zTAz~*f>T`%rPpmPp8k$4^MJ~tyvr_zF<50k`XVwmYKnL(_p-5Fxg`;r!#xeB3;`H zZH1ON}&Q^G$BBwS;EDLM$=2xNpFMy7>0O6&tw~tM<9iiu*zmT4;nqv$ zV0fFoF|H&mU2k7x2}>z|{umbv4mN1iX7`o#Jzli3mf!C?>wP6TNpgUE@#(kiie)=i z^271vx1PUKYA+DD2&{@Sd`nmgi&lS7+~lh@=0aQ#;eTim=dCq z&&VfsVUjCunBEYByCtvv)o7?;~Wv2t*(iD8Q zyq_KWDOT-O{xSs3aUx}X%{4u!cs{jD_ubI-1YAi&+n%z_YWOau;FBH;vBzi<-@lt8 zGtPJwoRt6|j_WF8y!NBjr$`=x%O2NJob@L2^F#T={(b23He6ls zlZR#o-QS&=aRGdpW&L|J11(S%S=Kl5?{pR=9C4__)g33F5_bVe@v8h5`}MZ^#p$Io zugs$rF526|Oq^*k2}VOMue5v9SCbzV6$|^g7Oko=KqV2{I+L2_H7*ej1N7Q1Y$B70 zv_+|(#mm6;F*=ivYa~`AXOv_Ml&Nn&y{_mvrEy4KSqee-r3!6Xe7T!#**S6j?$gXO z#a!G>!)pHxm4;Pa$eQ%kHtEeraJ34oiDEHC=*l}w$5iH+(J znbdEG_M)OK^T4eSA$x0v!S2BhOsSrD$clb<1PiJsuFYL=zm(H@fZ^|qlaguxw2mh&6@2>2iX zY&oI#)!ZH7)$}WiM?nwWWV_95NU-?L+n6QYXVar%kh?Z6HeKt2!DhmDl+R9lr^>hD2gc`$wzVUMqQ=kk@-B zv$WScC0Ln?$90qXvKyJ4<~;?z_|`xS)yT|<lDcOxu&h(u!_DBBkqRg(!#xW48 zhrw{eoc9KNm?Hm4gDpb8$;{mOrp@%$6}3wC{RukMQ&(>vEXv9KU*zHZm`*`K2?PcG zi|*c~gJ}5rpgpi+wzAER*)$ep`1V{H4l)@iema*1BqIZbD#*x4%gI6IpmGL)&J>M4*IWW#*JUgqv0e~z=vB? zh~Y923(2>dZ{RvRHIwH^1~wB)D1g)y(qwFg`9+ulGPNgp(cWP@vcISAoqxwqq6w)n zvO0Xv=SolZR+HIE=Njl;tGxJuhkd6{Zn3C!2hd#jh$uoozn~TH@BTO(2;&a>WDMO% zDfn#S>7Q-jY|1FYe?4>|p7ZffX6e9Q+9-}q?ZvG6g=plhi3X{9dq<^udx}=_Y4YvF zCG~YhsG5yZJ?}j?fo3C@_Xa78zp7NumSQOEcI-J4%-2I;_dvu_Jfb=_sDdjL;zRYINic{ZFZ zV@peeZO!_>$5p^ppE2gJWjU>ki|F9Kq<%y-`y6jgEnKSc)$&OV2d=1|_b*&BU9e;( zjqDJeSHK z>SoIP^7i7yw!%<)p=gW8RpX?%%1Nfek|$b^*(OMPXCD~Q^-hZE|s@=$|?DEUdiNU}DYGH9SThCbBUP!#+@WWs0^hs%RhTRB$N69E6JxfMR zL|;e=`4F$LSae)9v-GWZn_rPytlT~P#5URxrQ-3$=15;5(NiA813x}$R=Z?iy~ux3ybBt$7enW)4-KV z-)`rQ)1jmNy&(UU5FGQJa1M>BfaCSkgQNY6YV@fbUBa4Bo#^Lkq1{X`mC)e#Ms-0` z&9Z$>eJ}0uLS&_4{cchL%4~r56m!WRz1L{N!0^5$@7*3s=`An|1L3dkeu z3K1QPpKHT5g)^1DMRFRo|CAaGPUUFr-CI96PcVlBe5HRE;Dl3}uzI#pW#J%XFBHkx z&otz#rPc40yGZr5p1GIwNGiJ$tp(9|!LX5CzTpgU%2~-W0Sv^v12kebyM!!~^VPmk z^p5K8)>CRGe;E2rt77(-D{grHqOtE?Ggc_&vHtKG3#fhbfsyfkG|zs!_AF=oc65H> zJF@{tUu)0DdN}1O&<}=umObgI(??N+p_H-S|H*g8aF>)gzw+cGN5=aYgHil3V~5R2<%4Ra};I=7F2fq6Irq=~GQWz=@Y zKB11>{U%0XZ2|NRcJ6&143|hZ!ghGCc9Uh8}4u+35Jl8b-b)Yo> zJmeAA_}dJ9H17ry5X}qpq*a-9(JiDtzY(X&bR<{o`u=^hdl-DOtdZPc10KTa9Tld+ ziGzPGQgsDrI1qGOUK|dn)C(zwd~>-IYwbq9wYLBJ?BsCsE9%Egxy4C)TAIv=Ow02V z8!kTX5^hjNy3ozPNqhj2KvAFEiF&gBBd2W%&wRut;aux5+!l4Lq}VtzE% z^jU++ZwU;O53p4yoe6}05bWciOeG2`rSqo7bEOnS=Y9JY6MgM^%s+uKUyQcZ!)31g zddUgr_W32}F}i1xcb+@QTZhRU1bDT5Z2)l+{54+R>@vc}2-d98ns+Q{QR;mHx?pEqB6OF@=Og>vr zu~3gpbi}xt)0{BBd)|Sw*?XB$7Nz$p7}atog*hFi-ssD8zW;5~H#2^}jTTUa zY%8XCkbmiwAUZY1r(t)qoP}xEduUylRQ&5!GQB+WpR@6|? z(7dM#`u|Jp@EK(^3OFR%&W5x9*#FPsdR_@8M-J}op5r#5pQAq^H)1sYEvIax6k`m5 zi(+y`n5qw;i}qUy)@&~{%2y&I1Z9=ovP`KsnfoWSFM6y7GZi38?comv99BbWTU$0otL@pe1~7tV z2Qbh70r+F=X>lVfK%K>%dI#i0Sw2i@Iat`p@|)BG&5yXfY;TeXC~Rc@O)ig?M#wsW zXOpKRrnRl9x5nnjxM;b%NC{eKY8v^msOiKq-NRBax_@kow(%8=5GohaZePaZPUEgt z2rD9Y2O8q^Cq(=TtBal%?S;b&B79V#wH`q$kI^WNzCMVw@o)>rK`yHJnWKoYCR%(q9fD(?=YvJpo>1)AQ`VaBA#@13nWmN`NwS;zX~M3= zyOFYo!u*4Uc1I;0ALS9g6xW&g zXEWnZ1h}ZzW^X6=1Pjme5cH$oO;{Dynf0p%ALXeWNN- z{8O9r=y(v_Qf_APNMiybKmX;CaU@)Gd=8hk{_^gzIhC4PwXy^@sLX3q_m zb~2lvM|7(oezyDkwKD=<7dGmxC!Ff3Crav%PrSpTcGpQUOAwM_BAz7kc>WPjm=R7# zx2$23fBW#Sp5v}#P<&L`0YUrh6kn2qVDFOP>sf4^)bXh7+7Brlc~N=!5K z(`Uvp^xyMR%bF*#{ox&F-rM zGx^*iwq)zzi=~c-9NmX@TfaL&#AJr{cuC)R%I{CMp*bWS)L#^@w%y_m%H0XGt2fg*Bwq2`;ULK$%|S~A$=edRmx#3ukM7BrF-wPp{!S@LO)aQk z#v(-nDPl_lhcF4ep&B_g*x-!MHis+k3IBv)#t#4?NPB$_n+y@DNZ|zg){1ASu-tKr zF5h3D;@P}Qtt7F;Z6!PZz-V93k)ncNdsaosUaifTdmiE}6SJ=XtqTSLW0hi}>a>@?6 z$QPquMR))q`{af`5rVJ!R~)So5Kwk%F~I6>kkK_R&s@KpYyb{*^jbK`>AgQ72CRGM zZp1ZpM~U1B`7Ov`9ynxzgpu}vd4rp9v{_VXfaGd;E}6Gu-^yGlhOJ!N0qU{0f-k`o zKqBrX%&4Z6!HiYDrS@)T6Hr%5KlLU9suAx`J47aDDhd(JC!gQQ9Xa`TgA<6I!iiUd zUN~9dI{@4s-?(4(30hx)^)(R-z}fY0ct-Uw7(MKQB!zx*B8{cI1@LUm_Iy^n>i0P4 zZCDSW2C#8B)&IuJf>OzGuNZb&tdsElLc3Tb;d)m#F$WxKR&1KER*5S$tz4rfVkRG+ zX7DF6BCf6k6X(I#jZ}$@Hdm;f@a?x3IETg2@(H?r2zn9~y{=Yi+RSc0KULmOM|}Uv zw3a>ez&nuwBOAOjd0ES!pW93HL~-^FZ@iJI!aMapF}=Ce z`n>FJPtqJimoKN$@Xzqj#T0|T08o8WjExq%I&56JzR9dJ_J55XZyzVDPXHG6kWL<^ PsHCJoCoFu=ScmREF?ja9 delta 169005 zcmZs?Q;;r9ur0XT#%|lTZQH%uwrzZE+qP}nwr$()`=2{A4-;`7Dl+S#BC0YXS7xqQ zHE=@|vrSYd2%TCW3yIXI3>^*(#M-zGs|W(j%$DjO0fO8pj7SO!%*_7(YpSAgdt%?EJ)!nBo!NadgS0i z8|{nPdb#+#T*Oy@4087PSA_?JMfdbUF=AvUVOwMyb_kLP_5LuOGQHuHVkxA?&HQJ2qOK6X_F1 zzowwex;k*Rt_oMg#o;X>098Y-#TyZsiiABM9t_sW;| zY+;>70+J#msS7_6q)1fNa0`Ezl>Lva^yZU@+ave8YNhcvApl&@r*A#i-de5qbw$wO z_!JTymc0VC^uoK}C)p}3;*Gg&K!TlKzsz^!UnAR%bL?418M-l-1^AhdvW z$4xerklSaJHWD)`aPILPkI2iVW?ZdBHWw!#<+vzD^k8Y+ULV0keGH$HI~P7U^JH?bBV zWR6SP=17A{B|bv5!uTI=bquP498MVkWV%)t2Sy_%SZUUuZn2(V)U4`ei87E7RuiC2 z{(^BiQVl&=p=#V7L>D)XzX<(S{It6^qE4+CglV`Y)` z<;$%2`M?s;2H8SyFC(B8p&C$FK~c?`bT%b64;QEzJ?OBW?E4n~vN!vZn7%DLLW#`S zP}xY+_+1OL@RvY~k9u6m6L#ia_c?`J71c#Y&Q)RYks%lGlb844Z&>nt_xUgje zXdjKki0V#yZud_dN`Ak67XffyTOHEE&gI468?-9}$5fHj+(dY)vDDaB3Tg+=KQ4(N znN|yGsRd40Y0da8CT82el*&}(QYU39iF9Lpa7W`FYvp`4;yvR_KCVdTg@T!EkRQkX zA#YM;l%V<0sMssRiB_XoR6ex@!7J=St7r;ucc;EUS4>5UE@zQiECVP`@t=9_TS-n2oeEn?%GB@LbZK^_InR-!K6p9VyDya^{p+RO70)#tS({lJxj$SS z7SYHvzJ5%N;W4{~*MR)yTaGuXz=O=eu!M3vzRI3Z4|o#W#g>kCQDx}Yc%>@I^6+cR z?^xzUML~)N>p&w>?=&2@9Csbs-ujWgA6w!e8cg$G1&wB|2LSJk!!FbKsNY965b1P; zNP^`d(Zfp`$r!HpWhRj2*QG&432oBWv+r7R*l!|z2(E|Wiu-|4{3%r#u4>#+yTW}4 znCR5FUn6(SwyM(4kXR8-^y8{1Rb!O{czcx8sJ3wGy*9C4?Be4*Tj7R8u#4ER`WcML zj$<0AX{y6NV}R)-sYcR^M4$y6$4Iw<0&imGCMLU`hgREG8uqNqccr*2)qyqHV(C~s zEx{^x1K+A}+d4SfJxe&1w!>c?OlK!dwMZlz=QMlU_*Bm|rq~n^`^!Hu2cq$-=SoMi zUSz=yXO zIH0pii3U8AsY=x&_1T#rsbvew)eJ|4wNOX*Xh#}x=MHxP>GWIpjCqZc`I;Cy&uG)E zOvy7_ih#_QbbLM(znt_J544%u<-w>~=;u!&cs3c$)n}t-0bCg|JuwM(LV>S6tfu@%+u{CoxC*WjeNxfqQp#eB9GN6RK zdO*`=2S?wuq$=bkYlzkWdeu+ba zTK(zYhwU))GXT{W5US9#7!s`JKXdNk&*IgX!s9i=7!sUfgos3iWQ6<+O90hx2qni0 zdde3#=ap4Gsaet3l@8GDvIYnPO%z1-&%u^Xd4Fv(&+U&V-|-cb_%QR8-LQPVy)=(72j8UN9qh`KRCfe&yY(15S2+kqvNKM1I|E&i6kN>;2t}M5=*i z7$1A4|2__FjdpZWGC#K1%7`n`*c@gIyIqJ=cn&6+9OFjOgZA{8a!ELlguhC4Z^6R5DLLJN@ za$+&}BO`wd-}gtqXC?AaopJ?k?&=@3D^pn>@S<*l1lc)(@&e+MRq3Ejl6tk9hs!O- zgu*?!+awxtE>MWC9QgEIE{397mdmHE&&4uGWfy7V2SX{W+>UHH@um998ZjYEa_pef z$wmyd<8*A$4HI;ozz3jm>b805t;paLG{eQnR?hJ+5*pAqZX}P$A?wGkGHKowZB@j1 ze&UrwHyN=r69CU(h{MSLnfw z$EFoZ+-*pu-!qXrKE$rAaAQ025)%!26NhO&*is)3oMCg0h!re+a@A$%-fC&<_6N~2 zCqk}dS*>AftZeL>BnDZxTd`lK>;{NIzFb;UJ+uGG>CIa~MxD(^#G?1BCuUIT?QchE z@|eDN-r+nPg&Gfk*sQ6Y^jf9(Gbt`(kyJ(;pa_tlx_>?jn%`AzHV*+6(GTG7pKAtsB{ySMSq zM??^@yS7g$_x zSpOXd-6O3mwDZ2ZKa74Hy;tg=7wKYs);IPz4J3I^h5@)7F1(UNX1Lt=+AG;lS1%(o zhf|-mqUD6?A&vC*7l()$U1q-eq$yd?@OShmmJ%bMRoXbDx=0`aYHc94`MK_la5!(b z!6EC<)5D#Um6NZh?5XIj8MO0F9o0r`?6rd6&jcu4v z-cZomrUBX9h>wJnmzdQ8NBHXOlDrw9awtmlQc8t=9=nen=(67fYwXZuH+Lx(f0kG* zhcEn-BboV$ZeGEnwRa@j%>OLW^SL=NemR-*q5!VqjU*7ikK&UW*KK zDc-=QEuW1TU%ax&RAFmurVtr*JK%1Ow>zG=>9W~?)&J@1a}(WuyooXSl2D)f^WYcd zknip5>r5bivYZ>@{1V-U1O+S0g(| z=>kBRPH;~6SvbZCB7kL3-KOF$dC}4*X)HXRL?V3F55Vf_z!3ZuKu?O% zRNzu{FR*3H#Y>$5nLUr^G(rcT@F~wRDvZXiz;hmXx*~b7N z$n(HBC@By4)j*Rn$GNJ|@RC2ASSragjO$DOk}Y6&o8(&tz>6Hu2P_RxrImeyO;7nY zb8|#cNhf&%=J_;D>#kK5c|q6$yDZ1HMs@Ka~`c_fca9}#BjTBLTM!elJ8m?x~A zKUzok-ajvJZ+3Qk-ERex1FoIx57EQyNuplQ35uqH&KfyqOM%9%S}0%FLv;L`UtY}B zXGq8v=YdP%Gq^JkIFSZsO0h-Z5~B~A8#z=bKP33#9ypxDwihsDHq~I2-F%P1zU5c>_;ybfkrqE)-1jD@RkJS zB3Y%yCBSM1U!73f}U~$>_#kF;ifgM(3>|iZu@7n9) zKN6}mz^XJh>)%BA=zZRYy++RTwm8&TP5{XS{k7-q@shneZcgN_&0&Fz!l|L#t%hHC zLVWJ0Cne-@On9$_73}ans^O6m-RP!RI!r&VO0qav>|KlApyy?&UUZ*BaL@(- zuGcDSN{AE@lDfP5rlgI5=b4d^^1dfntO5r|7PVq{iK@>6WYzNiYf=H zu5%gfR9z}=UA&6i))ul0T&717g9h~~a6EUZKpbG1d&4+l>;$D<%E2b}$^|bnYVD4Y zY2j($u9^)Nk}JXtg`=PezBk)bGQhH7xvt~e3bnX4e-o z`aSX%j};pE;vfeXgcsrH`MRDtZw%81R-A7jYI`JEek^UOnaN%v3AFG4(JLA7wo%M% z+Y?@5slCD=WM=#jlJ=kb-*7y_8&(ATNAr^LNi=jYrtNF7X3NPXoUsZp3c!+bGf_x! z27}>6=~!(Q16<5BMqRWQph#uP1xMx^MkYeHlz#9z1$4n zjnlf_|2$?Za`QD@U>4ko`kH`sL|-i9RfZlnSrF5BA{v|& z@n3|LUeP5>Qet-#BVbDFnKi>7keJz{wHbb4&Z;U_$fS~B$5X!QlK5L%C<`U_IaZ-Rey@<8BR_Lphi4c3~Q%|&x6Ym60a9-W`#(k z>7?RPODsmvG8rhkjn$+7@BE-X`T^}9wyFCp3xUMG$8WSUqQy1s-8zivqf;y) zU0)HL`DbusikfR<1Bu9DtDbod(B_r*U=(XXcNUKrIe!v65$A05AHs`MvK`lWI}eLu z$x~o4Ejz?oin#^y&`1LW;4D6r?CwU+;VU)y(jDwiGrn2NgvUmWGXYglFSk&c%Zi6@LGEE^tW;JGF<j;g zFcyiXO{7uva34a@DGneC3gIu(JE1{+B!$0$vCVFatw$E^n)^x7=WmQ8n-hnva0L?q zFz#a@1(rQI%U^Hr!jSiLn~5D?b>wnkbR%7S)B^3qvt*0`k($05Kh~)&J!SJu>~K+X zUpu_dMPRW@&ZJ7Fby? zObNDU83?-dXy5m~Sy?;s-MI+!YqY7`q%h9;b^onuW;Q}a(|Fnp!w)Br%xJRZ z)*+TyrGkm1kaff;FcqvIT-MUHa6Ai@J|XJlCX#J9M||Y3@ID z;gF+rhijU&)$^v1Dco0`RoC|e1i+L0^gY35&9&!>>Wl^p7wX{>dyxbUTehMpuzY(r z>XvHw^P_zE0v@)@6#w>F(*h@{qD@bYHVTU>n{f7-lByjhF!LoC2g)7S zMS^^N(z{4Z?Y__{H##>fjcZR+ClWeZ(>s!%TemHc^U6dxbsF-oKq$7!t6iMQ9K9&; zq`R6iBKAO6`-oa@n4HxCEbFsPGRjlztV!7S0(IJ=&?eCvbVNPI59#GvZP(J@Ew}Bj zTL^DJoXJtskM%!8sf)Ryu2t79 z9w@{;HBYd_OzX|!Me*#a>mWRrn_!20E%xRq~rK86y`-D$a~w zK_-^CrVELtTb6ru_%}8lfM6&W5F}(yi;Dg+P5wbHtH|#=%DSk~h}1!_w3%J~!V)OX zrF;TyeyA@YCb7{1{9FEm^G01<*$vSO5>9#_{Td1Rl|1xT9z~yq7~K)F=^_hhnZVSS zcP(1(XUQ?w>|O4pfpYmp0L+S=Hsp(_vPc_{2Bkp%Bqo%hV};aMxweb?xSey&=H9R+ z!-%u`!j)CBsow0O8qys4*^&i(%i#Xp_qJAQP0#STiV1HEP+P@R3Lf7w*JXi3uYv6O zWvA+cYy&5h;<=K#rA4hdHAJYPIAX>1k0J4jjnHIUo=NBlZotjeg#R~wgWtr>VB)D! zk}-&DRx&#S6nI=wR(7S|{EjT*!0M2RZmC*Zfl!+}05;-|UB-Sp642?#B1)$nUG6f^ zyLFGCGgTy#iYNmj3B;J{ko6zC)2umbhszG%)7vXF`cgoifJ6x-=+7qF0n}nXcQmI% z4l(Ls(Ve;G56HXzdH!^$in`|W8gVg&w|f#JR?L-%Afj|ghyx~vA)>I%(*#D|SGzwX z2v$rXK;4xTW~A4eEAr@-ER7CVa_2i$EAl5sC+&z(7d6!-5wTOzyuigfr5B zFNUJfayTiJl@%=a*DubS;b$Af_a3DQl)Bf&a?VqBQn2a+X*Wlf!Q^QOa%j_Jkn7mj z43vBsfqQW7XN6SPLHlENQe}fvRRF;b#rjUuHz#$GD^<4jl^Bu%7;cB>?PFn!@~U*l7%=gr%qyTnZw!?8&jqBzdo4A zg!BQe2mqv&2{=%47J??`ktnQPk*Xh7gEcz5=cm5$m+4c%LSCsPTQGa8Kzg6mrp3j* zvXq6@#VY4-4e-eUgymuV_%BH2kjDhj9)|eG)Gew^;2?v_cbk`+_uDq$9Dl!WlY*v&)wdBSocT#~)gVrm z9N@I3_w#Ib<=yihoH&VqzvFHD2s;Q4@hP*_y_e70V*_|UzXlLY?{7ZF-i$2WZ$>sQ z7N&Ma+W)!R<%ii+>*S~FLM3}(i7L9y$!_jmH=zFIZrt?sa&&}Nf(YC^O53vPplVKC zncM(Ox~iruNnen@DtWG-C(G43dloNJXX~CnH7zd6$#e5w|Nos?F5b(hs>MdR|D4W( zSM+}f-v2#Wx2GFp=hwsgVR!nabFtB_v(dl!eEJ1{^K`Qmf~BhskzgOBFl|q|k2VsF zxfu3P(0!H_?JvtG+ZLd+6}Hm*FG}+exJSQq6qt9QFy4MR>U>)B4wPXNn^hk{IP_`) zviKWU@P1L;RNvmQ0Z9aOb%U3E zQapi19;nk(ighAo>QD9%`=V7QG5FMDq|Iv^h_>zfV^KuY^~Ch>!gWfC)Rh0i^cyG3 z3v|uwla3u!qQ?VQp(}6xnkv_Yn%bW|aL|Z9{L!Gjh|{KI9LM3eb~q%g)9%?-wqP`P zk1%Ak&t}1+HWvrT&38kXp6Lp)=+t?fKlLP}vf$;m3$mIH7P!#4i$!5ajWr4C35wUV zF?De)|Dxl+SAxcwwp#FxKbFDcR=}PS1~iB zuovBN8Qo76?QJV6PVtOJAE!cRU6$pz0;^?|iZ=HnXDtF^j+0RYMW0w|3^vW=%dmL* zc|eRjxF>*TE8Pb=L}Q5&AN!EAj6!K+K{mhR;$z5!9kFZHaM&*nULr$fcX*;>x2Jic z@vF~^z88iS;|%QV7;sR%cwf{jZb;>CDJP|a_Q1WcR&3g>ATe?3)76LnF%;rqH{n&* zaR)(<&kg~oT7=RB<|H4~!N5T~c@Nfc7N1uBrJ8|*$bdnr%RjSp^p&hyP;0ItLZgAJ zVith9_B8a({X9>zJ7o;zhPU-E$K_o9#Xp3_9jS@Pu^c}w2hB~QNc zXK$s;_o9{aX1Q7y&+0|1Y~9X+Q*sS8(&q#(%I>Ldn(igt4d{0&X|%*#qi}I9I4PiL z^Tn0ag#&6C?5UhwSHd|)S9Jz2rbH?!Y8sbnoJ^BhH1U8U)SE+}&UrY3NqX1dUn$=q zX&%5D-f)FH8*M7;T6}-Vk@$^(U<`B$y{wkLCUJ7v0bb#)Fcc&HTD(=(XuY?4zBYJ$ zTk8R)-LuQ0sXtAwj7n@RPimOzpUA?Xu`an!+a@8)W&_(EX5tGc;o_Y0Zb<* z*!tFwlQ4UEKkD{C{!k;Z5Lh6zuTksy+AaVAIB1|b{MZAK`8^`Fj)scFs9mTCWh2gb zB(_R3r~L zghP)?mVWcUnC;oD#feTD3l!JSN73pKk6)Z39JwuhtwjyhqK~k(+OCc@Kyxq_fn^bo z`$HYEPX5){HwHb~qE}+WcEv1L$O14;#($;utL#g0TFTDtqv_LRixn>6V!3^?8 z8NA+|p;6;c*ZI&MiCYo!E5K;rrMiAR$RMwNpI-!)P&e@Fc;EgwY6WP4>_MawjDn(ou(PwL-m`*G0G741>^8;Fd|zwl3ZL@U+nv-|fEs4NEHbMy z+syq)1S}#FpdJfI%Q@&C4qvB&Q7GpdB+3y!_2Y0HPO><6N1f6!563M7$nEHUe0=U} zS7l_qkEE%{S0|y&pgq!Lcxn(|*N;3I-+6qLha%UrJS{(%5ZEn}R0UB_*RL+00rcp# zHS|ATg8D2x{<eW0^pa#?Uv;Z*29?}|W9R{v$Ib2me@%0iGyl_-%{z9bji0vnxqmQK1aO3BVcMFa z$g?{f;0^M81~BT_KwF_Nrqw?C0M`HZTG5n)D&cXis9*Plz5R%I!g?I#)z>q}VGpQJ z57G|t2B(o5P#`c~{rR2kz}G9-BNdz>N_~Tcs^|z{H>rzwIfgzV4jvu5&cN<~DHzGr zLdpZf=CV1fx3eAJXXQSB_tp`aeEV#4QP{>~_~1AXc*g7|<(rwHl1Rwt007Gi;!dC5 zzF4HAzle1LIseJ(|8VK|zY%%3-?PUzxXJ~?5sbexiJtM%-kwoA|Dg09p`44A*9s(= zy>-oZcQdgwgwSxoUeDy;>HTgBBr=CJ>}tUv`x|--sRhJ;8P3b^ceR)95+AHyAZPEG zSB3IG)8}}1ljZ6syE=J52{0bDn2WX63dS?u)#?7RM{3F)=(_uT^JwxU z!d#wk+7J@lfxtnwiGWO!4Dx<1$Xh;azh1tWI||y5dz(sw z&-Y4O;zh?s-BVE$y*cwt^gx;~Zu)Csl-h!-sYE+wq7p?$WWxwkC!kbxS4e57?+dog z1$hN&f#Ue#^$9)zHvBofB+sg=AJjB%h1hZ?5d|I&7|3WRws@x_Zg4%`<`V7P9T<%D zWdeb}mm@(Yw&gl}yR#V|UotWh6}y<6|8l%^pQ=Yq+nZ3i^OMwVLTS582O4WTm!-87 z9D?Fvs|G6R%gc6c?|SBOl+5FLmQ1r2VGcaz3j~}_9PJfgkdE9%tE)+kdoVa;l?~;+ zB%-kl^ha@b^=q+uhSWdoUyHm-CcvhpU8U*XT=FaSuU*-C=TzWM(ER-p2rWbLXqDx3$8I$tAaGHjIAGF)WC;7aDC}0kedi3>;Tcir` zBfSf8M+%pC0yE%PH$4MNw<&=%jV-8vDlgfDTp^cF?KuY&0yt9Jvo~YJ6oQcs+;}ew zSum~r;booeB8J^FB*N?HX9SRIAQPSkevw%KP>(FMxy6@n;wfgKYYrW*tlV4|=dg7U zy8b+|O#>Dge7Az=_Kz8L>q)s5DjCI2($#r1vOo?r^O`S2d=-<5tc1r3j!|_ddN4*YxL_;uff1WVNAK`N!G? zn0{p1xEI9V>E+Bp-kYhj%!*D|%dI7ppm3p~43TS*hORM**0VZ7QJ<09Jh`4JVS=j$+WiFfBTUp81HDBP+)+C+s`FlR) z1g2oa3@Y(j5y$}duK@-*ULtxII~WJpLnQ}{6jn{>RYCJ;7vGt&VQdgYAzLsOyrPB3 zrVK0gW1-@RuVrQ*a?$uAVx`fw>v|Amt_Z!+>x)^*BV1d@`~7_JkOOYj8K0aNoVyFy5mZi}i)aQUv0$tc9jg!NQD6BO zq7RF_9lf&UXkLHa-Au>^fUAu3z1;Z()u?|grXkd!oty6Ag>jmnc;%1jsH_)9$nbj9 z>esX<;m|t$rAhwo2^RWBS4zTHU$RKMJ+|&qDTfkJC?Re&+nN`eJQ@d>iowpk?Dic z{pz!wOG8~o@7CX%&wn+R>wz)lv`ckvyl0Wki!QNyx;`p%+b+(Ned7jzjcX} zre<{X%+N7yTX|$&yvPqnKvg$-qZW=`rslrqOHpX_R972%H%wgXuwpvxvxcrOHPX56 zE9k@3z|%zl+^l5Xl{1cAl5D1AUFFhnUb7YTEN;g9vV)ay?#>0~6iK{)4Sg)Ei&z&b zGegJCs;0(%w}J)fQpM`>@BHp!G^T`0bQ2UoF=R?hmG;^lD45Z>q17nK8^h18h&rWl zrcR#{xw(Ad3l~4M|1YgEFe_Us;V39FFe~H#=+meHHg=n1e}8WD1oQMrp4v9ldO*8d zBRa z-xi+%wPy4+vp*cf3|Ovt=)o~K;CI>X4z7<-+{)&^%l%=&{!X<$njBj&dl{T+X16nS z^Fw%n`XcK_Mg%fvo>|vFVL|za!FPfJpCf*?P|(2)!W2e+PtlBDe;9Y2`tZgcg=`{w zRnHASUboAUC*lx)^xNH~>GBC5Fk8WX6I2`koUY**#gn#|FlYsSbwE_=E3ZQMU_jh6 zAN~S-a0t!7Yj&d_x`;$#LH9%GcAhf)3KW+N-J~ulN0R9yb)cYlw2=)lwv8hEKA0c` zlG^CSzA=#&3xQMpSQ}#kZu7g}4#wW#rl{nCBd>Yk=GZzq-Fv)qut^(GveH#-JmM;V zUGE`aSj4l?&885ug}|}?i1jc5H{cQu=)bW9GUJkn_Z$!Qd9shrGIbc=r@{zSAf#>u z3rN#RXRJgj?w>@CbxLeRcw#`sqlCcYvwX+`ZaUhP(n-M0iSGQO%Qno3EcM<;X6WM@DX;Q6{ds0-9E865HPT}&&p5T4urrV z#%y!}PlU`0?&K`A=UQ_TxL7=%bsXxL*s$)y_YN)*klrGL;zZ@nm=6?aX$5|ObcKhL z;oco@D7+AZyfrzLmg5~B9{`l7lti1-exf$E6inG!7z7fBHL~DSXV56`5H`2E@<+m)-1nZZ_zxATh;%}iv zf{}~|OZe$pErYNUTw0RVoyrAZ=2G-OSahLivN^aiPeL=j_;^?^Zs|hdCz!*lNrQuQetj2&vUf9d#PN?Gjq2r{?n{3x zlN(h@+hF|UdKo$+=Pj>LPaAl6-vroEP4H-vBzr_TG6Np>S!nZrf)oKuixpBO@?7ku zpoJ7evB*YKo;Xp4b>V;rSyVwz!zliSa$ukx5D($wjZ7*~Xb@AF9Vcb5McOwl9JBs>vHICOcgrh1tHFp!@ky&?`E-;j?QV*68Tf zO!V3ZXSQ1E#|M`r`9*-YtF~kK#4JU!Wm#4OP2@RMO{FBU`#>NAEV+*4AjM&fj=H^A z>I}(49y6}>zsXc%yNQ8AZ%N&CDD80Jorf<|_7l}ydN`!@W2uy>kyLLnakX3%iSG_e>?|k;5+uD*h{mVb1=`EZxrY+p<7@c8-bU^gpsuT@nu_k5!}BK-D^0 zgzey4e8Z0sQU)RwE>f`KdfynVXJ8KrJ(eHzKFq)hr|qGGyRzBKQ=hKHb> z?-h`-!}jm96gj(rfZblcm?jm5u2o(KEiaDE;!;HNa-C{=3IVsGNSF5cdOOV!HUCIB zk$ulqCvhya1XcmT67@nCM*6>!+-F27Gsym^?scLQZxRe!^Lm-w(69FUG@z+y0vvA; zf3%og+!%8by<^`w?^WqtUoeVX*z`p%TLSp;PY*NJX%O9YZc+algE9Eq7b!p>x3m&y zeafexz;Dk}R%UQxa+ zCds`QSbw{a9>w_?d*HbzU^_YaigepDe5vX>2J0H)mxWxa+L&K|O2OaSgzlI7na={JE>_} zoU(G8fl>HrHzzmZ(VfeJTcth)F#ktka+<;_41&}aZ3P;Zydnlp211Ko<6#I_2!6H( zZy2eUxA+tY^5F0LHAzesQ-v*4!kNY!plrNWJg*1o)bM$IKTV*ZwHwf#o&r{TG2xZb zB10voOqS*j)e&IkiY4tP%T!9KGMRg;2lk5Nf11^sD>A&sts);DGZ0xtsF}=| zOpKv(L{VlJc`TkHVUu&vrnOa{&15?=N0j&K1&%Yz>W|xPK4$)!Hof2UBxQmYSLOIx zwrL@;76_b_Q)boD?VL${n}??gjiU_01C{y!m_y9Q)WLXm!JHIy)aL&-|ER5z0EQZ43RNYVi=&UB;U+z zid3bMp|RF1ki8%N5S$o_|D_XTWBs51N2$RPAP69w%>VsXVEs>rsbwR-#ew)UQ#;nW zncOjQD=F74Mce&io`3A88}F%#%BCAOHmR!{^Kfg2!`-=Fr?5sNF8voNo#VybI@T=b z-o4)GK>zf4Byuo%T5Zbdf2h&y!SKQKP;D8URIjklDNfjI`M^~V6nOEaXK?yPhvc#* z*a-^h&|HLF>;EJpZTGX_K(=fC|4^_T_4l|AjSm;iu7VVPM*73vbDVTnrEutY-A88n zM{=>-Xv6hsC0XeIqd;r^8hQZ}LGo}&mV`hq%ps0DCu|mIV;*dO5{?E6KOz;MXe7;% zjM-FL;6$k!ob}fCWva&80Sn3~dQ?C!dk@oa)T<`$4IqvJUaTrq$_Df<$NQckACX59 zKWhz4pYW3~3@ye7P^9n;fDuxOR;x=DQ=L)?cb{=Ch-T5{YO3r|H4kaP#IvLeGfqpP zW$33kmYlSn%S7;={UvoS%XNp$S~N3pjUd-pmkSI5rU7vG``a(^tQ+mXRx-UA?2A^i zJv-Z%?!nvLYM;2Zh{b!r2kOBRlY3DlXOW}&iS1)%E34hIg^7y~sriT?wBXO++A#W+BMv7)aLVbxKOd}60KWSr7%aRF1kGLGnij?5L zy(z~~aWa2SJ%OK24^T>&S)2>#dI*$wDlblKw1q`dRhsHPk&Ohf@10jO{Vnbak_L8Cj`PfrskPj(T`NrW~Mhfx=Y!{d+Ca&2`Rv3G=RZ~miL1VYh*9pw_ zn6_Hzot7=l%T^YJ_s%?3vS&9tFDvr^MLpi~_wxf)Mu$lgooKUakWOgi*ZuiDjVVMk zTY8zQ;k*SMFyvX)SOuGp@}-tsNkgNo)CFrxi>dxTAY{DhLQ>|tKhbixe0p1>U-?-Avt zQ+i4O9{Y@M{D1>nN#B-$1qCQ*4g*ghh*MOvPoJ5mY#>B%CmbBB-ygKUdhSG^bScel zTbvQcDJUIKiWn`-APVXcWX_KuatN$=uTQw~*c(Hk&+-vAkH@$X>o@OtP;m=i)SR_4 zK~Rl=08`h7vM20izykkg{Ll)(-C~?0K=;DMVDfq`$GvJ~Ug&|ArXtHm(dbcz&OM93 zB_30&i|gA|Mmn2NX)syCNey~Y9U?|0uWH@79v<^Z(2F6(D8MXggDd}|t-&zTXNBx= z`=8Z0iWntX^C9d1`p@u^_G@<9ACsu#JC&nuv`3d)vm<5OgJ~fBT`{O zUKA4FhFY=@W_zA_)URL`-vi2J>5&7IRr^0|ePfU&(Gu*~wr$(CwPV}1zp-uGp4qXv zJGO1xp11Gb_x|08j{eylUFSq~RaR!6*^|y(^irfDRR8i5Ftn6pl{Zg?+y7nxGtd%p z0vXsHAE!P5n~C9VepdPee`1e`?|MAvmpvn5gis(%pCU}TA~au2ErVHkX0WY1a7;*L zo@IT_&fs)4zdez0;l$V(wq~W$x1xG)v@o_+P^g}krsg=!7D*4_*j2__9kO>f4+o!N z+k?SdP9_)_5%#$+Fg07BrWcEKH#j2TqPVB0-+!0^N+Y_-bK=M22uLntb4|3=@F`6i zmUVn6s~i7Bu*Z-9-J{}R#u1LH`b{DW}@;!$wwUvdu%HTs?QbJka%$j9}Z<&9=Z%>Kd2sU`cb zJ09*9(uvKwt7TEgDnkL>c-PaD(+nq>*z1d3j}PzHBnV;Gk^LN0Gj|G?yV2I_&~Pv0XhU!a;V#$ zHRqdjgGx-$AIW$WWQgq+{FcWOgx${D?dva}cpljbKXqMisctWDmMKKXgw^pi%Vpw1 zI0l&{A(`T>+fqiUQ^>rsVaScB;q2h)4QCE@*ZO5aZBO7=N!bI!-y$^96>ObhNEhHT zWT!U%nQj1A?gB6g!d*j3e5~Jm+2!dMBwhh2e)4CQeWYi3=Pw`U$#k8Xd}1$l$kim7 zrDlFO*VIS({1*e+DNMk971@h>{bZ{UYwHll|74RPXYQE$zFRdz&{tR4yCEM&{NJRg z_utRgtd&q2Fi4qb9t= zlp`AO_)~{Jq!WtKcfEwo%L|-Or(Yk11hN*@nPv{KjbA`J!U*^Y0e9}7y|Db2_*2M2 z93&}dA>ac<*oI+Y6$h-~$ESi|%!H298t@5QNk)^iD*R#+knSm#11uR;p%hWvVtM-H zK%;=DI0b+2*kMT-ao0#Tf^-yz8Yl4J2+v8pOHxh5Jd zE<0tn&_zX)Itl4Sa@g`?H*aOJktxk;WGjVqMUg$aJg%$s2{pwxXLd%sYDXGH0D&w| z*1mk!UQ3OkjB2}9wK!XK`fDPMcLPH-C=o!@#IMKxF$T9Y#y+9lQSmofWb!NFHIpC( z^jV(Cq^N3t#O@bc$Ujxg($GS}M|YymFLZPWF<{3lCN#ZrN3bw35*Hwd5}22)$FEvC za_@{K!AG#miQPFVBww}bHiwO$!XK}VpZL)9DH|_NmiF{V23KsNA_xc^l(9u3M`nOJ z)3iGTgNzFb1Osw=uS^3`pp(B1-U`{SJi=Gc+5+KbLn{|?#CFNC)mm_}M|tuKLkK4H zu=zLmjZ!taL2k|y)BfS*t2`p$Jk2G+KLH8gr=_yYj|dd zXeJb+q&PgZj#zmVI2P;+0*Bl_)&6LKp zR?YLY*IiBUao4P=?96LBrV2ZRe~1||Ec|7vW_)S!KgeEcsP z43-lMr+3lQpjA_Cknb7L|j;D zRf@qV8Nw_KDlC}?FK3u1nE>C0L!pmXp`E^+br~)}84@OfUFBma2p*6zFpN+PU+UIY zUsq(W_N$gj=skN>{`+ss8h7D^-zBr1Hq~x|EJ?|+1Z(%JQe_C98|=Qvk$|Rv=>`nZ z)f8 zui}~G^w{eD%o7{Q5i2cVb5#YP3KtL*D&{#P%;7-H=0&6eQNSYyXJX1*@i3c#vh;G~ z(sgl3a*?B-BlGbFICf{OlA{9>Ol4JFe=dOI0iYv!*ZUA(^#uS`hn_Rs!B5J~i51u0 zZ+Xt=XzY#pW82){RnZD;G;ie<$)IxDW(Q=Zx~kroa{ZKG{5!c{E@p@*D@~n|FDAi& zqPil2hI1Skfls2J`Q6B*Y?_k^1U>}V-a;qY6t>h5$hca0?m1@87MXxRmp2o4cc+OZ zdOe5+OhdRd*a0A_g?@~kWvCgug=Bk`3ri zT7*)@T|EjUO8;|s6(}Sc{?)87C~!ir95d%V%Z4hULHXAH)Hs#Wsv5V;1~(Ubl{!e$^off0kVA#$v;OW9Ua zK%^xNTm<}e*q3820{Kb@pjjLzI6prsf7X9e5eWF95uX11{d2r*^LiS;D{9)_KmM6; zYN%w(p$>=;x?$je1@1`sG#3c!LYU^Y)rX1$C&R`6g1u>wI&gM<+wvHFqm!0uuiPt; z>X90USyO7r45DiKvS&5IFnc{5WWJAvu~yJg0`btZv5mSG=aJ2A&=Abe$tBF~;T7Z) z$jebEBXl$jmNJj2Txfk{*Uc~4GXGPAY^|_ImKWT(UO0{i z!fEX3gQ(~mAU&Rray=4wJDFH|6VDnc={|q1t?%!MKAr)dZ_-)q%+195S66=NY*#M4hRll1) z&$7MPglQty3DQ6G#Q;;w=wy?qQx`h9&_irwdCY~8Pfm#LqH|n z-uu!=u=#xgx#2?@W2c?(*vxj)b*?v9J8O;hjCsdfbAt@{A9W8W>;IJ|p#ii0*Ii*m z^XFH!1kw*b0Nk&IRhm7W7XjgPQDMmf&me!(2y4eQ4`+`W%`(MeYs{sK`?{|7K?VP% zpwV{u{4t!Z7|iAIz!b@A07D+!vJNg8lc0czyUI}+HQ7xO0^a^6|_uo{JV@X zQc-CRg?pXoKKEWMo=Vw#p2$Q)AT1{WeT2btK)z6k8!Q+!?~e9Icu#`6Z-vv#tjQh; zGUtp%0&hVq;EYc0#Hrjy<@Z*sg$j}#u0u(>oCW6))3Y?k0C7nh;qbQUc^ROM^V>M& zpw3MX2qA7ZvU~LlS1@|hc0Tv76^b~QMAd^dPP@N4bHuw-V~@%h81>I#m7zHM%#q>D z6U6|NO!o#8!BAmFvt7bIjk#+&ZEM8P(wko%m{ov@%|tScAT&$LeIyPmNPTjgeJiHn zK4nYKz06-6m=j6I!+Q{T7({@zbt5sJ2zKxgCfG9Q9`RDxNTkUcj*B5Iy`(It9hh<> zLJJP%ljmL*yd;8A@Er+}XQrd$-40+S82ly7k>n`vyrEn!^cP>arOc<{OO|t>F#0FI ztnxY!FLy8Z4(}H{A)r4yw#`_?O9o9r8XHVDFuf!~bAX1vaxLZ{l9}QfX@dngIHPn% za35EOu25}Jj?T_KQ#x|je5;vFR^uw|d-s52JAE$(l!5OUd{S{9?=Rk$zq7lG@z8pjYS0X|{Xp*m|#4(PzXf{CKCyO|l1YN6Fin6aRW5Ln9>d zNTd913z?5n;d>6whWVQ*h|;_jFtOr<;=#|&n`Pm}?MQ__uk@_?Asp>J8CtbNX-^k; z(=ho>end1?kW53vWJ5{n%euWSAwG8^6^q$%r_y!G)RCg&+XCgHHg1;P`kRG0hBu6j z9Fwq69FI9PjYuW0WPr|@e@U_7c_5v!qOXiXk0dt-e+X8oQ4M#DZBm&Wpc4W4o<`&z zt~{H&EkVw3be%yLuCAR5drP9?2ZbR^t>)h4=|15Ok@)qI>v`c0?i`}&+hc&d+vMrh zB@zkDGyxDNTVXqiZ)V$UhW$6;I$uUD1wLLcIjSspPDN_MoY4v(1^_s@L&wvPGN$)$IB;q83+n-ZkXLgXo%5QZY4Zrq{tH3M;pZ@^TSQOgL`pnWW z9($$Z;w0YopQzUz=Vz+bhqxxTsAvZF!T$g%_qwxCuZQ-YavFdE5}C6~A8wXD93*qBOkq0(=0`A+R3B1rlZ!PfIv5^U-Dj#TRQv(I2>$5SZ;GCeZd>p!Ua{EF`OpuTCXq^q<)}Q z@WvD=9dpQpGaQbw!+pXQHZ-_H{|epu-}UT z2=1!}qin=^ue<$qhcH$wFgJd>0u)F=H-WveZq0a~zbZlZSA8mS6(?I(B7|_Uiv_)A zgii+DHs?N;^LprBrsrTe<9~u?!5fZv!*O9NHBv3P$N31?r`hM+(m|Z7v8t-Avb!KC{x7d}-+JBUt#L;VipQTbLq5M7q z=sP9;91=QHr}a?XCs7$rVwnF)ll+D@n9-+8Z%7A4`wxp5goPtLQ4*XQaHX~7u*rb} zcrhsat4CBVQ$i-wQxw4HG>;~o5Z|*cih`yc+3GbXqa$Nu;ne=~WzI!LUfag8epo03 z1B$_p2Y-@%=uo^%6A@%#k?8mBjxodvvoF*jG_0I)Pgl`uaU&=p4F8xPOhp$(*UqlH zK{V#<$uZFNo73V)H{b6Cka8l7jDbB7^kkNR=9E$Hlz>hbQ_QBhGVn2SkUwou&$!JX zSO?&^bV9i&GZ`hcAeMbF9P!i~W^NSp` zAoop`h}Nr|C`*Lz`PZO8;#9S&r(WXg%@?T4rZL#7(%byyaAm`>7cLL=;l4B_#9{0Z zDq=)s@<5Mn*o{GZ>(}Ht@$QouXgawfa?{~p)gzeG>8r1+)mHDR+PKE)h*=mpk&K&n z^@*)>A-qzxk!OklXlU~sbjo_`gH&F(y10>4Gxizm_-i1HSAG!7kP@jnvscuHw0W9` zIl)Hcb`C`u7zF2YPhfW1)+-54hs+^bAyMK`-QeZqt(m+wqpQg7Zd75Igyv*}&Ot(7 z(?CkAP6p6B7XiFTFwAsNcgkCbEO%PfuZ0K;+SE)|gkXmNM>O)U_(yXxt_FA*m9=aK z=f39*%5p699+?n}@3Uz-YDTZljk6zV$4>ml?ZR8*<~My}El$Ql{0aB%i@}#_YbZU0 zkG(k0>(f=wO$$1+#lMTx6n)iJ5XimkDB5a-CbknDRIMiD<;Jf@w=J}nD5H`_jA&pC zDaviXNvj6{rJ=dmrJ=~b_nr1U=Vq2Qq}VYIfYes>7Al^jG);Lc_JooRuTyf3s1515 z^b=7hLDC=T=)*zkE2mJxK7^Mk>5gZsNdDw9GmaYtSmrThNzQt?{I%eX)vB7;bCCCj zmXC3z`%t7H?$B)3gBOk~OoB_x!*UrphrXPAvx~L=69jY`t{pG*TT!3w>s&fv?4w84$ z`n7Lq+xNPBrKAxdu6To9R+nj+4I0Za?@z^ER!?mwN?>2wyvYFZSUHoic?hWn?3pYO z+B`c8cz~ll0*8Bdwvl8Aj7RlZ$)sqOTL^urzC22#q;0-Zo$o(Jr)V%)Hw9{uqqooc z6`9N;g*U_dKKeA565cR2)$@Hro>EZ~d#BD`wg6QTV7-H?+ycXk)QN5&FBIwq+!V*3 z)~**Fgqe)q&!QPEe%+b&LjYndBf;K6EpEC4VAFdVi#|PaU&!EN-H#lBM2M_|xB6J$ zmGtwZ!NKHg2bZ#IyH<{oo>YeGEy5BM9h+U_rUz`$UcLBwE8Q?lJFtuq;FjIxUxzDW zi7pl;n9ZVydi6{;z(40ckV1s{a_j5@ zsLq6NA=V>2o0yCDE=sXR_N)f;OFo~SHx+ehG*_-XuG<6MEF?Z!!=i$@)INHnkC^! zc7(z30x}w3Ae&`r4>7saQ}!@EHo*)8WH3oH8Rmd+7WcOuE9H;_zX}9Hih-6;mUbIK<&OOq!x^DY9mNS4Lvi$ z^evKJY_X9}#S}?=#@q)CBDH^{4YPI%CUiH^3S8dRt3+bY_QjbjF*XPL{wjU_Ui#Zt zeRpI139Eh}%X8{yv*g)Hq=-)gh*xZRK&8!*HsRoJR3O&WKO_i>-F!<`T2&T<5yw1v z!yE%*XK~~S-_Xj5{tzQ0few}8$MKbfKy@7Iil*419OB-@E3#O@<`QG2n&LD57BIQ4 z%USb*IYHG#8he%z{^fcnR0X^c(9~$#H7{aJ6ij>~?kLhBM5P7MlDy;S+(q=tYnk4@pxmV15PGaymM)F5dE zU*ojR_D$eUkIYcBNgoSzg0K7P9b|hf)$xfWUh{OJZJ4tQd&aQ2r2hg|i35@=uk5jXv3s=t5Ts%iX?@Yn*RUo_dEKfZUdsGYqXizF=bH?YdO7(R( z7JK{{b998k(8U|E0W^`2Sr!JhNrw+ew|64n@c0vIh1aQSU#1G&ukDIH$t@p@3l2i~+T6T}0#=p?36ZJg-tm%Mm2-RMn1RhAmlL&fd$fRiCR8Nk&|HDDRe6um~&J!aR(? z%DJp9vYC9|_yizj+=fl02Ad5TtXLM+R4MQ#%sopJ>EI;=7Rrm3nt1Iuf)6&s1Ix<+ zhyuA2Xa~qOR-whMiJ43{vP&dG1yCtd1OOmENx!z4ztFTz^W%3eI*DhMXWR#W9U*G?TC20wYa|n_@0|JKc<e%=zCj2NzpU@$C5NIYlH`zt$ql);aLc!+ac z&f8JCI^p2Rx}4M1(fN$sbrb?umV;(Sh+b?dGMhCIOXMSY8^`h!Hx1bo^4p`sZcQY4 zK7E6|)lEI068bxJ(XR9lZDRn9grbRNZDVaaj}eL)(>l7ogN~%HvQ)tvz&Rg1j=s^| z>-s}acfeqaZ*STN&66j1p9$HG!?HLjfFA8QcRDSItBjo?x?3)lMXke&a)Qk4k38IS z$1v7NFC%=T{I18o&$S&xz1B3loVGsjw{FV4cbmgY%Q$Gem2z()qE%|;Vb0Lc+_77x zt*MO8SoOUZCApr4TJbnEVBs#?9J1Bx%8M*@#-Zb^N*moSO3=mh9H}UyDxy)_k9K)=wOgwY!oK|nc zJKs5&8or9x3p^rt38%yl0lb42Q9`b@Oa8ldH-r+T$Nyk7JDo8AK=g^IayVru6E7_s z!xw{}uOh~s?nwhn9W^vMz$Qj#tdPs=AyoKFI!l#O<1i3!80&LLm13GaaNznBi*F3~dx&)Qb42i`406jU@NovK*I77QY94FN3ZtEf}jLeyFuRU+(;zEdM!1*HmZ3!EB)tC{kJNT>TC48hd*6q zgC>S-SJ9`%{PGB)%OEv^fF;|M`YBy8G)e*QVC?;`2V~=WKymo;Z|h%I7jFaTxqx40 z#6seZXRlI%wH&f!{-5p_jldA=`92qJ62Q-;oF;~Q`+zP1g__l6B$N#%A=E$qM1 zDR2a`*D!KS9^BQg95~--q2l>`Y!CP96JLJfx{q0*_`l6_@d*jN#T+SsKitF&jl`G) z+XVuV14Dtn04b7S7OXfKgt~;DY=*^0JjJ{3wpk6QPKnQcO&va)xGZD_I|(j>Heces zByPU^t?b1EE6bOtaBYQe-3M>O-?~O2BytEYp@>smKIX?t(>;V_uQ4Qe;<91^D8Nd@ zP{g}?Cyi%63586LQz!wb*t)Qnd)zD%>Bz4c1H~kJ09bB*+vCBhLr0sk;*CS~nZxJH zm`X%jN`z=pu?oR5LNlyT*NQk@s3Q>)LN8?h_{O8=z7*nf4DZ}dBcZzfG3!mqLlO|OVTO)4&FI=riV-PmcE@$WfEsy~<$ zu@f0W%oxP+t40~|%DbVc)aJk%%(gH!g{#h*Lk-gHS;iQS=tG?ixg9k|QS{;La2NXu zfRYSw*Qo}9cOz%#7!#Mhe-9EtL+(y6c7*SDQRk8{5%c9qH2L>gW1a1NHB5i5_oBy{ z7U%+StK=(N3bWu1M_L%JeLBp6R}yMr`4Vk8cG)*NZ$WL8|44D_!~!o5q=N5>EDPhQ z$XzwWfl);rMArNUuF>QKUqPA%sUy|}V0z|UXSs3AqF+!19Z z{d0OaLNv!B-LVyi7=>iHZI6LI{I!6rJ{pdFzqXi|4R0vkOWhvki{S|W*4e)>Y5}U3 z=)lrWoDc3O+RN}RcE1*|a(FRFI29#Nx@&6&0R_b}Xo&qo#Oa#u$NTvJHOQO^_#;T$ z9|id3$o^xyi}URkNO=Bab*=@hzL*_Utz`ka(ohG>7we|J%YNYRL|efvfqdp&R4kf=ppQ0wZ{^Bn4yCRIs)R(nH7E`x6!NQ z6E2pa^YZf%dQEvZ`j`-6*?3Jd$)eh6qQ}~+xJf%0SKgLg4?S5K_e)m*A%l&s zDCpz*gj-Jor%5kDL315u05DFF*f(Q>EP*$*xlZHGj=G$2ScdsYGNtpAxPxw&YkcjB zLEw;Rq{9v22KwekYPaLxzh~CyYcVqhU4rdgzCs;;>orx-1r>etbhP9I3pSrDtQ)Ek z%^IqYAV_EX_fB=;+<%~C1S}gq7z2`C;&%^SZS(+j$DQv)$DP`(0P}_gRL}PMx_59y zdK_I6(c0JGX_6SvPw{Gq4xGj4np&LU0u}_dlS3;%9ihHjhSxPdvgg8^teeAJY@lH| zI3S!%I>oa<+&RwB>dQOfnG9xw{5nwa=Y1&BJmSQ3y`_j?N3p&{%IIy{&6E#2eNljZ z>59)BEjV(~(6|c+Kt8p~pN=nTyAOlml zck{As<1nEM7jH<5+3CVbKBTVjI+m0wm&k!4P4u*}R65fD@Mph$AS(nN$%b>9ctil&@hH^6 zd@cM?m6;ayTpjF0K z9g8ne#qTHss2`p=MZIqhPg6&l={wN+kDA};iY?+M4|Y53IzJ4||ar&+sJzg3J-Th>N*yD`b6 zBnT*TV?n3g%B25fMD(Y?R0l64(SuQtq6Xf<+EZyiYig>O5hMxqymgP7T4(djh<~w+&j;kO zXEFoeIJH*cwI+vi>W77X4#(O}E&FFwYXXU@y@;4VbUE0hWL6P&#JlQ}+ zpo`Jw?U_Qh$}iED08^DbI`&>AhbxtC<(UC6r7jb{JXF2?+b<*y$h%xNWk}PPUmx}C zdT@38dbO4J$KW)-Zu9YOL)7{gGKrif5tg`S6SKc0+(^<2+Rb=D2t-f-0~tYbAbx!= zoTz)Gj$Z)^j+he>l_(x>8Xw##z7v`<3%ohpO`$O4!-}X}7`fb_6%)2m#6H2X184?7 zm58C(ifP~v%C-m74x(Wg%GfD)252`3wzKXZC;fqTms>2#=bi%(YkgtE9?vDj_UeJYbaAMSGiA!^F) zKAFoAh%#Ax+c3}PaSo5pkgK-bKcvoW(rP7$r}t5Nme1>MV^znZe!R!#OhN&On=a`> z?GalM+@tuq_K6g2UTHaa3LESADkFvlc5c7kc*{Xtf0UTWgq+BHmy!?l;l*#!LbFN} zNdCC8iN+dkt8kol1mqHtF6Oxd_+YsRf(7Uf7YhC$EvT%E z5-bmE{hwhXvsENl;ENI#7jEH8v(;@{113_}kYrG8guZ?YbK z&XX`EF-#bw%-sk7A{eLf_g@5aW4Qa1p&y_5&QAINB(px&d4aG{@y7!wP|_yu8otli z5>a$1@uA1?<^QcB(+8NhkB76|O%}z;>D82fSj_rm(tv05!zBY$(t**-!W9D%{O#G` z2XdEU;MvyDzuiDdX@aoo2hwBn;wa}Z^Z#DJWkh*v$^?dxDMoDXaLXd&YdKM|5xis; z*<)H&<7!*Ea|DQB2DM1J+(nOjx^7B`cB?yty0 za&b5Mui0*3)Y6D*n?omt$EH~M$=;{IE=iW9bWj_!o6Z6H)CNn4jsES5)h#{0iQAUx z)_9QD{JWGC46vdCzr7ZeO`>JcGnH5PYUfL7N)FHixO7zuY~3pclsJRt4m}q zr31ZmDbsIv5EAGErx_RKv3AC3Qh53y)(~{cxl|{|eJ`x7T+T1Pc-XUl)|(P2#6^cFDUNPfY522#8Ei?>SqEJlmI6O#a`E9GL2@r{9AvFt<4dF9Arw1J?h=^nEkmGPAk`3xjtL6VhU6P9L ziut~oeTWVN=x2gdjhw;c_^aT3%Pa~8?HRlg9Ll{>|`w=UgKv~2r07S*Fd7!WA zaAN>W@{$Go^BvL{{3T?nZ9n2^=LcPF(*A|P?i%I5v|*=m@VBcISd4 zASB!5%P=S$4jGDKIuY0!>Xh*S#G8*sBpmS9*|C0QY}T5IS}}3X3F2!v3`VL@qz&t{ zj&puKmKS{+y~F+6<4nfwrA<}ky2pb<7D;#IX*zH1kS0BJojlWJnuBG|!=0vNX(i9W z;U?4XQ33Bs`f*qh{ym?yf6|o4#Bgc_&NZtZ+uu6P3fnDiX3r_x^vu5R?&SGzpCzDT zBmYxvNGG-OJvz|r8s5B^+OC5<1kqplpKCI!P)9J77~rY_?-QgSfTaYf;BD24tUeT; zt7356^*KR4zzV5LbIRxWTABVgf*9}a`&1q zx4@6U%Cne|*H(`y;@$gdI}F;0o88oYKF+sw`qQTJ^|LiJ^G}6emxNX9#llcr-;+sr zN&6EScWyTRQ@)VGJX1%cR^GmeH^On0G!z=i8oZvt=0DPrmlG?aec6?H9z2YTdX$Bc zzrajOC>M**ORjE*yWJb+(>`EbUw7lM?&?svhvc6*pe(%lIpr^VG@Hml9mwK(~6f{l1?$*>JsV&;mWF|7~gciCF0oZtAMnI+$a z5zlb%-83O6k(P%Xv}`tp($({}H-PfpKI(DXvg>C}_pw^*E1buvSw(rj4?)g7;-dYr zJ`V2Bo@fEe;s~Vm%SNCpYW~-Ozbb@5mL7ZlLdYPfCfYvaDDDwZ*58D3Zh?RQS7G;G zkQ9`Ki8Fn#yg8rdvaMGwW96@&OI-j%o;K0zOEujf~6#hgXiVj^dT`kVwx z{YzF#1XM=34i_yh#;ucD8g)$JoOT0_x7H-L{HLl)je15vANR-k{bf2U#Jn%0IXDwD z7;s~$E~ zBh7Ktr^mySFC>4YMl^bFAIh%PdQvN@Z?~#jD^Kkf#?^>6b(U;-7n1a3GX|TXyKGn? z0R^G@&#jXHxOkNrNVOuWJCeHCzt2p^1+M!6im1puwXSvjhkDAvsNF&V^&#$Zv)w59;&TRj_Yy`WJRBY4kmS``Yf*Bg)4) z-8t4`$<(J>Z_ZqWyr!u43T3Q8#y`??TCh|w+F_^wdcPBpXIgY>>a_{$U*!~Mf zpN=QD{jh)B9#}3q|DBnH)$go788d1H8{w*dKGk`3!%69*RzeVsT0L`8?)?-b4u>Q( zDZ1MV09N(N2N0Jn3{YcD8N@rqocI}^gUYdezpdCxleQ%&Y0-4cefNcGUdhHAn0Dn=5^kAz~lvr9f+U# zxX8nuQDKPot|13%3(fP2{o!K;Pri&)qBCp)5GEG)d8Y$Uo?$I<+v1H0-yydNe9d}| zBJv4F#!ZV(oPZ+ztAyJ}W${onjqm5x76kaj?tBS-n1F_pi>Jj~_|fFQdjceCQ>K?U z`yXq}a!whIP9w*mM$uU~9f1T$}U zOuj*I(S^hhnw~gtACt04ubx=PE?5}QFUG{9V~nb$<8YdcCyg7NuQ8k75-LOS2H&@* z$UXK=pY*B_Y(V3YYEU88BO$alMNMeL^H`Wp5FPP<@H2em--(RMVrF_$$r zwyKR$-Jbg|qH+N(8M=xXCeuBxD2%C(KUBF(5+!>_SPG*R*kVao2@0)l>MDW3IrwJX z#fqjfWsh1r*(j&xO+Bwz&`3i z#5)XCVePzq{KuKC^t=OZ`CMf|P zx@Jl?6?@QgrAoh-(hi)E(+(U23xMNW(JW3KIr9reEIVB!wFsVQ`DyF0=o2;=Uon4m zD;O=YDdZ2ph|}*~_2j^8vr_ZfbqB;@dI`u>a%gF|?ca@>1rD$XX0=OhTP6N*S?I>UR0aGCO-^Xa;bIc(6o)$X1%b%Gf5I!o8l^5Ny}pOlTtcH<&G{c zdrR?XV0}A>jFGU~>2PyWjhF%4-1)oQPuWs9Q9^tUeYVBNiMR;D7ZJYLG}gT;rHKJ{F+Ug;g{^lRn@E4-_<5tr@vq|n2WYgK4Ok6RCZ z#xH0~BwS*v-a?3+A6QiFaYxkQn~G2n#^Z$WP9N*}KuCaM!OqgHM6g^HtWgJ!cqTUY zHp=O+Z8B2Km3@|-)@(pa=5h^n`23D3r49A~-g$KmkX@+b9KAGtVpI~9G57q!gPwUI&^x86ij5S+D-=K%vc^FAb`u3`gDxBi-LR+WM3Z=N z9Z8`rSr3S46xU1AC{X2X#@~}7MEybCcDN7*1-QyZPSgh;x3toP@DQRA&r9c#P*|>I zyDWvXpMwma3WwjmQpO7xGK2Xe^u^HQR@)#Qx!O7;9pY{NJPQtQf}|v1vV@eEH8EDkB0e#M!z9a)YX(Gs8R!pDB`H`hr0HD$;j_>q9QZUqX zl2%YO5N3|_03A@ebO&K@C_tfNF#ICk4z;^ZVC_{Cx?LN`_P_D2YPR&^dJ^TB)%}eJ z3AK*OMQAS0r~ z?64HScc*U2qAiM<)%I?sdGzSzRK&z{D2hFjErQFF2Mj<#EV%-zsQ|j8z(U{)BfC?) zc$0>oS^>xlMz>>mEl}-ndoBby2>S31ZD1QaO%MI}-2%Xq_lOdk1Ivk1lPaWgyP}S0 z&id2(aV{eX>PAG&Duz1dL97`M6w`mAmMI5;f`O1o2Wz~h5?8jvlOTy-kp%j~OEH-w zwWUsa2kFutDwviHZ~>O~fuDk~TUfxB{buaC(@5qxb*&XUZM%jXHc2hBgX|J`x?6IA z8QR4o(sTVb^77Cq%{(t19c&Mf*N^;oi&<&PVw?{OC(3zk*I?1}o*4+#`@|QdK-k^b zvN7T5BTr3j8le(mp{e}w>#ca+J|-m4VqF2CaKuPFtCngdZGd-;BBlCV71O!J&#cMG zrP1vZ9Xdx1581d7&7EyF`FkBg*N3x3yzX$5uU~Ivfd<>Y?g$=2o!bK$N?~Zp16V~$ z$+rTPz0W1{_{lV5=~n(V!!YE7GG8_*Haf~S>5CGzX~ZzlwHlLpPdFM+3#pgzApwWP zMys%(lL@;9;sCG=QioJ{n3PZyn=?nyVm47{Cn$(ISmFU|5h%G-ITGP+h%DWV%>XDA z&-RGAtpOjwhViLMd~uE|ps3{nrjS~ZEU%Js4P>SCJIdt~QQs~vKD_n6OMzpi8tP_|8hTw07(tUB$f-gr=p-aNXeOv1}TXI z|Go3!0PrRSSIp(L>r7Aq7%sGri@?ev;}~095Mhfi-aNdX(8}rv)`-t<4<^$d9_!$p zF2^)i&c!h!Fw86^KQ-g4em!D|%R&|+3ree%W^$lLzv4nxJMHYZ`Hn$s|8~Wu+^8?h z?^$2k@yFnb8IQtPh->ROsC!Lw0dICM#so)Y2SltLATeOVH8PHPe-Geix>(z?u)vkhyZgrxt=p+t4{th^7o%Q{L|6`J0-r; zqc;m0&VL0G&p>x+ zJn+*P#Jcj-=e`Wwszr;WG2=W}`Icg5;5%SPqeQViyInlSvnUv712$l8*K3m6BbvB8 zvZF2yy%bvJlQY?98Rj3@B9FgI08>k9TFlaq{`&MNJ$(Z1(!?@EV`I7LXhqa8shy0} zFXTu0jMRM=XOOJ){;~>9t}T}>8X#yr>Muv7K3PNoRd02vhnS2M`4~B|J0(tdp;->0sJglqm!%V z+b*h~WyQOSnx4$clE;BoevXcgJ8q__2*JCWnjpnem1ZqawjG8-+~NJhb7}-|yZXT; zRj+%XKaV+FKkiYH_r;5az+(R-&F`6#$u_3zI8_Ypxr*I2EMonsiV1*?WYfE;B5Ohh zx`q0I*ejAs|Nq@}dL$_rZ2C+uC^{%N`#;Ii6zIRMI-3jWALDx9sW8~WSPNuWGW%jk zFI$1<>M6_fnqXZaEGWl$WEW(bya({DPKaf~TM69ev_FihLuc9YZsk#`2d2rp((v5$ zvQfdw$D%hSnrFAVZds#M7L6yz_SZS_D07`^ar=VtH`OH~+~1}A@&Vhyq(R0awle@; z&v_33>#6q{%5ey1k)@yr91f2@<8d1>tPS<3fg@3NV*L|UO_62Rb&G3L-s8bvLzekN z0up3sx=K-RhDsldN;{k^<1UgQSve4TGKCt`a3W%GXpG-(i?QDk;IWl4oujgn-@r& zq9#x+u_y#e{b&?ZUq+Ws`HDWvO&*j_y}UqobQn!#N_>te2Xr2RIxnZ59Pv6!uC@or z`Ddy-3WT=p^Y0Y9mvXx3Ppb?@Ek=pwTGrh){>CQ7CM z1Gqp(zes!mz9^?-#ZK1b-C7_}JxgarlqlL~KbM71j3u&ypMUAY$TKC30GL-}zM_n? zbRtV(lz%zkMOR_~wOQW)gg6jPNi0Dq*g6zKmVXCXG%cd47F43Rlp0E)TA+jZ6Qx=F z!~)pbifPSF z%%EUK6!R}z+EW%Yb&KCX3^5282Ei*E?GJ3w<#8`d3|Aga{mh@thp3Arx7dAfaI$drdm5*+>@@IWpL;L38s&sfBw#L;)hvlO*Tp(pOMfs?UxICoBH5Y?Y%LS!QV(hiu7^wPs4c}2 z(g}7o%BqwUna-RWhps*~)|xg5f;nAX`l zFyMLAB9jai4+`o{T(=t&5X1&>Y-&OU^TtevVag^VWMY)l6O} zU$6mp;PIrfNGp|qATuH=W)Gs!4^CK+X5k1t_o+D7#oG}N^cKwL8gJVI$A7v;zOg%_20UXNq_d)eq<>h^)4d;TxWi_nkxy=|n>plauowhRO z$7i|7z?@at<(wCvwzc|!g0*HjjRFLf%KW1NhyzRJJJ+lS_-(Pv6MsMX(rHkaGpFqb zSS@!KvaT&KR)EzkwGGB9q22&!vKHuCBK`sBRiM?4f^7lanBCq0t!{v93+Tq|?hMfF z-Sa6dw|B}pbAj%WBJ=L-o@2g;An%@2-d&Jedy3t2$U6ygdw2Xg%~(==+bNH%WV%QG zLiK3({PMkpdiT8Zy?=#z_k8pZgnGTty~W!G@Xh?mJZlH#r{%`Xx`g7g27|;I9<~wm za9!F^B-w0pA#GSvs@5L0sHnd@IevcDrsAVzJmhgKE?34kd7S!A0dX5lZdk0lc|ntjd*aRw;eQ@#`?~3^i`uvBWIN@X zFE(52&OqryebVZrLFJy;ebb=DX?7nqXbV1QAq%iS>a(2sxI4NdGLJjqdKNMp`=vN{ z)9GwUakk)b>d%erjq0r41-m=Sqi#pe+etP2@A~g*=-2 z=d60RXJr_c_pUF$Jb_xsus42p!s^=_fCUE|26z7c11m5s!_Y^48|!MfB7Z$W&^NHo zx05rmpe@bRU5gc1u(iha&=rMh%ZUbA%Jh*m1NOh~_#s)66UUOAk2Y!mSw4&8k9Wu8 z-SsGJomkj2odHUKGmWMn}xiw!|jxCUjiRC&1Ii79Jm)3{rn>1X`j;Jf9uaZTv z4&vO;!z7-4oWGs?JfHkCA4j1ql zS??#mPq0CP)o453pG-!H5&0A^wFKUL3NtGnW6Y60W=|M3fp57Oa1{x-&{#)Oq_cN0~C%a62g{~mvn!}wE?uS z2zC(1!b$^c1#{l+t1(w@TLR?49XVIvDEBxLwwY&sc@|{hzkvph92lm$$Qd*#(6#m9 zh+{l`VP-{gpjNneYSwx6j9eJpO@k>kh_nJ z>}p&&{n%dzyGE9pOR5&<$>)E+J1}edti`AvjU;~-e&na0BwVRxKlGjZ5pfuzj9EvN zAP;7x<`nMaJmJt+lk-XiKK+zLNs<2Q=V|ywlhhJWcir_0#z4a501+|n>A}A5&O9aE zS{E-`tgJ&n-X9&akqaj0dT?6nc@|fv1h_ap9N(g@i8^P-e-c1gU zjWCG&F2yiFojHN6;4td^{w$2Qm6?c#ugwzqaYqSv(`7Zn08Pfg4N_{dxO+`@2TQ=c zQ2>fP*Qn%wns-<2)NNm(=?4hl8?E~R*EvDB@(Q#gwCRHZABYgcIsqUY1PGoO0;SmH zjJe>ixvX-?wz`loxNv{E^s#a3O2HBEE-|8@FVK<`ExR`H-Nqp$APdH*@qn0wLa7!F zLm=IzUUVJ?^nigL{1V4OsvTQDHXd$Lcjpq*cp3o0YLUVclDOC6vo1#-` z+@9!%izpA*eqMEz26>UjVSJ*$YS%;Ghq35K562(DB2St)>ZCivQQy$NY?BU-<90Np^N!oXl?Usy2$p!~ z?XMjQ=^fSV$j=AIfXLp2bc@K(vUZP(-xMycF}@ATCLE1+hK{gjv1*(rhlm>oflY^s zp*f+KL&b1JC>tIB`w9ix8>3~q*7aXX2#eE;M?SYOTi z1>Dcu0e94;^~*jn{&!b-lF@0_TR#Pd&G9~YmfjD~($j2{dg~LMFadi!F%G*sTFi%T zG4}$?xJIQws=gbP8(160JJ$|qiUg^jJszU=J&ZMjbxznfW2}u^ILKH(IsW?0waUWZ zC)d*Z;@W>;gAL)k=aFWHWi;5M!5$6vT^MW=KaU3cuDLcE?9pJ4If;H&bGXrTf!CWd zfc=)g7*i71y-%KfwJ8aT1RGNlqlfz#J>2`Y)ni6t%t&-~aBWpOdV3D8IHbK6qq!c< z^*3X#$9%+N&PVhusjHP3U0)4X793qyQa7j*QxAW~Y=*E$2G!I_9GrN4YU;}45S#Kd z7ns96UdW4oS)POl(p8eK%R>`o#n>__K1V@yd}0x1 z^1z2aSCN?Nvb-FqmF|?Jb6N7{r=cI`NYz=&3Kti7wteU#IbE<<70DgCw2i=K!vyVp zq~m{JrES!PEy#YTm2FB2_gvW~oc#;g>gIGTARBWJW3slRN4dS=$7JopPu7n0V`Z|I zj{mzgM*oi_`hQ5adQ8?n=45T(=)V?)^*>NIsFN~scPJRSVJGS&4tw{(zS}-gN3e}- zN;W6z_+d`ey)NQ~d^JR74%4ml=X}OZxW0eSIXd3TW2}-WN@RB6qB`VO#C~d$1gqMR zGRKhy&E&z-9Gkm14Hu{SHaQDYx!0>NK@`Y0TIesIFRvh$i4WhTJ@iEMAbxJBS)PJ* zEz+R*E`avyF_;5<+xzVp+wbRqEyp>BLouyGBB5cumltVHJ5Rm1Sx;TS#59IHW2>qU!hNf4aBS@ihAj= z4_%WKE!v`MyE=Xd%{fJ~NHoj@UtL}@>eE5GK{E8lddE>x`0jU|F_K(P{in2b~pUd`O zKmU;e{v!>Qygt}3?wZ33Ob_dSFAsa5lOF}I4!W_EFXgF4<4F+8&>Q<_=*L5eZh1wg-b6GrMbR%y!Mq?J7 zpCGu`9@-z5)?a&L`v@H_ESXPY%8q{vZ7_o#HsevD4KZav*INZj#Y#Q0R!0x1-xH~=e=_{?S60r;?I(;`KH;qP)LN>e=%DlEhmBLM` zPn}Cq9Fr`{?$t|>()dKVgi1r_^NY1l1p3zOhxo8Mi6Zy|XO0`|OJGUR8*P8+1^Y9K zG{RQMy6voMj3rxU(2#^l=7vvnw%Mg4y^#H*m+nbmY47tm_Wj{6y#=tckH$d=9VTr@ zdJwOg89y^^oBQVFJ=D<0uK3&5iduMnarxrUx07%0UQE7S{QUvZ;wZ4#2!R7d%ml`)c#7~a{3p68Pr^A1W2-Q0NF~iJG_hfczGaH^o*#fLgN1;DlbS38U4bH&X zNd{C*$<~fE^1PGEYN(4c7q%0Vqa=ci#t|HPqhu~b(HbQ=Fd@CZ89H5>=a%@&>E-6} zRx0TG_ob}lLZNx%RY3}%AoV~Iql&D$4ZG0Y=b{s8Ue??}o&&VDoR@zUKY6jISxBNV z{iy&20RmSRyZ`c2&S4)WX*YIz3|IwzLRc36?`Vbcqw!+nM4pTn$LMgm?q`I7yNG0!rq}hehoN#250=& z2pI?hOWCe(>g! zZiAiT`I_{6dj0O)vYZv1x9|nn z#&+dIx7=WzzU=pG!P~*9=TDnz?yb6NTpq1K+W^ycY{_%7UYVxi^|kGmS4PBD+0EvnD|7QM8=*LyR%?dwx-jyM+n<)h3!ai*puuCu7OXiZ z26ezTG-hdr7t#!#0Y=aSg6Ies8_tK?f9A1Qwyg;*Hdf&es>DO(`Spq3gx41FO z5m4r|SG)rk+evcz4`f73db^BQXX^aM61}jo5MDSw(qX-rPmyEV{#N8$tOGmjC8afQ z+OsUNW`hpi0KaGya&psy(f({B>g;&PKkf5qcsmoyj$MBnkZLlZZBzNI>|j?5we#=_ z(V#&E*aQJW0}~)R$(=fE&-v0crnd9@0&hbWinwJUY5^xbw<8uUoXV+5NRWht`ahS1 z1W5|_?a27YX1v3T2UY!H9~qxC2|v{VktahAQ`wV~YtmMq(*P zKI07tMvM)~R>VJ)CT;UDu$n?L2AShw^fWx|rIcy^G^pN5tNb3xLA9g-F0EZ$2PdoJ80+4({}+Tsx2FV5T*(@6qkwc31@4eDF^@_kUc% zJ2m7{5U%(x(t|MEO<28%L0*TUy9Wi?wsHRfu;oyUmw_V$69O_cmm$Xl69Y6kFqctM z1S@}&)Hn|QKEEQLuDa`(5IgU~-c=p2N9`7{doWk~fu$%DY4EIFNI zdSKpIw)PgPCXQ^&vLwrTl=hsp<2lcc9-kgP{HLEdshdVJa!yxH;3sbE2`3H`H%NWw zG2+q*vYm>Z{B)N z4)h-#&-K&9IX9!WP7o#VES=X!|2p!ggOKT91G#5VMse|H>xq>g<_%b z;VoLd)un2PrmL=KRwx(=3jJM;j6j3&NP_~I#u2b5wq1U5^0COey7>-R{GDfvbsoj8 zkg=WcfR;O3N^jA3v`|)q@pWaTD@%VO=c-dgI@h4dils7J2V#BpA6S?9kv*V$qo$ku z)$=6TCMWvd($HHL)wx@HzH0culT@4fs?=<;Xwy3 zf>SS|ly(WGZadzoOeV8xQmB80iZZ z1^M7;*|A^Up57&U-e}G5=F?y1H)HVEnb5bGM79y+hY$f|oKF4Fc+-Cy{q_cO885tz zLd`^R#UTNq;2I$pOoq$kCh9u5jk-?e;$(H--QLQ#bdCAJzP=sqW%+NT>7vTZM>aPZ z+ubO_h4e)Kw-`m1Yn@sDkozB8Y!00SnX_;X>0>H$?9?O`&q4 zA$kDx2xAO3lNOsH1OSln5&zUWXG!2tG`?QQbap@efobK#`$8g>Z=TSw)6_q zYHB_E`QOyY>byYo;fEtM1N6Kucv8z#6!(yRph1Xn2OyIP2K>=7gLE{N9(&M&3s`un zO5I&7qj2Hk#sg~>7*YbX<3%m0Q0>JbU}*M*AZ(+mbzwnM=#~2HVc+TU;XZS9Hv{Gw zX95!QJsDwa_9K5Asb@r!214E^0@n{B$@!-bsh%V;LU)=?-<&^$un@x2q62qQKd=*7=J96VO4eQ%5t zSPKV>*FU^?iY*`X$_^E<=LFWV0#+9*0^X@c0Y^Oa#NwA_VoFEYVM<440eXOrc%Kt4 zy5#_h3wMpGT7Y_6Wdo)UR_xo3X0=7hjvp+}H#&dYFd3^?#|O6wv4~yDMx}PBlYr~! z-B38O1su`>wl2wtaZGqz-GTwn$^9l(BI5$a8ymYapHMFFbZ~lWYHs)jz-4G`y;C3h zK_Kv_YVYb7Z~;9ybSZ3fKKM14^o9vcov3x{=^Ws!<;D`z8-|`I`yeqnY1sFw$RC#~ zdq000!}h`ES>0{yo=n`JmsL}5_iP@kwz%&ilkW*v%1cEi8M{du{)ROsh=Fdv)*4En zW40jz2duBee1S|T`viDui<{=9^O{Kzx-tys6~625b+%E0%*<`qpOrfMT=fL&i{VhR zu`Y1ia?!C!eK(4OX*uD}%D(K}e^{m87MFjF4%yX^f{)x)u;ea07*Y5#1=xKzRDxo7 z^8QeXOeBar&L#N6#tuLUN5NUxst^y0<4UoZ#x>DBY62TJ}afHLrtuT=7vpxro)=g~L@DppW#9*t#a>n(5Y{}-IVz~Eci9H zJ_|UrH76I?sF-%+xEF_T+K10}m%rQ0;~v$Ty(_+UT)P7Gmw8=hpI1J(_0=^ggeTv6 zM-ZO!ccXOk?psQ~^_BpgA{eckcWZy}zx`*a+g^F2NOtvg#c}PbEAK8m__~r1L1P); z>B>VfLYQ!0SDr|GNOhy3cmcr{U>leoZ?#CwZZ+sU*XDydb~Oc|F`z1XWE))$6Sv#G zLNsk>GJde51(>-ofL_;qTV7BXVB->?7&F1}gD5TtZ8ZvKmn8x7mIj%U6N-P`pXF2` z&^e2RDELtE!ul7C!tx5Nf;9=r2^fXtor@%uH5D(Hz?CNCJymi^d?! zc3z3u_GPD09rv9}B&i!vvNwNDB_c?M!3Tf4R6_Wv8~WmRpGu(dka~!b2Jj%|0HW>!4jIRBw$;-dn1Qr1RRI!fg=PH0Wy<83KIk}GBq%l@ihY|mpbPI3V-0f>`FT}On~66oxU`w zGwCGRZsg8Bq+Jh1LJ}HYRDyEq{q=he07@h!Uvycor%7f^=m0qU&J_m$U1#pN&YQFE zug@;8yvT`39I(K-o;lo$Na#{0^z964X~sKVhv1kc==oc;66wMbmu zSI-^O`1oesqcQrP6OtH#X^b&ob3*EpfKmhT7uR>ws%&Jr5{T7zN3*&rOlw(-G4n^a zvRWxiN#+8kP%XmtyQzc!fnpjvDLui1n79BX2xH>M0e=dQbRK#DI&xS@qS$w8;mknf zZB}a~5ErHq&fyRBG@?G`qg6-1od`?@4^Ri?A@OJk7y=S`Rv(}X)mZ5abC~6IW_gUT zFoqsI{8d%enJkk=m^p;&*V;cCNRI+QoKF{l>yG}~dkL^w)>SH$s_L6LxXroc_qbpK zsV*$Q3xBY3?8gU5g?{@gsT-kWQXVf@QiJEW0{MYKt_9J2dX#N}D2<4RV+=M~_62zg zvVpLS5}&pFw&1E|E}NS=DL*Xe{H!OVa5gJ8D z@c$&IT*%an zlz(MrI>`8f2QI2Y>0AN<9C#!O187i)WZyuASTdQotQy-8B5PCk^v*m}{eEm|9n8iQ zkfgGl8=@8tp9?!GOPZuj@O6^sA~(PCmev!=`V>+#TI&Hl;-2^+0Vh%c`+ROwVuC)PMX2 zQ&)!6RRh(~mID5icA{EMcgYHjFuXX`MFILdXQRAAr$HhV14^?%@(g5Op(d~gb&JIJ zKfT42NeuO^9jZk{!qg52n?eY!@}$o1%n&idGGhh-FLk}4g*Ss`X@=ZlN0N&&wH*mW z>z$Gc^zQ|H^kpFo9KeRA!Xd^WNq+&F(4%~a<`QW;O)Wc$uN7niREE!t!9Cv_Eg_Jq zxQhz{^Gi9zN&S-2MP&jU_;vsNbF>-6L=O$Cc13BO=KXk>xLGV zv>Y}8i?cA@Jm|(oE{tuHQ;oO_kz|%!$Wj(ki44sMY2#zHg5pr9Z%$nD`F}isOu`Q? z`36r@-gQ@bSY-Tw6B@*uqUOegPphF$g`|5|<+}giCBGxr!At%zJnVYyhdy!Hace%p z2tEH~YrfBbuj_ReG6?F<>W*oXag*zyXXCYYCO`7XX z*Fl3*FtG_TN7r^)pTSo6n|~R4{WjdnrcQL3U~w-JC6q~crodnwaR*D~4<-%ms6;r_ zzMGN-0Ycf`s7qDl7R=@KICMuqG~TPX2P94L&8H4ZEHzm)j7iQI;WXGkC=c70fv4~o zs>6uiw|RNWF2e3chTDZflJGqe#!;*NcWRH#&gi=VCjD|Bhj`RX>VHwpq-eO^_}0FX z`R!PcM9=O~%r3_7ZuBhn3)s%YG%^xg?(uH`|8W4roCz$njxl#C)6Ar5YB^o$3J?iN zT35<;&Q^JDT1ApBU{N_4f~8D9o7QS+frwcGRgn3CnLr0J{>bsg23tK>CxCFE#s{9b zYdwOcxWSCDk5_K(XMaJ;^!!*3x9hzy0_}QS=LE#47dA*YO8#fB0}&d*%%4cnr$~SR6Pvy^k=qCIR7t zvM5j^+giEDK*drXpcKlUpq2Maxn-cPuN-);)SducP>4Zm>wmxZp@6`!Q||Qttd9~- z;)0W9l8S3th&@@J$jU|XSzJ}C`n4<&04n&NpwLn7Fb1g#*3hkVvhBLPSJAeLplua# zc@;aH#1SlG`d|^u!C-K*cy3A>D10{~DRI}wb${vWV0Z2Vz7cI_yr6h83iUid3RTg4 z3hj9P#`8g2F@Fi79kZS(0IvRA0U&QhCjj25^Pg?I)i<8~Q5tOP`=iLd&y)1?GwHpa z?yY$C@-t7X#MS^WRr^=_t3P=8Za>uimir&He^}e>&<(^9JBX-Xzfa7mrroerruCsM z-pwg4w(+D9pY-2+UCeAs{LN}ss#g~Vy+`CZhspipYE%fyr z^vR1g76il{R)JqZvBvy}FmULe-0R@kV;_tT&-7{7E-wYEMO)jA&U(l~PP~x*-?l%K zKYRH%d4I3~iB4XCgb(Z8c2B;J>TmKwtG~%tQ~pg}LizVUJW^Jhf>7VsYZ^FXYTI~v zWzKt*{;C0+?|*vRmXN9uN!BkJ-(LRXdCS7%kAJNabcWEIu3~krZs70nA0}-sdhWkq zY_EOpyQMsaM|;EQGD5j`)GZ}`nDj7=$1W*5Ur$H|kWBzj=#h~5+h0hwSLbE>>5cxv zpn1}_k;AfT_yMTT2@9V5{pa=FGncV71S)^sbKAHPfA?R(zR5EYLV(}b_8yYETyxXp zdWmQHaJ}(B5+t#qNQI>A_`mNicEK;nPHef;d+2z^1Q+09u^&Gy7V^D~=X*b0yt=x0 z`6fucOk}Z&y{oks2B}DV=_O$*!YuHvR^D&3pXz*dIaf(K`=MGMcBX7~n^)!K?^l06 zU;KD=@%cgmH{XM=6nq#0gT!0zE`Iyn_f|mv+4Dt^rQWR_we!L_g=gV?y!iFPXGwh9 zS8qIOeE6yBu`))1mxv4nvodBNo0mvm#8Q%o|MDY$S(R;G9t_GF`LlIh?Wnb{&83QF zH+gkvn4&iZK9P0_$8$>$|A0b`lazna!sAT%5+A;Zq6im{^e_oP=+u+3$b#6bjkks+ zAF~=QH4eNK%n^sUR8VM~#Uc%4Z;9(>U*zrOTq@tU>u&1hi{={szFc zxdN~BRq0%KamQrcYHHn}BkST|_=CKpXX~bTUF~*`q5KFF<1xe<_mwH1RPU7xabHc(Gi3~#WCqQFI!$Eah(KHCPf$}&|?Q* zzwfNkP|)~7ML4fdlUpH1ln<6=G-Qdf*|B{2@^T(SQ2wpDrN&N|cho{V;ZqmILn6*0 zfP~6IX!s(FGUD5`D;6+BN!&jx;D`bO!tn1#Gd$@dAl!h{fD1ih-oJlL=KZ=!C-#g( z0FKmYb85|Rkz%4wKp7cij^X!{C0c`AKZ6%YfU77wa)WCJkDcxQG;z5{`$8;|R61_GZ;@RKU7ALm8zzViNj!##$3b5w*E0iOk(Wl-)E?}0 z*3otO>Ho<0$3(vCWp#!t+=G4(_ySJ1%1hmv^F!g3*Bk(9H<*7!J(fFbrl`uzBciG7 z(B10Ln{)q2CY!mx`^VdBOwb@P3#}gdTH*<&k8YXa)v2)I7A^>XxYOmPrHkom!() zWp2wW^FV*e*(a3Pm_5#;)J!)-hLZ)Z=Z>hQ$v2DYP)`N{rzT(6bph3JBw>KiXzPeT z!sUVDA~;);TOcuKZmX)~VGEwFHsDvLCfyG?I@RIVC6Tc2#7dWt3b*yaZnWTz9EI@I z^cWvwgCyZ&EDtWxVX#G0W24$vMYXx3x7vJ$BXWO>{*13%s}X1{j47#=18M_zvw{!} z{cLuV^US&%0B3vx33x+)cN6t9pI8`}cUxi51j+1|eVgxeF!P=Oagtmg}^F_W}`B2zKty#mC%D8^@&J;OfQIdOUDl+?lid(Gu1Sp$#>T2kpxe0 zr!Rl`vf9*o4_}^w7~SB$ieZMfWuSs*XLi<(N(Rt_5EycRz<@45RN?}}wwUB(!uGLD zmOPds?WE{E>EPh}E;IV17-CekpvjbQo#Dsr;gl7Croi}D$)a>V48C1wv?(NXVZg4=bfkPcQITN=$yr`DeC_X}HO;~?y zCt1@|(_@tb(|m#Z+rcr`b?qkaMmwaobAbQb!W_2m_UK%YzRup@5J!hyXI<5R@XM1f zS{vQiLc*x-*D;QWF|}YzcRu9mhv-|&t~mI?IQMfDK&w4R9)CHVYJDhI+TH>bU3v1b z6P>JB`Q$?Bk2GIxqahgV^-*XsI|` z$yZc54Y@EH%<`5e816W*C1X^B;og~DMbd>_gblDpSJ|zBUt8+#81U7{)Gh!VhGG5f znh=(Ow)fNy*&(+890NGAv-CqhnANL76PQD~0NSuUJ5ciI|ZHl8zhhS0x@s9;mtCeYEVpxUpN@BPw*?pDS>Xh`6uN z9qhsL0dWH*RKLLdBOHJK-)YDezC1!`TZ{VZsnT8k5ui|?QKCed9xcxa9?u}hnWnTg zeXErB%{dP7%?3G|-W5(C?NW0Y$0A84#YegS5GJyW(P63$WD>nbaooW2@l@C#E^NW8s_@enz5iRn+_ zBK&E}H~$ju__rWw}~C6V8K+H;s5WFQaV+$ zu5NaaI{>&N_W=EsHn|<#4gqYreHgG~cK~3=?t_3myF&n5b`OAGRGmUIzg5i+b_YOr z>>i-o__0$7PxSywII0hn_|M}fJ1BlM>X72lUGDC_m*NQre##&ePhu#d$51=~xTkm@ zeSa^-LjXI99|Y_u9st-={19MI@esg{;(g$6RbD>bLGA#!p4@$0OYRTAds+v8_q6WA z@22(9s6$#qcM60$ht@GJI}JkXSdchMkD+w{a8K(#`d(Ux0Cu!K2-wj&0I;X^A;6y2 zA%GpN`@mn+i)E?9=KU^q2Y~nN?!#Ml?|;_9lRSVHp5%Qkc9Z;Q)FH{^h@_02MRJkC zmLd&8a>)pj;uw+#0QV&Cqwgho2w+F@gMb~$0|0xH_kq94yQa9`LGlp5j^usd14$kL z-jlo!?@5kl4`vc}u{?kpp5-HIJkM9R>Sa;A*+KcERTxqp&-r$D-%EKO!`4Csp?^FK zDm#Yo0iZqM55;zz4}tACzsBCn_yDk;@oU(a?d6Mm85&gPbEm?N02`NwkLldd>{Ek;5zajK4?q+0N|ed z2ZKBEhX8lv?}Hyk{s8En{C#xGwtwg4&9(!$LVZB)m8FsHvD+&I(9?T`4}a)6s?~mX zakORwcY(cyJnKY2CX#T$1Go#BMI=p+IS>ed?cBuxe6PC*fNR~w0QakW+%`v3cke#V zSH`)I4P|z38A9M%&oRI~&~u!R?mWi;eJgF)yJHBTiSrF(ntapL%l!6IPV;dWGBs~6 z8H}$f@~Uf1ecmWjHCNw95v$bPl)o#wJ5$l!*%s~U2D@Xkd(-H9#`rV%OOVSe9;ef< zp>1QTN_)RY_pGmf0teHYmswt!CfDQ0D^$M^4P|ZJbYq0AZvdFHDSuuunJajaa{8A` z_^ZWoI&JT2RQX+Hn&sCun)_S)OPQj=N~jbFm=lyB$38Bb##SuA72vtb2UA>E?rGKH zD0I`xHodbzH+8eH%`GG>%^bvZ%Cy8ufc7Xm#A!~^?R(As| zA{5LdA}r=;SdgPOP=B#i`0aCZWz9?{L%RY1&cW|80|2gy(4pv~Atv8l$taqBz07}J zDRcF0R##nKR4}I$*Qqam!_Gk;FfAV20lzkeD8aTK>Q!f+(N2rXJa?|a8UkKpy=x$RJ!UsdJTrR}@O?~BE1VP5Lx5-e|372Sg=(941k#(#z|Y8I`&#P`UUY+hN} z1~di{cIeorJu8r3dq z*F`UH8c*M;vNR>FLe)AaeC?#LuIN;YtC$I#-pZ5~bHGHhn^!%S4YfxG)*pn7rrQ0& zZ?5%#2dln}y!u#M+2-CH;rX_%ZRwW95s#dZjN3%_bAL6z=KC%f>innC~3N2Y(z9#Y7HE%)@BOAl%0p1Yt;nVhNdoPTa9vBoNq>bixvnLSEdc$qj_)ZBbhu zvwu+-9ApYkE*3VJ^OO@x?fJ#3dTSQ>y?R~0RQF#u^CviEI0bEIE;K?q1LV`x-WIbBlGjywjJP{H=a?~xapgRem8|(7VG=n9lI>!g*UfZ zQh$IyP1CbvfEqD)1)ib5-+f&x5hO|E$ia=10cym%)!39V2p5}sKMtE%7Z|n4LVDm6 zPs|foH1)->16q}~MsK*Vsd-)G-R#cnwV-gB7mX>$98-5Shcmj&<|bJ$3$cy;)($Fl zPbf5rIEu&@$`nCfs~WRI#qzFM%^+z|xPNhUnJQLbie8k;3V&BX!ZeZ&l&Uj15o9&y z;Z0TBEE(VI<`r`)n`PBzn7t&+45YcPTbpU=-9fJNZ+tCo%rB0`B+j~fV_R2sH=2Lh zkpPEH_EgO3+a_P$6*HR(Vsd03ErQ9LHD(O-Pr1d3$|iMhL8ruviqi^riC)m+_UQ2e5<8=UY{(+?_R zm_T`A`hn>4qQcXK!>3@}pO-U`<+3Ol8-2C2GXu4w{>fgZ6#|EhcFsk45WDg8^r+m=HuHbeODtqtu(rE1KCe zFPe$zvbKk%x_Eu!r5iQMHh=0LSLrG|YQ$fA@MoIo?=Y8is@!X5#jZ~g37$7n%og5%Vdmv6 zZ1lUkg-b0F%%q`Zy<8}lFuV3W1+He$(H@@a412D2CCvgrOFJ0rMSq1mVTJWf`U}oU z!jpAwRF;qv7jMnTLCh|5rU{UMp9$^FGJ_opf5z4lP5F6JBAB{*`B zfr3P4)9!xBNOlQ6C-Z6a5xiDyZ_(maFGlyDjUP7{q^ zRm`VJLL?WzM1RcRX_2Rz65gCVHt?r^1pZ&fAWsg#Yd3v*2tGrG3!fahk)9WVK@BEE zun-Kp4NBR-5$fxm(%u1$r*)-FL8q$LT=|UkZB|pd$RAA6wQHs?JlU&dSW#4X^n_Jq z55Qg&U=JcaNUWndmS7;#NdXq>a}ixyuTu-#rJ5D^WpOrkv;>KQ!S8>{ z%s0PruI~oo+aj+j)eJwPZT?S)QVqU!hNfjm`{7 zk)qfQExKk6TI?lDgMDfHL9rM|jVyVRoTmSN#~0a_5@)t$Bx`LHh$vEic)B}2ygRDc z8++J0>HTuvJ9^`XooCF+W5C6Hl-ZG3k5fvG<{WQY7(UNJZE`&PTI}%BrYJ z@@(+w{9W&V*Yn;#JpyRhgI`hjAp-#6jV8ShpRgB0|2q#OUxwZn^VGy+K?v8>d*A!3 zhn*zYj1^@dhCdA_*TqV)7>wYGDBt_^E8XyiYSSarjCcO)^ z5TkWP7;q3{@Q3(DzGlyvQ)0lp5R?cPP@+}?W=QgX!0NA3lW7`N$|!6B%?yzHW1t&7 z01deQ5-#%*_9gt5w1}a=8Z}y${0x29&yP z>ts_k4@nRpAs93;xMFApie^>*pq&F0%DMQbAcrybN6hiI_HHV8lxO9Gp4N1a-=eBY zMsDS-Vca|G0y-96vL3*Tja|yQ;}7kb)2p&xCL6C#e}X{~L~O7=$zm~sy~t178f-{? z@Ek&+L69Wjfy53ZrUm^ys*2?HDM?HoNNkTtti!BG1Y6Ddxg&7~a0B7zfjIY+IGf&d zU-UQ4uT@vm4T%tl`z=qm0YL)LjUXHZ3LdOk3kG>8pVzJLVDJ!LO43a*@NyNN`%6`L0m>1^ zGDg+sS8AY(GGHLRijuNYU@6lxil#{wrAcWkfAXfUzE<{Uyq~J7Qibh2bx5VX#xn;E z8MV1aHONIdniX#FVnA_!Hm-%RsN-kP>Ukn3%~j-Jv%G4;<})|{b(Rk(?|(^EJg(7| za+7|a7i=0&AqdspL9OakOqz=IB~?ZXHn^p4-L|2XHcwx%sxQ&?n>C0k<;B<<&M{#*;v z@(Ujvip-bm>*MDjHR2Ey_fv62GQO%h9{WgGTI9eSkFi^tX^jBoY|@1+^apNaueZuh#haxN zb#>jv1+1%TH(fAX*@aP#0xmk0ZnPKy*`{){tAvnB;9nLl5<-7B3E}&sQa1lf^HH4{ z7F{w!ULRCcs>lqYsh`BKe;CR-Uw4I>i+Lgi^QrhpdskYK^!V3Bj~kqoHiuhI>o4;( zwI!qZSY15Wb=pNSkSz<^X;%Qu2S$3lw2J~S5`Nf7cC(Re_1V#VThC^n9hnvPhfe71 zc6CVWwhPa$%1-CTgo11M!67*NZr0S>rEIbL(rSI7wy$6IZK=0lfA2YI+xU4Y)>s|; zy2xWy{M-e@#l94_7EHRu&lFrCWa23UXab^$kS_vgwwnO@&zU_qg*bUy=+i{YI_n>y zylO|ibs(-!upGEP)~e~ugwrmPj+H(>$yB`SI9Hc@2)}c@Km~9uf5bSKXiZ7BCwp?5 zKE#hrvA3pW5I5Y3f3!uh6GMbMj30c7Lg0`N@q<}$I}05ID0|ZiQnBB0}W2mu5^>D=W^3;^z0MG2%I`tt?*?F!@Y9(4W%3@>d49$>_O0hrKh43~i; z1QP-^Gndi60~7)|GncV71S)^)lhZa3|DM0XpE}MA5!S;m+A;%o(19r}<>GRQ(>F9smhyt2p!6*g; z1pxTgd^LLg*0ExsKersh!@#=Hy;hd+0{Bg>tI=;G$8gg85(^ia%j2i9UQb-dv9Ho(E>Q;gS{8c!We@uPQ|YPg>R*YiX+CDIeIpBlk-+wpN!_hl zMj7_2mIh(5NmDGPQB?s~WIS;LYEPF2F<&KBRWmWbOUf1v7Oz%`^2b%Kqas?#N-@J+ zV6v!|`87a1Fp^S#O3?Zbgi9^;2NW#qrSufc3yDLi1)531kSc#AQ*_}&)dyAZ&0b1t z0lKw|wW9`&Iyitsgit!bQ5SlYTHicj1+c~NgX-qFoOmI_)mL)0PSsBJG>!-jWd3RB z9_5u%9niQIS8!0_5Jqd%9|mxr=5)ZOOSs>l&C{qXhlnl=g%jiu-<*RU1rLD-hv0oa z`|H%jfx!H*VdsD00M7Mr6?#z`u1DEj+=M!eFzTxU=wXPk?{hF%{lnQ~f_WrpnJCrS zyxOeZIIe@E4i~aAmo#Xk42QeAjR6i!x0~`a9z-+ur$xG>6?)0_-rNn~;lUeel;FGJ z%$HFW-P6`!VwTn0i?WIj;p@Q>d<_OEX~=n%7sm_q2n~Pm1aaL)1FG#>W?&0%Qv$6s zjS?KXf#(raIgiq)_!H&^iG3dZDP*}xt9xehBP;-AVEqRcAjxh$e~fDLhJ7111qOhA zf2;+VgkV%4@|Xa22&UkhOZd`V!m})s#mgd(H}mQbZBVp_(Chvptr`$6a!^S5;qtQ_ zbxrIa_k({`mvR9Y6VGgJaEQMRz7BtzF3Y2(H!*_f9gY8$^omw`hg+6?(z}DFX`TF; zTn!Dtd9@k=*+0jPP4GBHYCWhZ^;@&&osWx$6 z=4O~X>gR<55>VF73wz zut#>CWb#vKS5P$Y8=7aG)MS)HydGt7RK!?|ilkbuBpg|+V~RB|O4FR_e!?#E)tK56 zD=~kv4ecGap_V4IA}Y36S!}Yo$?f7qX8uHl-70e~4GU#bnWovcPZ2uFtC5XW(G`_Z zC`prb^-0K0dj8i=LRqC5?wTvn( zEu%4W3R1E7ER3$}@MwuuV?R>N&vpvjaBHj@eB_;sIa ze1rhVbq@9g*5SG~*@HddaDcdQ`$Bs1AyQkgXBO@guZ?%kovP+EBpw{uL8{t=*ai;P z!I^5zLi2tWn!{WH?Rlc7vRHve)XvyZ!4y>oQiR4Qb}6lIcr^44ouufUywZ+yQTu<^ zSaz+tor~Lv+&=z~xDC(3?Zv;(ZTCL#=m2QV-;evQ>yHmbne0QkIL)tj4$;S5vN=*8 zbL*#I7>~>Y2i)OFPF-;!Aof4Psq3TPvs~)vI~2UDdGEa1%$MT{4{866F=XbA5gN)! zSy4sO&x=5r?X%C6g93+s=RISgJP3bWQli;jBkKBuv&M*O2dco$yv#g50;O>^J+`hl z;8MY;%K>wJr=utah@sEWq~?vXUThZQa|8{1xJG*YH_BOFDIp2Ze(~}d7LZ*iTs?bn ziKSSAgIwp}lg-jBP3t};l7iAou4V=w zly)(|(v0j`XD>p*5DN-fUNwKA4Mb=%-Wx1k)PhZwq}{NikWf(%@G^}{tm!~X6=tT{ z1Whs4ZjuVNbcvN{y)N=~k>E=`5b$LM%Q|+$Vv^x+KiE|w^0;sIUv=xoi;7TeQn`22 z7kO&F0jLA6yfhMt(n3bD2?1mC=|&Txe$=vw;8Yq5N?Sia|VJ=Xnyz4Hm0>~S}7;lHIj6^t|_&=HyPdo-2O)% zJuPiM^62MZhM&>E#fy1&{)%SzJ05j;V}#Q7?xr-1vC}VKARiiXo4G^&{{cLdT51Yq zZe(+Ga%Ev{3T19&Z(;>%3Nte_mtT(vCJ#6^HwrIIWo~D5Xdp2+FqeT_11W#iT1{^o zHxRw+R}ko>J%mdRhhK%8HXkrrGzEe<1=<=uDALAOEK9B=2g$$hn^o+1cdcl(Qh@Xz zi(Jl<^Bx~LL&}ttQH~VjZ%|3fL0Q5#sw7Wc!pE;a5g&$emTy)*qn*79&I}w!6+%Eg zsc59#s)#n{y-J=?H_93-4>ErSASqa7*+mSDra%}6b~f3JCsvWMcHq2IM8=^hDsmD1 z2ZCQVh{wcUx?vQ=qkuxy4cwqePQgWr0pCecl1@;j#BSgMo@5X2BR&RhiiajiC-BFL zcM(qr%(%z^PjQXDYL8anNI`rys0ndChk+Ld!CVkJrH5FVqmZPM;X{8ycPS+WZ(WAB zaK&Y*BnwPN3IJ!V^qLft2?Gbmm>7M7a4?qTO`sFc&?S2-1x7(2rHq2iOh!M^$y?|r zIdquOl*D_`#TooEocG4b$WD1nLaRX`NFjigi0(~NUKnGOm6sZ0em#R-ws_0SfS6bx zl03m%k$Av1q7dW>DW89g6!1C*1r!T{&^-tW2C$+KCKxT~5b_z57YO!DqAtJ#!2vTp zV8S~UR?J9%e<)-K504q%LxCs&Pe>$Gp_>fX;8@7e3OjU@1w3epmc;~eCFck1G73?e zyowGlF=P?F_zP*r0DoDu9asu54l++5JVv1nY{VGw7L*i|fPsH5V@_spbTlZQt5cDX zQLj|-*WdpLav!a>7<6_sot_OIKi*^qk!grvtDOx|i^yws$Me}r9UZCS7&r+_w|e7P zR#kGh6)dQ9+kzG1K+>m*DBs>zMEz?sbY8q%%tt5XN}a0Wbi)ZMtRKVFw; z7@n7d;u(A>XRCiDUaveJ6tBwV{AMvKm({LPJ^G^@PliwDAJnM~4iSJmI>Ru-1>S{S zv~m`od?|g3q=u{972LfnPq;>m0_M^}HRRm>7!BxJ3>*J_Y}wXV~r z`lsi94W!{>)!(3aI$w;-MK!iLD}F70E1sc(7c4n0T21D&;-vWV)o=3o{$jPd{;?<~v$yjRfRzh9TwLkV z{HnMpr`LaYJ}y7#cgyRaSM%%1=%)fLmBn~I(&KVw4DqlSU6{BuPG799rau5qKSY+2 zj#&0S1?`N)`>c(`oSXp7|1U8i}m zBU{tSOG3li+jPf%1&3%i_m>^~N7@&uIJx{d9bT5-{jw|<@5{vj=&!=RQ`QGkl zbBN!(|47Sqlzlp2uQHNsdz~dCjab>1c9zd-F&ydPbo5paXH$JXe=iW;l>%WpeSh`n z&G~=7_neh5S+-->s^^-U@Q zccZpuM{-Qi21IPLvskN==kJ`N)-#3;Ia+T9bDP1~o59qP!BkfrcC(c2?%8WSM?G*Z z7264G?3l2|&}1fdOju(*%fxv~lkKLN&18Sv**S8IT!*;JlC84*;ou$6qS=Wubnw{LYC+#_IwX@{3{I0NHLVnfw@eeVr63+rq0*AAr}fAe~X zzmB2Q)kCSb;4g*Uf2rxX=Wn1475VY>wk4r_*I*Ri4T*@!e7I96bL%G{`E@k5C*|Dj zwy|y}|001CX2@IZfK_vJTkR|&dbE3{i8Y-cI%o8)vp;*=mvz78?EfF`phv}*fg=PI zmqHi>6aq3imw{UYDu0~qkDEplz`x&LLBB+HTW|M$m%d5ub7`6?=}UT*D&?snVv{@a zU~m98jnqHBgW0vg;Ef4;PnGs`(&57}GoBwa%na+p!F7Ox2Nz#oU3~Hl(?N(rNo8>L zBH&CTg-M{eMm%J})hc*8dr%~+`GP7vyUdr{b(~dEmE_s{w|}eeF797lyt*JD4F_-* zg%ddlP{DG2@$@$wtYH6l0Y)s;!Ry^o>wrrQ+cfz3;-?F2EIG&^N+Zd_KoN{6a|)sY zbWyq&))k`RgUL`Mtf@-~98ORugpsjxiNY;?h$-QRgOae1 zG>(W?FCE_Zpw)W6t@PWtSSM8yWssi)o-On2<@W6?Mt>VKzyf3tB`30w1Yt`2u#g-{ ztQ)tu{ji7PLA{02)xO@OyRi6U6qeEE3}BHMX-X$zft#O%o0JzUf+En^IHoG1rHh9L zy_QE?ZJB`EshGV^%D4e?!jL_Q(S{6=s4ED!j5A0u37MArA;DnZonPDSuP;G)9{;Km%kDA|}$H3_hD57*rv_RpXVbxrR#0!D6~zi=wTzivCxZaP5#t zL`CI5HP+GrQXoP}GLaN0k?tz|d{jgrVq1Y9i~ME0tbWKI6!~`J zK<2>!%dD-xh__i%Ma4dTpzbr<>L$O=vnWlRv41q$t^q>uKF1R~6A^}xpqU>+n1amE zdMHXybMc|pa%pQlilrO#VrR@5aWmemK9*Vr7JMQWEs+lWbSWalh-=w6C|O6>4lEwj z8Y~^HC$QAUBw19vIg8C`+XmQxjHv;mIHYbrcOL;V38ThG`4e}@cu;KFwDqpybza4` zKz}}}n5(=MK&R1+Hf;b6$Y6Y8BO@e*98LYekjH zW|q#)5eLB0<_`b_8TED=KnDKQenvNfL$j?& z3*h4<+d3(F&~7QVmG>wvZinJ%^9Lw`45}w~QG!A}%lr&|1l33p)6YcDO9wpzn`W_5a`*Zoxi*qs;~BBJb$RQ zWZGJLWXf0DsId34Ja?6RqfHt>0y3c9nTP~KF-!f-e+1^RHBHi6ka$pQk+ih-h!pWM zff8eB>xvPCMq4;Q2xL$?v0>s`BTs_{9zt&}q@9#G9;5mzB%nAthL<9~+YL#u_*R@#FAR=6>YHe`SU$Pi>=hsxp8FzIKc z!=*$aH^Y_49Sk1SS`clmJrH07ukYy4?!?8JAu`&w0XDLhHL>^M0xC}IXRgCRn+Q!0 z61juKgIdd`t@S)MZ;hPM)(y}R80o1AjDalSr?26hBFfE(CCwc(Hy>Ipn}4>_9-G<< zD1wWmx>-A5j5cQg2FT!I;w*<#C?c_+Sq`VrI%_jbX$B>ZhXy`0TOu9B$3%=Jc?Nrv z$i0AOyio&OKnB4R+Y?S0BxgV48%`7wjCU8%eBdDCLANE-R{bcyPWIJrdnHYcqky%i8o4-{+1V#)npmtgZA` zk&U)_02#<&;iwr+|AQ+UFxAX@nkUru$;pd6P4ngnes*mupyOxy(PlS~GG}bFW$ha? zw|WhC0dv%9mDF#vo{&ZBxPsfadB2E1-MI?y%r35j=Eai-@R{o$*niW!7~Iowunghq zfonr?MAa@2)%&@E2!R$o1j=xc7jf_cywMkB7ispkP(6TaIfq|_?JVr>d198BN*-wN zZ9SQ2d<)kowU4Oc^(M9Jf)0}TPLBDtlib+@l0htMiRBDoz>gcLpC5^PyP-!`3dGc2 z+A@vG@>rsPQw3Z^(0_awiS0cgF?hO=7{cNC%gLV&ZtG-k&6gcPzH&RQFs9f(~fKJqU|3QZGY>0br)?DqU|A+D7*5+ zd%Wqpc>4#&+ba3}F5d3q?JnMKJ>C|fgyvZV3xc8Dvm@z0HjkrfN(3^6qzM1L0oX)d ztH_Mf%}r!>ccSzA*o^8%DE(sSTDrXd%~!uXx_b8btE*@CfBo?g^h1s2ai`nuVl~o) zw!7HVQv%b|9)C{>1OztGdXfw+-~>8p4DB?l z&y*&69PfN`__Aj+KcS`-ryTE}PYWaw?Ot%BwE%Ph(7&#im(C%0XK$Ksj_$vuN5rf= z3c1ec1w%sTAnlCF%k$P6RxAH7JZWC6;(F?o#Vg;l6n`Ql;!+*YQtGv0buRk9%~D7x z>KR@4tB_vrejiP}`z?&TPAAyH(eI(h>Kr$-5OcrxULU zqEpFxiVT~1F^Ev%r0iXrLEYbQJC(f$C(eY`tHb5Mn7yCPOwuzGcT<==JRZ1LZ#Hr1 zJMk&FK!1e(`|}D9g5ob8pc^4@J}!K^$?x!oL(o0u$hs9 z2sJ6Wf)PsSDV1^O-;xL@_vP9Dm8AcFQB=JU0P+nzRLamMQ98Sfs;H@O>*sfA8mG-) zW{E)&mu2%{6!z_h6U`CXb-g+Vs$CMY8I>LUfClz2L6#dkmw_V$C<8S#FqiQ)11Nvx z8f$aow(+}v1wVDDiN%lv!AH`jljd@n%k-J^G#~Er0U{v@2}LR-WuNcYcY$5-CD~_L zZZmDiGZwi3b{C6%V(B{@$9KNF`qRzT=dW4nWL_4Bv2(L_B9?lIA2>;rdQrxlo5FeL ze%GjCu?&;ceO2d&ovb?1sk&PHbn}1x)wefSzg`7^&39l`2n!=XkU077>fKMiQvm#X z$M;y4I(KH)&WYj_eoN==)jzI$;-vre0${}V!kFn*ao{vCa0L8e;5`VujPp=4ugM3# z=d%R)=3eYE!{zhIIPaF+_uZRC?7On5M6*~1zI%`2sQ0oFUES!xYXp_`M&*B^9FK#j zT6`B3$?&cSza#%m%Wn|0mnXzqA#28rl! zL^Oo1P;1S_8unUsN_4WosVjfk-APGXX;mj%d{l($&>GojA>iqv5cZ8MK(>g6!*S@{ zeK1?h51JFNUzKNFlbD(F`5Cb%#Gsk~Wwsy2x1gX0* zCin%ZkN(rjYyLoC14sV>1RqX9CPo`)o*%%EpLiVf4GdSqql9@`k~-iIQXV;tbk<<5 z$Ec1htG6&8(0Ea#&5EIUO(52Ua8HFEWVepFur= z{GiPVjCjBsG4Ki*tTumGe?LR_X-;Qsa*riFw!9Q=JIBajDL5tHOUSiMAm~?O@GH0V z>gSmsm@v+nmt;MuYn=L9GOXqdKjSo}oBC>oj>otQvnhja0y*9sxO}ZJgzmP6VD*SZ z!`Mp);>Hob_qqRE$+FN3N1_FH*gZylI-Oh!(f+g?k8~FR!-0Q14Z&AupB|n3@8*=X zL%r@@t`Cknd%11h{$|@kcl$!@z=3Jr zfP#Z74$|}@9Kc2l#u!I2{HBklnh9j13GdU*V;sN+i{o=}_~YbxlC(ke;%VlEEV8MG zO$Z=}-Om#ZO5K0-=#aXZ6hJ@{X%hmQg!Ie4`Rp@-m~`}>rcoyP(pdqILrA5)cg<6u#edy!n8V<^dC><5hhMSycmLqg&YSDU1SAm zA3Pol96HFwcIFogL%+Dpj7MktunE#hKV-(2;2;y^Dad~*zRZj@8wq6mFJ{J2#l}zu z8)?6($i|w&=KQ82(usZ}hl1jPLPJ%PG9s@5kE;6NLhfZ%KtfODhspTct*)UAe+2Ue!^nSIEE&X!udUA)8#%rc6X3VxG;D0t za3N@JZej1?^ulo*2fig7IB_f_mU!{t%w9SUumt2ErSmudrxIt+2?L+D&68mug81(P z-KW}Tkc|xV(hSxx$MrO3@1@6(VIiA)r7xfBk(RWHNws^{zE@?P|MDN(d318;r?)Y> zx7B|(OZPlF76w`heC>zxzR;p?@kr0$FK-q>8(2VNGW6SC8wXTrwVhxef1Jiup$@Wz1O1H*X5Ms<{0drUNp(;e9r<(zN&7*$>4I4W#kXC(^_@o)x%SM0I zP+j6QK+tV>68O>l!6u?CA>r!<6X9zB4lrPIZMtZNVnq-77HYq3M2jQL)h#R>%IiYf z4d7Oo;l^BHo$A!lg_f0?&Pz=80slmK5S{J1?L^n84>YBey06vtZMa$4EE-$dHWEtg zwr@k9z9)O78j(*g$ENw$4iryqz=eOGEQJ4Nb#Ibhc=3ghqh^+yiCjd ztunGOJ@LLHzCIZBnl;Bi(Fh0Tl5l)K;YaRDZ8oy)@TF8c)hmO~QLF$*1JHlU($ZKo z6!kcx+CU(0ahY*idJ49(%@fV5_Z`q^M>K9#t5&L1-965*iv|9)s@fQz2UDR!SDR{w zjz^CHuSVBT+0IdUP&W#A6m-wi)`w=e%qiqUw>YK;?}x+|bE_KP+Z^PMAT%+6m36%* z(7_5S9Chj8#TW}ZahTR`0CazF8D^3D7d^bRc)}4LHmN+?855yp0;ZX(G~y^1`IcxI z<M)}Ua3qyR?p8DfnwgPOsslz~V;LYb{qZnT-=Diy{a+oNqVL}PEVT)Pq?5$#g0+K3r%^~k*-%2!%xF7l( z-Thv+7@JO<+*@#xhIr<7i@@E|^G@vdU_JPPh=HBK?7ZweU(PA>uM6W3tj4p`k&c?@c-ySkrwGa0C$d4C1lRRW*4 zU^kwJU&HeQ3n&Zk~u^k>d_7C7jX9UWe~D> zG71!E(PRh@eT_t8My69un17fDHA5Ib`MbR1F9Y?hNazK@kS2i7i^K4oIKO0m8l`TS z%sKrRHguRb3T2mrBLoqb^(+Jw0x>t2@zDh;f9)FCZW}l7-Cwb9m1>J6xp&(XXx+w0 zkTz-TOCN<>T!yV47uyti&>+I;4i=*$J z+JP0K(4(Gpv9ugJKt3jx?*zyRZR;YoE+=OtkEb*02a{j(MU%3uj%uD~)Atw8kAAy2 zfBJMpfEu;{ivpqp1bl0e9$mi2Rt)gx7Djd$SZlc}wHz;iF|p2%{yf4%NnCp%iJ(O= z!nUu3cpcR%R!$u}IRRN*Jc-m$vL!27wqWXOog1r3RM)&+%jbWwMXly(Q|Z}UufD^W zmMt)!*AdTHjD(UP;f(kww5by1V#Rjyf991M7kM!yc(RG+$!@96UcG6R$YK+B8>f|z zXEB=+cXA{CF^vXZkR#*LI*P<%!4g(RbzZje#7y&RLGz6oS9zHzb2u%6@v&#n|)Fw>+~k;JoMZ)eU*t-a`+e<#>W z=jj!$Ws20W7UqsNCPfX-%a0l<*GQ+Y-n3Gr1y5uwpLCiN6uPd`d_JXk(rEiEB@dod zrs2;dF=@X6F$q~~$E_kkM;SpuVz%IM>%!L0B};R$XSlI$BQ2Wqgx4DrqmPI?g?uaW zX)p3EF8{e29kHrOS|Up|{&CG?e;$1^b=^r9^;uPyaBAwR4eliABeJSkWs1K;(3Wr6 z9k5RgFUOW@PedBUot?_Oxmp=gznywuA{l_|A&=XGJP5SGVY1AVB$v1lv_hY7uvl*j zJ(I^4Mjfg{o8ytyJ6RAyNYhFq6q0$&2O~pp?ogN<(f1;*Q({B4H zVpN92M#p}v(Q-vkN1~96>!mD8#j)HG{$211b%ll?0XCMA_xh9F*O+v zWA!&>Y`t+Pq3t8j4q?CC_q|p|e1r+)2h=8(_y9yrc0!D_?u7^wV%Y&k4wb20z#ZR4 zp$|S1R|UtmN@gt~qi@sNa?mj6gFgb0-*a@%cS(RKQ3~io^^~(De^$C6Ooc0jC!J9F z)qp6cdf?R3@FWm>hh2OH7a17I4_&xph_7&eZZ~)57VOWh_)m39(=YQA^sqp51PNQy z8$#rX1~2!{9*HQQV&>^HVuN}p2)o!WKU{0Rj^v{MNxlZ8*W<}Z3l%2lEoWL zS~R|ceBbTbG+QKu7?cM0q4Ds0dfTQNR0D8Rv>gv_D_&m?*oP?x2JMr(*96|ECa4&c z!|^^P!8=Orf3YY4b3WgZQprNs*P@zGWptB`+7^A^rzmq#ocFz=l68BNjjG^e>Gy0o zl8|MhSrxY>`(njwD6gAxq|sZUW#94Qnv8BL8f~*olu}nsSMaZSz1rzi3+xbL->i$k zI-ZhgR`E1`(lb`Mbq9*l1-5~uO;Zc7H%Vnv3D&`ae+nWEC=k?F)&7C+QY`=!U_O@B zNR|9AHCC2qv(A}PJ3(S#GTAbrv=zwPib935=CY*%>!4u)?%7ecD^Z0@%q~u(!7mGz z{r+mJ?%drLfgrK5BV8oMB8jq0$Zum;HWdW0YKkH+_0F_OYA*YK;@dJWQ;jMbM*?eT zX-z!Sf14X&i4zfR0RR_C6U&C7SZjqAa~I)`B%C6M89R?+jVBwbQdE$ROsXmT@i|yZ zGC$v(zS!C6i>j`pIwXp|-fBFs5e?m~Vw|{;8vO$m8LjL_)~JP&n*UMS&PN^d3?~weQQ} zA|hVbi#PWkSQ=_R-|t`Ci{OM?b9SFmHJIjwIEp;aG?%e81S)@BZ`(E$efO`>(_pVLLsBF~t!p3Jt-*?+ zz}j>V%Ys0*`LooDBr z(-*FEeB=u%ob$26T!}nP9FIxFeAhW2ITzitG#d5#)RWzxlHp<&#(9uON!+_Uf7yTe z`MmR2hk!Ngz*`hvWMIH^hO^GaC3Z&8{?ft7^`pPD&LNcfyOZ#utqu=ND%d+D`f zE?%Fl?r0ym>v+glj%go#@XhfEMuHII;-8q~VG`$2ya<)A9CycQGBa26H0)8{T}H_w zvn|1O2(J)5g!R6OqyC`9Tx@Kq4P1Zt2!ks^NF-c6c-7847o3(3#8h(Tq@gp0A+MXN z4OO^~gy|4W4OLR)39v4ZbnQq#TrDrMOXE0V(9PqgRQ^c#shk|n-yJ};LJrt z%7bjER|zbB>_Log1r-l)$KxwtSu>%$0Ca0(qXZ}Wi3p%1Mika4yeWv8vu5M#5?Bd= z+2B+yWxz(^GUP(y>&fBBL#%%`t?j#kMBtDJ&$CH1&TFR zdPw^GZiHPAHP&m5srb4k&b5|eE&+~9REDYK5`rta31=S8=FyPu|0otVFIM%rH#~DHpI>5g$+a$-x@Uv97|SF8$}>hGr*v|d)GND z_v*aJ7;+H(mT0$~IW6{WW8VWz6wHY+l8hh3zRC;E$v*5`Yye6&Q~1jnV#MOa`6S9J zbe1(}>1L8l?d4LjSUAj+)O;WOZHX>cH|&>Dl;uG@tTLpzjN*TWLyB)s^XF5?X@_Zg>%31wF6!m8_)tV+XdG0l-RK5zRymd6_-~x(KLfx9S#*x9b12?Q7RE-bWN%OBKSUOuhrIf z$AAM&3dB7dkB=7Tsa7%A;Je28T8qJXUSkhKEPYT>X16k%A0-dp zUd$FPswCV;1eU+5l6bckN$lNHb7$%BWVR?o+)h(THpRqNN(2KXBJ278ac^C(k)viS zSQyILn%6*=vDKuvb|phL0Os9zmpI3*2&wF(XHlzjbt@Zc8iEL5X>V9f{Cd|=H7*1Q+2`RM<~ zn)(j+TfMY*3A$3oxut+?i#pqi&<7c@C1mD%h`IEjGPLpoGxrN00yE??Q5*t8IFL_S zaR@Ara|HZLoQJ8ZA@{FWKi0|9AkM};g1diVYFaAiyLTKGgOA{=sjl~2-qohUG+iC~ z4#NB<4CA8gJyr!~cp!{zt$a8Mhwn|_gCs~twkU6^+LdBw7R>C`^)wj-Q*&+W&c`lE zO10S54vz&a85r!BJ3Iz-osXZf zHH7jP7Dh}f>ni=##NxJs>)1N!{L{h9nh3U}Ng|72gqh5gIK6Nq7qk$#>xB8uEOLk0 zT^wZB$c^)_rrGsma4`$B+t3~M&&JN+YxZ&I4uZ)vn2U&l@nw&Qt{X-NS;;8KYr0AK zcV~Yx9lI!LHK^Pt5@|*=-CoVcvDZJ(%mlxmhQSwiklHByeCEbiuA6^wvdbrosb4h6 z&npfBCoEc-hux#WLs-m^Bh6!a`yvYxdiMObCVi(Rm*6T5j+ z3&}i{lmAO4mr6h~h-FJ+IfKEXp>`_uuZp<;SG1y~Kupf14aQCsRV50jDk%JuB$v2c z0urr{eH-KMWpMVTRJWqIU`XouqYkp4?o;8z!yp;z-?u3(;c-M&l@@lUU8-#Qh*E#a zTBYYgJU`y{L`sk#^9K57PUPh+yvA688jmXG*jd*;#o0XRgJ04@XNp_~FzdIeZ6mel z-Lr8p`1&Su51+8WFP%8{2JSBHD39#KZvTQXa8)KUiY`Cc*{fqxqFeiP6pH(5>nFW+&d0#9@FhBY&pftYZ&V`ckGQ3Y;okrY4(+ zBQA(a#vKA8z^OJ>-{0^m2)()TWmvG74&lK?E~mmx&e>fU+reDVRi9+1cDUnN6ob z7+2OmquE&sGufw?Fnyi;8)tuf?w*BC)2xya1}uVCStSJ`Otid7T1mh-Hfed~E#j3m zdZf%f&3PS3qDaDxdEJM{@7}y`YMt=LeE(X&|4@IpXhftkg8U}TR~~COZNiKfKCZto zSK^WTv*LaS;k(v;M)rF@=V5hBNQgoVTxTBwZ%x|8nif>|0;QT`hxuNUlS=p20S4Y|1*!dC6;;8B5D;3Gn z7PgXnmEg9-4dz}ccxz&}nEZ=k@?|3dMwZ)a4v=jdrlS!c10H`nW0;(mZW!O{4R!mY z=APfolEeO50DjdN377on77)w4>LwsGk&%NKRW`!A?M7df0cuFbn*&0ok&W}ylP6;s zDA5}5TWoR(TuPetVH&jS=$LI22|3R9DO9COJ;43@eAu_P_O`;{yNJ!Z( zZv7&QTeYy~*pGhzb| z5hX*g_LG@Jw$Pg%TwF=vg2`GG6%-&tNQ+{aBCA0d=gSaCU@EWKxXVCG1rZIF2y?wn z1jL(4t^q*7z@0-Fxw96V9Qt}23~0sTcLZU( zF!FC}O!*XRv^tJAndiT4i6@LxA9`G#W6& zo@+t{VGwaCi`W#Lqpk%NgabrS5W!$7v06|;AP$2_XJ;)MfC}&^iZTqHC2n>*7(fICqliq|&<$r|#N=bx9( zhL3YUl?C2EO5aVAQ|8lg{^h7g;F~nB!f`i$JJOXGU*t#7-Ovk$-k^BR(iA3e`i7{c zLv0O2Lx{aEOcSgT(WE6jx0&#~&4;ObS2mK|ftY4D+0tPQ3JWJud)3uIMB&hBI-g81 z0hEx!t^*0hwe(^>!4b8u{a(J+Hv@tQP7xvbqOCv*_4+oY-D5v?!yd)m;-w@~bs?7U zqZBpLcaK0x03hKM3$a>J-etHn1fVPb1DmjI6_Xc63%!R=F=qk_G$*xRHIY+!Uu+cODx$wI3Rf_4Wn=&hg%!s#FLipRzqR z>XbS<74-+@TG{AMc@F7|JHQ&xFYrgmDYgv^?UT|}J^{_fL$$LZ7kE75e|Ku*@#mL& z;*sTPo_{f96wA(oqth_m>@64*F-8g@yaVGwHxrqlxKi7ReyD40Y7Z@i6%FUNGc~Rs%!7;nm`wi7bur&&0ZkL231P}o^mr*wb6t^Bq1pfyK&^gQh z1L!sYHKTZf7u}3^bJw-4QOnUR|(eJT3z`F1*RfyOXEWlkcCgzzb0*sekmQR~`=u3Iy?FND;xpo5tP; z|Ln!-+v(_X@ay!Y=}tIeTvFYCh!GFCH>3=OgxKzr(a)pTy7S3s+BQ!Lg%sn5nzw<+ z1VfTYPZEZ>g&V(q`_ptVq$=>={4`ykKN|$tKOJ!FpN%iZ)A49x?H6ogxz z+qToOZQDu5Hg?jnZL@=pZQHhuj@3ygxjE;I_r|^dtH#))e$?7E=UVfFEo9Z9PYfYo z{rHH%NE@n9xG(kwOA~p~et2g8w9V2`H{r$7)VMkS<0;kBbSr_kkV;{W$j|~#e$kD1rdW2u+cJ)SJ7%mXiroxK(D=sW$dqsv4a`(Y@_ptV5@l+$NfF;-wMHN9q!~~h$;aUv4VwzJWub;Nt> zmCV3ZyapH5i1X20b7@Xl87hUX=nEUJqm#4);9fCl?&#&gpJG)%$MECPGTvu8VDG3hDH=yCW+RD-k8Y#JZHKRin<9g zFoyb#?!8Uyy^qP8)WP2-S}u5;u!G^}ukRlXmvSgq^)j66sg-MB?$b}G_5~MZQD2?* zvO{urhOuG6BAvSi5a$EKK0c%WP~dGP0PqM{=1Pubp6fC1JliIUzqCFk-=w@d)ZA<> z-H?E1&qvT8AxCg7j8+DMfVY)uINl~vwSTMXd;gs81rO6JmOF~r=;K&zaLdr<=ND_2 z%WR~Z*ETY~@kw8j)=*+Pv|jA?2w?mCSZYEDk5YfGp3@gXe`=~V9sDy8hT=HW&tXx$#+of=tQPifcuCKklhyq> zE`pzIVn27CWO|Wdd2!%(adIF!TpfoJz_5MO5bdBf?vM#$uH~DT#;Hj*3dQo{JJy$c z^q=P2Dool3*@>?r*GZEwFLYuBI4LrzmqUoQhRBCf?3P0C{q$Bs<*LS9fw$~KX`Cg6 zoT`Wnk`=@X>2+butj=xq#ApHiqWPS{qk8&OkR58)CI-LxujV|XQMKK?rzJ1@`FaEK zYAMGIF(;Mwap5Z&g6oM!``}U?_mUixsUC{EV;K7g4&+Ap)G@xLqeL%NIqenodEaiW zA$g1v8lKg!q>`qkGEOfVKp7@%lPpk7?UI~Oc^ug&f=>E-9(+LFFGDh+5ZTK15Gs(qV19EpdDJbXeQ-*%1&6uzsN+&YK3% z0e|CD(8d}cdKhC^lSdGEZ{voi=PtpBMtKpNfgP6|^Av4z149d{NaH(-k{HZ6&(<~j5e zdsY5l$F9$TKg5mOE>|B=xk=aZ(Mp7{f#x~e2mUzPiw5UG=bqeqfgaeZcPZ{m@dWkp zwP*Kh4><6jJ-WLmGlQ|c|Jxz#esK!b&Gic&45;mvK6Cx;8j9L{k9|)8s_LI&L360p z9Om_@u(hGYFdN4su^(Cz)u=GF;axOCxRcZKe#N^XZ^D)J~1%v2Y#BoVu=Tp1Q2E zdegBBwC8uxD{t|q+G)&3@w0mI1;Uf3!>9?&M=vVf#X|Cxnu`htzdtQ_xIK=U&VSHM zli&3A@qD}IuPguXu>U8kDQ^@ewrsP_>GC5$@-z;T`jPu4zV&%7+Jod1yoQu6@xR4C zZQJOJdByslp&2?rJMIh6?f+bZ{HnCto4H4#tbzuyS_u!`A#LrDP$*p+PvNauPS2Cr zDH-+gj3_?dd`Kq1Y55I2RW4z`A(ZWK($e@~y?0pq;I#hsQ1cj@(V6FH_^6jTBgj?` zVZ$~4jP-z(d7E=)p7}>p`5$&gRDciN>FUTBwPo9|`$sllz$w7_rKdne7c$R6fHvk_9|rK}k%yMRydg%rduuuZ*5@_}lDilVfHHlbWMMGdEy_3nwCW@rgQXoXS|X zn-t009vz`VTRzYoa{lw52l)fk$Lcg0`o3>i5xbw;M zlDAsr6HIM7{wAcE>nud*!9xVVveZYU@@{}R$F$hRI=Q^m{%F+>m^B=_k%32mmb1l7 z3c;?sa5prZ$c)^@ixQr7r2EGw&6V-2tsksnhlVn9LE5Nq-$OEB=7l@F zf~y5we#2l#wsblw4Ozk5r)9X`)?J{CzE_`?HM(b(-C0`Lap3pRji>~HNpv#&lPU`y zPHNr3fsC{fG<$~*Cn^1hO1cpa{Akqa0{eAbaQ$`fnMgoaD=BNVK6{=S9@8c2y}f;R zqJKeF^;)Jh0f@ZFFvObH7W?c)g0v*aIw+tE38`Kyl_JfJ;8mM!FN^+(?! z&c|hQB;q&vRg0oVv-_E3I3!W+PD4Hnl$6-m0xL1`u}#=skK%+_Bs~ql$ zu2n2WtP(w1aRc(damEjn-&_)CX&l(YKI* zB0GssHLlY|Z;md>gG&k8hf<%LH8*dXdOZm95PsD`f2hYs@XfK5h1Zmeo=wa#DYy6J z8jJ-dT9=Cu_oi_7c+pc^l&)yO-lRkwCm2B%6dB+ z5sS&Ocy4O*tkHBTP?(ev?xQ<=OT87~;&J#Yd6IA51_3@C3R zq_Y41Q7&n)gu%IZLil2h2evATRdv~YMe2-Z8Fm|n@UoH3bSW2MOd@+&0;YXvMtkgh zvH0&}JM3_wro#w-Qk602a-tlnnniiADM+@G5m78RwBUN*ixz78*n5@bwzLzVfN2pa zMsH5mP2`L^y6d4}h@tI%e4DA@L2&sw=RS$e?_x;Dbp}aMhti&KyygHFd(7}Gw>+0i z)jKNxdmZIAqr;$UqWp=}(1?@PbkdIJdTZunP)E1_@c;?7M2YrQ|G!fFBHnBdMujDP z^e0uj$dCZTiO0Y;9gNUX}Er8aT8nm#<%|A~00%zc)M0F^P?5_^&P zgJ`oQ>!V3B<0O90V|12}`>3|vjEa^(sdkAvmF4WHGKAClC@bd9^#mf& zJ@lKU;@I>kKV-H99nyhQ4nuSZIppuT~g~*QPdWOE-!Yy_=b)-&6iiO_3@X0sGecXi`yxyS_)ol zF`@JA*7K0xY#|-H)JbjQdc^==W#uOR1bO5%?psuQc*d$xY7~7O?t8DNp`gq_{v7#a zRZG&G@9@Fq%mBIlza>EZ;sLP9v&L@CfSM#_o&$rL#GbOg{$z~-IjuoJQ=Z|dJ}S#F z=3(R`E7E+cvalvEy_BU2wbsLD@FthY!bR``OCLN1ynO_Y_9jb5 z4DD&5q00eI5HBRK2Do|qe1=3wgNg}fFv^1KKvylKW_dct4VRDq^5v!X#_%bwh8RE9 z*`2t=u(ecZYJYq$)$ZB#&)4au%qDo>{9yt%d0eE26#W<-4j@WKSkL^E zT^da?N%k)-{}ks)s`IvRJh~&iu5%wB=&g?2(6b$0Q?PNGP;+p-nLwM4r&#+R6b;e6F??IYPeo|?2;YGcDV72^RP4LYP*ui z)89clc&!dW@$=g`Ax`3d>$co^zI^LWxZY6HEYsi|SBJ-HOaHCGU{AZ29)%l9yFu@@ ziYdP8Ml=Euz@7iYf;BwNfq{Spp2dP`T+)Uf0^xk#4)9cim!B}fCWDfLSShukeJ_^- zHAdIP2{8_~!O(>?!02#uDt??*4;=(^Y{jOvufdtwqvpsQguKQRk0?{skJ8ke5zZ{8 znw3$%53)*>N*a;=et&PP*xB}Y!JJp&#fFW5Ov$$jl4=x*&CU@_V;ov z;d3|C#&`r9{6QVv&v%HUvssSdm5k{_wyP9C`B!!S2h5zLIOdJ)R$$&+X7Z~O=dl&7 zOxRCkMy--DUsm;l$6aOCEDEPNM0PeHnKC065_l`aoggSoE%`~u!WmJ-L_zOfhg9#= zzvr%xu66WX(-k$?v;Yu-_XcS)wn{-BKqXKvYeubFR~1x^;huw_k=KbWcY<{;8wvCF zG+E&2~e`Mnl;aq>&*>$ERQWt&weweUw5UkWXBzH9&6nTKj|yEJ{k_+DY${i#uXCb8O;_XbS6#I+1;c0P-DS*>c*L#gJ(lwsjyN^f(78!${&>kBAs) z4Vwqth7}aOQK)pQhCjvcxit@^!DBCd>xEFF0O_PQ zfuGJ2bx>|@!bOjzQEHk1u0@tRb~SA4PV2{ZFBzji)+v`)Qom|lki>JbU$G7-*8IUf zUZ_@*K}^*`!~9^wwD`3UM--iz+xi3nuwz76AEEo+8QAyi9lUjLKb*To|3nL10f1a`K7-P zZ3h@cN7C&vn}*Gd>T0gqjM8c;FY#ox6{m)kbc>ltBCQB)t@@8pqpWRj;FrB?*jdpdQrAR_S$?rqqLE?i9mIComYr`7bm@5m2M4WJ^0AW}|e zNR?bTj#`4Ixg$_DIgOq6^DUTT(?@O124`|xua;J1Bdfw7Zm)n&WAGIT_>+z=tCGXSKV*Z&7VLp~f>kD!RYErzcewaZaJ}NAqyN_Q z|8%YYN5=-^VgCP>Y8c?ku)q(;jMzduoPU=eX;F3k!uo1&Ki36?`f9W>gyg|M8Br*Dc!kFAtJ@t#%+guJLl zxYCDOaS%oP6QZq!I-UdVtPX%|7gfeJ8Vw1t{-6Pb7$U&T1kWc;f)uQ0`4oOZ*`TNH z)pi=d#LTa8`I0gW`X+}PMW6B~(?EUn$Ue!(L+b@@+S=(%7`118 zpeSw2TN_*b(~uT|R3h3)42}h{ca!)s9J5S*?^c{B#d|%CRPUZ=`o1 zNsKFvO&Obj)p7|5#7Xm@eq6jjw|d`snXj5C!eq4!0~s?4oW^J(wF0^AEG^Wn@veG1y4s+j z=&_z_xuxaeKh_CxosV~V*?L+4Lj?7HWv=DG+Uf(^FtWI1qF&?D-ZXVvBFq@jS^5uQ}FtNfaJ$WtFx|7^V(H$xlmm}EMo(y2EhL>K@A)fL|2+pHbPK~4&P8~dx zTwp>PX&)>;tAd~-c#JUBG@sIjysQ!n{yq%zmv$2VRAV~U)Q(%f;7$w-=X zu{|9JkXcl$VPo7XM9e^&xTMngfEq zH%2hTbrhT%6ceN)DA&xDPW>)Q7-ObA$g$Kbq|ly!G)I{!va=ik3+^fncs1Kt3w-tp zhwRiGRCb$g3Ax^p9|EGxV^W!9#MvF@$M?<#2yX`U26PeW{SBh)>6SJ; za-#8%Kff7->^_Fem_fa## z%QTxhitxnau^?Ol4vkJvuVTyYX4tNWSR^N;aRU&=x@o*{C1*mket7**tSG$79n9^B zI^SBi{r0?{-8}KN7~y<9`R_O;M8N;7W~w-O(#z6L1!41a`m+gEZCQC3Qr`LMJ--N= zvEQv;@IFAb?Y#gsJVB_<`2GE>lF7F<@pIQ1RSr7x`sK}=%CJjKX&V*g8>h@EN^tf8 zG;0-^w8pKTQEW`})ukrAV!kf3&Tu}sb^m0PmcElQf2C=KQ0S%zgCSga9-oF*r$Y18 zp(T}b{xUPFYBxXPhPfir{CYdl7#$3Jr}_|6Q>+bJHfPu4h8=n^iKbFV5F zN41ZQT%qL9CX63+seM|YX@h5@7yc%ZzSmQw_kDbF70Mi_cK2$1YoZ7;KgoNqJDQr| zHz&y^jvlN3fj}r4?pJ)W_xGFCX~$9KKn$hfggHKB@M5Q27vmuSdc2v;`u|1>rU^zu zK!UNdrjd+5pn1J_OrwbS;dTX&<9+y;yX;#$cRLo|BY`!jT9-ryfBeI>6 zYBwkIwP-RVw220udOr(&`rlkhSlW&0(`6%6GxLrOsM09t%9(ON7w~d>_!gD)Wj!km zVDgFY3FE81yV+P+WNxSC;tlWx7FsAo^P&^pnPc;y;c}$BZ(?7OK{q&;8+$u+ata9T) zFRgS#hd%1`y~E_yszFjF6}MnBjEr_+7`e(%JUDEpURIo*1~~r`?U`^w)+yXQRCo$f zc%qttN&i$~)hW~eNcG%PFp3}^BUs)qb~H_=bYLl+khg%k zV~$!5X~Pu8G>{dZ@X)lPr1_cK8JbHCSp$1ne}LG-775!Q9Ei++$ac`}K;uS1V~%22 z(?A2hB-{7TW5lG`_;t6(4i3A)NZUrrU+f!axR{x+E%tPwh?!SgI_Z)oQwqg+rChL; zg$5N*xa?o(lE~$ZQBb47{Rr*ae@PN0AHDJ66rV<7hQKAhRbb*pkW~0kYmpL6fOPBz zp=kFAgABoJu8nzrI~d=3X%i#?Mz9fm%w}q9U*lGbgiN%vT~v_gp15f8!CUxSGo~Rn zv=<(20Ng4nnApH5Tk9h3T>c>#wKP&ny&uERkkRjIwnI^Wf0Z>#7|0%g=2vQy1^*P= zoR7_OZ3Eg}nsQ3OP*)b0v%K&_2;^+#XZQ}k`+I#ZP=gM>?k%Y?JbXgg;%h3F9&7M5 zN;G?=qd=3T@i+#8s{6|S(}x>%ho@VFgjD}sY?Htaa&5%ZSPn{gG*soBAMU;M$IF_w?UdZ_gjP=(kKWU{cb~;y1oEd>Nc{kLG7Gpw-7|y|M+p%r+OPcUfD9qC z!43?B5f5tWB?XCuXk<TU?q&%YXq>G#tQy zE0|zKW&3Q>&t?Jhz47y>xZ~4`T<8WXL3n~$HM_A_-li?h%YDjAr5?RzJ9`eItb#q7 z`y4cP0v11bS|b*WoDk!_O|W2x@z3*9KF5)?X)fKFx&xC}acl1b7SAbU%iSv;S z-CXlV4Zc&nPSJb&wH{1$W}U7ZhAt-83o~hNIJ~#Z(K^2S1t`XhMDEEY^$j*|A`;k( z6UetsKIet=Rv%*g6>!zNiUs1fQ`e#UCYY3@R7vph<=5r50^a zJ$`0#K4NsUzD++}H>ADh7T~@rhFk7HO+;AH|A?kH^EJ$p%TRI*#9r6oFu3ErTXBPEz8m&ElyU?M|)Yi9o;y-AHs( z#MzBDneWB%(2~7zi2x?!#D(`l!g<0iY?%QiXjIo)4DH6-%m{uNhEGxL<{4( zb0#II&QmFi&t(5xm?4s?xRVed{Y^(WHEf zH$3Rrm@=R1JZ_%Vj|50!V^uMjDxeg^gk6TJg1X#a%80;B7(Ys@C#f^ABi#_svqWyg zUGH$Ag165z?WhN2kFR&|f93&ek}w<^1S{(oyx_lBAv_zGEQy=1+@z_n*hLx3D^D&v z%k^USEvLj~C(0ZrHe3X%3>Gkqcmk@V&+RT~7-SWQQMW87R@2>9-KVE6A3F66nhIy% zoBO|gdkGeFh|Y2*v?r?KE4F6kT6L~?XR)Q(Z{tI|+_3-l(zTv;y)GF}*vB{N!;AJh z?spLV0Up7ZHxO`k^+0t9%yW=lmCnC?L%iy@#{am)#WPLmT6~xF=H&6J3+`Tb2}R0{}lCHOFkInX5FNfKjs8-gP3hYTMb1@g~FS>%KTnOql=RVyJ@KtP}Z1u;_{ zC&)lF+px1LzH`IAuWgWz{!j_Zci}rELM&hfLk3}}3Qi45u-B^1Bzx2+D44NtY?eg` z&*W(wx?@G?gd1I7;egKw&6xrIKz3h@`Q7^YqepEVunZ3hW8;md8yyCCV!Zs*0*Bt8@aW?j>Qk4<XBnQL~8_@-xyfdKK#39R&D zpn*F&Z491E_UCgKNUByXUNDjFy)0|TJyjqa&d#@i!u0Fk53lWq!7fD!n=`XLw_+-Wb+SbABGUypJUsuv^=bd1;c%1`f)7a z!PYXwh`Z~*-6z~!e0{uq1NGSqZm;WWgxFYr2HWuY6Z@M%Z-r-IsaZwtgnFXbf&`~+ zb#jHB4EhqO6WZ85Txfv}%!a!lF%S?Ad)QP93X`GBrUAcdh9kb*4i&Ul@H58_RFvUl z*Jx0Khr2iqiW_nx_3Rlo-KKrKePqz6`@2h!qiDYA4~icU#V~ibEY0yn(%~nX=)LJ? z$TAn8Nb9>y>J-Ef`|_wv4a9}zo8PJxtD7Q$XtJ+E%-w!&b33YXQN#tr_7% zN{@@(Mq$h-Wx~r95Pigm?^>ZNCaadT)B^DlC|sy3p5MubjWXxyB_kkN<+Ri-?1b|W z>ejG&@}gmjGC8?=ZcMh)XC*P9G)>!pydgyqQhFQ?7YUjx zDNJ3bH<+#b*GGXh-BC6K)kf<;p-LoIoD>`DMJ*iD5>Zcc!!U*gCBm1-ox;$2BDf9d zR>>HiZ^uBHroP45d-{PjJmkxDu8uoAo>*tLN8L5t?vSIw{ZSHxMG1sVm=UrTS)~%@ zc7Eq0(m4kqqy!aYHzNIgAF&#e_vlc3i>zhw@JVjOD z1O1&ZlL^eLV;p6%0vs?s{dYxa#PGxO>8|yg$SPDY=1?K6h#goY$Mdv14fC;mj30Pc z>7pRqw(2KDqlH9Lvfq*Ld@bI&xzjxV@pksY=xDS&M0G2RNhJZ%Lz012gI4cqo8y9Jh58eHZXM=8Cy4 zl=z8|Rq?MzdfW(LnE9 zKGeLogjbLeh!g_zzj7~wIJdB0iSJ~|1Sc6P#>!M1%Hm`mk7zM0dEvja|N0GfsZ>{H z#ikI=Vtll}a57>t2`im&C0{M9xnW9kQ4qKOQWg`(cr`cVHz%Vmz?BU}!eEZ` zOyQ0N?s}?!kBU{Z3*ug}Zg?mjxrpDJ=I!vJKvfwy{I>*nrZ!N@rI0$`onicO$E&$Z1fpbF$-Js;F+RNAKz@pG9-!5b=5Hi8z#{DWrcAyhA%Wi3Y+8>YTVsqBfd% z3PcXNZjrVT4@qm>!BxH4RQO4(smDJtyfk16CV~U_9tCUbG<}D^*60h^xQeLt4NBox zg0gl+je!lv+b~A#ccl^PcGm@BhBo*$9JM{f9H#mXuHFK+^PvbP&)l207)i6wZlP)} z{T_ZBvFktDDBab`KaM`yIX+T~Vy&`mFMJZka}pD-*Bf-~X>FEjy|*Oi-6I!Q`X7F+ zbWH;)!THVLcXn89ugx^Mv~IK_xiZLg*&G@BII+kg{_Ld5C2{f-be=w()^sebT$S!l z@Ak=uZ%m}hQN*^3uYCe`v$Kz_jsArXCw#wY-+T(@%rzIwKC_i=8R>Dhe4d6;)@6+Z z&uJwQTTQ0^#k~z@I~^Tpt?k)xhRdDMm!RwRyI)UUK$6ce6Gzn9b!n#d-zc>l)X~U} z)uimS;Cv@No)&b_{R||y*aD>(sdpO@`Rc1cWkt3eyp(i@U*jdq)zJ64alDIk(dUvo zj2G!_v~B^tc_0Eg)l>e@56@S2_6Ia7#D5_WU-AuIOK2oSE)+cFcr|3$KX8F%H_HF5 z5=(%<{Qo8|w;VQGQGjQ|f^Kvhc8eE=^Hf)rv+mfO8 zYc6yehR3=)T9tqf10(XpAJ!IM<#Z208pr3F|FVm(qn+R9<0lQ1P+;+VN7pmA>gdSe zeVYiM(e|HVpAPSgk*s_8^U5W+_U-G!;xGS29+>X=>bJafXt5N`M#&H5_l)X~!iRa; z_V##s^0~CjfB(WQVp3n%oj2XLsyS|XJ4}DLh3ZQ~xRB2^!bqh5l?b1>hY$U0{f=Dq z>7sB$JAOUSypz7%I;&Z?E_TgaDs7fE&~GqB0e|26uXxD+qo05s1p$&9M3`3Ly0*6z z4Ioiq3QeB55DkR<~N+>?SnAqYcF@#FC+>-c2Dju>}0KnbU zcfJ(k#YfYmVyOYH5pn zHPdx(h+{R0W@vUB-eJ~~to)@KetStJL1~XR=1L(`MhONG&IX7%5c-k=yU3eoq&E>D z^)4uj+B%sMBe%Lg9byclYJv=16(EhiC}7Eaid=XrHSyD9x960!PaAhNnwYIpqJ)}D zDgNCJ!QJxpU01H?(`Q9z1F)|7=SFOMYVkFRMmXFlW%;a*UyL$G#@?2|g(Dx4Q`gN# zxj)60j2zYNX0gp#DE}yduWuomMK-gTBR2zC2-SiT%o7px?!DH<8;mV#1$l-NPwNOW zA&_p;JQiQfz3(O;Od8ZxDCHC0lEMp{J_zaU2L(Ux;vUpN_gG?yjfz;dDO*S)|Orinh6AV zo1jQSnv=oygE%I44|AS!2yrG7bA^^HL7w?xPFzU zX@`>C#F4yj4N6bkUD;ExWS117KWnYVdtYI?_>qikpO6=lv3&)LC$mbgJiij=V+Ri?dc}(Azl9ZhOpB^-nL>id{9E zMQ{AZ&{DCgE3UaNLO4qe!h4l@?;Q4`=1%vFzLtf`(Ly(T`vX$G*_v8t&~4>$1Iru1 z7FP1C<#4<3*{wz=Lwf=CuAIf0+WA~W0S915WK>kg)4}m3IUv|`EGl#3>iN(=`U3#|*sF&F^c zMH#?W2UyR8H4RYeKh=wM8PttJm_73+aQc6*$^BCe+gwO=9}JG1b11`!yH-NKp#_O8 zm~KGd!bHoU5Ie?l(wCUT&I$P5=RkkJ{ z->(TI0kH%yD@G%7;VzY=Ki9qDClvs$3c7c|M;0QFPPuRo!dxTiiZ#ojn5>%W(1nfA z?>Rxh-@xiRX6+{j(vn%B!*V&|ur!AELj$7fD{iJNM_lI$G=hZ=sD0(NX?B_qb^M0f z<5RnTSD}WoT@yb2p6Pl*S&?SA2AvwH_!E9L4NjM~;q3A=;)LMH;H#Zy_YLZE64eX0 z>PoDX?(3S$_a1o(XSJmsb_3Mo0nQ!H613HM@D8qL8C4V$UdXqNHuh@p%E!&@?^nUL~?BMUZrqF3=pIe)kVj5uy^i32ZY=Ax0_nzMT&6rUH zAZFB1HQtM6lxbhoiv+8gYh`ZamxMW@X$2R4mPcc*HWeD=~{FC8NcSQf6 zM_UzM8c2(pZ$)C4|A)54Oh?Ds7ukwnZvbaRp@!t*^WM9X$j%PD{ zJv8&X_G>DN;HM)$M}gET{m<(ZN#-fEl4BH5VRGs~;WMecmqFNbq8%)7x6BgZN+hR2 zblj%YoR}Y79rU}nN`q@Jdx#;%-KwI{Faa=*B(s4Ef?FFOgyLATi_kgwoLLfhk1k=L;J_k*c8`T-Cqp@Cl}giv5V{+i(+JisO0bWHpTLZKZWg*mOd`{)e}JS!FuWxfqq=&mMVOTxw|HGa_3uDGxNe8Mi+YyeAV>V z6*p$(`-hqQAi!+m)X$lt5)YOIqXnx{FKl_-CGFuW#~#beVaFtbt~^q6w`OY~$+XqQRv zg*;X+iUXqJa5fojttBFqHUR>g`N+fD+{qk%0(OZfq^kd9h^*u@T)ue{k@FXoUu@%d zj}=es-@`q`fLQS+5_Zr>f9D#cP-n5DF+ir^a|AyC;#YY5z^xpFIAzJf2^~KpIks%W z$jC<6cVR0*@N$w0o&PF-X-fZ@gk|ALyMcwE2mY@NFjJNZCS{Wbj2{W+qIJKuP>qo6 zl&Mn_f4JEyCT<{zf4(t_%V3)GIFXl(sc;D~s4nhNT<;yV6-Vd5L-Y0^8ss4660q(h zNWsx&FSvxU5qYI^6F#G)P4axpX&Ep~;>M3DKS4q|PC8O&pZH0iJ0o$eXb2$r^AP-F zBMt~9@Qmya!Uy5Abms7UBjhDX?r~=}nfDMlX{yF&^!*9}qY4s)Ue!|ZCj|>N5`$D# zgxx21G_AVvH0LFeYG+g*t~`d7NcUQ|CqfZ(rrY2;=p^XCMvCh<61%zCKYjMZgdEEf zPlj0ItLx1pX#dTcyo#fW$P83-9zU=u7$axkTf_MAGX>l^Xx}I|p=Lw|esF6;NaGSf zYNN)D-soU;4=@TuC(<23!^7Gm-@ni=V%;v8;L^0keaV%Pq;gRbr?B49Z|BUBnJunRX#HcJyXTU+RoLPBY@njTNS{%_g)a& zY`;v9dJZdw0&^}{EFF0y1;J^buD;y)iiMn?NGgC2N>Or}N6D7JI_4V%9dj@7GzLa` zX|TG=tb*eRwBSIPMUErSe)r-*qB!%WFy`8zC?7QI=h3XjSlvDnvO~xO4zqUu0?)Z3 zN9zVdRmK_VamtS*D`LBqBJ$vNTA&@Z_hGi!jS=yM*6_YhS{}tGKC8yER?X zy8*pGbdlBS5>d6-Al)CYxg{dgeXmh-d`4zD+O;s##lbETp{7%IW7NO7x(}~jD+Z#5 z{L1+Zsg?_#&S(kgVZplf%UA1_#2hZRQTA0+#MEWACcME|96+$|Y)_;4bNY)wVZB5^ zD2vq|rv!@rl>3HFEj`L1s#VKIFG16w+R3X%P;%TD(qjQsy03owfRUgB?j;f~3|=Rb z;q!fp)o&|vH<-R2=e3GtV9F^AGvu|xtcF|Q4z{YAJL~Wp<1$f|qXsggnCp!f)fcY; zBr7;zb<`nv7a)YG^F6ljRkBdZ?;~J-)v#e4RLg5f`Ld15nvy17_@ep|3+1&668HNlfd7|}$Qh$bNBnta#E#oe?5k!nu0e#7`62!Xai@a{! zQF!Cef6@)<+@{E{dnBRA|9qjyPgF-vAaBZPUt@Y7uk1RYPA81Mdn>oT7kF6C9sX2e z512^BO-Xk^U-73JQ-^)XI!c6n%OcmriWXs{xo+=FI^0Lbr}%|L)5Z;cYdDHW(WWJs zc)?Wy*w%BWyKNDZxt28Twioj5>Z;qIfO$(}K`)TQ^#hM-^QiAll!N($m?kNl(Z~~l zqJ9&oCgTAUWy;P@T0~kjZMbR}axg?`#*%N!U~F7PR}Et!3#3J zdbWE~KTA}H;)1c-&dM)y!t;5P)7@I0bA~cbo;*_6wxO4ZTHj5ESP6QTB;WPnu*%5!qn3fllG@XG}Qbn!q_f}kB5_6 ztdzY>7`s??A#{Z3Q~JkDok9^&$e;$WwK%VW)UmJY92i_`;*$(G46V(W-~eN_kTW99 z+LCAijU>Y!mB|5Dh1QJs6>E!m&{Q1;iy%GhNYS}?muPxvu$wPKQ>!JskDW`0GC4V6?O6&Kg%v0v$#rJ;^SXz`$(*mo66*ItgQvY6=Y?#ry$Hpg zr06TBV@d2gGMd-TpBnUrGx*=lBrKLL8Q4&j7(E<_(p!+I6A!a`{APz2OP?_WhNPPj zO1at;#W_@_)5kwv=1f>|+2)ji<)1fw#J;&kA}*5Ic^9NY487GA!;vX%I)!V|7cti@ zmi5T_-@KUqF>`KaN?`c7MU4qND(9tp3A7e1KZ{k_C3-LX5}0?7cS%fvn4?!yu3ElT z(B>7*^|jW1GjL07k<1+O1D$%b5VrAeKQ}w9yH~ZF>(=hf4gn^UDg}6e^FRqI-dN86 z!_zs1R~lyPHnwfswr$(C?UjmcTNT^3E4FPrm86oKboaB*)w)>s|NJK2F;t_Uz{Pl7 zn$L+&5?z`VT{hN-P<4i1Hyus0+a^5AFck+fyI3R3juj6`Poh5Zb0F`1CdoV=T^G-b zIzP@VD;{hb(;ydfHF2Nxbqz64(J;%HIT6A(*owusnG|$5Z9R5d9o^NrV;03@hi5Wm zFf&wnVXj}I&Xg{+OdpvooW~o^>6&4~xW`K7eElAEOWX27gRsRs-j(B?UG36IXF$%lqJblS!b+rq+K^E5Rc~rg~lXXZK(O?dhP$g?0$Wkf^9T;?B%PQ#lQcshE z5G|GeCczh+J{)1up*T0WGk$7){Zsy`84m!#5q821#svqmV#|BI5kO-5b>9|Yq)JnMEL zZ8<7&6{V>h%qZhR?$!+=8hVV7LOKamrY{P$lktyDo42N%rbULHIYtfsXgpL7%klR?mR{h89dIQ9CbX(U-0E1v(?FDvb@)MuX z6THSas(p=#z5G&g6MrzI`E4cSg}%5n?S~HM*Q_%cBWn4UHN=1mWs+gmyyu#p;?JH5w=fZE8TkiiUHg zkso?wydt6HgqJ3hb8M>Kv?q^7Wv-;RbQAy51`#sc5{Ri_2&XO7%#C4`@Lh3eHQ4$# z@6yWA_ETinEq`K(LTQhflV?C%EmS`c4!>Ax(~oK0i%8|b|LtA10yiYf#^waTLjeMX z!xBZ$UKZjllQBjiw`o4;*9M3He6+a#j5S{fD;LG65vGz)bjOox3nJ*f7rG8o2o>M% zQwYB~ab<{(r>O;dH3HONY{cQTsWK z@`%aIgl*(#;gCdy#cV|Ce=UC3QY2vP$#mF&*&CsmAnk1dWASE7Cjo%RysS_D6hW%u zab%Ev`eASHj%C_T^sidyOeeb~emMRKUo%vrCu!GnL>^G;F2w8He>@B@g1f zvMcpNQga&QpRb_Wo%*dU_O1!c;E+TPVj;_VG8(5oneFmI~tEfya&_2 z*Twwe1di@RFc-d2gd9bysT9kOS%~4#R7dbK>73LLLA96KNwzwY#gs?{&P!Te$~15C zn~aQ_uKKBIVM(z{@IC}jW38Oy@&l!+*UEXMYN`9w#2IMJXdJ#p`a?K00nBaz9!Bnf zo$4358WF+^4JDw5GUTs!Cx^T-1n-v$FOHrZRdg78fqhU_>E1bl4;~&Yu#eDa>tJ&U zWTBSs4m^Lv=ATd;&rNCPt#^vy$c+ay6n@hID~OZ1Q~OJIBB!X@+!9F%1%8S!oJW%? zAYIy@e#$NijDrb|8H%kJlYJPL?vV{O+~~)4dKO}F0S?GI@6l?dcx6b#$I}B&0}cxr z(P@C=U>-5(fK76Sr5rV(n)_OxY&={+OssDBH$cbBlG7D!(WS_Si#c6cRaE#wgA?nt zvk0M^ly*U9z3#IB<9f9>w^5=6Jg~t6H?uxm?m%S$rIEEtg(2V&Ug1$s1f1R4uZ68n zYZoIhTq(4yU45^R4YpCwel~#b;FVUB+vK zX({kW3%xLibLPP40RRl*-E;ZBk2*Wse?RI!3p=n4aF}@|_OnC5_RRbL{R_|*k>%skG@!`Pr_nh)a+Y@3AwE$pBfuVgQk)SilMT#|X`v?YnJ>0o@ zxN*F3`RXf{fw&fCooUtua-x8anYUZdIuePNVr>6C@ zaxjgZd6ivaE}1xRW6K9Y4w!!vAaJqv`WeY}Hti^q6CVy8H;xNZMk{@|GiAESjLy~_ zJZ_@6v2yperdeoQYV$M8>1z+u93s+wGyvM=AEE7kr+NF-07k{v%V(MlHPcbK5vo@9 zo$}7cuZr%dI%1ky(mp9$aGG&_XCdL+t*1g!B$r2C85EntM)+V!>5k%e8TH{YN9azs0C+an*)k4%=&ER zXQXls6QFH7=Ek8Ajoj&SBiirDBr0zqocrLKI!T{k5$p>hyor-{WivhvChl}56SqhJ>~Gv1fPiP@Sd5|d+_xf2O&wM zwe5dj8(qe3YJq{S_WuJS4HhL0=o-a=J8GWVLfYV}h z`*CRiR=&5PGyI1==GP#*&Gp%Ck~|6i(tA_FbKrjUVh{sgQ&r=WfC8p~R^reTyjc~Dl?h9YwnkX>llrw_DX1WoYj6RGpvec~uG+3*$;st}WsC>s4 zojP9$5FpuqztiJM*#Lzv0ct~8xk_zfzuWHWmIrfNImWwAj-2?HL=BA({I^H(|KZ}k z&fpi-Jbw&qYkc*X{G(FFd*x$LRV9!KB%k3>T@1nKx%NkGq z;T&sgqtnWZyddhRJ_?6j(;UMdPCJz<`&98zG_tIr@eWoD7-mg7x_Kw?GW05W;u$$dY_u<9%)s6c`ZXII6erX<&JKaPn-jqi$W z|LiiL{?Vo!5!pOBHTOtKH>4NMY6m1T=sOncZZ_4@?eo~CD`2w;)s^f1!bg?=Q)GSV zZeur%j*xUZ99SU=N)9I^1dRx0@)KZ#$JZTh^2F%X;E`PF*AhvfavBE*VkQ}FB;*Tw zebZ9D$Ujg0nw44tYhdC?ze~tI6+|Sxu;m>w)6l4~*I}(t$ZHYGV~e#nSOsWrUJUN& z?W|qy!eul(^C(^bcol5Dt!f%Uj1s%DUDB}6uyeR79(PubD}&70*#-3$8xfN|TQW=>;6I|u(ydg_3_PZ+(!D8@coA9XLv6{joH`~7D(&VJi$WH7Sj z4M*2U{g}z9AuxpsoD~>@m!-f~bL}af{&^C9UG8~oUnf)FX5?-aOb_2*N`B z4rZv}xt?3E-O+7*y;DIIfe6(=1W*X8Z&~qMxI@WOaHBQSOA?TF?vwdjRyGg#6++Y@ z&|AOO&Q^~gmm>D-Jc*t1%DQ`*#vZ>>G|YYFgYr%l3^Md5)VG9z&gP;{q@+J1kwt$U z=wU$ae)ulPGejzywDQ|L?sJ*PkyR-BdVnPP?My`f9-3~Iy*;efxdIIH%bk-1!!%dq z=B+uWV@Gz|z!q@LH6yfFVXp)DC83Nl))naMVH4hv2sGJRWIxvdzC9KGhd^rFhQ_%a zC$xaEBP5DJT30sr(#PB%rz>#s-=SboMnv= z#*=2S)+Ht>435&XBdB$VE#?Oe;Spuic1rzS#Bd7)xCp&+%l9AA1L1t(xQwA()$$>7UPS0 zVDkcc!cv-ig}U>wUNi13`yD~Q#-ZO{Y~)d~qa&33MJA$nWA{ZGO!Lg2i^uR~JICt% zSZ!24Wj=lNLhL$T|Jr8K>a5Q=nnneemCprpa!Ez$61JA$`LB@})3x@+LT5M+3xQdI zBRGdc39F&q{H^}I2hJY%Hq_!J#qHE3E@uzG^GzB?ro&GgodZ*Hh9WKW1+W77->2OJ zIYzbOsCNc6RIe#lY2W%rPyUsZtJ8-`&DY;kL0p^zWd4f5tK(2D`2 zK%RLO!Q(;p==VT@Isyem7QX3N2z{Z9ACJG|9zh)l_gY1vKE|COp+evyp#B~e zNNhtKC`%SLfZuwydX96+Vi1<;T8sk#AQ_m=A1C%+ZG9tsZYrKva0)FjOKMd@4F*+4 zt_jc+r+@5rF4h%}6VOvk?^bQYXXXBKJ^!FyF5#Csnl@Mr`@VhH>Q%poEnYDuuR0Tc1cv|(}vAhcg6tZIzDMrw&lN=KudO$}D zbqIHKS5P;XgJq+5mj-uyD@NMzfT(jbCZq*ecf7OdFgjP7;H%rg`;2gBuht#4M+Cg& z6gO$4y_{}`S~ti1g+q=)m<2Gne*KC#jpw-mdvzZ7$Ch*d64Wj7`mcBzHT|+5jN@m_ z`d?SWw6?q>9w%JSZQWc0zZhv02|2n%pkKdLAO_R{n3e6{1g_0ttIq5VTeiQ%_viM=_PaD1ky%JcjiV}nYfBQY<$?fzs@bMM`TcW z7i=No3s-F5n#HQB#Utjb#aAqBWij4YzHzkxi@{`k@J0y{&}51lE47a#V`2K}lzVcG zHKia@3lH`C;c+nelyRRTQf7Z}8?@FEkPp{-2s4?G(7SjWI4dX|7+qT@W-%^cTT*dW za6_n83OURQ4Ys7WcnM~rMt@~YY#r3Ccn-zNgCa2yB&^y<43q*UPReGz29 z;GAlR&r*>w5iu`9edgRWST^{e+(d$W7!M8_-Wbd@g=rJ))(M(?`_Dv0oSto^UTTnu z*v*_Z1`IxAuKEb%G$ctDm~^SzoUPxE^J*+^)O79O@grFx`q79B7dL z?CgM`Fg4h&?19ku#dD2RZ5{7XkrBVy@i?)ao zdJy}bM2kurmQ9nATnjTmok;}5%gPa242}cFkUeYqP9ED441E#;q}et|kpu~22+~cn z8DsctAgSXwX_TzYU7q8husIk-U^~TKk`%9jM8j~@U^0;5mv4LG9!t{ZIHM;?0^F-FwFkPDS1<~3nc7{UXmi$1mViT&Q5r_>G5$W>|+fxwrNodB2Uoo zRx=ZMg6h1KA9RgL><;|*T~J7*S_AA+=4isIcY@`57+<(|0^(7El+8T=uk=`JSy#N9 zJqL6S!pQ)*6MKL~gvpjQ$eSX_J1|BDB#4bV{NM(*4e0@5u2XcgsbrSlEcBrX78;gU@zHRt;l%n|6 zw$;`&jh6&}TuZs$3Woq`uz>GuS)XVtq0*MvT&zu-#DXe~&A=QdpeVBsbUq#Ny)1%O z#2hd!SCT1LQe%(SK)HQcVbyNrxswVL`;eNurE9#9$#kAz!R!lQc*C^=eMver!4t+9 zsv){0sMfG8=ec1NpTg)uZ8%+7>e(vtZ5IIa@ZGwHCEtwQNWVfc9-R$Wb8W9xB>Tx+ zH4lH`F;M@FdMHfwkAmwc{-cR|b``**`#I0g#dT(RMv&t$vr=_a?t6a~`Pu8+wch<* zc*(%{#pCaPYw`*BIC0Fl+0C)vG3b4;yu5kZ>1q;8-+pCknqH6-AWF1GFhCMKh_Yey z=-E$B>tb8Vz_cb1g>+d>Nzp#PzP$M7!K)EzKBZu(TzQ+q36|Y<9$+z zzwHGzTY`N1OCmT1KK_T@Bl3K|xF)Oj7Pj>N_uVkafl)VFUxqiI4vAMu!>aYE9_X}UZ zYvPI0o}6Z;_4NOF!T#(1AZrBZ%t7^N^pnK`C9lC`-0>wL9_cLJU0<%>v^kO-+AwUu zY34y|=FJE~>#|5MpI(1|>yeJNyjxdjCB-*Np0o|tnYLcYwc4zQ8Co`~hubaV`Ije~ zWFTc6OLv4=qZ&@^e1kB`NjChi`{n!r#r^xob?RtYuOCTe2~|`QePRX35)u(Z`qp~pdZr@+w5}?wu4V9P zdT_Gpi4MVoosDifCx#`13sVE$4ap7jrHhF_R-XC3nVTHG=U*0V#uH(^zv3Cg#fs^U zkaYkCNJ9v)a${HxWv%gFfu^yi!9Q9T_Fr#p7!p&iHFXy?9V7ObwTbS`UV{B$$rn*j zOhL#knR@1^w84LCBn5rrBuC(MKv#fnjZ5B9Mj*A!!tp&J*^q)#2P@x z-s=(=K`U}1n`n^9oqh@g*Q>hSX!xHhDi!82`lIs z+g^nuW62szEV*_N8WFkd;8a_4e{Wd$(_c!HTdu0=ikP~Phz6P5Rgote25ytQ#myN% z^e~LG&TbaZ47SeiTc3uXYeE6@z8x?)7!K89@ry~BH6&bFi~oe9&kkg$aF-L!zL1i$7&`lkH4tu!ch_A1Ggf=hCQ3L} zM7TWG8%zOwIpduZ0a}yU0`Gg!)3|#s#k&ZV2ly_&KYL>%M59Sqy{tT(^*i81lJ3rX z?$OOH+V3PRaxI+N-d}w*2&UzxFwZbD@@`;95mGr<%i2-D@_>G@v-?5x_*fH6ny!$wwA|oQ*qk+EAHZwa z!=UP0r64H+p>v<3QIO%bUf!TmywfOwjY*AURtgz^qEYqH7m-X^?gQitX;VPa=%PYl zCk9&=jwjACkoC-870uJQNrv>owamX}ji&gM0hRkN*H64*_0q>nYS0cB%Pg9{m9Jb2 zb!v4(C$|fSQI765=64lkJ{I2DfxCRFduRyxNsd|6PU?)2V{-dllV~|VKg_CsXIMjV zc&>A)-3pD1_%I2|0s3x}Xk@q<3!)Yf3ld1UPlkOu9_D{db6cf7q1)MyS z2N0$Nyj+u&fu0&UKi9VSgj}0;?k~yorz=0MoM3RTT6Pgm8_nMUcFqj^fLjDhNEC`R zrcq+#Cf*U!3GfDKM|gz`U^W%S{3r2xu&b$fH}K?=hdFt5z=`EAf)eSx@KxnNw;wnL z>*MOcRJ!mnha5&`9?Z1Pcv2KyWRa1HK@TgNsXzR|h@N;XnS?#@K04nmFNqRx=fm)T zC`b$RVBGnnj5uHS%si5t#@6p&GuvMT%LV6GR>us!jyHxm)8^EEU+*Hj-;4BzRp-u* zke4DuUXboY09TW3AJHWEhU|2QS0*Zxj_E2Eo^TjvIH!#^YScTkDDTCT3eZs!9~)>T zB|%Gr?YoV45L*iFyvh47f1saxI6?RMPo;n}a%1x`^`(XDdw_ z@Kg?`wL2y7TeA~h!Y|V7I&9)o+FqcW!If=jsrC#i0Lo?V3J-V~Sa+Ztv)MCFfmv$%=UnAp81ve=^MvOH%0);! zyXS69+UOf6u>&R!xz3>JMMo}@X5%mBTX^Tl=z#}$rccvhVsPrs2J_@9&%0FNA>z+t zKg``CX72#;V~l0b^OUgf#dQ?{R^B@mc@DUHSZaR4@8% z^INwD*jN-VBK_`L!Kl|D%=&>G1^7XCBv<`sOB|32QufS4NvR+l$-?jWcDW<-X}4+? zHqRE9&YTIH7-N>FMK%~vSJqnh!>6fcJEQ(wfLzK`^#)Pw6t6Z{F1iob<6$Jh@qUSf zd-pg?0m^pf^Vca;o=sNtR)VwxoMf0#!n^j!7~{Hr$~eo@3ZMLEgST5Tv-NcuH-Il# zM@@;uR$2QuOQo!$cPy$uzNK_+DOonGQm9GvB#v7V@B)NFb9G0B7vdI6|6Cd-J1+Vjy7CNpI+H7OUv8a0El%2t(s zD5)dTne#;2xbnR5pTRJ=4tpVo>AIPfI(WxJ{ac&i9fXbGU&Fr3Z{D;%UX`B&`*Rm? z4;6mz{J~b%ms_4#TLEZ&qMl+osakL)B0T*qe>g(N;?bKS&KZi;f2(x5Vi-8aj}OK< z=jVjgwQBr7<7pyBYHD{r-dAh|@`}luNX6XglM{D!0)tj+Ck?rl7pM%$vGC2O!8lK#+uty}ccOFQU$g#0pDT+B;ym7Nv|)sA`cBwVj%v z1ZD*=&P@WOQ}g`C54U;N)s$0DvfmIncMSmlUQP~fF~Lgqfo_VNNFjk~B33c;0sR9Y z?|tx0CWQ!J#&tT7dNy1Z1Fx8X-hO#`Fk34VvmdIvszPM8x(*L{(KUxGD&T?q&mo~c z>kKDglm65MLp}0R8)KdX+F)UHzP+X|-HCkwNn-NWJ;~k#13(A4n?JQyc)S z(g+CvJehWCcfQmE1(t&CKaJvga=JH5zKQheyk`C z!?w=~YnW(eT86pJq$bbyb~yW5Opp^S8|{cU#auLE6q`Q%~HN zW1@6cp(v!ARu#ZH@@sP`K05&1#Dvyo)%A&O&yNBpT)BE!pJTc09**db*j@f7)SghF$TqRw#uE z^Z_GhLgayjIUB+wToNkLl=;UNu27M&rq&Ep2)t07l|>;0Hfg9k%6%#vgO_FW!q`iw z!4LN@?OPO2c|2UF?lsbnMMeS9g+r$ z6DreiQ%eRAEw^yYL;9dhvSI*y#l$1=%mOd`8z>5kn@GTXm^>y0Zr^U&<|aAZ(e>c| zBVilG{zUYngdiYvUUP`~-J4T;8nA=^fpL=L=y+@3WKmqCxH`E(;Xxf4iYU|x{g@L58B(Hy(c0y(6m@;|Xta~wF+~T3pu14YcKJL3>6eX@Rwl3SB+}x%f$0Z#aLSMx*Nyh{CmVke0$0;Ma{pVw@zvq}D z9^1#192y?mS)NnMV6*1et6f3?UK?GUYjftVQVNLHPw8=IPqB^)FO5nCq9c0}LGId-vvWIT^J%N5^LmrfPZ6Wg_c zvbaY%0LB3h%R!NwLy~PrkyLza|Hq9jfE|!y!4w#qs9cQ5oXvUjmZlt>BS}!IKtes| zG-Xa2skTU#m`?;LbJ#oy7xk`L8&y&*w;m^wQ;YiNwn+ZpY`5Q~SdxkTVsV=S8TEeu z((yOy#CMRG%Pk1v>dM`~$w#$66B2=(2NzOYf^skxx)c&K0+cX(5v8dMSqT6BJ31f} z4;d(DEPgw@FO@T40@=Ls;0i3+w^@KdGE00q%vB1S2~zD_LZOD#uw3HUvO*Wmsk}2+_wj3^*|C$&P%qk4+SgIF0dTVT!O{ z>=rUxwkzg?e75QI&=)vBBPU(%+eQfA$OF^deBm*{U2kO-sy=wv29IX^I<&E130r3xFKO8Op4I{ zgjW14lux05B&pG`{SYlbi@8jL?oa!|?WBu{Nq3(8?=h}&*iZ82|A0HX_Kt_6$p2B$ z1yEBhZESibS>qC=U+SYgAuYYM$tuk)=$McuB1o@Ozn}7ch*(Q2H&Rs!J6mGakI%b! zc)VujT3S%!(ShLkwfp#bKA-SM2ZNC(P43+g@6kyhQwR^64x?ghO$RIPhqGiWma;_+ zd5+jMX?W+ZXeTNL^a4KiFC0BJ`wWoh!a)C8Z~l3> zjNWy?IMjVPhvt%RDvI?Z#gv=9Q%eV-^sb@qT-i~Ak&7tVci-e0g=m{yhuKD%n4Y|S zg5{Tj9Nu|w5Oe7{$nYY&veT}_i>KOC7R_&EA)&|)7f~eu8ibt1;-+EDp^NnFEK6&| ztjLi?k-2$V*5tz%jZdfscixjiilm(8(22vg-@pEr%kXzLGfaMpn{x^0L`J*KMhGR% zdA#1Lu?Jg|lS~nOIFD2azUg%#ErAxc%is*^N8z~ z^K%&b6#c;!ILcX!Uk22(--4v$!7h^bH_ue`?MJKjG$C36n= zKo11+?>;E&UsX4vEm4R`4j2mj&jJm_f^s793DH&8c!VF-NZ-L$uLMpkgXfnIErse18r%Y<$wIqhvYn0ib8foU zr3cpn^TXnJDO=b=WN54_jya~Uuy)gDnZcD%!$SowDop1*8F5TG_o+iX?Z zindfCV9}aZb~=Mczhf?QMPK3u?Cwfl`r+IFNK;|`c7!Aeol#&8*(o2{l{6OGI<(1N zO?TW?>F-hD&noJZFtCtWlL5(T3If4+QHOWdAk?Xotf{kGn6u{ZP_ykXh~m~G5K$T{ zM=kPw>J1wiIa7YRd|K3NMvMd;RjHqq@1E*aEvY=BUVpVPRQcbS?kcjySv$+@9C(lb zSWKi$djOw48OuJcc~fYKBiCdd%eR9KnZ@NIDr1b5d0Oj2DG+2JF_<+nRGSxU`qKm> zihid+isAahR2r{9@?Fgq&7`$73Z`;^aL%O9#brd*wbCaD)R3fh-= zEyZHeT2Ra|mtTIxvftI2SddinF{)uaer{aa14*$hun#V|L-cYoj$O%14;C|^kQ$gs zovaS(Z40T99=nI+2%Q)b*<_NU4$516oH=CciKryL+ihJn8~gkL;yH8P*yq|yCEWnF zA1v}vnWrBP+JsnmWWIRhCaX+v@=e0u>p+Sa)s7kafu!dGjGJu$h~iyQk{=0RqK$%O^jy8mzzW zJxSQ54=9^tH>0u+cwC98S>hhcZ{rSclO%v%%6|^gY^WQD3KBmcY@>;G$SF%^4?Ys}$lZxCBj?sW)SfI6$eN7<0 z3ZQmx@P13Ez^2S3%+gs48B;f|tRYjS0SRVudvNlJ;|vsJsIi;aJYIb-Gm?cWI|0H) z7X2!n)R$HtwWg~d?RaJNo_FSS+D+o`tj{7tSLI{>#57m>*dZSgkt#M1NW&UV->GE8 zlVdFT*~UucoU{>&d_%yH{L~OzzOME5+Uc8!Be)uG1hu}+kZ_kELi;q$N2U!d)H=Fg z&00dhHYd7q3p40a?6znV$@i)SGjgHARE(K&>7(3ni@(IiWZhR02y3ICv19eU&V^0K zyH2{_bJ@GQslmI8KU%Eu4YX3DO3g1Ghs|8~C7M5-sC|xd=-Plir%VMa)%I2tU7N7z z(lz^AquR8Qt<*4Cb;rrA7}4Dw%u)>m%@5hy-}3Sw2YT7g(B2T&F7_ZQ6o4@iCe#)1 zz8JJmQ=Rm6N7HahZj=L~I03Gp7MG_ol5E7f_O*HRuYtW^Ku~wGYbze|)IUhxjML{_ zLDADOPtt#7%gX?9^PQYSE6Lqr1SO8XrX~@(47Y! z04t_XR2C3flyPMPnvQU?O7DqX_-7a*w(&dW0Ahy&z*XBN|Bb|nRa_i-Nd`O>rMxAS zEeysVj?0!`Lx>9e_Jhs96*;hI0W)a3S5SKY4y z;@{v;FTG2N4DY@0$v+1GMl5}#`u}9(beQ060|=B{osVpyboBKFPz!{jM|d5e3mH(FwZmaU+Vk2xmJg z_pQUGd%{CQ`bQ(Ymg!^>alF;U?l zP1J8Je!_1L2kN@*$EvfZD>fc{Lp6#O2eOPHNGOY>A;EOCtcj9aw1qH*~6V*G}1W2;ZJPvJRF_^g;zJ?pfmGJr} zU!7X-RUa0e?VVYBa?ZxEXm@U9?#Hco=Tk5k=E}*`XrYZ!wz#7|Dz4MM+3a;d1V6s* zl}-~XN-14LdABNqx0%f+^{FdJ6|z*Y|Q!Y_iDjWoWt{#;@YmZ^@BfZNiUje?U@W6{v({;hDF#wiv7 z4_wfRY^`}S?%IrDN7_M~sM+RACeo~`-skC1FGFRyDU)+o&%FX+p@kSAE0qskPlD9# z3%8dsWRf`jbML26*Qq?twAg9CD%vs$LjqspAiuGZU6J)m0nMelV^!{h$#o$(Gnrrt z?{y8X6TDwj4VGxQr+A>M_c|Jan9SyG3mLwf5l2jst$e5mc$n&@NSK|kc6#R-fn6ip zYK^XWr)6eo2y+EvZ$2`yRG>59W+ITCRUdcNXa|BEKqL~{#nfS6xs%a}yj@nrtiVD? zG)Z$dA#fH+a8>~+G+T>o7rFN&TKM|gu@x_)7x`(aFw#pCK(YFeV=LWurc8FMrgAuxb!5hrtJLBOCy^mcoe@B zm?0rBz(x@a3mjYuALycArsQfy!K8E;ICZ=Sbgy3~#MD}j5-6)9oP&5H9kvZM6?bV! z&XegjRqi8WGa%?wDf3Qn$3J#dWnLdh$#>Q8iCk#(_`3k*x)$K63u-=Bx!oc2YH=g$ zF=bTqEZpVo8cpCFO0H>;e>I`R%V7n)Syt)^xLmHGDe6&piMyQ;L<}~pnhv2zHZ?KA z{ADM3!GWn$Imn@|1G%{NqNW3$o`g(nsd4u}6${Jmnc66tdq6+#?Oc$XZu3nWMu*FY*D6X<(UvV8#^|j!c!?_1pAFA5uk(rvZ z3UBSxBiFsuCYGR{2ZXd{x!Jxda(ezj4EIVJAf3)Ra!#k6X6=3Xdpde*+rKeJ`-?zq zp7QIe|LaOx^qDswj;c|xpgr3_=CWS`fFnmlOdSlq_%bz`BsxM%g&koM5yK~}8Jy0a z#8XD}rIULO{K^yq|7l<&-x_NLpVcRcE#C`8D(R2i`I4 zk-A^*8jHJRYNtM<13)BXG+Vj_*OS0k&qD1H5e64DB+z^O3~P+hAT~KJTx~f2^~Z5FDG594_Ar}F7GP-)Bu6UFjaY4doxJ( z2Gj2zj>GCzan{dV9r6)0cMYGu999d;7rCJ*9MIY4177o5@}6|QTbZO>>9E9&3LruJ zs`8s>X7Fu+K4GV&x7?M&udsciLW+{c z2c4UKXb8p{l^NkgaDUh4iCnZMbNe0VpJ^Fs6Z=>$U;eW%SrmB<(L{zDSO_`pXdwR* zj<>140#snlfE)$Vz;#9VieWLn^Y4QH9_V2(DR3gtPB(gJERU*~YT=@TdR_kS?Vh>=WmY9XtCDCsY zFAbnR9{;^7Uhk24xHdMS#y&HHYegc-R}P()n@>&yO!hW~v~{OGR^niPYdv;U_0TZZ zUev64-*E%e{ntf}C(;Pb2%jgvB@c`R3s7VNGP#tJN|1q4kj@}fmo=cQf`EvAk{fdG zUXn#qSMEXgpwvRAcA*dYw1ILme{3N1q?%$Wsgpz+?sW$YZ6H(mL?zpk?T*6j5euI2 zE2Y3KMykul;I1e~Xom8%=80}rHt~-vw=<76Cv`Q|NRu=%DEpcpqD{6U4cY4U2*5_^ zwm>@+uyE;BAh5&`PqX<$J*^lpb()Q8F23QE}5dK z*1>b3zv6DQ0QxmCrhC7mfyF=XNRW~YeufSEQsx?|i(Mfd4`p#SgNCWMuYpz`TwauL zfb}iAn|@SoM>F=lOpYYhqrU;mqy8DJV+skafw{QXtv4J>^@jcSGm3CaIDqm`fx?%) z)`B!n5uHXD>tj(&c!z&c>eTb;>a*3nf_HYgD;)6jV%f8j`bUtlf9d`T^ZJd3Vs=0A z!AQn0HL?!Wb?j+b9h`;Zv))hH(CKFhPS?B28Ka>evfGtboz%{J$F!A)F^k^m zZy0Z7o0@rHD3jX>E8;lgM2qGN&ieCXHjR0dK8s3gb&};G+pSQ?@saa=BleuDH*lROho?{C&Tv%c(3K6GbE)Jdr?PnX@=YQnA3c(} zSEjEK%qpDLE22wA{(HwBsOEFx8>W*V&h8w`trrSsV0mvuA8i@|$!dsp zC`b%t7>f=UeTDZ8PYlpbnx)T7b5l7rTZ398bjSt<)F=v~AuX<_MVQ|EmfX&G$fjY~ zS~pNQk<;&Ff6(-_hrsI%j{ZNU-Z46pV2u`zZQD*J_QbYr+sVYq8{4*ROl;e>ZEKR; zoOAAX*SGpdJzdq+y;lFI+Ew-Jz4sP+?-hFcAQWpHiNCNbO;=;yRZ*0d{>jnZN2@)j zR!pdGjKs}2a_t|cq&zCooq-tJ=*?&;N^=(l_KyG5 zRCWNV@q()HV`w=3r_EQPt^lG6u95kt`CJZd74<)ra2nWk?Xcjk|1U=UUwnL-O7*q3Tz62k&cLpJI`5+@tl6N+U< zLsGIc)Z*=hf%!xXQnRv7jlcSt(HQ@i{Kd@vKUMV9qbo3E06B)RKfE@`Bk?JU+iA*k zU!tkFuuWS+meKcT%zNE0uCYsQ-Qg#sUM0ExyK9Lr_Y%{@crL@k&X$a`b8YW zQFpNA#!fSG5s*dVKFQVrM~OlRZ(u#7NHAea_n-KvU>D4Ti%=}y9oj;i zZ$4~52vsBtq6h>H0u8XM%Fj>5x%qL$v7!l#Za#HQIMI#9iLe4@0Ip@bmJ%RsBfuGc zcN@~LOzc`nxhJx@^w?4?l|2$WQ!TF{Uwetc9ak;9q7S}nsH{_N@mp@l)Htkc(R@F_ zdarPv{&RS^!bvkoP=BQ(kJ+7)#F4fOPa$TWLS?nJ3PBoz64C+O3=b@$wlSWjG;$gh znf;r~w1nxo!*%rLWoDM>P&Pc$+-g4Edj1z{ti38`YVY5Hz`A>6H5|j>x_c#}TPXXdH!r-!cXoky=GzwOr7dH8}400EgLe@ptM zsidDE;l?diKkM7s0-o4G>d9HE%SCwq^SpAhd}d5POQ~l(>AlJlr5jj4tIz!(yk+oT zAuZzS&b*}?3ECp``JZ7Nyn6U2T;dR^M6x_$s%HJE3vNT7ACFai2LDg|)4`*DJm$as z;?#Vr8c3_-0BS#7k_js>|G$@v(EOf12Z;W>;t8o5tTg4oJV0;W3Ptn+oNsy?y3Al{ z#p?^-0AH>MY0~INUS-5>8Ke3uaR+QL36{9Iw{#QP2#P5v?Hg$zbmxEJ3etf2KC;mc z_n?F9q;uF_;XM@7>lWjx^LX)K}1i(sFh0**KY>A};G@TU+CBOdJ3TNVE) z9DXW}pZfWJnvb~#yIo>~vI+YDCV;?q$~(e!vOO~LDGW#yPco5@BwLg${PQzc$=!8@ zwOyaitt_Gp7o2!lacV=nlA?0zb@Y@iX`wo_;W}C9Jx7C_3=LIS8v+tB?43Vi<FTzIAPp% zYRMYHO}XuJgem|FCwf1_wex~!4q}xgRX_AQP+@(5G7Sa~5*tsr>E7b#=Dp#@>BKbu z=S!%-OsPa8KVM>D`A^mkHQ-3=!v26A7}xIF8ZH`oQ)Ovg?YxI9Sap zsh&tzn8Bdt=~Rk1A)sFm=Fy%vDhR4ed%^k)+z@VMvV;5mWR4@T5RkY+_Y8(Y4Bs`j z2@593M5#wbMUv)~xcG6J+r{O>&((wX$@;eJLJ4dnDDmf;NhXZzVK|9cFzkx|&Ed;V z1nL>UxndkMP3c1|OpqT29xAn#&Q63Tx(N#{?4SLxbtYi|VD|8G<-=K^dX^Ulex9MQ z|GxW7M#n4KtZ|+M0aS5CHBr?c+!>-7>-RelyqD9M|Iu8BY-(6x)@Pq;kWw+bsSGBq zIO0x}ZmUR;I6VlV$EbzVs`D3c zyjshgfc1;{0v_7FEPiCffC&PQk{JM1j%JZXB6YH3VLrusS4Gx=_W7{hr$b-6K0!a?JgMD(dGT01rEAdu`N*6kEvN2smp9Bf2pPm9B5<|K$GHDK=R{-U;+$ zbdbIMcQ*ydA4Wv@f;Yli^mJSfjc53b zc&$GN?nx}JCB13FXID7s3j_`1OXc^N!J~OJ7Eef3EF5V_r(^?n`Jo}PFcArfJ1jIZ zCE(xBO`f0900|5M8dKS{(b=95kMm$Jve7?skPwewa}=_!iH9fQSPU6AMYr>=4@tl! zgiPIjHYaBqCF*Cn>^9(NhJv}wHcZGw2(^W=P}&)yGH1s7%Y8~b_M1jbp@WgW(Ebxb zY8`G$<+yM2NhkmsPUqW>(qcU1)0DCFzrhlDMDM}+P!uaVB`XQ7o5cJ4lTk0tl z^jULkC_H|(ew}xMn277)YQ-ZspN5pk>z`*AU#5rr(!f~l!413eM49jVs zJuf?MZ%toT^-0Nem|GK2r$2qKwgk9|dz@>4dNOQiPr{Y8P(@5a)$3R1&1nhHlLXF654v_+di6ZYD zz{Z58fR=Cti5EJ9p6Eq06%e4~0rp0dMfErAH47d6!oO>%LC+!8Ar=wiuF+yLgxUko zq)MI@+u|v77df(N);lNKt4hmh4y(o^qmA3%L3wJ#H&<$Pd?*fl$c1;SifXh_@Swh% z=n;>>qfqm|%~*aV<;aSBxxG~+q07Hxj?R zU~tN!ry`%)p7-5i*ygOaGmvh`&y*u*h*dSLh!l1Js(;jnX3^OZKJtknF{tc~PJ|24~qoA=%b4gUwGx1WL-3rQB~ zijd5&=`{Aaz7hX2tZ3$K7n|r}ch`w9OY2Dyta916e>oD32t0U{05ErqCp1F08yT&c zv72Yv=-u}BDsSnG!EbDyorcRoQY0qKw;DFM;EGU+b=n-C#lY#Yt;mWB z<9MxImS8;obGcgky=~4h_YX1R%naPZv}u9-?~Id!sLvvq5#WnPKiXVVkKLH~`qaRD z1eXJ{0KHSeLzjDu-FhwFFxM(8-19_E$pmz^2pTs#*>RJt-~q`w(e!%QNU8mSjshXNxzU70WgiC$XFCPA#GJNl8{U zMny7>t$b3;7;sS#lJ&3Rpv^eFl}Q*lzZ}!cxHCJz26Z2~rG5Ycl`^6!xzTOEH9HxW zt@a(I{Xi_!XV@MNnKc9oA`{r2=hv!~{`!ffUhRC*>MA9pQw1DN%vI}fOWT9(E}YTf znd3mSlCgAs^)UL62B1=u#HDtXFdT(vQ@hZmC4s#7o`W3|Jr*@-jR{FhcTVT46eqA;gANjvPL=%i{O~;apO#&}|d@4v28pt*z|5;>$ z!;r?y#1HIEIuP)W*Ek=vPQT4fOcW-v)MWXA5jkLnC%z2~_ z9~d83&Q+3Bm`myft7;t{yZ};vkdn8@<6RgC+ls#;90Ux}?(ezF(<`r)@cd8A5po`d zY25|uV<`DPHfP`8?=$fmM2aT4U zB4Y~i7GoN5$yh%QnuScu;*Dd?*Lt}M8V@GE;~Jd4!T773q(<;@1PospYUrXxTA7Nl zsMvBB@rlS=9Xwdly^b6xJMOIhmXw9yFFr)fNV4S@%bHr~`wvW+?9g;#HMlS_>?qP- z#(dDNbKgg-qVqLa#XoALDRl5a>#r7iCYC{mH-Bc#Ucz^j%D`2@>wN{)}!9Y zFXc%y*HF>D{&gFs`j5W2(^im6OI7jNl~J|5TMLW~zLgR~rPK*q<;hByl!6V7M(8Tk z^hH%&iXH5|UC81MZ-IivKE%JLnN7dCEcU<$ix-jk+}{9xMuWYsd(J4Yl{7 zVEuzM#pqKl7`q_oBw!WU)=w{5^fGmKL4^XP4s(5CFpRWfPZ5=1%?cXipr9D=#1FN^ zTh^8`m{B&8>JCr_z)^N(UyMXAoTZ@bGT1G`PSt8`V-t&$9RT`A3-(VE;>SyHqJ&K22qiMM)vADn)yr& zEaMKyGo`YJe)NT+c>urjS?f2YB?CiKq%qi-UF|Y!b}%eu4egh7kW^`5H4mkpf~R7F z2(-^5*Z&hAPKkD^2kRLX%Vo~!S*&!D6)Te;!#iG3GmmF&>#ZQ7sCy2n0erxQXg+2{ zAl5ulE18brh||ThG=p0PW-k}{Z>C)^y5bOUL_OE_J1fZ)CH-xsVW{1PEf2_br~)AC z(+!Mp3990w;sE}s9qIsom-`#Gr=PdK_V~>fXFcXUVs_ga^eWxbMypu!j3gwMDAr6O z-2{SAn2C)D*2qkib0%R3R(RQ{ra9lVbHFCDg&2(oZ1!9v)XOYyIZHh56tjHh+4a} zs=b|8JU(7EQ0buVz7!r1fkfa6%yX%Q3bau;zddsScJ zNorKLhb@15IxV&`X|P>D7S$+!ywxJbD$0!1xPuK3uxk(ebrA$$ewE@1w!9zaO-9jFH$!vJwE6b2Lnp zF6luAR9Z}%ydS|@r<*|{7Vgz??LuNT$;nSIUnxjWf@+ zt(EJz^L|q#(TLp_kgea07T90UbiX>+<(i@}V1Z z@f39XR3^LWn-?+@H8-pu@zGWCvGR~a?`hw>H_Tf?ZB&;0MZ;w$TZ5L;%mkU-b{qYK zc38nn$y}Ygc$2YKm16`dtX>vt{`Vii>4{{bpsikSsAhwF_{I#c8gz=4%D|hOY+z^ZXLUT|?*6XL-&kgq5Lthwbmi==HPlyC{lT(CJ5qC5~*O zwl^|FPXM{tN6zwIC5J-F%fWM_;`Ewfva_qjQI@Hw2EH12AB%Ed8}1=E_f1l?tm)r*;W_28}EMjuH|;c1ay;Obj=h%oeWh(C;$&EuPm8W zFifuk_y}EpO3g38iCW1$4Tw_d&AI_@jB(09yRlBT_^&38>?s_cAP9I$L_De;F|2xA z!)tabp4{}UMctKX_(u2cLu@r{i@y`EMCf{SGm5F}lwhjm=<($o8F?o2EU(1srlQn7 zQ@#2Zv`(17WumXqd!96-q6?AxByk>ipJO|iav_LSXlHY84e$6kJf6+Wm|X&dibgI2 z0!z!@4Pg<{q}Tlnp&G!?AF?7!VVoPXqV5SB_tQ{*$J-R)Z(SYod|+p`=0JkpOqE~9 zH|R&aQ^x;*7c)!hfAhDQ|9`lP9qB*&SA@2ChJ&dDTa0)P^G?5l-oB=dSoF?KPZBmxU8TZhwc zx3d}#P$(dLPH_Q1TnCSWtMQD--;C8y7#%C`asi3O%jefS)EEwSB(XdD=~#qd1p?A& zLfXk$Sa1oPLE^Cf_1g4idF6+KSeX^H>qfSfCrTA^fi<>5ZUzxqR370ZDR>kXWfc{&k7}?leBV9M9IzZ&hJtaF55|N`7xv3K;q=mk2`RSX z&tJ-r^e`nr(Bng?DGI|-G}R+*Rq36>o5b0J|MLCXAWIWnPPy%!hF0Uckh>E*b3I*9 zxGnLeX2o;c3t#7{P$&?1+TXkZ;?+ZT0U^@TU*HJ)y>;{OL_d>h9x>2n9Om}Vk%FHa z-_C9eK1u@g^`5bX4U<9PC{{1kD3}X^mZwH!nLa0AV_$s|$&`S}Aq*7HD9S?7FhF8O zCgvL4B|NaM*fq9KNleIiPrV+j&vZr(sN8PuFYGpC(kMq~(b&?^h`TRxc?xDE6QN{e zn|sS-C6BOlwCstWjBWxnsA^^LEMkvDR3y5U>PHON}SRJ#@)Xc&UOh?wFZvYE^Is+;TopzubNWD69!IzJv?j1y=0l@?C9s(GC&H`McKsp_dm;As3;{LCY zTzd#(TDTfYFuhtMI%MX_JK4~+^AGeQ;Y(xEWjI6FNlsj5n(Nx+4^>U8?lPAwr~ zS|a^?*t$xN8R>tB^;mXqajl=3$&>! zj;QKsxTS!(Cgh2y^p>~={XKy)j3kffaIn&w&!4uqhv;Z^sT>0QhYa~uV%T_FuJS;bhy zjx2&@@w_hB3H3F8e%AgtILj+y`$*WV69pe}{l&e1j$!SaQn}IABbd0?()cctL4hOM z@?q~`82{(ntt+~wKL%O(4Cyxspb?kTPXMmy8gB&lRfOR5_{`A&&jT*_=a{{t+|s3d z0AKxivZvv{7EKVr0hbRjGy+EfPeekD;E-U~+0nB?0*-`AmxQ19;90uQffAxcQgb~( zEiUsM;ro2Z($Sf~3gID*@GmMSSW@kds341(e{7a^%C5a&rCGm={fn_oYF{@>lzSOO zMMZ^Vo(3O0duG1R;jDfOKE|| z)84~Y`FpB#{(LO}B$1#%N%kL-gJ6ZU`dp}^GbRyho0h*MQT)@R^+LU8Jcse%^K(@N z<&^i6`8YPoG297ND{+)&$PuQzDfOgnk8nHS7{ zMOeUIlQ^gMlXdQizPKbAM_Vs80ClN{v$)vR>$GB zN8cv75jr>`2!key59%Wk8IYpJtR22cOiek|_jZ%r6-Pp?nJ>X5g-h-ySu?t}mTgbp zF+lumM)0hc+ttbI6bn347E`Y`}l1K7=cd5QVqJnvG&AifI_-3~`Ky7d^^gIw{uV=-ZS-c0Gj&R`Pb zoosJ3=FT`|qfT38S!jWgPJ&E`I1-}w29XsyX2>kq#BWrR{2B2omcKn+>caW+0 z{Elg6Uc82*fEV+Ygi6TT$S-&97bi@y(xI9H9VzVSxpXW+NfveXLY^|W#K3$pJ9NOO zd@dV$*n|Sj#GW?G;cfGPPni48AIh<@wG2~`2!g(&3X2c!#wfIV0Sz%h^6a)V>rSP% zhne3fQXL04AB0gsVmVZOLk$=;9t|4`I52?0LG2g{fD!j^8t$$o{&Q9~$0k2QOK^h8 z)s>zRXC{+r6p1H884MNlo_G3_56#m-@z)hsP^0(Y_@x#H-z7bKnH35xsHmtp`i!0x ze#YOSDwMqbLE8WBG}^yx4%~-Jae*^DF}=&vo^=MWX@)p%u#{7(>=kvH4^q`V8oj;t z5Ioln`!ZnVeM2yT8>?Swqo|cKLOu^fH)2rU8v>0m`zX3``}(zh+j~U?RA?qEpBRL+ z7`NL;4+D3{SQe!o4U!M#VgjIej^8~%HDWD|neXLbMCTI+%X0=x6FVsQHTtPYBX8BC zeZ>S|KMmlY?a5!WsAv7enX+BbE*o+pK(7$p;7Hme6z-TavTVe$&Gm0dAs^U5Fij@K zP+1&)pqkl0n|f4yMMV;?-!7PYoYqR#n4%OGA0a0Z)VP26oSoC;!ZUJtIz2W`;uu=R z0n3L2uR4DLJZ}>)7VwE`%u~drRCajq5O4$LqDa~?{yp@Cm4e+hU-J!W)`M^~8CSq% z(0W{8S@X8xLmZs}t-HXEUwM9(%;kc*U%73K)utJ*Kx2WkP_T$BNEVtpm#O3_sQsLw z4kzk@E1*u4Q@ zFeR&w>0z)hp8UqI4SwwkrH}_vt^Ym}t`CMRI2RLrs8oV%GD7`jpx-`y@$16!yPhpD zFLG9EU}}2+3F-MZg1Yf*jV+`kU^&X#?bLfqOfai;_LlK*UbUXG05R&AASPWB4dNeF zScXU=d)XkgvOZEtJ0~s&1Fl^$?5q}G9-;RsJ};>`M1P|UzF!Yj<{RZwcTu&2znF{< zEq~D>@NI4|-j5lEiP_SYX%(8W1wHb}!&GA*?31q%7|}!VDq_hYWAIZ|ws73l2{@Z? zTa>#B7O3VLyuH<7c`rB)VaTEa|?E_LkWImx~sCo1S|-y_9<_OY*7#(X07kJ4)&sgQ!9DHT710vTnD?Lv2+>>}^J^`eD< zD}hl&rQwPz;lqM&YcyWQN-{w{iInSpU9q^F`${j%l_4fn0B9H5UmpknGBAVVZ=Vn< zYz{1s^)HK2V~6iu=JR$5l?)~0ZW}si1lSOXX!;Ag<`t|T6^b9y-unrx_9)B1eU~m) z*RZ9ta^l{mGS*W4l(o`c2n|K6w%emB3`V_)6SoP9Pq=rHk)$`wPZEidQ_@x48JrIW zq`R1^e{R-0Zp-_x zUpZV?Qaq%zsUIxJ229101~$UPszzMuEkV|x1;xMEguyZ%)l_eQEXM&qR*DFec~#Z@ zza|yQzZTT;E$eJMsZf_V@o<}TH_GiL)UO6QY|wK(B_0GjQ%metsv4|0wG^R^+Tn8ai!k@LoPU37Wp!s3%G?uDOJb_b)y0<^D@iY~_lQuKBxd|XdKep( z8hz5;jo1PK!F8M=3A)9!SALmBm)H)~KIbvn1Q&*c@U0NDRDt9OFDy)SyCCdPKOTjx zLLhn0xlEqgB8}Pe>Hyq6#!@=#&pFIJ3-;_rjoKu1WE9bA7zLp06XyB}nlX zINP&Dv#80K!pYRbtTDFa3UkF)OxbyRij<;8M;Whm6wIHm+9HhGRK9p<9q2m>Xl+cKF#Y#wL*X|c#``5-~KeLPoH52ATlhUV!;-xi{M&lD>zf7UY~L1* z=`F^=E8|bZN{LEvG=G2nmKp6FElU zK8+7#8>{ml=kj_K=!_az&<04fjX%!8(ZtPBeFn(E3DIoZT@URukwtdA&*T%0fJXMz zEDICw&jbSv;y=u^tiGS(dGSDfpY@%onTM~V56iNbaWW+FS z8gm$e=uIS?sx#pRdUHO=D27N|nvu~DpWSN++Jj>7_<~6ayQLHsz2tL>Rav>39eP-e z-5n$@m4t$vc9&nF_^d8^;!r6O0M_~8p>w;O>#9TFqRr@q>ZWMo*?@L!!&e2AURi~0 zZ7cyCMH&f+GUUKEGUPnDe>U_W9n#b}4oD0GWyC}%E^+=XjgP-c7C2(r&two94{53(WH8kNw{2g`J}e=9#Oz+`7d_{Xq~P#Wul_fbmFbWjDp@ zl!=0>1#0dd6S&wfy~QuNPawl3{bKo1S&FB3PbC?j)GFp2kcUn(FpzQw)C{!9;Ij!v z!lmFhlS0;Xo>mMN4zCm7#~=?T67dBz2$et3QYSBVeyRhH+DmIZl|8M?)qP(i!B{Da zHBpy^>Q}7r{qvo!h{8gv;!_43<;R#3FolcIIvS%l5sFXNV?jMw}qV#g`&na*^EiZu~Ivb+fISuLUfYM)@`O-fKp6QQXLvv8pl<# zykHQ>vb~4}jn|Q~)js)E%~1RUH6wW6q4wc|;VaFo%37VO4-O-4&#%d0dsvoyL(wEC z|Mc12KN_LERgD-U!RZQMi`?kzIDRZJmN{h$QsynW5jXdi?{5dGLnPG`Hcm2wT}bdc z0bOKtopBPzOOWt7030+%B;YcWzlb<+4{AYfEBi=@=eu`0#AB8Wv4HhdExjupOZ%e% z3yAPN>;%HnI-#NC zwmdo!_Exc>=Wl{S8Ad$lVbI7bvmBmKeUP-L&h&jtpJKc3PU%TCCisnWx`r^%#h=6C zi6psZ-K_T4fc5##HjUI{SxKD*#RPeu*v)8}61zO=kId&>|3~tZ#pTSqGL7UKF3=Hf zN~6cM=<$0RHl;0D?c~_ZeeO-YmDetjjA@^h5jsa*nH|dUos}s(+r5pT=BIeS{EN+F zQBqaUmgy-r<}~jvW)4b__t_keLEKA;$;(~bgGjzJK%x0LkOI>xzeB&mVSv8y=L4`7el3{4~;w zp<5#n3u2+$dCf?NPDXGMCXCvU)Vo=C>^MDJ^0tA=(b7)gOBxX|;V3FbdzpaIWJS=6 zzE*S=z@M$-xaC*??R7&k*NQ;0DZ`1$AtJZHK!w!S&|tyV1Ci8GnT+2^(zV&A0cWzU z>=iMLg?6re-my36ATfm<*&_4v5xr(NvT6^r7c_%+wqFinmS%SO@#< z7l*lIRG$-*t5$I3z@D#l?9~X|eZ&38C*9{ffFo-6pHKj{vB-ONq;VY|5_mKi(QTI= zBeMg6>~C5o0V%&9-*5#%&PUz-UXxQ7{jDBys^L!oLC%6Ci2VL6)qO`KnszHDAi6q% zRxYWp<~ppn2ODNJ`*BpF0z=wk{F)NMs1Xa|MT4kNniJXUB#-*r5m@|ONxH^*cvLS7 zfD>DJ7ZLq+x*;aDNgxrob%y!x!5RdU8y%Ofw1wZFfPFY9*b>h;0|JFhodgBoC;`pt#Bjk zCE-f{rT>bqKgJH4uf#sMU5ics6Gh7-AUD%bw@0t?LH2Z0XCKeViT;AMc-p*yz7I{Y z;VT*K$C_? zhM6zZjW9^ZERFQNL7(~y(KK_EgpfNh>)+N&k5yUjPuCooFP2T=?{fkx`;7UGRo$<4 zphbQ6vusc(J8L`M3$8~NpdKA(J5=QF_@uD{n4GPK${>Dz(HssKZ1sTeq83y7PZiQ`S*&36j`)n3S`MFM%WBHpv>x&@O# zJ2(Vz&RGj;?-`_g6B}@_ieT7>{6J+zISnO5i@7_jiH6XRPG@~#C!VIF8w<1 zH&a5%)#T@G>2nlv{SJY!`Ly`A&!1oaEzUFPj~)SaccKa<3MW`m?DF@>4H?rwLkf2U z%~QSf8-%iktNFh~|J0N#Fa!`5_7-9WaAYXhHJ&cSv@h@w83UUC?J%)2{x6w`_5X*Nw(=InyTDFtt>R~7s^+hJjV*K^C!WQ6v~RY zsq1~9+oG_<#?=MSp?k2os$uuwDK@(28yoLeF6h8&N3s@DXNR67RM-?{C^iJN!XZPb zlA%gS8g~#o4+)sVN01EVIpfjw?bDZr6eob=bP~7Kt5}O@Ky8xGPilLnR~M3f5^Gi0 z?0f6B0h1z-xP3=4$-<2KPv?3CW(ypw%O+87#PX9>l1TSx+N-GxulmdxxlJ<;qtC+=DGGfi?O*>qpb6a$0`vNh%2ubx;~!xp+kf3bb&x@1o! zG`NSPKy|ki<(o_(=(ZfB_gC@#;f~<;@-!Cz?aFqLAeJ$!rQ=COaX=&0R@Zq##3?Lt zp{(JDk{|#y0yB1NV5AWDJ86|X6als{D9!{b))i;1lv_eAfoiZ^{A~%46egJ>KZBj0 z$eFQ|Ny{F7OtI4Chg(6p@v*8H>13%i{yXZdwr1qF)EfG~gKJlU;+i8m*C3wls)9C2 zxsZ3_we6b^UA50ulT0or_6?!3I~vY;5?XUw212EE9%{IttHKwt7fuc^R^F3*h^iC4 zTivW-uAr(|WonA;7?TUYD%p@-j=&{KfAk;)ZNy;s{=DVjkO}I^1fBlxeID)F3D)$D zV1Ydx_sm_(L#W6>gn8y^!?E3*#itMqdU3Ci%=v2CUJ&Gpv1)v4i=>nb&8!ZywfVWc zH#*6edD9KFGnp|`2hHn*FPiaEkpcHsZR1pO%Ax1rjx1T~oFr|4o3YqVbH@vQfu>o= zTJ%%5eZqDV4#pAO=*B#K0$y#w5?Q9E+6TI1%5YgnUV%Ym2@evCSuRGXA7UaJlb{y` zA~~AjI0!LzLKI%k00Cr7Xf`X%3I#klj^b_+68Sj%-GGzvsdU7n}#N}Q6n(+YO#*(iDC0x;rc_2*X?Ecs< zRvbyAt(D#hnL!pSdVJ~GuTmcKn%AfEbo@YA^o0{ogYi#64|M1z>4*a-ohMrfpGyzl z?j4he8htIS51DH$(em#O>ZDOnJp#JFBU8PgiOQc*{^>==uh_7I9;&8XNrfXwns06N zkMXy>0U9@elakp$F3-AI0)9|L>NQK>V{Q?UP}0}*P`JHW`z^b#_qtqhJqJ~xlu zuV40`DPA@d7i?CR- z${k-c%!L*;`!9~?H4Q`kx}Du$>&pL7weuMlRLU&cpV`+y=RPk^^dQekTcI^ z)yQf*7yM3;Oa4ww<-J1D=?FfpTfSnx7|VsbHUo4<(s@BDaFEWmfg~+khdbm1h}*Yw zUr&g)Im-UL7(YMs%!~nWn&V)ci&}JOp@cI|{irXNUH|TNBG<>kXtwBCV-RP7L?XrC zB0+`(sqtcu*JKzxBdQsN4G<6@!P-bLQY{8)>I{%;^cyt1aeu+Glwhg{Eo=2PW0Pci zeE~xIUM*$G!;>h`TVN;ynJkf@9Xmi}ysQlKlT;NZdC)9hkb6%%J%v1x!#~*^xNaV; ziF`umDqyjL_L|#*#mr6#*FIk%XEbbW}FV~>~1Rg;+iJ@==@Dk zd}5Wvyek=cB)nn`x;n$f>4B>@(=TT727AIe-%y;#}@xt4y9SIRi`= zBv>>@?AYw{fc$M*?t%{tD`D*LDDF4^3Ua&k<}D?@%$sHZv3s|%s^h)60>?(1XYr5TNg554+WSie#%io{N-P*hn#UyWM%O>{fJ>uA{)}YS{(@a1x2JxdH=e3=kCT6k|!-PTiA!V;LouK z6U><;Q*Z_cUE?4?1ryXya%ho}hdCfVM1mcynHNIo(WzOiiLM&SgWHwCN5GSXunkY; zKnj2oW5S~H6ZG3(+8v?|K*{451A9=&M=V)^fiB;53#W&effytvKGt>&w>kKy_%LMf z$OYq2=8qjkY#{IGOt;@EgddpcKHp zMR-~c>^YC$#WK5)?f1TSqJnc>sbt>1>|kNr|Cim+9)-kTShS5jo=F`9wgjAn^AW$t zo)ytvNVAf1NCX7(;WKg?uLH|`hM61zt-5V#Un!g#G1d%JcPSwH1|BCmbIFbx(sgb- zYLpGb?BFvk0F=49bemt0~PrfYB?|sY@L+)g{ew0Z*xDRBjP}_ zaJ`L3Wc6;EUp&`B2Y^b7aVl>QgQ2kD(wQ~F{1&S0V7275*xfMc6?N3D%mFa0gUgAC zfIfXyU@YO5Qk-b|$hU)b!Y#@eOZP)LS&&VTpEm-p-5HU+SD2YijD3UIC$B{IvHrNe z@g3&LOG92=%@TBn^W3XP#ap50)YJ5QFq_iV_F8z-P$MK*J14d*JWo@r{n8byjj?*X zA7%f(^~uO4AG5926Z%~wUI*|*H=rkKGe!$l*&+{ilibr)a!*Cu{%^>ih1LvuXj^D< zYK-n0c-R~u)l;9ouHdwrzIDwmRw9Mkl#B=iG6h?pr<9s8MVDwQ7B9&NCJ&5Q*S7aIhVrf0Nk& z5@(bb#qA3`c*KwwQ(~eIxJ^w=7Len5KLR#w)$Ex;R%e~xWCj~*!0Z4C04H8E3qjXt zv%#Iau7svx>J#UJkfdkyeN9cYqW{qz|#Y76aUsRyjfP zzyGw%+99s!Gy^)ktG5EGk8Dooj+}t92fy2LPC;?GT*UZsWfJ}R(_PfTK*~Zb3mg4g zIA@W`;cB9d9`#sqfq%m&r2&i79SebB8Ra|7am13$|Vz-$-}=IJR}*5_S>n18WS~a@>GBuZA(iA{}>s zx>>y%+9#nSY@34W-_-lPn*e#Wif7+Bz&v=v4)lXBde@%=(R{qV00qVk^OV-$2}a2u zXW0x37I*D2bnP;>k<{2<*s}fo9bIwjdCR>!F2^1Kkx9{Qf`Pp5E?QWg&i9u1;|9s&^IR8!jPe=vpcpK z6a$a?m2P2BuN_E4yX|7Lo2%H>-6grc^(W2dh|0gZd`)AU2yg`x)m>Io4|bd=MZjO@ zVo*ZSLcF?1g74Z1)Ze6(-1T`JrzH4uzHyY{TmHcUv`!kGR7}1C#_IL^>%8au_XvcT17HrPc19m(Rlv3xVTiVYX*5;jgJ{h|iO@m~X0sS`+l#g9jry_-6vu_9`&Nco#Zz61#*|?sDIOj;N7iEVdp~XY zsIII%+#ln7PA+U$I8%Iu?r8Z}rdE1>o|D7j@sveo0gQHg*|2F$wm^P+L&254Vs6WN9TkzyTyXFPVyd>#AwF;%je?^))g4p{N=edH^a{wc7N$ z33|c_>x2!^pjt4bcsm$s&f<+~svWlF=}F2O+9YVP`E=-w*(Jd)-2#K2{H*Qde=A{3 zI;_M2D9yEm*e?}T(|-x{UcV=apV(ekQKThhAK89#^Z`vUe4bi3!_RI2;*aT3#-)2e?gbk)rxL!Uv5=W92aCU9LMe4Z z)~mH%ie1VV%W%ZL_pYfh`;HBNe5LtqdXv z^#2xhg^6t`L5zzTl=e#=FwaiVUTzyT*U|&Uj%(vL6;=_MFf=bt2uc8aF zizl?>ft}x{WC8^VyNH&g30A&0sZCFO6r52+R?N4jn;BF5+-d4kN&awaJv`k=^H>z$ zN?IY&GgObF`bc3*OFztulXxLpYfP9Oee~29jdeW-x`;mrTwGWkJpFv-acBOCf+RQ@ z(?>YlYK*P(G~X3764;vAlGHI>ZH>dx^Sjn%U8NctlL%N0^X-WsG=@T93uMN=of9qz zenoigTUF^9|K{e7^_cxwGFkdnC=RCS3KA^WScx) zW*FH(oC{<@HUX2L?e?DN@gPu;$ougvV!u55%Fn)~)^Zuoo%Z-ib-RbK^DFP;0kT9b zr-X6oj4zom9W7R1tNBMx#xFYTnBufeMz98Fnlon~@r}7E`y8!bREnW~j>8`&D_g}= z=+%t}64N>L`)ORiA$pwGG}BDU6(@UW(#q7M6)Av-0##S~BlB%79C|^<5uO=Yk>>e- za|1ngokqkMkRg$ibKaekr_%`~aCDDfoep4p{K0N8S7y830INvDNqWzN&D?40Xg36h z0{KFGJ72nG&_6%6YwRCRnLO-0{Rp)PACy$S2qZRjVZDX_Hl2DCJKz@;m+5DcPKD2N zSwI1WaBYg2!X4+B`1hSP?CA~c$p?C(O^a#J=a@UE-5?o#CbmmEjeebA_dcSjZn$_B zC6AWLa1MqPc4Zb{->TwPHvN5u6$kUBFG(7X?3mc?WCA!K1~q5f#?dFi!_rol zes9+YRT$jvk2Sygmlxf~qwqmGg@$6WvrPj9Q8v`owzMa=G<#Y`xodd$LR3tWbB+ZkdQn_8yV5Ivp*q!`rgjJKFswxZBeVDjE z@7AUwxirR62NFP6k=S*5+n#!2e(;UO2~>6ZeO3h6K{PiaFEqV*=Zy**`TO#g6}$j_ zs<5y}Z!5oLM?!Eo*Y{3Sq^|#n4|hX{yr~a62sWH6@S7)z(=A~-+CA46Nq34kGarg$ zxpF|4B2|zc@|OBmN=_owQU&F}Sz`jDvy?2iSn>#RPcxsF9_BWf-wU^|e)^x9L?Xdo zhO%Nnd8zoQSl$d<5~Ptt;)5Yu^SkEd20`>u&DIrEcbk1e=v?0LTE$w^BXZC`_yCA%B6x5Jdb#{wuUr^1lxUt_)RfNOW)>R<4XaJxG4wmYgg+LQ5&9 zz8rE@E!B4-D1c)TIQv^IvxJr%V~A3>R;znAV8bAy0FvzFhuF}YnV1P>WKn2gC>leD zgrs0+A87@|hp&Wlx@-$ktu4kR#tNK)qL8BDaY_cYfF$k=MG8&7xQmnv9ZfRsa4Jk{ z#t9!^fbWl_S>|%?*Y|l~?pWq{GVePs^_q7sYdpJL@5is#<4Ia_RiXU+PWcq?K6v$L z2x@@GSt#beG>-***R6qx(S~Wo zu+798tZl1pEnWIfh4wCCTGwqvJF=nwMz-r}j=^IY7x`=kwjm@cu$NZ6Vid&G0`CI` zH*pm*#g*c0XL~Wt;Iz#*I;M5ldh@yr5W0czcgyL_zb0BrlMpEkZ)|t%6N3HMeulVJ z=bvqLF;Zb7!oa|C9y1~kf;ofnS){-^tKh_w-%F+W>|yu;X~EK0pUbV_-OKcW>4Z?1 zjkR{rZq^EgTNtMTz$bpq%-;`3&T|JwbFV=K^e5hdJLfYMW5Y-my*UFN&yom3+G3e? zLuG1Auh7~>b=$~4ZT$+egGoV7Lz{x7&o&2V|Anlk@gV%UueJGOXqGGLa4D=_;G81z z*yirGFPxhBj~RgRK7FT=*{e+TqgsIo$olHkm-!AHgK%?;MlHnojG1StKXeR7xugD} ztCR7+BIDxxzjn}X1nRsrI4uzH|F-Y*k=Ru1UePFD4JTS<&wDvm?fg!_E{jymH?Xvj z>xq6@e7)t6+5p-#srzuC7;I%`5stZ;7e)YlBi=yKi~DDIIORVo)W%98QO9nl6tAPJ zMp5k0Z?j0ZMKXK|KAKlBxI^r2S4^M&w+ZW`%BuoD$e5EAfPBqi$5vwKj^VT-)}}sd@y%DvK82d?GOj9Smjh`g%~f?Dli7@{-~b=kKt>QOX$JCLQ^!yWMZm`o?vEEo zjw2HYuw=~*kk;A)&Co@`=m@N=EA|c}~I(SL|AEVLbMVYs{jA_|p#!a3O2>fB)y}4cOYm=NGcb46w9Z0&@iO;f_rcDR zT>808$)S$|y7zl&3#vhbG+-nT;5af?bm@Ky%QU)kfvW_uIuK2_xMSMLerhVwaR) zy@#x8NV19{k+i@kauv0QE>0;;hn;NA(m$O|Zo2V?CDEM*g^ZCQa+GwE^{hyJc~B$Z zXdS0Rb#^ggWSYxQfY6!iuB7i6&_nE*@!_^yH>V`Wwp`R&^ToA&ny1J>S~}v{e^_knlaTKIBHrjLf}*DF-_R37NC0 zxZfnA_%P7VwuR3j2VGJ02fZ)h>EUtCM)o|e=G8DuTH1XY7zAr&S z=VJFL$ps0)NXpMmz=v4JtYdJ!ZV!CKy-?Jp!|KQfrwrc=B6$uj5A?C-JVR7$_@6ec z7iMOP5&<`&kQ~m?iit-TggtdcKp$&;_Ask#RjNT>%cH-}4duLlAwLJ0^Aes20waI* zsb7E*`H))p54;mXBn;Jskl?>4qBmOX!(Yv__+cWefa@YlJA=h3{D=*M3B`s8Se+uc z1K|w4QQp7(vf2;Qg455DzjC4K)}Nh8Id`as_!Z#e+GNFliA5DxrnPRGBJ#8l+Z3Y^ zKc?EF$ak#7l^aR~TKb%fm}UrJ0DSgNMwAZJY2h65+@gnifSvcv%S4|*8()5Lfw|taJuVp$gNZZFZ#-eYB24wr|83RV zAf^RMU{->7huenxwlSzb!jR0eMeD&#vhP{gcV}PKIae=-^?~!2n6C;+057C z&d(XNb<*MC*Oe(7Sg|rRf#UDx7-~J60KY^Fu84^NAZ>yQTg=UHyUo!cCnP?PhQyCp zagxD1d{icgn>r94W8j`y2$}IgSPUmvV*TC|2-`_gzEea$tt{>1t=ftltGH}p5esV> zrX})@94z%%8iS+=M9h>zfCD26PD-S1_g&yHEor$f&rPiTG1JKeYN|U&N2LrknLV}m z+tIP&pcc!!PDt-oSm(SEZJ;cnCkP*LteqoZEXMdXM*Gd-NFfEY3`XVRJq4d?oq+cE zY94K8tl?s+h=Vozg9GDDsr+*j{d&#h0P@gEA7-?4gHbw%)(s*<;B>dcpw zf}oiIoEAB0geV9gX486`#V8f9!hc0(qet4B>#gUYaaM$(1RBG&>D<8IfDLo{r_#)V z8Py}xQI7Fvs%Ggmx(9r{r`NDJYe*<656c5&fHMQ5D{)>fVu_YHny6v;)`jZOPubhC zWuH|^&{JP9WC1#VV1(3Ciix(Sv>D4_ndE7WX9qEnY{Jm(JwcUm`?zF7wOnbo*6<%O z4WdYF(hdlWsl`!ZF)?vx`XM7&**)K&NTt?cr8)ORh@|n(X*75Ce1*0QC7^7~k&|A8 zU-{IdOb#bEN;HntCX+c%13S$I=!!Omx@h*=O%*9Ze1S(35DB~j6nPg&X}xwJ{pl@| zqB1PzL1yh~Q-}Q9#-w@b^569E<|X2xxvSnLW#qZr+D{WlQOc4sahXSkBMa(-y5r`2iuI6CepH4ccO3 z`zOx-)^1%VbRh0^jK2y_ir`VR6>faXt}Bdd@YrjNf|qeIE*j40*FFRn?2@J!HySrQ zo4CL`K#@_U%J;ZlzVLX6Qlz`H4}kTrUW1@w12m}{CFCJ_;#_Nc-wem7ssA4LI9$9W zEF?~*wr~OhaLfu8j#fXcN!VH0xc)=lp#d)I0a7+QPy_Of3d=)(>+1i3>Xp{Kh~xIS z9E#{mg^-HS@$WOm(An7J@n<&Qb`;UQkAFv{d4W8$ zF}#R9n%VAdbCDl!VaC0k2$t|wA3&6rv)Ll5o$Xu*!|(UkzsB`nsi6T?8=1q-p&MOK z_mq(Nqh%I`jfa`u@oL*f6;v46Z=tU1B6|ybMfvF8Y`wpn3AZQhHoLmq;cAMj`#qqMa_w*E$nYf{Lj|ki!~^1A24O_xfdvfsc3T##K;DE+i@Q~dL$8qO#W=9W-5US8=#8V9qw9TC^0-* zGN=^io)%f=C^Zo-w%rzqHtP@H0b^{mhP&?hKw=|`=Y8SN$3^7>7ob^< zo#?lQeY6lIFzMqK4;cTHcxXx7O_)UKiR?fOmm>Fx-9Gy+0>Ma^dLuDqudKU=0~UT% z>#u0N+w9fYEO26D`5>ouW zo&Mzyk1Ap%KST^#>l>%XhtqP3aKu}OY|UNe2Qo~RVlCE#fiN@BwQ1@@tobe_{pXyD zXs0}R?U{M;R#G~h69H7dD)%4xt7-m=e2mnUTU0L5eKg&`*v3yqa+kU!=sgx|; z2ax|POXfcXfu*j^iqulFpr)Dj854QY!+2E#$`(54!yTl9lg6D1tQ?o!*Nblul3X1{-0g zRPaY?pcS@!<}8H0ja$YMb0xT5N(EXZ8>ms%>t^iVKZGa;BY@I5mwes}FsJia($U$B z&W=x{G#d|@egodp@Buup4I{ybQDG=U*n3=f!*2&cJ9u}b9|8Pl+A`;lvFIGch_*y- z;UJ{Y$mSeL5@Jq_75>6_MYnsKhl=q;^v>j*8?dPp+0FX>WAUj8)YWd@($0ZlP;!`6 zq?DbGz@va{I0WgH#jibCZhME>ROK7wjW$7ZyIAi9t7+E;`TeQn(sKLX2P@B+Kl}N{ z5c;z>09g8!59008Ep8Vbp1Y;QN@lfncQ{BFjPbY!Ye>D-E6e(Y89}IdQpEgzJG1`? zbg#`4s3V<6lx1MWkEoNRmx5tjh3;?)z};ZXfa3fo3Jw>N9D|itk-~h_yA)||ZE3z1 zf0Vr&J(3w(SsXhixQpA08wOc$ATYyKayK>I3cL&Px@|>fQD8PuU4qB=-Px1|I2)d* zGGB0MHuLVHNV-WyKdKNKYB;_gg}jJgla$r z19~S97Guff&F=B4(EZ9C6uyS0iD9!TuAmMcFbfw9g7kGcm<$9VX@&XKsbZYm;5M@l zQX8K@o%d@23M-r{qRfgBQC|57e8tx^w~V`S9DfIv4X-+AIJxMIV-lQGM`qT_8{-;y z;o%k4PA0|;>b<<%82mBl-3ez-;+t8zOrjU`U%|)>og0TLD7B|I7 zcUk=#0$gF*G6&mhsV!l3#Z=Soz3aTz>iu_Kt=@-wHSnuXjBBvWgJb(cA}N)>Vt&1+ zH@@jFn^ShD42aIFSiT}Z1wa5w%{^5qHkX3Iaua7JwlOg%t*roH}Ep_ z2`N~yx!Sc`iN3{L{r>GZzn*)KAX@s>vA z6V@W|NHx&GD|NAfuWt>;zd{D2TW>5 z55KJ`h#OUNO`Ro}CFf^}0Xb&WbWsp5jo+}SR-@ZG<^Cyj%FTP&;g&7@?EKD3%DuNJ zRCSNs$a$SKb=fPx!nxZFK+2uNFdtu`PTzOY2~$r;@oeM*-eAXNV>A{Dv+ua(SFp5H zG1t|~QCpg|gGxcK=0Mx*ac={m>8Uk*h}m;C&F!4tkr~~t@3QtnfNbto`hO+E>>A`I zt7I~GjyATfWSOI5dKjdOy3Fn&;>OS^M3@jepS53L-?B7r0tL;YhNSY)XD^lmk>YpI zbJC#uNNUR&cO1@;)qOPsX@3X#B4;Zwvlk3dWUP zTFmK$de*p_PaRfT1=ilj{8^i58&8Ies4a;k^4TV=*I7Dmrlv*(;f%2`nleKL5tlOm z!W&SW{}tN9qmummxYl30ph$n9v%50AKVSC^_n)Chy&;qMR+*5iy=Lb#7fE};s1OUu zoesxiKCPH#oVjwE-zD=LB(^~Fs%#pTZ%U)1C%&Um!bk~Q1Mx?8meBLi30kjG8Io}p z>65Hoce<}4X4+(3^00Fzl9f+dWs2Bl5&GmEZwcM$E;ohxa&9G-tiWv|`+KD8m%N~Ynz<%8?~ z@L4VzU1(Q*Oq$w3w4HNf^<0M(?%ZmA&NI5S(*ads0BWmqQ3<~nBuO&326z8V2 zebe^~%Y1-dgQ{M?SA2W&)jWA2cY{ME%68bjcSfFaphHr2Dpu-`#UG0EO$7!f=jznX zSqh5f{GSG^=dFUsPds4U5$d1AKeCY#p?>*=2<#;_!XOI7DG|y!%rLq|@a)u}hg8b_ zN$`46inbuatiD4HgW*&7^^Vu8s5QHy8f$)ZAifc{fIQ?Z);&=bd>#m8R!=mBJNnVt z?IIf|03z>^sO2tD3+5icVKG3gOyf=TLqgmOb4{RXOJ+7K=Fg5}Hdc zIog(PD{nt6{z+2@56hl3e;t`W*BdGv63>`#17i2ZRNroh13=OsGq!O%mdqHx;NvJL z%l_MMU~#kmfBFrU3{gr*dZ3=3=YN`w{}3Q_tL#WS?iRK$#gpB$1(lR?yx#vj{wkE! zv1CZ1ljWUxn)P(v*G#nsxVBy1{)&PW+cSn@&wRB6F|j%_KAxVRY%a#8X84}c`z*Q5 z)De8!i_HGK2#5%n&j9}9b~{CLWGPeDomhv zif+iD!TQjd<7b!~QbfLHk)#+9a+!UWhV~6X^4*RP=7r;(Gtfo(b7}rVfAAX~QTCiS z<;}2F!}-NWo|Pbg1%@kl#hlGpb4+I$Z{z0$E>H!WGs7zu6g25UF##i9lLn=fBUzsH z4;Ld&k0nirP$C89(W98FjtSr1oBL+{!=Y_@HI}3 z@9;<;?|~p}XREt2&F}`|OwkLxq_ez)T*~(ndBKhx5jG0Ohr-y_mI7LvNkZHyh^AID z2aTY~lmuh9y)`uRe?M?{hLotI4de*NC#k>xY3+&Dx<3PA(0oNhhJPLUa;{`S;dmEb zD-1@8qYBYKSElKe#T+gQyk*x{87!R(igXhPwJcn{AHF|qp%*nt*)pr&yvde3##6#WZS5rq9*X*gMduhwus1C zPUYZt;9>TQ`qmTr41%FzjGxT$zmkqe)gSywxH*i zxFR_#;hXzaYpvDzcPp4#Tf&sTsnCMFh;^&jpg#eJIi1;V>M2Qi#3^8&ia_nX3M2;V zC+>%8QI?=n&tjm$>*iJLy*Afz7EZ(gD-lg(FsR5E@w_<@YtxuTWmjanRZwmz710W- z87*B(?2GXi$KCz?oN;8o9pe#k>g!6qHI*Wrujv5h^INge_Y%5q)lF=*7*G;vmov-s z?dZTE13)H`e4H3T7^8i5hU-tOW{#Y7($1{y?4&x= zVvYO`|Df$@NvCY}k5mg6;AgffTJq)B*eA&#J<>Fbex;i98*t$yF{OK}X0;jcF6Y=_ z@h-m`GNNN^v7F#Ds9#SDG_&U6m~;}{vpXB2DfFvw`h{WZJp-A zkpQDY?&TQQF11YQER@;E<%1qS@zn{RD{I~@jjw6~yj7CyP)v`Lf@okar5Bg(;` zxQnIBq41)6-?b=+G^YcrUX8_3) z^4yNKFEp;MkEa#t!5XBB<-!pG5#3Im_J>ntWz!7PDMUFe{!QXn# znISa6BWCQJLI);bl3!l}TIa-#EAnKoOChsyANHq1V$h0a60>f-r@&2USgs0T3Z%s3 zg2$0OX3OFh-)a+BpaWs`oqdFdIDsj=#r|R!sX|g9!axTpt<%r#xn}&XaYT3E#ofH4 zH8EZ*GATmi<CJQC98_$Q}C(P5cc z?w5sog>Z=3@N{1CIYKV^AksvjSy~x08?i;)<-U9?3@{sJe}mKr0}bNX-+}0!o_(q= zF`MUn?rzJ|N!chXo+tm9YMNC^Uwil=6K@)??{4QE3xvMT)(`1YTf|QDpPLwrOHR`U zi`xdwpUTzI8y7`O37fzW|D6)rxQ~`3xFu~J$U2*rh&8k6+pd?*K|c^6Q|JD@N2uu9 zS5G4yJ{!5+Y?MZ7GhcTk(|HY+rF6R1vy5zMNTh1NliD)@gCOpq6@q!OT_ab)1piVe z8Z{22dmhs?nzv`V-MC?uN)DCh_~=;E&sLFYslRJxUTfv?WtDo$?wZ)qA+n%zup4B+$X6!NmvK>$6$Dt$P}F<1`thF7h@?B;?z!&i-f*Iw@dWaR zZL{C8yKzhQQMY@RqP|+C_AvfnlGgdcD}2yp%ufOXxLIpNn>6ZlF)sx6Utps?Od1mv zQi8+v3z8h?8V8`V#w?SDl3t8ST4cJex3vxgAYkrWqn0nGx#Ltoh!*H5yq=1Zhzo69 zP5psBehd&(4MIu?K`-NwB^kg>$Wk$5SCF&@==qgBnwqxn80@{l0ihi`Q4xXjPMmnB zu7yiZ?lLBGObG)^Al6=jMTj>?!HLwohkndBhZWpxETpF?l1dH2uWgu&&=55ImNu-8 z*YkOh9m#*gBDK%Jg%uyF#Zz~6EDrqeYTevbq>G93>ct9|lmPSAPW( zoXRW{K`G}tb*8H^xNciIn5MN8kNgd%sfHlS%A3jL)Uq@!owH`oHFPr)1fL}YZDod* z)k@OGTs0Zo@*LgtdO~M30d8nnNVgjT8<@=$o6#I?VSYG3UF=}q_Mvu!LY!@L#+ZC zAzhWB9FrmR^o#(-I==JBtEQmVb;j9@@lCQ`if!mP6*hV(5ivQW%N1gBK)_L5?1+^+ z+N`rC+i-EM?>!8isBXv~WzAlpK$Ykv%I}ntKfb{cFK%44c>AKHGn@uxoZKHY$b8xuDht9f# z=J&%md!S-9G6*14{0Rxy2~3zQi+t(HISq{SMBYmkpO|J?5l(Gln=PgydYQnU=KW>E z>$ndLI9PNb0PDg>14x}K)U{klhSlsgLG(Gwcl>%zg0V3|9D*vQ_hkxD z{tZJzHc~Q;EA7cNLWcKXI?sX$k$EtDv_pyR*F+y40OQQ5L*E6ab1Udtppj@AB!K7G zCwd1`<6u{#F0%Tw^x;_O1}YU~zVH5n`LY%mFIaEpuW|K=&~qM-f&9bU6m@$7{YJfh zF|3y%=(_XsnDpWuxu5PHqzwiZ1${>ej)#`%t6n2SKSfEP;i@B}+fFqiIdSlXt}7%3PpJr4GJ`@}lCccLr;I5Djrbm~=dmd; z1zIOV_dRrOp9X>COO**U8>CBSfTWPzeQy4z^73Ym3&cKE)mLHtm{cA=cE~^@WE{Nk zRvKL-#TQ{lRcMoJ_dOqzXkcTI*>xK7PbXWUZN=##bsj$z(fDr%V4HrkzqRZmU{$%^ zuCco5mvd>nr#HS6N7^{gi0b*9C8sM89FNG5!YSoZ&}DRZMYq4BQmc<0T38?*JOBg< zN!@gH(YC@&k(g2Y?_;?7)<{!BW3o7G=l=aGa+tzM@BbAdP#t9-BHt-cK@+p_q{Wbr6@=F53A_er&~ zbYIv=H*N4XkSAT`^Q4K1eVm!BZxe5ZQ34EA#!Db1 z5*QE1|D8)@%{mfrBaB_&WB3FElE`RbSzN8bci|vxkQ@++zs>O&Fc(7}fe)AOE_U5S zlOd{y?ny+k%hul^^;>kxxMiM^f-Eq~F(DxcFHQ)RFHW#{CPs5@ zLv<<*snm=F4DW7mSv~$pfDeGUD6y_<1&+KtED+-td~onU(U?e0G?*D(H1%M$#(@Nw znTeBuDogvk$k-2@9JYPUFc&T6fifutXrM+lq#6=IN@6frL) z^MKUA2o011^meiOlM90-i!2Qa-&AHE>KiRAI9{y$6PPVsq1DS6<`oWA22GI_9gRkP zC6LrbEC@8Ec{R|Ltlq>}%4!TPxym<63)++25Fj?|%#ZuWj3l`!#A=Pl!0bVtwSW>1 zEofQ|P7Ee@kh|~?B%&)0{Ljb%dHLYrv0r;p70brKN0v%U!RB*K8Y<2i^BJjsxK7Hk z>P<3&u~8(;E!0Aq!1}Afd8p9mBB}>fs)50vilk&c;-Fj+qJmyWf_5(corq)xJyvdM zLHXXsWRwrF8x=NWFtDumnZ8qN@JDm>VwVKP*ts=cH`T*p@RFpBo+tNDw(R-Q=U}lK z=B9V2L*P)(_XSz|%Z0STU}?pHe~m|1;U@Ilc{}7Wg>bbJG&8WkGK$ZkJ#ZIum4HfR z+o#Ka9e8+QyB80G?>S$HkPE;>w*`$IRYpI=;p%3)p;ssU@gd@_j6|};zBs(^EjN0* z``%veKe=+lw!7QQO`c7@9=URrz5cqr+^pvN_H?}X3tt9|_&sH`bCUp1JU4oGclG@{ zKRfz)8Q#Q~mEV|K1V}|C~J|(_pmJKZ}J>FqYd*~NB=~?B!c2y5KQnjR|fR*=m+lq zdin$fycIIvEq&z|*Dn>-7oXT&i1#<>$+9ZjJL6?H$_F%u^M`<;oObdEAk7uCAS^nD z8AgN|@`vuUP7zN|B3p;$Z*F2G3)I0ej-|_={xMC$XKGasb1!5;%f=lqz2;Kg|D19p#j>(H^w)$#)rFkmny^cKJ7lKq;Xs$E$CK%jVi6= zwPl+X(K?l*2k~l*X}|-FC?_ z({m{{L~5AG)^<^_vi{{gkNJ%0f*jHHab|48v$TL~dkXWoASG*=rr4~(cBSYsHEXVO z8|B(fi;V6h?5IOTwe#vSEC+o{{iV~uW+iln`HA{l)u*$U!x90nG=rNd4hZh{t=sxv z#F6_yyAj~r9PAl4oZq3XQq4`rMG+MLXYGq(@JEkp!|DNW46>?V3H;k+sR^E(i9Pu< z3J26byII@rOiE(Q7?Q3l?EF?U`U2u}7^q9gJ4nHc$oj<))X}sYnIwap@vxGOb2=0R zh-20`PdR>DF*-HF_k4QkWx6FXJ#{|CgOWD^C|gh%b1-1^P~l)e5YyDOL1&3MoI~tL z$1pz9cT5N@x_NUV^NexJX8aSqp8M{mE~UI6u-`F zleFc{I_1!mvb!G20cJi5;hx^Tx$Vh6YzIQSP@eqN&4DDh5h2M+dvt>k6*NpPHTD-; z45moAoG7EL&}Jb(DwJEH1wQD^KwK;^qEvn{gI}6Ch?I%p(SyY1;_T;YlyG)CRAy@2 z4#q?{3@_=BnjqSijB_Cwd9h zoCTP^5D6*=)BbD(V2EyJnOux`Iq4h`;oxT21|taH;j4kc4BWl(rp=D|FmbVoPyo*U z)a!7ZYz%sS8~8?#LOE9hJ8|Y-rihvQ-WLz%DV)`#vKzq4#wCwA79SMl^VecXF~u00 zDhOQOj%gq8B>YU*Pu`(-X>yb`Ea9j63wL+e?3I1^1_75Vy+NuA4oj6{+|KXSL6c_w zi>?p_GsX24lJ`{<>gvX$;}Dp8Q(Sf&jBQfDl^IUjvL!ci=vZ@2pWxnq3I=)Rf& zpEk=x>28ZJ>Pb&Hu-`ypIN~tQ^4KVrs~N+ShW>u!owba@c}R>1q6Elxq68?k~5 z)(jVVXiRW6PM(aQPiPuo?GH_jyzOMh!EJ+~v298bl8$a@D{y>IuI#<5y1|g{Zh2?K ztM4o3FJRaE15bN?*X%I&SbLT2bhc)*mTD4hB^zwc8g3B9h5ev{{Zx$~2qZi{xfQi$ zK`lW_WL_)m-u6V8j3nh53|KJZh5cxtF$%E=x;YpMI4=0wIw;V-r>7@U14NbI*Hapv z-Hjzs%Rn+EJ}BrmKfPZJMC$76)Vu%G+{6;TAmGad6tKCz8E8Lvjq-IKQ$>OTVvhut zsjCwLQWPzxy8xFAqALsO9!N2PbqLb}MZ=2^(8B>gWQY}QQzGFDtjr@Hcq4`mY|N{T z8Z|2AW$ibE8S=Hjch?Cg!kCd@^H`4-g$%o9!Pyd?GhEgJtr9ISgJ4f^32^ z*vHl8DSndOIC8hX=Op$PZ~KN$cV81K{F;HiFu$-iL5AwF(nL#nfmUwdMfs=gNFC$J zJuveixG*xa_FA9(`P4hy<2xt{PX)$Pt&mWcduDoG^mpCQ@_~NU`V{o`w!ZJSUw+2FF(Y9aQzy34vXkMz z`zo2;_&Tz~>q(^dLrHF|EFleyy&-o;HGcr#^hM%<+>uZ9S%tpTpd5|OtsdW%uYd}` zxEQ#m1%kGG@uy{d`ICL&Q+&3={Bufr{-V0-w|&?py8U_-{Gx9OZEkG^n>+Fc`D`=> zxyB!ZC4qji8~+Dv6v2nLd;TJQGXm!n{W5+L4w4B8x^_TmEiiPb_W?V>~gK?B9ej2AUrHdBe`n?hI>8yEIZfKKj^%Y~p;? zKDxg9H=QFXudgYp7`t!Oe!8nFcU={-ymh%7^&w4o=Rx)5{sJ#9F*1XA6Bl)f4jmX3 z^0@eJX7(54#`VtU6Z!QM%JlshKm_B_;tYP$gFXHG$@BEvzUJH()4g(FY4pqao_ z8LsQX@P;sQFDF1I z57Ld#x3h%T-@&GNV=)39pM?92FvKRTp+@GeDs~Nr4x5yje_9Ij%^Ak!X7HYl1YV&N z{ccD|(KzQFH|2zAL1tY+1Z{K2*VmMUksH6~$&ZfJ+4v`vRJu(Kvyuhs5PVh<*wn`_b$yx)~D#EWIT)RKF5a>uUuc>KI zP_L|w%+ynJHm^ggf)u6k=nE!nIpVK6)G7V!8-y!rPnI710pyi)6U%NO zBx^aL1m#-CwR9txC)4H}?S&|()Y%YR z3nuNAVHhG4rAg*xTH=9-8~+@%XFX>nJt}O$&Px@mKDb)NRQl51HjX!0`-AEUT99AT z+{1G?c`p)o{KbHe^NZSMA7LLA{2hl8OX9>);?lP`El zB%S{uq=y>B81KpP8F8&*Pa2x%>?R0dKZ~q5@|%!l(Sl#9l(bUM1wa!(tX*BK7A)OU z^uooL&Ld4NIpp;wfp#;t=`m}OaD9!T6cNHmRah>WrSvNdZ@m=80dADM%z+Z{PB+%I zU(}XQS6iNUUZEF}1x(zOjPEZb|K2!2`Ad13Fp2jm^hXwJf#tK>eI1c0#djvCZu3B( zIH51OToGClK$hzuiX$Ph!*rVsm~>!)nx2B~gf ztWwsJ-|7`ebl`xtWu1F>f}Y1&hAAH{l;ISeSG+tVR$BohF9FAf^V}0!ufC|id5Z&h zSh-`etqAh?WS!71JrbmQj@{p}=giEJ+=2UlxOrrD4a120V@4Geh#g$Lot8^KnkZWY zTG9G0b5?wH*uF6J2L$DXwtxxcC6$T`=>T*Kodt#ZVDr3@ z2@3WLWx9N!fg#XbmF84@#E%(Fg7W%!dI1upF0Png$19faOmGgfkPa>c^S~p(?*>yE zF;??MMi7A(ifkG{c((U)%F<7`01Q2|Dz|PvpX+{4Z0MNZ6 z*jB`~>Qr&T?sMXA6L-7UsmkX*{XfWjEuEN5c|tbnm2wBN6MXU!2zD3NPiPr9 ziHc*8u;vKrTa~VPnm?(|w3`o@ppj&+Axa%LJ(HlH!X~k`fj!eLFcXy?*w7VqH<5}a z#lSy}OSH|{+kZD_djY(zrF1e=jmJbk=)||g1Kw&j%k?KB{$2@NSE0fr=?FQ2ig^W~ z$1SHT+?tfNjjtMF@q5skdJVT&(am!rln|*q5JUYR075{$zjI~}%lOLsDeMfZ`u{qP zuoe#?51pV_iC1iNQRLD%I&Pt**oE-+zx(;S;26!jf9-QDJIcN~Si4RfYwZ{`w?K^I z?@QH&?K6|mGn#=l>h;sWv{Bqq?)o+=N8~&lBAVy1&yhq=*czVsgsWpCd>_B+B znWC-``TOWgcOS&=?|I@oq1+F=-UyolG><2G&_o$AVrG5)PQRY;0r|Rwk*M8wP2X*%uK>mOikb!&S%nF#jRzABrGLw?>w6O&1;pSRtoJHpjK=EgGyKN9tVdPqv)*jYn^RcE5eIj)>iK z_iWOOImQpcgA-ehwJzLNZBtmz>dTn{V71Wbe|>p&p~|ERLgbR3f)4qhxg5YhU@)eQ z?St3^7F-&YGq^Arxo)v_a^na@1%@q}$)i;f;2*REACr~T^d-EnXc|475A$JZ$?=UssNK!xd&rMZ6oGXS0wr@@?7ulDX?cTfqS*Ye%8 zW(m^g-U0tJE6XiiL97-OSwWS}TVQ-{lz-tbL--2unv{%eXmu)9wo|5J|HI32yOzp8 zC>=?hQ5>q#NN+-Z)IPNI%;6YqrRge3^X8rEx~~sJtB>7Z2=~Yh~n+8+VM{Y^);OeQ7OR(+-(oc^k8rF`@yag(lfM8 zB*8(fMVGx@xu~Lw61-%Z-y8-(rVPLFsnOclq=RxFykpbHG!!5Rpw5Pnu}V8re|)jW zIFQG@verbl@QbBnHMEiE54(LlmQeQ!A8`ABMw%>bdY5b;NRjXT`g~0$RUz=!g&qfk zAWbA9PxDM*U%Ff7p2}U)XDyR5A!k~P&+N4BDEd*gl$2Wn)!On0Q;iqTX;CsQSBnW5 zlK3igq1$Y_rgD>{KV}`z2b!eYf1(Z;>nOfVkkw;MmRD!vHA_xK>IxkZ8U#IBh!fYx zq=52{SD`sLV;rr4XpN9UcwEHaJ_3!Rv=pjs0E7r5%=L0%Z*-eD6=&FCDUeZzXjRa?|T_wBE`F7%5UX zwKGy3!it%5xLgS2yDm*VrKLjkpJk~M8e|mFm)9Uf3E+fU^E-MPp6P9z5uX?Cl<=u? zm$MKC=;5|9??xz1AHe8;f5D(3r>x;O&tm?b4MT!nAuo>Il;D3FncP$8mXL3PBHk8M zeP^@zLRsa*xsNEuf7yG6ceOvTNs>?LM(p97bChiuTEhzh_!hf)??TU_GdTe??+z^~R28S>`BD zqijNu4xYv7u>S&T-ZS;==PZsLK=iQQ>6Oz$8lA%SI%gc}tp-NOj?;95wt?&VMA>Oy ziqYNBu&wI6!iY7Bo3hVzT}Qfl{kND3M>hW(>DSI6Lf9!j*N8QwG-z6ox>|oNFr)gSlVyO2CQV1rQsA2`5EFm8$w&wFs za9c5c;J&!%S?rGx3||w*>bqXWKX{K(zW62jWJ3uHC77Epw{vf1)T;t;dSftl&^D_< zp(0%K%g-39RQP`uEVTFKpBgPxLWB-_VI0e0=gz;O&NwV5f8b*rrT=D$>zEs3K|F5q z-u3SZm1+IpXUe6u%|03``UMLiQol$*)7tKq z21W$vH%zp=!iHaQ&TNres>B$*B0zRxfU?@eq!RmT+LxpzWu+{0Wv6Exs7m(u!Cgan zi$Owhc7KWce_MW}+iCi>LC__328P>u!W`K($;cN-f=!%Iwc$C)F$VBV!cdan)Zk6( zc?p8~iLt>9lmxTb&L2NWDl{qq-C_!;Y>o^l00I!x?G+}P`Ol}Q?0$#NS>h#q4{5&f z%k>@AC&3btbMZ+RI8$pk42bxGXyE7h+py}%(yPKJfBOWOUF#vm$Sj%U@QzPS^Ap}7 z-;~K!#3%irS`@gQu^>vb5=f%u#E^8#Qjp(FJ2Gi*V&nuFJ0^?I>FP7P64vWlkv`4mEZ zG!OYxe~mH?g4nXj*~YOZ?lXJe8>9CZW^?YK{YNKGLO@Je`_9CYgxy2IOxk?rE)l+TWqN%PqW02 z`RNzQX8vuB-C&ZnKjz)`T^|-26LQrT>ALhmZ5NM8v&OQl#A=aBwuVT)8=p?;zW%z8 z3YO*BvQ8t+3T^iv0{S->hql*k5F0Ofo8=lekqdY@N-4Dw2S;JpI;xQ6!hiBiO7^7H ze?44uG~d4bgsM8EQnwi*a)7+}eufQedg<9JhOYLKgJWH5T1+1);70%*Y%YX)^jve& zob+kUk;L)0J14!j)P<)wJ94;4$?J&)&E5%KryVQ(SCggg7d~OiEovbMGL=!bF;&x- z+#sW->fNfjOjq&Zu7@2j%FX-bCkh{{|NqKuS8hBZ96evYCUAGz?P>?QlPD+4KQd+K8TjnwMZe^?+j zF#cIEc&+Nrm?oTxX+I>p*|ediM_Z-j%cNH+A$QHdU@;iwAQ^a=nA^gfih6s=DO!w! zCPR?~$(+JHmrAC9YET|eSe}>HHAx@LhkA!bT&9_%9;%lZnJKxu!&MdO5DCDHpfcWy zd9}(dmL;A0BfJ}}iY(6M%AQ~vf2*HQ&+YMrl$V~AYx#Zz-vQkpEc4K9K6`=Zf>IZ4 zL?$YY@VoPSRnDRJ@ox=QgEq}Hj)@DF=EeIiIYiriJsOD}wn&W*a;hTciWnLyZ?d2X z2W#`MliNn;&`O%tI}sc983fp~mdYYECyFfryPcaWvn@0xs>w$55_uoQfA4kGS?Ut2 z4~A84lkCsBTem?R+VDFMvb$FbTUg#Ox0HF7$(l!5@e4|FeIE4PTxI6nJ4o$t#@gSz z%`Ru8vb-GMu)7d}+Fd4hdp`Ud3Fxy_X2SX`r-bXqAZCM*b*yfTK=O+4=33KTbh-&!(ZB?>S<0#g^!vuFq>iA#g3u|XPRv9 zAG7z--m<-|Z67r|H$gx`@!!NgT?4lbfW4t7u}Bj0%ma{+q3P}fe;7{uM7=bN5kD^? ztoI@-==3)4Y)>-wp~)d-955EC`yyv|n)d|C=bKRCU14G184*kTLe|c2ad^(+Vo>UG z6kT{$t(vfHcy8u={y;&FXGI>Icz~3(;5?yJ@Q*zP2(~W>Z%A|G(MvB<+nE{S3)94$ zKs2fya^Kl8G$vH`f6jIrBR1~B^+cSB=9dcagLrh%hdVBTK_tUZmDr>4WeRqegHVeE>%QtK#RJ+=DU=ipiAnJpk$A4S+*rI zi+`$pX*CdmxZ|>6qD-Z0kz+b#C~=VyUTK_!3M$uR`_&yo&&N-|HNIDmbLygxmqpVg zqs)3x;j*ntf5$_Q+oTN@gR!7uu2f{0+CR0i%K^>8<5@Z>@jSH~6clJP`eS_?mPMgW z4Cbh*lh_q+w+CNq;jjm*Sa}?i%J`_k8-abWD6D$8i*xy`^cjg6+2A4zdf|A_t+KOv z)$wA5-*N!MO6_ox=Hi@=^|f}j9SR;6CsG*>fYwr*e@)rTF^PV{a zf?wbJ_~d`o?tJ!Datp#XE2V$$;jz=Pz`{tC2wJ<{r&#!O#X#WI-+{6vPu%~e_bY9D zb_i$k4CdBkrC!5LFUs=6DKktY24SFKG~&o`&+@7o@u~OuDL!FLNnDdY*p`t)Xj16~ zv}Efzf0=;`l@watveWnR(5D1yp7EM>JUglZ)pb*s2D@gZy^W_EN;x7J_o0rV*zPTT zc}$}6Y~w;6?zyc@z@@_4?)KjK3Fh=eNYfzWP@Y4+l0wSr&9c8PGJL}P!Re5?g_iw5 zPRkuo0+y2BNP`v4*{?+=*_LfZ@HB~PwKaAQf1`P-T)4TFpB^@5*~l}U(oP0-y^=|~ zxp)=vGK`THN{1PtIp|oV$)zf^F77Ery`D+voT7eYa27Hmx)x{2ILk3Hr+5-d)ej5Zan1=XSin2P}7M5yLph zU$ILUjQuc6z62$_^xLAGgeHFBwtBGQe3lOVp97F$LVr*gBzTc_$C7evnRFfr2zh`g#B`N z(RM2Z=?_|=4N#Y{OVsZKSL6#9XOMI7NSKC})%3_EePHd6;*xD)I^cyd+p+^rV7--3 zp3}J+911u&l%+$Er<#CB-W~qYe_0?kNc*`c`pjDdwukzjWEo{X`QX`e;bei~ieiR1 z-jqt5H&g1IN9`-ERZCZ|+Bj+#k$-?DKg2d`ya*@a%glO`{VtP}NckxL#g(WZUW+o> zsA<4gBb3_TzO!cb1j7yW2q`>{{b673JeeGlAq;!$&%RGj|M^L@Y2hF}f8R1bA?CXR z94`gCWspLr2{2Wu6VoP;YZ2zO9$uIboEPFA=p2`(@;szbBwW>VMAv)p zT>Ov^RD!BVq8XIhBb)-QMp4~{BrA?qy8`*f_P=J(0>u>?<2>(>Jc57WFhqm9+f zf%V5?y_F*H-6>STxe*)KhzxH?E>#nuV zvme#LXpoA-~&jhoOR! zXJOasbUmJ-tJZ@Z1JeE>60D}&WO&z1ql}gkP#_-4?Us$(HeL5znTuFtw z@M&3fs_%RLg$aJ^QG1bV1CDYT71j+tK}L`}r=(AAC^5eZW%9QK6ZJhXH6a4~Nfq;_ z4=lcdO$GKOH-S?T%2m(mMjGwbKi8PCKkxa4^mVFUsh8 zxJ8<{o2TyLf2oVwnl~l2Ljq4M-FfA16#j^iluwL6dU4y8wL+=&3?+_VH(?YMs&nOJ6CAi#fw8euZzxOL7gt&ZUUAs zfl)ZlrV7%$*%b1_^y%bDa4{bzF$(Qxa$KZZko6HJaL-ey?rI>;p(nq~Y=&3wwu~T* z9|8yhwcF9!^LHBd?S#T5|D3lLgs?%aFdsKgi7SmB8$YRh{T<7zjV+4jq6Y(bwdd|8 z-&xx{e+}z@Ni{dg={iLcxk zl$2o}U?TBarJ2^=5NaLvA-CI?TFb?oIqUPGPL}%DO7Q;gi>7)CqE=-u?HB-lN z?B`~1iy)VNZ!!FWMUcd7d$Eih@X=r{1;TacuBhE3Got87*3N9Y?o+4gn9ag@l=iZ* zTFjV=mTs|1i-A66Oov8`{rmt^PK(1)e|WZG+1$udBm|B~L84a{5k7mX7lpVw+edLg?vYo?;TJk6crZ9Xb7^a`OYj2gbTiwE4|l5V6MoeTe@N2{ z{5t=kIIA~aqP~#G{YR&7kAB9&-0%G5kAEzw_cGe3VDr@pzBy25IBV-U_R{EKGZ6zn z3NvS~&<=tZE_eRkyqEC3{CqtTalOf9Y7}PXb?q1SP~RQCawN@hwJI$NV7zSDE2%Tg-OE zFNKFE+X;srZe>u~hAsKtutsHHes13(KjHDw2P{-%BbvZu-729=h?DHJQD)~TVe;^B zi(#1Fbe<(nMC=i9I8Dme|8m$>MYYD19&-L0YKrk|Cu_{%oT}F9VYBm9ij>9$53DyVEmC;20{>&T6 z_sWC9nJ}TjDp1ojgN`WCGdkFur;bPe6$yAkUeRDbk?ZWjjyx7<=tn$M=RV4UoHx}J z$q7CImS*Zl3hfW1D7CMiW$L(t&_DRM+XiuLzwzkJ9D7biBG(O4f8RJd*)jZp6Cv2h z?O&TQ5^!CGV1e zC=9ISZB(h6U-(H=Bv$yaCuZYG5qFEozI~$})%yWEX1kzT<&3;gvHP*Pxqg`|`*=y7 zDw)OPCsEJ=Th20ke~-wB)hmL+dZchUJpvs45A9ZbUu#Z0nOQ2&R5r!@Z277n&vJNJ zwrB8oWTU272^_;rW~e9c)b9j%+ot)@?*cyVt}zkCVQ<*1(TNU3w&~~nh&D}#K*KDe z7z2b#6(p*t?;g~Ciik)CR5Yw_Jy6UQ=;6Xry)h%55Rnv_eesQpqbfjJ(I6;64u5 z)Ypn(zxTqr}>EqHVE>zQdXIr zxDS84(ByOte@=Zj*Tu8hScZe8wru#$>|#l(Ho-)fec@Tw1r}%PWz2}z&-0t&hWBap z!j*Gc1L5~6m0pCKVJn5SV>b0Gdg$>we6A$vN~Bg1#(k@Bn+`a|Di@!kvXdjfY_>uM zbKsCYe$rkOq(HSD%xa6*`?fICxvHw>xN)8QExnbsf4#XW*;3JQGs(xBeDJzyjbU)Vv0Yi-^%AgMQ+)S>uP*X4O7v&aRqlyWzNLuTQASnWl7fV4VYQgj1;a za!DB^GH}$HW@!Lm7K_Y@w-K(a4J)jQkYsy!eMUS_0qhv(XvADF~ZCxRImIV5KE6t3Ni&9RvroO4Oexk1wrfv}$T?TA zb|#E!JowUG%=l>Q5wXk_ipFOf@&=it8Z-@OdQ7njaaytcoLSN$5Jl1YDZgM_xu8MA ze^S{;Q0URnqYQ$8s|C`@9Q)ZB;*3jqGWeUieg)lBU4gzYD5>BD|9OD?)#aIDDrS}a zl`K1RG`>Vv0uIV1TADk{>c{2u(^L_j-o)qQ^RT3Y>gu5^)T;`|ZkPe10%SSBt%T&0 z3!U?(oE;1AAJDB$rrJ-{BsUQotsY9|e_2EivAnuasarBeN^*io>k{4EW-ixdey`5( z>-A%w&X^wJq3c~18I{DYk7*}Aw(S;v@rr->f*Ioan250Hvw4~PD_#(WbOheaz;ak( zxFe{ZJm;M4`2-&74lLt+>-0Hl*A%jScxTTE{`-kwi@DNbyWYL|Rm4J$ZaeGsf3*h7 zjhOCfj8DN+X8RA!KWc3Kku!x23xcUfxX(h_8~UU)0M6$!?vtL5FpdQOnlQ=90Uv%f z8L;~~K+|AqlP;Kn%`Z#)4bjU6W)lr~5p2QSz~(j@Q^})a=x7(+f?RfCz}x;e^1mV(jE|r zGTL2n*Y6vx&GU`0=$bNQHLJ|L>oQ8mk#|mR@?3I zL&;=5e08HEzq}AjH&woVe{&CJ?E?~VxOmdV(wWIAM0n|R{>)teEy|)&L2LnKY_p&0 zzIqd~8;Bf9I7uDk8NvVKFlJ8-2DbY9+1uE}5J48G7iB;GrXb2Fcp^9)oyvr{j1EEu z`*}n=*C6+yzL|hByPK1J5FR#D>W1DgQlPCZARv~m!4gpqXKpyD|r*`o_LxhY~Hi?ot%u zLji1_hs+AJ)890ef7w5qiH6!XR1D;DXp`|y4R?4t4=WbGVJu5^{b08NW+R_4bjv3>>K|!@F(J;0Xh}NIdtp$?C%J^+PLML!l!~}W95auT*zfL$C z!KvD7h%C-kCanKYZM1uebv+IBG1n5o(|bnB8@l^e@}k(rHyOdMa&<6HT`*Q z>cnk*e7vEEVB<+vM$!AYe?*cZ?CWH4Ff?C1Df0;r$4l?|*J~)&(MOL>_Kw1>;4zlf zvv1BOelsWxB9jHqyM^fX{6m+>D%txxycwI-%JxM#*9M{D`nK$ItH6u5#wo_U^k9vE z!hEDmNdy%$f1kR!Y9oHaiyIF8**^kaFI_l`jY2OgZ-wVJY}zoFheK1XX6Y>*(wuib zOzB8z-CuAc2aPHR9Iui+LbV>T9l=FAgAPqpe_`RuO<4^sx{)9&3qNs3xzo*| zeg~`Vn3#*RMRq_5iDf`EbNbL5yRRj04C(sP{Pl9fE`{+VXP84?2VXU8txjt$Q+18V zH#2jss*6p>IJvOO1JA~`6MG=ZAat#V(pDLZuI$@T9#|T&x#88u+WUm@wN3JAOHnLc zniq{$e;+ma0Nn(^LybS_Pg&=dR(UgHQb!4@=SSh3BRIjGN|KeN;4cEY{T)>7%~MXY z7Xb~IWjM$Qi?3(da=PGam5)J2kPu=U#TED*ccP$2>xTqA8VG=w80@p*i_FQ}%dJz5 z=X`P`=DLQzAdhwvA^$-~Mb4d?@BOASPf)g%e-|9((hd&WXTQ_?I@*H!gRt1Rbc9D1f}4#zI9zPZ<1@}5jM->^o6mXLxTzBq{*)i!b-vxNL82)pS)h)X}5l+@K6 zJFZavlog$~ba(3btg`@|8SZ^QuCPjF?Hp6=pMs`8Chtl`8A(+<*{}T5%~BwCC<5%+ ze@3s!MbJygu_z}8$ufU^=O#7B#mz_W($6su_{wR-*;Oua#w~PQJ^9wE@#rKWKg90% zoMtTRV@9~rG?(@GrN@d^$GBChQMQ@eS;ndoR{pIhLX1*E7PjySc?sxH-qVU-sD@|i zR)X<}PTtzOH-di*JDUg(aj175?`=g?e__xUjpUfFKWy!dUb&z@^xb#H6uHt%B@0hl zU(P>pLLbKoTkC2@YgaqL)o?u~f!}{@+Hi9B7|5eixtVz63LQE;1V4AVwS&vu`Kf|g ziX%iI-rA*hYXr+VBj6~k9s?V7Msy!j zcG-`4uw7PHEa}Z+6yI$rZAyOduH-k)(%qjlq*8npx4BIb0xJE!VyE8;V+S*3w#=F` zZFK(B$XxqLofGty$Y8sJq(I5X+NzrHyw|CNwT<19nE>wS?0b^9<{bR5fApJogpw#X z5i81w5!&;|dV)14#|t z%RXb>$<6O1U&G z*iX9%=K@1%0S$@+v#!_eTI{Q-Zm(#j)DCQ9TaWu9`{KYW(ltZZe-%*x*%C8r6eE@) z0L4;Vjz_97+=U`!Ydx*KYrmp^F^~#c|zi><}&IN-$&HB|*57XH8s)76i?q$)Y>KC=ySe$55jN zsVP*AI(1f@FO**De@aI@v{`&>?y149vtf(!DCZDc`VCUPJX_t);}4HHhpQr7UD zJ{*F$^yW|eKpEuf&tD@2=JQ=nJM)6$46#|0xKKB{4l|EzSpG1;Z$L$^9O(6^xNdvx za6~gdK#d_B6uw$(4<%fLJeZvOSlNjY{5icO9v`{7X8H|0f2p8|kM3tgJTh?(K}OQw z5eE~%XxRE!8C7O@k#h{Y$dM%y5+EcRN5#y!BE5jQnz7AL6^7^&KVi)1%=>cqCnA=Q zR@Rkl%s<3r`=#jhmx*e&Jy@&7j5>PY+`AWid~N8CsQqj_G(BT5RHCW{69K7GuCQ~s zFX6Mp^>T1Ze^k!Ooe-4!g!l2xUUE#)W;*@#tK6Re+_HG=A6C&ww#@~vxG-nQWF(XZ z&Vj&{gmJ(kji|OMKQrL>=x*L#NzskuGA<7p0C~aP-+1EoQ7O;>jrxlu{TMv$;2W1v zR-Q;z*BK2jYP zxaLs7glk(P$3@}Pf_!SSd@Y$#uNpqr#U4I^Q4g5e<)3lKULq7GDdhV!8J^cRxF^ai zv8LsS;)rltl}<>9H7BwWshb?K_!GYsCxPQ>TWfM6k(6%0)V08B5I6Hst2N&5@^(a5 z=1U(X4#V4Uy(>tJ^#2Dvv@KeLu>&?ZGM8~H2NnZ0G%=U4Y6mEPv}IVFU9%;O zySvl4ySux)I}J4M?(S{@f(C*^aQC3W-JRer!6(oA&Y3gU`M&uxbAEJR_pVj7s%o#= zMMb8n!6atsWDb;ea&%{6V`k+CD1#i#J>1lt9F(2-6q(e4)*f>1X7+%;7#9K+m4qwM z%pK(9C~4*ngEdi2$KnnmnJAjRkj}L(gAmQZf;|j92aR<<9sq4@&Ffjh7rI|ML1@HGtgRfc93*2yEmzM z+@wL?KucABkh_Hqz{<@2@8$i=uH|S6bhQUL0{@EqulxNQ#LD_#Fijhfg`ML+YHW>!d=tH83_2FnRJw#EdSTxA1ZNiCvSis z6B{=lfQg-l_3vZ+`yBb$IRpNG(*BEx?LW^-X6~+kAa8&HD>ExA8{qH%fBgRQWb|KX zq#P}rEdQA!4RwEHZcI=>&t(;trZ z;n>rE^kxDti--a!==zJ%H>o>$7!E^OjK3p)In#;vyIN4(hSKL|)jr()rKWCN_69>d z)yd%sclAUUOyOA^j79nAj1p*()sTeUh1LXyQD1U;MxA#2^hh%_HZuIP_XfKEfRG_S z37$&83^y?2lq}ud$JF({(c%O8#goSa(F@ss5A(~r2Stb@dA5_p58t_G_F&r1?{{HS zhT>UnjjJ|ItiD&n_cjtj6o4qAh_@Ub{}AbVibRXZ+iSFg0f7fpR-%k826?TVZcp9d zhsKpmWPfQET8^A^IqzGuLLD<*PMcN%=`~vQb>dt^jYn}0wU*-`0<6@cugmVghKpu@ zdJ{7!S!+gZq!%uspT0wbT|%n&0*Qme}gX ztH@vDo{6a8hPX`27o-<`%}~2PJb}`GNldt|E!%}hTLc)U_(|2z+$&ao5iWaU#yyp2 z6E?4E#Kmo#UJjenjD%CHB(kJe%EC{!iBUriG#%|glx<&tAfLAdC-^ps6;~RBW6+9+ z&AE-B=Qm0GoU|nQF3b;KOjwWCYY}LUBBy*7E~sO$;R0@swfhxW2of25PBS{*D2yK@jwKuWm_1NUN;SP(5LSYM(#9!x;jF1cwymB zX4vL`YBj&yJ6z-x$n~#Kcl7nOiG`k!(Y&ozg$1#1Rnbuzdz^%K8T4m)mE0wL*wnaj z4vS*F{l>Hu*$SQlQFOZRhIefpW>MIr=ca*Rbx>XJsuHgyzbZf*q00S3= z;E6@caJS>E!!~heiT9m<1a~(V3Cdv<+m!v29yF>;akb(Is* zMlugVlALTyXM#I&AL_j~W^fPL-gKgH1Q+tIUO%j6^$6vW1Df(mke*)~l9Hz~!kw=d z=PSa-IebL>K_B1Upa6>**spZIS=wlrO98GP;%`63k~yxq~>&SSI? z$xzYPRX2uK$y{*daNBOE<_{)q`C=+W`kbfwD{Zu{>2)TaHsgAzd96~W8wqT-2i-P> zy<@~jB!t;Fv-N{dtXv3buFP&~Z=s9XWv;cc11n2l)#X)x(6_5!B}iP4un+x=Rcdhg zv;-4PWUBavl^@TG3!ZiZgl8G$liLat9)VY%FP<%;Z_sn^xa%qc*;L=fNGxNr-)eqC z6+t1K(79SaV)7>5F{O?T9KD1bmXE&ytvGV0&l#y47`tHgJ8um2=weVUsvP&9Vs2ty z&7Jt&Pub{y`&%;=zYLw=;={2L<_To0G?X*+YV!S}ZYI{O!`kaz{t+)w5uE zn)}m`*(Y1gRaR`1*GR?r<&E_0$dwFgg>wP(_}4kHLa_*X<^v!Q4u^T=)m~3ImoQX_ z2)vc}J(>7x7({Yy>CyLXkN2QIWvrZ>uPaU>QNQCg6t!41Z0FlI!aFy zE#mO^pI@%HYgI7(@b^drVvX}Bd3^i%*q?i8evEc3*vjK3AHw;a!t1h z;Z^hTI&OV)v|pTVB|@;#Y`#G05Yoccahgi{Fmo{#L($5$3ozr@()=a17hPFpWhXn; zr$4DYHUN5jZ~$*@bRbF zE1o9tbI63noVVvUAzgm9>Q{b@Z&{m(uwZ5N>t7XxMP9?=)V@D{^%^9xgvu@((9QLZ zHN}}i&B20tgFv_VE`mi&F?Pw{+%2T88wtaI$dc=|M3Y(KyfqD$(-9>XhfC=N4|wB> zSfbmOvj`BHXr1kL*K{Z8FcnB3TQq0rG;EtirB@@I2lK zh&7|QYb^Q|xl7G|3iM5LgXf^ghJT>jZ^?~ezQm?oS6-fxvP)<@b3&t~744^_iq}hj zP@1u|VnOG+q44>EE`*s}|6NLB%j`sC2xCoI@_&ziobhmAAb( z`|u4;yTy;w86kZp=9xxU5`(PD5+MvSBkcQ)mQrz@CoiiXcEiBheT(_TsS$Dm~o92APfhQG2;LT zv#=0aVe!yY5w}SG-{SYn#x!_;1hl2SbiPh|-fY~iaC0P7VE```QJ1aAGu6i2MXtT~ z?Lg_%jNeZv9~K*FmOoa1iS<%9()oG(7Lv5{pu$W^wEg7Qu8o%UmelF}DbqUsxtu*u z8#Uwo`zdKN`ay4VW{93TMW};Lpu4!>bW5&06yIYFa%+7!Jg9eGlr?C7TOHJ-RH=(w z8_}Z3kB>HA6{{Ho#koHWW3i;z1MWLN5y6BRy>=M1(XKOn^r3|;k!nNNpvmvumY?0A znJadAbQDc(!TfGoa)Es;;9kTt<{uO6ZrB?%0Hu7TNrAa7GIZtcmv04WH9?p^q|tpF z;Tzak`03vE|?>%W5n+XOBWRCC<@rXVj%iU=&7{RUOE zuc?6Aw`0VBVHMAQM)Q_w+KZynsD6O_G4hV%m-2iUr^+H2DdM`=3) zYibZ3eyhS;9^ka)4C2=deqEr*qWHf&t=1dxe+z z^xW;zGQF*MrY?;zk_3MxHJ`N4t2hrx=d}Q%TYQ-k?SArkgm@gh`*pMJ#L2B#M)fK0 z@zDCUJP5ZJ8VQS&l4P9&L@TLt8MTGL0&^)VaDNqJmHMnr-x9g4Ff_O{gu*<2hBXJF zhb9^&U>LTuw}SqEFjw&Osd(8j3yLJ1>Gn4c6&I!g`}$qE93l5w@GJBthdp$MKe1hJ z6$a9i>PPucAvO6*Mdh0Zk?r{gJJZryhr!BFLC8gfTE<64?ZSY}6iQbYME-fxavj#F z3=_Np<6*OR8o`}6#?aF8#ScW$63hi+(d3wSCJ{`7_=d}W;H+)wADUhZxOR=svgF(dsd;}aCiTqCjD#^l!l5611bb=ZY!$zvH}d+f70H*0qI?B7$y zvk{achiyN9%7JJQ5)DMaWkKQu8RrJY0v@06A=QvRok%Tt`$CkD#4gC6zr{Wq!3^Uy z%#5JRJplxLJ&X7UKXgT<4NShY^ZG3Uw2xRcXPtOxzs`yP_>U)IVcC~SEv%BH5-tT= ztFRV;6DTU?WReHir>;f4<>=&Pm_KUnXl6jfo=!q049=s#L!C zvc_c`y~EWf%s+A2vk`|c%((PR8)obn%s#e2RtwV8i(ZLayBBmnm~hu)=WO^rhMc1O zA|0=5a@*ZDO8dUJnWQUCQ{BGUGfL&?Df(V?{u9UD{&dJi6nz|sN`fn(W{}A*AXxk79f75)yKcgD)T+Crm6=EF+K-!|pkd>pW zGLK@aD+!hRIarSTbAPIOLGCJIDG{f+7GX5%AY2;4*Tek~>N%l(ktv*2dCp5{yOXPa z11%S#9EQ50dO5;zN(l)1W2S34o<3<%?EapAWx;5V3TB<@OTf9_IWfL7JK(1wzQkYy zAL~!v9WESQ<2l0oHKURtlgCydj3+lPcA|RW7f{MT7eS#%5{|D_f!8o!%4Nu^j!Y3-EUyE2{c(UeiUQr+WVjUq~mfVSDRdiv0xvg9*r_chJmf&^7pWqmCprbe(LBV~;d(z1^ zQM{Q$69NNax&u9)(j=UiExicR3D5bvq=%(eAN5SlqcF|!g>jT23`zv$i+7(C8Dott z4^h?SqpOVQjBhGgQLE*I8l)N49I_7uvcF02Z1_ym)u`gO+)QCkVp`z)Ghj%6Ie!#F zbGbNBdl#vN?p_~BbzrqWrz6c4kge{0XrU(L1&!BUHtdd-wny8DmI2?aMsycvLq!Rr`Naf&u2cB(18R}0 z0jsiW*Gy0K5?>A*qS$~83Z#lq){`Rtc|>Vg8xo7!yc`~ zsM^zPP?QavoMcAeW^Ui>IHZ_Lw_`($v9pdafSSTVIF6n7<`5585e6Me#P4kN4sdyenG5Eo}+v)xAr^DI)M5-^><&nOVrvOhST) zEt>K<8^P<#d=qxi>~y0hn#F~~mTKMzJ4nCYgxpk#7j<5TQucc_X517DH3pARu8CW5 zxbBU$DLL%^HLeD6LLItoisorNZI}A?BMnyP543L>BqYl(mk;zm2hDvcNY)yrAeqiJ zMi8AiSmW2zT_mLls}FO3$37cVCqhuM4~c~2y`@%~bDtywD#Q$xR8mJDt2UazNx8Ok zG#;r=qg7=Jb)fzcrqV~p@W6dG21 zOERt;iDE%l$|@%;}~AZ)_vG7$4V z|GTy8@O|bL6SKaTluytLK!JdPyKLeF2)ywM1@WLVQ)EzG>7 z2AI~vt_eVY-*&{>pL{8Ef}^~3DAhN7y35qI>y0ra(ySTo{kXE1QG!MwWxG(V?ItDW zIe?|M`Y_30?3#B5s<*bzX1qZ#3x3h7iZHHTLP9N(6&?8QK@Zp=5yx<*F^seH!$h z)7`d*Br_65p>eKF3patB{T9`hHrE2R>}#5<%KCJy0<9@fXNW@e+F`IF8N-u`*LQ0E ze%zDLM3}WyZXYaLf+Knf2eLcZMVVe zPz;EFs*{d6-6V?+Zh{Z8)ps4eknCjeNd!osq={w>@9EWHC;a5R4xpPON+8{4SAhQ> z_T6{^K4%B#EWM(<;KY4wFW{uiI|s(sK_M|zgT?G1O`TtX&-o$hZ8F{c?pS8qN{tCz z$uMv(b5N2f13lF?uI${UXIC(BTcU!kYRq+i)%2{d2UR5EpxOSnoHQQBlRy33?~wH|OSr+4nJ2SV(+>Ch>h#9O>=@+oB-vpD>3i(%w!ua~;dq zFbQMBhLHY=HGR5yC_Su4CO>yPjUNlSU%hepE+sxR=f5wm9E{It40V)v=fkl@qB|3R z(!hRN^OETUPh_{^h^LePKy4hwc&MV!3O61p$>*o1Ea1!Z_WEmB>NZ0=#VP(`;d~+= zwsmA!oj3X}O%s-VXwnjO?Xa)eF$&}wcGXQREOK?48)Yqcep!55A;=#XKMN79@TQ$d zTI%QkcR+~0QIK^9=qj9NS$K3lxS*b0O|FRke?m|?Z`&l(krGekRE*`8#m9ecG06Ypl<~UR=#iHb0kA_Slutf*{^& zf2W5Df)_nJR!mbRr}FJ3kqKr3gzv-4gP-=oArxd$BnaQwYn(ovHL>j&oF0gn^T?Ol zUg*c90%GcCed|K{7sK4rMX0OeAF8W5lAH8pEjbh>L@_cKPRt%7Zbc80YIWuObB&y@ z@i2COWFa>aZimbK&TcPgNTZ^@p-NsUf97M`UcBoeAKEJ-jgu6&1BSo?m$s;hWt~Z# z>?=e|PYp|^$cH>zt!V;IRNs1(^b?~s9}{4-<tfXtSxv4r za2w-lI$2cE;n3O@G%kyJ|Ba_6Ids)<}7AYeAQ1>b(y?} z>zjm9XB1UPjo&R*oyl)`qz7y6EpcX9OqxsBxj$m#6TZ;5bLJaw9BE>a#7=$iOXPM; z8-8lVvZwFp-u-wr2q~qFe;O15W5@rZz3bG+kajRX_!q^bqJ1`uI>pQ$WxX9^wVg{!YO(7VO7eDLEsXlO z1l6<99*zI|0StMk^ z;4=qUFJbc!_S?j7OsUb%m}-KeZr9?*)&{2GeIEtRS;F5lqoU(EjrH3o)U=9E62DMy zD=YIQnlPe?zJL}!%^axv3q|VA?|x>K$gR5+zkXMat_RP8c_VfYjFrBy>p+G}h9Amy zcW#k2*`(A89(&(4f3e_?S*dK2&!5{RxWXp2sfdI4=_#bJi_0u{k^2Goij_r&=;~9z zr-U*F8<;CQ(&%2=y5ZBF&-5b>vGtzT9o=h~B_Wkx1u#2NjKfXGY~?7a(cCY47rT%@ z#H0PaKIZP4#`@2G+*+@_yc9T##}yUg&>bu*VHad!YO@sAe^%&?S|Kf4q@{r$rGqER z@p%m1Aq(QwJpZ;R!$Rp*q}x>7GA08kfZEc67omYJh5Nxs<2~q^xQSYRa_dI=cbj$8 zSA!KeR!@j8OfatfPJQxf&<+>94GWyGBWxDN8PkQ`Sr-!S^zEW?Pt-z^7jP3^pJQ+e zAD9mb2nO{2f8m8O-mq0f%uhm9go)KJVoqel;pq{xk^%K_!@-1B&26Skd#nqLN@e^1c26MUj>har>bsUxKLp&S$n zFRvGwBRPYF!VWrnnL=TdEoPu^A?5MCvUMW$lA;XryqOy@!9^h22;Y)}JPE!f^WmS_ zYYV1a&_tMAgwDR44~u=%tzvR*<7~r5WQlmF!msju&WP8iPVjSd&_Bj6(qqf5 zZ!TyI(R z>JIZd(UunspOv-Z7K~vs7K76o3~aFY05OT<+fEZtJX++P9=u>xMzXuO$dPB+4;_)T0*^j(f9ceGI-H zG8eVHY6@LM64TY%V{5FVPJjLmyh$DqFwm4VEcOq8Mfwg^xSev6XM=;R@7T>d)op0h zyhDsb6BoFD01Yd}12=|(UKgvlQs_y4e}ZJVWYgt{m%>^|yajakeNk0*s}}PAs)1q2 z-qN^<`~FcVq>PllL`OWe;h|R8-mQWigTVSl5tZ%K?+>X;AAS;@P7&k;3o@xzB?dKZ z1iRO=vA)+dE*Lv!%*ghcg6}i@P3%`lc=AcElu8yaQxB!YsS?ghYFBMnWc!20e|KZh z>W@MdoKPo1>fjgqfLJ4wTgI+5D%22*NVnbgRHbRElhY@e67b{bN!_m<~ie z98a{8h1FKS{LR@#)4g$iTN#=da#p}junhuV2oxv(sGJKAGIUPD_C(X9;&h@Lu0t$8 zHIs9WF%yCYr%yLj=B#u9N{FDPf8!Og7)Ox~u!brC8d2L+(qF~~eY>1%>4ghHEsDK3 zfT)|Q7*uNNk{VPd84zLl1ZjpLq0=DWU@JzwD3t89iCyyz2bd&YSxNjhH1-6 zB0p=^-@og72N3=^gDVs)eRKbwcX5q6>EO%C^jKAa?+TXRF}h zNvb5?fm++1-O%?r4m2Ig`RWQzF7{BlnjAN=_5f1+amcg1o6AFjz>M`P5MccG13ATY z&km*Xcd-?zsCP3fncvzN%!fnNR|uy%h77^kSc3Y7@kvqqitaB(%J4zoz)a&u@Q{Mx zUtXNpl=2>);zBnDGdc6kpM$hbYd74}{Ur-mat-2PK7POO3%00U)8M=fGXO1WkxIoh34rQNH`PY#a&k4nGKmB8u z9yYdUU@cF|HR&MT9Mf8xZRW_JE+}GyA^4w5GI(!ApgGVt@uf7 zV6LXze~RbtMn|D(BBe3tMu|>WF`zpo3fs?&7}&J*?EZ~z-9LSa=~S~k9(2=aaEpcB ziXpy@{>@Aj09C8K6_lruNW02-rns>9w{m_-lfq#A6Vn;{C@X@akF`+VxbPM!#7&zV zG_VA}GEM}@tS62-T==oEanB@v(nPUqc;wh$f9wd~ScCeGqPpLrOt;*Uati5@#bBU@ zgIbUD(9~n9PH@G@r;HTV~(>@}X=O-5J?Nk^v;-Yh4 zlEBsX=_!VZhq6ioS6k?caA>=$Wdl{n1waXz6h=^Vk{`#DMUq)CCJ=J-NUM&GN%v+O zGq6(Q!$|@h6j}XAG`y&T?@94$px*O4f7)lx%auykA78a3dAD_*xE##DK&hB@$z17M z6n%mUGj4G4;k(gI3#&*}T_w1UmzoeEuQRbwMC7wEV61If{&j*yz&hDvxie)s@9LiC zYzMH+YcgANbLt!C6;e9mxw_UKkD&~X-mgP-zBPHxoVoA1&)d-QGBVaqRAg|;dT`c1?kw648D-s9Z>C{EXf!Uc{}vy2&}$mp?-5MR7GuVB*8-dg-45FYThaIGF!J%(ABM!NL}JI z06dRvc=H&ajSfRkglF+fJ&@#RU{Cz+DWX6}Ea=N~Ui571UQ{No8H%jo!I`tF;k@0�ArG_W@j>F` z*N{@TQFP1%?IRaBIBoR~=mJ>{JbVPe;mEVe}M@p zY)#E9xx{*E>;9N^3MzI<@-%LgCNeR<%Zc{h@y%Sr-LdsXHJ@wF?bY_w~ z#7M})D(1a=kx`Fr z(9j0a>^rAmE)*1O6}-#!pQT&D)$gNUlKe~MW~$@Kx$i(FxZ=r>{# zNI_MLBp;b6yP&Y?+Tw(#!m@bD$?zjCF~!R2MZhg2d*|2X&Q$tb^(QOk99@^iA77M) z7N|Z_e%GBuZN)9PJPWnPJRD%?+5KMea}Y3(r22$yuq{A!o{O`jK*nbIX z+camr_sN&r+Sv|4q3u8_=tRQ|+`cb=C2QRns&`!imX;>NH>!gzVK{0#FO+kSw}uxG zz#tu#HRG*J`LvjRC*HvI6`bnv)sPF|=l}lklX$k+f0-PQ1miGJb|w=L*cT-eBY;^O zlH`&;14uA(V|KcuG$9Ov*@1A;e@|(sdtRZxa2%e^rH_~E3yU;LC@(NQwoPt}^t`Fd zjgx>3`rZn11bK^4+is-2Tmp8IK*4SN1sn66?h+0n5qeXGa)0cFs8lfP=y_zfgl|i% z_ey8rf5OOq_ACWUS}dSSO@HV%>prfFI$+AE3Hps$s17Yh$`y_XQI5*Zx+&fptIxcA zzk($JZ^XF|s~4prhH%M%WiOL;i}$$;&}&8{aQGHgEGH=kSn32<3$P68?Co@6*bd;* z`>5z}m1f6^$NiR_*732a*F(w~WYr+G;QfIce_iL9l-I32YNRAk-~9z%Rzkx_Uz+V; zeM=khS;J{_kn+38cUUPp>H9p+NR zytf2ZinnXj6e0*>vIvhn`j?W-*FLZ*kiA9Po(Pd~(B(idOLW79m-;_)s5>63cTuuy z9bLRsj|}_#l*hQySZ$fMixUk6e{O=|@_qm89=@i&`RpS{WldnNhIag{{o}AM`J5` z7JdqrSF}s?AxL$lv6NwYeer#F@r*J^@JC?b%+z|_Z zU_=M4#ka`!<`Oj}No}W7UH%ucZY|R{7q0*-=JK%do$j3-Gv-uof7m~!&I+hP>u*q& zLLYG4Lkx`Yh8g}iKPvt1KMWU*g-M#*nATaTM@tlcjNbq3NOX1&FmWUlN&1u7Vt7~S zQqqkCzS(P+5gr>u8?zj-3{<>oeZF!1PO;ejM}Oi#)Idt`qO2D?5(^4hk?*2GEzS0u zP>#$&QWu(3R0tntf8*v+1O1$-nl||-OSD9Oh0`hi%Gbjpr9iq|>s@LBtsPLwZyD@d zH$|cVY)PNIW57Xrpu^DqW|+oNe}8){`si(NrlouNLql{b-2CeUkB$PnH<}X>hpDng z+(u^5(7A%7E8q)-q>lyeQKcRn|4k$rHh!h#gain*d)xd#ekl-t$p5WcOW?VfsPLmS0{VlJk14-qt ziEp>T@YT{rS+#^*PAGkWnxCjEYH#0WPdKQ(&sw(BS~b*D$)7V};&?W&>mDr1GS7n` z4k#EhYXl}1f8{|X*J+>!q-9z>7%ayAIw2D+k4a8?^82=>1qKn*N^bb2yM*Ot>Oi%p zp?b(mK#*oEE1PF?Ga>Z1%ms|5{do&NjJlJ1=&6v|x+YRBS~<#al*t>)QKZA|VY|+G zSKNxzOj9kBX1+V>QHn?Re>F{2i1%?1?Fr#mr(VXiMbO!C zc)-$HqQ0dOzB6KZV!oeq$@v*by@O01?<47bp(dU)(C|=wRO$)=)8jlq6b;!!{@Ca{ z)Vs%~6e}HeWpNzzk}R|vR|k_g?gbe=I}Xk(Jfw#9^~urT9cgydt5(57Q`%Jx70#QQeY|m8B_)fgDmbq`9YIbKp3c zIke^{zi4X)CgLzpetZ(7Ol&ur33<;iW?GZgT>G(L^z2{XYY?hsh|$`1?6gy?vKrQv zvR)&5SP9V9^{$$QXcK!2^Am0v?*veu3YP94e@5qnN8@EJ?<&Gd+gF!lU)x$;!=4$0 z)4q8%Ux?bt!2f92xd7z2|ISzN`&|rgdhq-RKa^b0Zd>M4aOhsMuVCjqLIbIaj3-jVa63aw%>WOonj8^_>oL@OXJDdHHuA82=?f7vLMR6QcD!Sk?53yhL@j?Z z!U-^B=Lse(i%47H`#t#uzb=t-#~Dz}f7At%mByTRY27DRy^vt**Z-7vo%(FJJNWgh zXzwG2fJp~DmAp8<@i2U6K7(LXSKl;c^Ha!7!Ug{gPU03=VHO$-0O3-9Jh5z`8!2A6N0NQe?t{8 zm&_1`@9~KdLI+S^;9jfj?&wC4ZM;BvQw~+a`F52V#9nfO(Fr}@26R`OE$~H-J=4 zPJ|XD&fEXRUt8R7LGbo@Zc0A_e;5^g5>!%V=Ph7!!!{IgVGm#zndlgPmOs~&8!I>L zI`M0Qj%wCkQL!O>-yr$55xoD|h78MG5-f!B#fa#Jl~ba=Lw%rQ$^QrEyn`sJO~I1$ z5HE>NzPs&Oey#3&FNNl3lGHpTJae-y@9z?(?jreitWTbKesIq~f8?Io4l|Qcp_fRO zjym7#*taUwDb3)C8v8icJbnxTl69rsRZ2G59?@3CE@8J!HUzviHg)MA*GAhCn?Q^ma^R0-nW3L< z8Kb^Tm(kLfClVa=e^8K^pW)N;^IS@%vVDaDXTfc*{`x2#g#CsySYq+JmkWB0YV!#) z5n-!&*gqiTJtSZSUTXHp1mn_=t z`lbv}s726F%+RY$QPHon*1`d`A8a$22CaNA`2En92jrC3GEvxmp!>e&aCm_Ya5MxH z54nsJVj{#uxVqvC@1k_qZI>nc%#QOgKmdM1x^uj8f2~;QrI29^twk2p+Iq{>Lhj@` zCw5G|Ccg_~FhhgJ#=CRQT0w?4)DYop4Nj6=>lP?yNX|%*8s8<1b0>3L@L^D0q+@80 zI==N#4V>xsrJ3-wlCePOIrJcRtmgclZz*O7tWtM|w0MZEPH8gPMGzBrx{sqdyizKK zwV+xRfANOA+swzLNDkHH zQ+Du1VK7Qex~(&q(!aqqfsVAfek`&TYpvzXwE%^#v&+f)RiE`7Nv+BF(c4thC0nul zf2I_jM~}cAAc26vmo+4lPE$*irYotLLCvGOKz--@ugICoNA z9h`luD8AjBa;}`9_Q)jyRCW?q4aJW< z^T-mUHb;A*=L?w?QVQi}Sbnr3%5CE0oNHlZjrBiq#~C_J?HJIrc2zlE zRG>uH^S)NFxRaR8Ci6LEt6j+~36f2blS z34wu*B2$fk3Z*&4h>>i?P)iCJE}bSmyy7Q*kE+#b1{I(t$0UmhuX zu?Qqct}m&%{w2J>phDc@515bvre@wnT)kIl9qBJfR+&$#g&rBO|s3AX{AY7)xFeKsu}sc3iAAyPelgPWWBLA z6-F@vk>(=qcy0P1wW0Vv+~82Lj#fjbKd_eU67Nb}6wAb1WH z*1cM;I&wJj^`n|3%KQkWf26+0i4@WW=GH8t%4&MwEuH2Z-$-JUF3ID$);(p@Nkb}2 zYd1j(IpL!4N8|?nb3m>c-k-0z=e3Pe0o&Hlk?~d@lIqLRAOvSmUc{pxE;ziH8zsq> zI421%XdotvGL2@;;+WT-Wm1D#ZneihXC9>R+nO0V??Jd_OfK@*e^0#}@+C+XG&dS+ zOwXNUeTZzUk>#i`v1jSu+tqj8pz`mZH<~_e+_$CC+>*d;eU>3O9Y>w*4|vuv_Z0jI z3W(e>oe-c(r`mI;`fT9Yqndyq*n*gNa>hiw#?j{^4(K#0l}yW3;^P!wqTjRC)Cyz| zM=QT0#l|1kJwj`2e}fcFkol3P++kgdjxAO4gVD1L1-~QQ{JJx*K=U?H?q;8-?=gm? zz0;jE^^?MYNT?6dUqhPr^mZ>y>nDD7CysIxF#P#am;ojNR=}5M{>x0RV!1YOb@B{G zL|KtM13NR8(Ij{1X9F_}w*}T#M1ymwne_c&ZCxO+bTz-=f5-YINPjbj>?}Sz6xNjr z!z5^@v3E)LN|IOU8>Uqhg~o%`P?-O)5v3JwE-sBok5YGIQTdjtb)WF`Dx$=^4e8ej zpYn4CW3MK4s!g}XWL=US0`ge6zJNA@{$KYt)CSjQ#7I6!@Ng5^2L=M;RD5IHicg{> zfu;C)?8pGzf0-e`P@pz~i~Xf&9fptr|AZHgJ+VJSzYEUm*YlRwwJ*=}`c>McD$|aU zo+c?EN*QqEO8VaxtY)wK;l?YDGOcsOY~kI1CMDWQgn6VWEQ(%e&@Rr;mzMk_Us(_w zuylfQ0e($Al?4?>#tHk4XvbT!mU0n@WW~#ilkrkRf1KI5gj+Sf6j)w;V~FNq;cG5t zUtcJ*C)gP9Fszvjf(NB1Cf_u!T#7$EdK^HrX8wHVx@(c)+KuyKGE|B8E4tSFAeUYu zy_P5*!-jkFYeD*?75mt43_jnkNxfg2q1igNkbFGN=c^tH%qL zssN~tf2IAmIHATkmFK1Fc;#mwczfBZV@>n^DAn?xuQIk;1R}LxMV_5`(1bhM=Lr#L z>pxwpcjITR`uyvo5_AvjydE=gR-?zTb!PEnoXUmnZxkh{StvM>%tkgb@o5W!NPq}o z@*!N{xI=85kR=sO0R`lnZ_@b(Hb|)psV6=Wf6TT0OJpVK7{a*wg!^8VB&U6xkj=hy z+G#FEhs)0plLv}Ih<6VCgF;eX5b@jzeHXPyMv=ki1nkbtD; ze`y8gUDC(Rr~$Ffj#&Yf2xwt#^Z9CmLzaI3r@>}GeBYI0ncM$4lLfpek-iPoIW zbu1T>lQTE88|5|hB?FoWovhQ}*@$Nl>As@GGOJ}j>sRO{C0;>8?}ZyB&}$vh$^9P6 zppqT~@h~6_xEwp;Au~lVvk(FUEI`AB7lg6WAktZZ0-C zDDyM!S^cDAm_Wd$ALGMH1<20}-(`x6@c)4+eoGN+OF@UtLMJVx7o}%sQ6|ZE&qJsp;Nkvc$C%nHF@FnUQo=xR$u; z%vicNnGjZ>VKd>Z-LoYql^AZz5~ND%OEfE&*-C$8(iE7bl9_Z$qMKq6j`akn3F22M1juIWR=C8lMt{dj7Fr9 zMazE*JW~p-%3@MQ&={w|c##QNXqy?xE2II~TGKNJE(l&BOH8V@%!Dcgo@vli7@Qf5 z5=xfz3}clB8(Pp?k+0a1dHp4fv_0Fq9=t`4TAf+iPQXQTdX)4S?Ed_t&kj%npv~;1-GSpJkveW`~@Q#)#XauR# z@Y{-7kW|=$R18>QJJcd1EzMzptFSbOh53PZLr7vaYXr5xm9;QLr-%xv$tfD^V5p8T zh0B{hocK`QXlQMfnNS zjAj_iWI+Ze z`8DiFm74)TFUb7Z1M#th3(#S$J#crN%d$ME?sX3R1MP5p!fcg1sBks5IxyM58Sk9~ zQi!~C_H9!XYpwg75daGwvfZ$I5am<<(>UMiNz!?BZ-qDz;oRF*Qhef`16m+6KukNZ z;p}^`s8Sxtcutt~v$?7S+SY$q!c*Dz*}UifT19au0jC3+xL z6x2l)M4+-HOhs%UGZCZO(V&9a0|Brn4YMF|u1Y_ZIzUFWbIAEk?8Dm6(v)NDqb6OrNkt zX~d>ilJ2YoiS*n?PJ4eMUI&FFJqxlsGE=HPpgV+D!Dnw#CFOjK(T=rS&~s%4Q7!}t zf<%bd25=BeG>MUl#8w57XO(It$Z(yZC+s67OhluI<5=Vf3Dg&CT&cmC#EI#Qk^~dW zCW77fkO_?>9#RYyjttKmKAemS8*(^Zkw0rooGHVyIhM0zR|9`^s8>oJ zMrSClq0A&ZgiTT*#%7}xMZJpIV?p@HhE31gZzyrm19RK6w&cAX-f2L(}w#A z#b^@lSZxpwjBJ0!njl1gD$}C3wiSl_d4-DRK#6q<=|p;aV~QM7wIwIiC2^{q3wl%> zi&5uNjgnoWP#h%5S~c{C=j;^Z0)j)J2|cmAU@H-9Jq0_UIS{Zh z!JX0209KLF(Q+ythTK_D?cn&JMFJgsPm35=@;w!+hO>W|<3XiTzAtf1<3hh?TxbK) zhBiItqBk6{Z>B}XGF|x~TL^8QGq#Bb>PNPOE6v}G2?s41@Sa4B$s4n((33_{n~uUi z1pu7EeX3E4XDkN(@q(mUNuZ2rW=6ne3DY@LS@8_qH_%Qcy6b5A|36pPYMd{5(y&3< zo}~Eu_)mWxf2004O8-S#=neKE=&pi2%D>FVDhkZ^3HG#KBaLrsxhiEimL~LL9HSXx zf4asPe4iuIAsX?Z=auvcijNTQ<9&?%%wLGxG1jMj<2AKaX%LTerGw{WT84kGMvFufk#kD3DPqk+oo*;~r7CR1 zGBK6JC(%%1g6mWTHdY19*nO&kvRD~PQQm1NG{Zv;rKt+6ZVuwJlB*=1DnBYb8VUj3 z7(i&dkkI3JYNrN9$To!ro<~erURyO9NDU-jQv+&v4{g^YcjPg}BsC~D7{Z&{WuKu? zkJf(<&dO3k(Lr*jI&6%vv6LTmV_uQPgWOmZ4Z6K0TW~wSCMug!tSQ-&(vG#0(D8ds z3DHzNrW9>I#$Z-YxTepWF`?bmY)Oq5eN`pz67^6{4AJQhC)>L8rajTd=$vkq-m&7{ zRq5TRF)6)?*9y@oBE7Nl?nK##RNieZ>Q;Y+9UVJfBhG2X-e{4QCNYDAgYikmF~2E3 zq{-F(I&E)1spl87@$F(d^V{98N7s1s=H07DM{oY|_|?&wDDYx3x|(O&_c-Kj_I;_d zeMx`r5}N*skiKnqGgN zEb8muq58f2ER4`ffWVj0jP}R>C%>gV{7L+h@f;?}okG&*+To7jtQ7PtTF zC3rf<%&uttN#HI-PG27$A3g&jubvMfq9hQZzqb^cHYJM{AhyyeBDPcsCW2n3nCuH3 zQE_q*l_&Yj{8e7(SEK9eQ9jPE^F@FDJ^w5JcUwrgjgUV-esT8fEemOSWmGPRsC2a> z5vyWQ#L;Xv{qxIo`jwR27E5m<){_rMhfj`JEZ46(5yTSwm!MsURf<8e-a%{E%Y53e zFuRRpM=xG@KYcr7l)8vUS^SX38b`I&pkVd4H|%}OeW_%I{Y8+>oRsAuE(*Xz;ZOMX6@<=5kz<-9H7RtC76m7x+6R?r=@M;OsZzm=k*Q$P4)~ZT zS`o8K<$9DK=8uu_j`FAZ&-t_bIp@CD`J4PWf197=r}=OB?})3T{5*fZ$S)CJKj&BZ z7i7hME^3bZ-12dsnqTWh^B8Ua z;v&D8PNp|FQwejKUrr|z5UFz(A*7S~`^98*4TW-~{XAXHpxUe12sc-=dIVme=r4cW zeyML7eNXE7oRxkvMr6KOUY|pZ@f8^XXQzSuRL=-ux{jxpK3#u(%fBt_`C<$%zb&VW z`tp3jHZ-rV$Da9l{Rd{AkH6<0hP$n6J^#tAa>b+5M=yT=wQFK;&L`u3Gqb{*nZxSp z2h0rS+8CPJ!qA3{Y~+pVVm!N8UiTW2^rVXKt- z{^;cO>62gxC!>Fj;hKINwyz5Q)gkSvRY@k2GBpFfZOlN5p=cx0aATC*Yz21*oAW+Z zGVbL2E%#qjW#jAj(cibAs2%w2R@K6Xlb_Fyj#G7QNDYpb*CqeNNMw~!iMm8vU81cn zHK4B2B--K>-Drz6MS5x{)owzC4iaaI_$O{e8J2y6;SGPFwxC>3u5zpTYb7(akw68p+>5XZ{~o^eE_U5&}DN{Q#MpT0PW)!5tV_3LT%a-V+a zI3TQruu6xYt6`;(E_>RwW|i*A$f6qb@N|7(#damxT^^mf^SIB4+g{rC&&DacE~&Ok zrO%IlJv)E-tpo7}ue5AW5n`zmh+Wx&*oc-r*?V}U<`uz@mTYNG^#$5&xB@$0PA2tY zP)+sCC3*2$Ar`bZ*&@YmS%_`Qv)n!2_z`@!dhQ?YzD`qHrRT$!AKt$`>M^bj0n1jj zs-o>0(KRsIcu5S5Zj<%mTEH^R=0y^)v=$FBF>ilhqgG{hS5ZG;MQgeIZrj;xD{Lf< z`qR`itl2fa_T=%Sn~Uk?_~r^Ik3WB|F(Lin{G(+4Cz&ezA|z8PdPY+RCUzc{G;wLM zT2Li++ACKBH4Eq+GXNCa!;49>#QmpW(F`m@%+}6!?=`_*7~LT{2IK6N6LrzJ93sd= z!cc$9I94TmY9?u%N>sGc2Ehs z6WFE&TyN~z#8CH-7F~u^mm#Wq8ioWd1R8&i+&V{QdyYDPVdyes))3MOLv+*dQ|*MG z&YuRl{J1VZ1jztEW+x1FcJ#XpRcinhsR@aMANv69w071zbBlHJt&+KoO6FGUSm^E9 z>28X58Co;n6bn(|uBLS+ic1x|2!)DtE$9XaBj&kK2S#k#Hk-!p(o+ z?s1o|H6u2~kXKbfaWNay+T{UuJ+vv?;Y_ZXxN38jm{etU!P&;Wye@ZZ_OA3GcWM{h z4Q?=XIa{;*!7y1>yI^c^E2YbryFX)cC;Hs9L(ye!%@1tL+NgwYxD&hJZqtrJhdCuS zreYzJR8y5ZVQ#}(yvv;0s8*B2w#t9)g15Vta$Ww+{rR)^;g7zAozthVOIiswpS8LW zrt@FMm$WwVxP+fvy+!b<{MBbl`bgeieI)txkU5lhDP%M2ui>j$`f7r&{3}bkmJgr9 z1)5Qg@DZ{q!<-%>T5<(R$6(m9O(sZ?fseNG?ug{z;#_`FG9s6Kp-fRzbY86D=( z-Uf_O;rat}*g(UDxq^L}GRPIMGCsFIY!08`5XyD9`Z=uswFItSKI{L~^PoAb3^W2} z*2f!M!`xbh>%+0KCeSuvE?a+uYmmzZx!j#{h-8kkHHW*CKb10AwSrv`QiY(WTrl_m zyjJ1r%)=)!MX;Da%tWb+piLPNH+mZsT62W|n_$yer();e{ZU&H(azvE%f)1TL*qFE zWg#6=2I8+D_%5(znxW&xgew1#_#a`d8?+VpZblx@>Oc7Z#7Og$A02-SE*h;jjt=G4 zYx-{wWM|ht&{8W}#U@(e7aD<9FJE!Z0b1cFF>AEKXxTT~tVYY$Xs3&W6H%s#f5n&w zJ_s3r-`CwqR=^i+;2W}D+@epwVg-EM))^QrQgKBe@Zm?Jpe16t-b+iw;>y$N<4eTa zkZ75Wv{Z|h=%b}qw8VcVT5)rTX+8OElx_&QmIWtZ4`OH*+ za!F^Gq0t2s0yr?Y1)>OP0tg^*VPk7yXJwaeqzD%RF*%oqqzE7~F*qWJFdTj?o;3g#Sy#$Y>zP`2g%=W9X8eCrMiQCFw`p6<`ul=Nzob=D;~vizHda8OFvRDJjx6_@Y7+&1 z2+C;>Nn|=QbeInp^nrX*{E4NkTntuG(8_@X3JfiOa}vZ-Z;a*#W)BF6sq@}ICvZ9u zI#GGD&>`hhOa}@KO?xOa*VeIe)GBzJ_`#?UIW1yeA!)W^wnL`O4;}@9ge&6!K1LOz zh80tr933d~7Y!IpCUJ&b5d=0YA$Ce2f%r>G<(r}mKg5;PfJ!JO$04p|vc-bS?vgza zC6zpXK_~M=PKQ^o4iEpnTtA$d^~8PlufxN4{1}$Uf1aMtpRT3(%J(S zowdJPo6Z`ouzY_$|6V${^;IMN?ehGBK(u`Q;q-D3^zi-D7xwdw9Yg9hq@}%(hT4!d znckO;cVDhg{$^d_OUYZiTAM6eFR^V9tj(Q&coX*W(4q5{JL)R1N?i@KMV+}<%kW4lWe^}G!oVNPPKNs>6*?Lb?H`MHQgF$D_y-G#TIID=xvx}g zS<6D&d1XACZOhbdj!c4^mHI?GZrVg7G-<6tayxF?0wkdm(#k94Z8y|zfh4d>E334B z!z#5yC<)t;R*Gqd;@DD50>!ja9A%0ZOM%t6vKsdqJ9H!A*5ylI$F{9&1Kheg4bfKe zLr;DXk_s!uVQ<9PHpSS@=Ge}c*iU9N1X~Lc`a;;Y5MF4guyXHd$FfBkghKeW5Z>*$ zY1>;u<5z}V-ge{Ka(bb??+5E}uJ&$!+V+-?*!v0Vc3AC>S0{AB{nWHF#fzoD>ivwg zd+lvoT0=LrZQI?UpV>47yC120BfG^c9UIXWcrHBp>xY zA`Ft1XgT#I-EMGn$6-G=+|Bu$)5r6R`sJ(FPw&3@`FeW39{Flryr{4Kem#GG{MHXEpq_5iGXAGT?3!>>k%5 zwGNUNspZ&yIqN1|_IOv?fW_?i(3Zf0o!B7-J2AErutP1&7VJ=q9hF>v(wkw0J@vbG~@&@KD3eWTe-y-!)PYCS>^q?oSJ?KGC=zt;PC>Ih@ zSFH_TpodyAJm~4>fCoLx+enpQf6sQ=1klqJJ0RmM7ZO2NEg4r}dZKRIJUB?&HV;fs zG%eDA!E4&S3=xtJDhH3t44;67bw{$+2CxM4&LPtiyD7jioo`V@4V9L`XmCWd`9);x z;?s|Fp^mRD{Ozec;8XTQzl;L(_ zM42)p}X^WHFg{2XfOW1VU_n3P9 z?F4LzO?C=``x}jre1VkGeuJoZQ%Ph3c{23eib#L#AV|ya_pgf05)z_smF|(6Q4z zq2tLnHZqmXpwlSDUNCkGbm)46_BLlK0qnqOTE^ThmoLAzqY4&bt-`Q|tIh3-I-G28NmXJpF;u6xxSzIc^Fz;kovCejT2|@!C)kHBU=BgoKsf<7GvbcozFrUOF zypVa>3JC_ilVHU<+wCV<#*zY$?KuLI@(2<-egIsNi-^PZ~pLMzo*a$5e>QT{W* z^6CEK*OT@gf3o_nKjl#Ef2z$+^>v8q%Nd2d{T?XaS8ITJBvU~-r=-DealZ}oMJ$o!uNfn4-@k)!%!1=Sr!b>b7ny<%^>e|>&oZ5sYi z_~+-ddiefyJ$-n3kDuq?^FE;4ABleYa{X}m1j+h)NqyNUbo)Ce^nZN%?fmecm-9`z z2!A;Jdj9b8>ea)WFCSli(fsi9!|TUKQy!ilAKgvzmVrF{^!fbhy8>F-<&qA3{{uZ6 z21S>l(FGF%I5C%~t_UK3GF=KUQ)zl-AU8NN3NKe6TQMLrATeDEFH&!BbRai3IUq0~ zQVK6gZf0*FH#jgLFd$M2FG)loTRcKAGBPwaG&eazK}JM0K|?SyLNhfrH918&LNrE2 zMm9bmJVG!sGBh?cH#tK=Mnp70LohNzGc`3eIYl`_G)6^6Ha=aKNv{Yue`{!6Ss8}+ zm%a8mX`4%vv}uwyX_}--Z%LD;X|vn3Nz*22()3KzYuYAFny4d!3LJKylQ; zamE2*7DWaXFQ7PzilFGik*7B(WQh zElC=anj}dROVU?p#alH=e@$@!%|H#>ZM1H}6gaedX!{8>P(r(p)&PBGiEPp&drk&ucPWXoG0Ygj$Ghv>`MNe@%Wgj_@crhPDGu%Q*;!&|1-0@$@(tM$>$? z6(isTn&!rCrK8{^+7UEu;}|%Frn#}x>1i;Ib_q=u$iW%3%V@YHJqsq#G&kAj95|1r zx#6&M5=^1VSF+v(e=v=B6S7;;q1{DezLA}dB{W9ruMy&sSc}F+dDbf8u(%zK1He}Z zac8VUW2BySjyOAF`PgX5_Q~owRQGxnI|r-S^<)(ti#nE?ENT0RT08mf7{FYSl#JwR zB#9&09m(#BtEZ$>hkH6UpDmDHX}u$hj8D)z|~b(AWpa%h(Ud#&`&jgYgI; z^Wrf;zQrLxmc?N}Zp9OTOo^mWY_@dx4NZY8izk`FI3RK2I8f^h(A*{f=@QQak|a(6 zQX@_S5+Ys#q(Phom_J?ySUxTQj2;((mRD*QkdBDjf`#iLCu|=8^b`+cdn*M~j)~_TyTmsJ0`;!W@ zB(c@Uv{VRGA0JPo+*U(D1y z0FHyBmVWi8(m1#)oWPY~OTY0~X(K>G8@2RX3(`1{D`PNv+(q;ZN?CN2HdJL=$|tz4k@D4;G>f9AkNOXHh*gfq4>17^V@xNPZbzfFeMQxM}G>x~1uSnWS|eRE_j@qK>;jrwX;lQ(XNqpjCSa zR?L;_^zbT>&DOv=B_XA?VXomXdSnxgR;V%ei!Z5F2F_gPcNAs->xFtyYp(xa>evSG ze^tPG0V9P5b0b>VfN25_3r&nqb!!i3F*p8{9>H~CH^4rj&D_+II@$pa3wzDY{Z|?n zjDUSYm$~_^G2^g+>jGDZ!2KZfnOk~Wy;v_C1q0@m-z$w3;% zA1hRWOo7T`22{|>=6?SLJ)%0u*_5mLaV^U!S$Vy=KYmz`G=N6HIg~e`Aun@O}Vag)vcI$N1CQO59TbZ=zSsQhUfL1 z?r;m1-F}xkxLxPEb6vIU-eqaJ(&;XB$FkKRO;@moVAZmBZ_S9S)z%Vke^~aR_p7yO zwV(X~Ezj#gRy*^OTDm4{d)PA1wcE2Pe$i@Q`i)-pxN#Tww4i7f*sHcfn=5z^nxqi~>&<__R0$aAJWwiyUCW0%H~vC;4jwP?S}|v}FU%)a`{=FY|9QqrpqCl92p$VIH#Rv6B_%~qMhdA1akl^f diff --git a/documentation/conf.py b/documentation/conf.py index dc055c4f..c45d5e75 100644 --- a/documentation/conf.py +++ b/documentation/conf.py @@ -20,7 +20,7 @@ # -- Project information ----------------------------------------------------- project = 'Grid' -copyright = '2018, Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi' +copyright = '2019, Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi' author = 'Peter Boyle, Guido Cossu, Antonin Portelli, Azusa Yamaguchi' # The short X.Y version