From 197612bc7a9f69ada8199324e6f4921c622d34c4 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Thu, 30 Jul 2020 07:08:54 -0400 Subject: [PATCH] fast cpu basisRotate and other small cleanups --- Grid/allocator/MemoryManager.cc | 29 ++++++++++++++----------- Grid/communicator/Communicator_mpi3.cc | 14 +----------- Grid/lattice/Lattice_basis.h | 30 +++++++++++++++++++++----- 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index e11ce948..a2866507 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -136,34 +136,39 @@ void MemoryManager::Init(void) Ncache[SharedSmall]=Nc; } } - std::cout << GridLogMessage<< "MemoryManager::Init() setting up"< basis_v; basis_v.reserve(basis.size()); + typedef typename std::remove_reference::type vobj; + typedef typename std::remove_reference::type Coeff_t; GridBase* grid = basis[0].Grid(); for(int k=0;k Bt(Nm * max_threads); + thread_region + { + vobj* B = &Bt[Nm * thread_num()]; + thread_for_in_region(ss, grid->oSites(),{ + for(int j=j0; joSites(); uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead - typedef typename std::remove_reference::type vobj; - Vector Bt(siteBlock * nrot); auto Bp=&Bt[0]; // GPU readable copy of matrix - Vector Qt_jv(Nm*Nm); - double *Qt_p = & Qt_jv[0]; + Vector Qt_jv(Nm*Nm); + Coeff_t *Qt_p = & Qt_jv[0]; thread_for(i,Nm*Nm,{ int j = i/Nm; int k = i%Nm; @@ -118,6 +137,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); }); } +#endif for(int k=0;k