From 92a83a9eb33c54467b29ff557bad4aed90c4f65e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 16 Mar 2022 17:14:36 +0000 Subject: [PATCH] Performance improve for Tesseract --- Grid/threads/Accelerator.h | 6 +++--- Grid/threads/Threads.h | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 12483185..389f2cc4 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -482,9 +482,9 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ GridThread::bcopy(from,to,bytes);} -inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);} +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); } +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ thread_bcopy(from,to,bytes);} +inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);} inline void acceleratorCopySynchronise(void) {}; inline int acceleratorIsCommunicable(void *ptr){ return 1; } diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index a9fa13ea..6887134d 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -72,3 +72,20 @@ Author: paboyle #define thread_region DO_PRAGMA(omp parallel) #define thread_critical DO_PRAGMA(omp critical) +#ifdef GRID_OMP +inline void thread_bcopy(void *from, void *to,size_t bytes) +{ + uint64_t *ufrom = (uint64_t *)from; + uint64_t *uto = (uint64_t *)to; + assert(bytes%8==0); + uint64_t words=bytes/8; + thread_for(w,words,{ + uto[w] = ufrom[w]; + }); +} +#else +inline void thread_bcopy(void *from, void *to,size_t bytes) +{ + bcopy(from,to,bytes); +} +#endif