1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Performance improve for Tesseract

This commit is contained in:
Peter Boyle 2022-03-16 17:14:36 +00:00
parent e16fc5b2e4
commit 92a83a9eb3
2 changed files with 20 additions and 3 deletions

View File

@ -482,9 +482,9 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ GridThread::bcopy(from,to,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);}
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); }
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ thread_bcopy(from,to,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);}
inline void acceleratorCopySynchronise(void) {};
inline int acceleratorIsCommunicable(void *ptr){ return 1; }

View File

@ -72,3 +72,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define thread_region DO_PRAGMA(omp parallel)
#define thread_critical DO_PRAGMA(omp critical)
#ifdef GRID_OMP
inline void thread_bcopy(void *from, void *to,size_t bytes)
{
uint64_t *ufrom = (uint64_t *)from;
uint64_t *uto = (uint64_t *)to;
assert(bytes%8==0);
uint64_t words=bytes/8;
thread_for(w,words,{
uto[w] = ufrom[w];
});
}
#else
inline void thread_bcopy(void *from, void *to,size_t bytes)
{
bcopy(from,to,bytes);
}
#endif