1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Merge branch 'develop' of https://www.github.com/paboyle/Grid into develop

This commit is contained in:
Peter Boyle 2021-10-15 20:46:51 +01:00
commit 7e0057d2c4
3 changed files with 6 additions and 2 deletions

View File

@ -389,7 +389,6 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
acceleratorCopySynchronise(); // MPI prob slower
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
@ -405,6 +404,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
if (nreq==0) return;
std::vector<MPI_Status> status(nreq);
acceleratorCopySynchronise();
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
list.resize(0);

View File

@ -42,7 +42,6 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl;
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << warpSize << std::endl;
std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl;
if (warpSize != WARP_SIZE) {
@ -52,6 +51,10 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator
// let the number of threads in a block be a multiple of 2, starting from warpSize
threads = warpSize;
if ( threads*sizeofsobj > sharedMemPerBlock ) {
std::cout << GridLogError << "The object is too large for the shared memory." << std::endl;
exit(EXIT_FAILURE);
}
while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2;
// keep all the streaming multiprocessors busy
blocks = nextPow2(multiProcessorCount);

View File

@ -95,6 +95,7 @@ void acceleratorInit(void);
//////////////////////////////////////////////
#ifdef GRID_CUDA
#include <cuda.h>
#ifdef __CUDA_ARCH__