mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 18:19:34 +01:00 
			
		
		
		
	cleaned up
This commit is contained in:
		| @@ -223,7 +223,7 @@ void MemoryManager::InitMessage(void) { | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl; | ||||
| #endif | ||||
| #ifdef GRID_OMPTARGET | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET omp_alloc_device"<<std::endl; | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET managed memory"<<std::endl; | ||||
| #endif | ||||
| #else | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl; | ||||
| @@ -237,7 +237,7 @@ void MemoryManager::InitMessage(void) { | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl; | ||||
| #endif | ||||
| #ifdef GRID_OMPTARGET | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET managed memory"<<std::endl; | ||||
|   std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET omp_alloc_device"<<std::endl; | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
|   | ||||
| @@ -529,27 +529,22 @@ extern "C" void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum); | ||||
| accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific | ||||
| inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) | ||||
| { | ||||
|   std::cout << "H->D copy to device start "<<std::endl; | ||||
|   int devc = omp_get_default_device(); | ||||
|   int host = omp_get_initial_device(); | ||||
|   if( omp_target_memcpy( to, from, bytes, 0, 0, devc, host ) ) { | ||||
|     printf(" omp_target_memcpy host to device failed for %ld in device %d \n",bytes,devc); | ||||
|   } | ||||
|   std::cout << "H->D copy to device end "<<std::endl; | ||||
| }; | ||||
| inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes) | ||||
| { | ||||
|   std::cout << "D->H copy from device start "<<std::endl; | ||||
|   int devc = omp_get_default_device(); | ||||
|   int host = omp_get_initial_device(); | ||||
|   if( omp_target_memcpy( to, from, bytes, 0, 0, host, devc ) ) { | ||||
|     printf(" omp_target_memcpy device to host failed for %ld in device %d \n",bytes,devc); | ||||
|   } | ||||
|   std::cout << "D->H copy from device end "<<std::endl; | ||||
| }; | ||||
| inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)  | ||||
| {  | ||||
|   printf("TODO acceleratorCopyDeviceToDeviceAsynch");//memcpy(to,from,bytes); | ||||
| #ifdef __CUDA_ARCH__ | ||||
|   extern cudaStream_t copyStream; | ||||
|   cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream); | ||||
| @@ -562,7 +557,6 @@ inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes | ||||
| }; | ||||
| inline void acceleratorCopySynchronise(void)  | ||||
| { | ||||
|   printf("TODO acceleratorCopySynchronize"); | ||||
|   //#pragma omp barrier | ||||
| #ifdef __CUDA_ARCH__ | ||||
|   extern cudaStream_t copyStream; | ||||
| @@ -578,7 +572,6 @@ inline void acceleratorCopySynchronise(void) | ||||
| inline int  acceleratorIsCommunicable(void *ptr){ return 1; } | ||||
| inline void acceleratorMemSet(void *base,int value,size_t bytes) | ||||
| { | ||||
|   std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l OMPTARGET calling memset on host and copying to dev l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; | ||||
|   void *base_host = memalign(GRID_ALLOC_ALIGN,bytes); | ||||
|   memset(base_host,value,bytes); | ||||
|   int devc = omp_get_default_device(); | ||||
| @@ -590,7 +583,6 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) | ||||
| inline void *acceleratorAllocShared(size_t bytes) | ||||
| { | ||||
| #ifdef __CUDA_ARCH__ | ||||
|   std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l  Allocating shared from OMPTARGET MANAGED from CUDA l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; | ||||
|   void *ptr=NULL; | ||||
|   auto err = cudaMallocManaged((void **)&ptr,bytes); | ||||
|   if( err != cudaSuccess ) { | ||||
| @@ -599,7 +591,6 @@ inline void *acceleratorAllocShared(size_t bytes) | ||||
|   } | ||||
|   return ptr; | ||||
| #elif defined __HIP_DEVICE_COMPILE__ | ||||
|   std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l  Allocating shared from OMPTARGET MANAGED from HIP l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; | ||||
|   void *ptr=NULL; | ||||
|   auto err = hipMallocManaged((void **)&ptr,bytes); | ||||
|   if( err != hipSuccess ) { | ||||
| @@ -608,12 +599,10 @@ inline void *acceleratorAllocShared(size_t bytes) | ||||
|   } | ||||
|   return ptr; | ||||
| #elif defined __SYCL_DEVICE_ONLY__ | ||||
|   std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l  Allocating shared from OMPTARGET MANAGED from SYCL l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; | ||||
|   queue q; | ||||
|   //void *ptr = malloc_shared<void *>(bytes, q); | ||||
|   return ptr; | ||||
| #else | ||||
|   std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l  Allocating shared mem from OMPTARGET from LLVM l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; | ||||
|   int devc = omp_get_default_device(); | ||||
|   void *ptr=NULL; | ||||
|   ptr = (void *) llvm_omp_target_alloc_shared(bytes, devc); | ||||
| @@ -625,7 +614,6 @@ inline void *acceleratorAllocShared(size_t bytes) | ||||
| }; | ||||
| inline void *acceleratorAllocDevice(size_t bytes) | ||||
| { | ||||
|   std::cout << " l-l-l-l-l-l-l-l-l-l-l-l-l  Allocating device mem " << bytes << " Bytes from OMPTARGET l-l-l-l-l-l-l-l-l-l-l-l "<<std::endl; | ||||
|   int devc = omp_get_default_device(); | ||||
|   void *ptr=NULL; | ||||
|   ptr = (void *) omp_target_alloc(bytes, devc); | ||||
|   | ||||
| @@ -65,10 +65,9 @@ int main (int argc, char ** argv) | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       std::cout << __FILE__ << " " << __LINE__ << std::endl; | ||||
|       LatticeColourMatrix z(&Grid); std::cout << "z lattice color mat " << std::endl; random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); std::cout << "x lattice color mat " << std::endl; random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); std::cout << "y lattice color mat " << std::endl; random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       for(int64_t i=0;i<Nwarm;i++){ | ||||
| 	x=x*y; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user