mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Merge branch 'develop' into feature/dirichlet
This commit is contained in:
@ -262,7 +262,7 @@ public:
|
||||
autoView( Tnp_v , (*Tnp), AcceleratorWrite);
|
||||
autoView( Tnm_v , (*Tnm), AcceleratorWrite);
|
||||
const int Nsimd = CComplex::Nsimd();
|
||||
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
|
||||
accelerator_for(ss, FineGrid->oSites(), Nsimd, {
|
||||
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
||||
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
||||
});
|
||||
|
@ -264,7 +264,7 @@ public:
|
||||
auto Tnp_v = Tnp->View();
|
||||
auto Tnm_v = Tnm->View();
|
||||
constexpr int Nsimd = vector_type::Nsimd();
|
||||
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
|
||||
accelerator_for(ss, in.Grid()->oSites(), Nsimd, {
|
||||
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
||||
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
||||
});
|
||||
|
@ -395,12 +395,11 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/* if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
* this->StencilSendToRecvFromComplete(list,dir);
|
||||
* list.resize(0);
|
||||
* }
|
||||
*/
|
||||
|
||||
// if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
// this->StencilSendToRecvFromComplete(list,dir);
|
||||
// }
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||
|
@ -490,6 +490,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
|
||||
#endif
|
||||
acceleratorFenceComputeStream();
|
||||
} else if( interior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
|
||||
@ -497,11 +498,13 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
acceleratorFenceComputeStream();
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
|
||||
#endif
|
||||
acceleratorFenceComputeStream();
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
typedef Lattice<SiteLink> LinkField;
|
||||
typedef Lattice<SiteField> Field;
|
||||
typedef Field ComplexField;
|
||||
typedef LinkField ComplexField;
|
||||
};
|
||||
|
||||
typedef QedGImpl<vComplex> QedGImplR;
|
||||
|
@ -201,12 +201,15 @@ void acceleratorInit(void)
|
||||
#ifdef GRID_SYCL
|
||||
|
||||
cl::sycl::queue *theGridAccelerator;
|
||||
cl::sycl::queue *theCopyAccelerator;
|
||||
void acceleratorInit(void)
|
||||
{
|
||||
int nDevices = 1;
|
||||
cl::sycl::gpu_selector selector;
|
||||
cl::sycl::device selectedDevice { selector };
|
||||
theGridAccelerator = new sycl::queue (selectedDevice);
|
||||
// theCopyAccelerator = new sycl::queue (selectedDevice);
|
||||
theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway.
|
||||
|
||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
||||
zeInit(0);
|
||||
|
@ -248,7 +248,6 @@ inline int acceleratorIsCommunicable(void *ptr)
|
||||
//////////////////////////////////////////////
|
||||
// SyCL acceleration
|
||||
//////////////////////////////////////////////
|
||||
|
||||
#ifdef GRID_SYCL
|
||||
NAMESPACE_END(Grid);
|
||||
#include <CL/sycl.hpp>
|
||||
@ -263,6 +262,7 @@ NAMESPACE_END(Grid);
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
extern cl::sycl::queue *theGridAccelerator;
|
||||
extern cl::sycl::queue *theCopyAccelerator;
|
||||
|
||||
#ifdef __SYCL_DEVICE_ONLY__
|
||||
#define GRID_SIMT
|
||||
@ -290,7 +290,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
cgh.parallel_for( \
|
||||
cl::sycl::nd_range<3>(global,local), \
|
||||
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
|
||||
[[intel::reqd_sub_group_size(8)]] \
|
||||
[[intel::reqd_sub_group_size(16)]] \
|
||||
{ \
|
||||
auto iter1 = item.get_global_id(0); \
|
||||
auto iter2 = item.get_global_id(1); \
|
||||
@ -299,19 +299,19 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
}); \
|
||||
});
|
||||
|
||||
#define accelerator_barrier(dummy) theGridAccelerator->wait();
|
||||
#define accelerator_barrier(dummy) { printf(" theGridAccelerator::wait()\n"); theGridAccelerator->wait(); }
|
||||
|
||||
inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);};
|
||||
inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);};
|
||||
inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) {
|
||||
theGridAccelerator->memcpy(to,from,bytes);
|
||||
}
|
||||
inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); std::cout<<"acceleratorCopySynchronise() wait "<<std::endl; }
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();}
|
||||
|
||||
inline void acceleratorCopySynchronise(void) { printf(" theCopyAccelerator::wait()\n"); theCopyAccelerator->wait(); }
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes);}
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();}
|
||||
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
{
|
||||
#if 0
|
||||
@ -514,7 +514,16 @@ inline void *acceleratorAllocCpu(size_t bytes){return memalign(GRID_ALLOC_ALIGN,
|
||||
inline void acceleratorFreeCpu (void *ptr){free(ptr);};
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fencing needed ONLY for SYCL
|
||||
//////////////////////////////////////////////
|
||||
|
||||
#ifdef GRID_SYCL
|
||||
inline void acceleratorFenceComputeStream(void){ accelerator_barrier();};
|
||||
#else
|
||||
// Ordering within a stream guaranteed on Nvidia & AMD
|
||||
inline void acceleratorFenceComputeStream(void){ };
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Synchronise across local threads for divergence resynch
|
||||
|
@ -27,6 +27,7 @@
|
||||
/* END LEGAL */
|
||||
extern "C" {
|
||||
#include <openssl/sha.h>
|
||||
#include <openssl/evp.h>
|
||||
}
|
||||
#ifdef USE_IPP
|
||||
#include "ipp.h"
|
||||
@ -70,10 +71,8 @@ public:
|
||||
static inline std::vector<unsigned char> sha256(const void *data,size_t bytes)
|
||||
{
|
||||
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
|
||||
SHA256_CTX sha256;
|
||||
SHA256_Init (&sha256);
|
||||
SHA256_Update(&sha256, data,bytes);
|
||||
SHA256_Final (&hash[0], &sha256);
|
||||
auto digest = EVP_get_digestbyname("SHA256");
|
||||
EVP_Digest(data, bytes, &hash[0], NULL, digest, NULL);
|
||||
return hash;
|
||||
}
|
||||
static inline std::vector<int> sha256_seeds(const std::string &s)
|
||||
|
Reference in New Issue
Block a user