mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-12 16:55:37 +00:00
Make GPT test suite work with SYCL
This commit is contained in:
parent
66391f84f2
commit
22b43b86cb
@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
|||||||
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ( !(defined(GRID_CUDA) || defined(GRID_HIP)) )
|
#if ( !(defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)) )
|
||||||
int max_threads = thread_max();
|
int max_threads = thread_max();
|
||||||
Vector < vobj > Bt(Nm * max_threads);
|
Vector < vobj > Bt(Nm * max_threads);
|
||||||
thread_region
|
thread_region
|
||||||
|
@ -469,15 +469,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
Coordinate fine_rdimensions = fine->_rdimensions;
|
Coordinate fine_rdimensions = fine->_rdimensions;
|
||||||
Coordinate coarse_rdimensions = coarse->_rdimensions;
|
Coordinate coarse_rdimensions = coarse->_rdimensions;
|
||||||
|
|
||||||
vobj zz = Zero();
|
|
||||||
|
|
||||||
accelerator_for(sc,coarse->oSites(),1,{
|
accelerator_for(sc,coarse->oSites(),1,{
|
||||||
|
|
||||||
// One thread per sub block
|
// One thread per sub block
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
||||||
|
|
||||||
vobj cd = zz;
|
vobj cd = Zero();
|
||||||
|
|
||||||
for(int sb=0;sb<blockVol;sb++){
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
|
@ -287,23 +287,37 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
|||||||
|
|
||||||
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
|
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
|
||||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
|
||||||
unsigned long nt=acceleratorThreads(); \
|
unsigned long nt=acceleratorThreads(); \
|
||||||
unsigned long unum1 = num1; \
|
if(nt < 8)nt=8; \
|
||||||
unsigned long unum2 = num2; \
|
unsigned long unum1 = num1; \
|
||||||
if(nt < 8)nt=8; \
|
unsigned long unum2 = num2; \
|
||||||
cl::sycl::range<3> local {nt,1,nsimd}; \
|
unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \
|
||||||
cl::sycl::range<3> global{unum1,unum2,nsimd}; \
|
cl::sycl::range<3> local {nt,1,nsimd}; \
|
||||||
cgh.parallel_for( \
|
cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
|
||||||
cl::sycl::nd_range<3>(global,local), \
|
if (unum1_divisible_by_nt != unum1) { \
|
||||||
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
|
cgh.parallel_for( \
|
||||||
[[intel::reqd_sub_group_size(16)]] \
|
cl::sycl::nd_range<3>(global,local), \
|
||||||
{ \
|
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
|
||||||
auto iter1 = item.get_global_id(0); \
|
[[intel::reqd_sub_group_size(16)]] \
|
||||||
auto iter2 = item.get_global_id(1); \
|
{ \
|
||||||
auto lane = item.get_global_id(2); \
|
auto iter1 = item.get_global_id(0); \
|
||||||
{ __VA_ARGS__ }; \
|
auto iter2 = item.get_global_id(1); \
|
||||||
}); \
|
auto lane = item.get_global_id(2); \
|
||||||
});
|
{ if (iter1 < unum1){ __VA_ARGS__ } }; \
|
||||||
|
}); \
|
||||||
|
} else { \
|
||||||
|
cgh.parallel_for( \
|
||||||
|
cl::sycl::nd_range<3>(global,local), \
|
||||||
|
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
|
||||||
|
[[intel::reqd_sub_group_size(16)]] \
|
||||||
|
{ \
|
||||||
|
auto iter1 = item.get_global_id(0); \
|
||||||
|
auto iter2 = item.get_global_id(1); \
|
||||||
|
auto lane = item.get_global_id(2); \
|
||||||
|
{ __VA_ARGS__ }; \
|
||||||
|
}); \
|
||||||
|
} \
|
||||||
|
});
|
||||||
|
|
||||||
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
|
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user