1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Compare commits

..

13 Commits

Author SHA1 Message Date
meifeng
249c7aee39
Merge 37d1d87c3c into 2e570f5300 2024-02-28 13:59:24 -05:00
Peter Boyle
2e570f5300
Merge pull request #457 from lehner/feature/gpt
Import GPT-related updates
2024-02-28 13:59:04 -05:00
Christoph Lehner
9f89486df5 remove unnecessary code path 2024-02-28 19:56:23 +01:00
Christoph Lehner
22b43b86cb Make GPT test suite work with SYCL 2024-02-28 12:57:17 +01:00
Christoph Lehner
66391f84f2 Merge branch 'feature/gpt' of ../Grid into develop 2024-02-21 19:05:00 +01:00
Christoph Lehner
f2648e94b9 getHostPointer added to Lattice 2023-10-23 13:47:41 +02:00
Christoph Lehner
e6ed516052 merged 2023-10-08 09:00:37 +02:00
Christoph Lehner
e2a3dae1f2 Option for multiple simultaneous CartesianStencils 2023-10-08 08:58:44 +02:00
Christoph Lehner
452bf2e907 Accelerator basisRotate also on HIP 2023-06-20 20:36:24 +03:00
Christoph Lehner
e8c29e2fe5
Merge pull request #31 from paboyle/develop
Sync
2023-05-28 16:13:12 +02:00
Christoph Lehner
da9cbfc7cc
Suppress BuildSurfaceList verbosity in Stencil.h 2023-05-19 20:22:20 +02:00
Christoph Lehner
6b9f07c1ed
Merge pull request #30 from paboyle/develop
Merge upstream
2023-05-19 20:20:58 +02:00
Christoph Lehner
5f75735dab Add M and Mdag to WilsonTMFermion 2023-04-06 18:25:05 +02:00
7 changed files with 50 additions and 25 deletions

View File

@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
basis_v.push_back(basis[k].View(AcceleratorWrite));
}
#if ( (!defined(GRID_CUDA)) )
#if ( !(defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)) )
int max_threads = thread_max();
Vector < vobj > Bt(Nm * max_threads);
thread_region

View File

@ -469,15 +469,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
Coordinate fine_rdimensions = fine->_rdimensions;
Coordinate coarse_rdimensions = coarse->_rdimensions;
vobj zz = Zero();
accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block
Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
vobj cd = zz;
vobj cd = Zero();
for(int sb=0;sb<blockVol;sb++){

View File

@ -45,6 +45,7 @@ public:
};
// Host only
GridBase * getGrid(void) const { return _grid; };
vobj* getHostPointer(void) const { return _odata; };
};
/////////////////////////////////////////////////////////////////////////////////////////

View File

@ -63,7 +63,9 @@ public:
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
virtual void M(const FermionField &in, FermionField &out) ;
virtual void Mdag(const FermionField &in, FermionField &out) ;
private:
RealD mu; // TwistedMass parameter

View File

@ -93,5 +93,25 @@ void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &ou
RealD b = tm /sq;
axpibg5x(out,in,a,b);
}
template<class Impl>
void WilsonTMFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
FermionField tmp(out.Grid());
RealD a = 4.0+this->mass;
RealD b = this->mu;
axpibg5x(tmp,in,a,b);
axpy(out, 1.0, tmp, out);
}
template<class Impl>
void WilsonTMFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerYes);
FermionField tmp(out.Grid());
RealD a = 4.0+this->mass;
RealD b = -this->mu;
axpibg5x(tmp,in,a,b);
axpy(out, 1.0, tmp, out);
}
NAMESPACE_END(Grid);

View File

@ -706,7 +706,7 @@ public:
}
}
}
std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
//std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
}
/// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block)
@ -761,7 +761,8 @@ public:
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances,
Parameters p=Parameters())
Parameters p=Parameters(),
bool preserve_shm=false)
{
face_table_computed=0;
_grid = grid;
@ -855,7 +856,9 @@ public:
/////////////////////////////////////////////////////////////////////////////////
const int Nsimd = grid->Nsimd();
_grid->ShmBufferFreeAll();
// Allow for multiple stencils to exist simultaneously
if (!preserve_shm)
_grid->ShmBufferFreeAll();
int maxl=2;
u_simd_send_buf.resize(maxl);

View File

@ -290,23 +290,24 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
unsigned long nt=acceleratorThreads(); \
unsigned long unum1 = num1; \
unsigned long unum2 = num2; \
if(nt < 8)nt=8; \
cl::sycl::range<3> local {nt,1,nsimd}; \
cl::sycl::range<3> global{unum1,unum2,nsimd}; \
cgh.parallel_for( \
cl::sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(16)]] \
{ \
auto iter1 = item.get_global_id(0); \
auto iter2 = item.get_global_id(1); \
auto lane = item.get_global_id(2); \
{ __VA_ARGS__ }; \
}); \
});
unsigned long nt=acceleratorThreads(); \
if(nt < 8)nt=8; \
unsigned long unum1 = num1; \
unsigned long unum2 = num2; \
unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \
cl::sycl::range<3> local {nt,1,nsimd}; \
cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
cgh.parallel_for( \
cl::sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(16)]] \
{ \
auto iter1 = item.get_global_id(0); \
auto iter2 = item.get_global_id(1); \
auto lane = item.get_global_id(2); \
{ if (iter1 < unum1){ __VA_ARGS__ } }; \
}); \
});
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }