1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Merge pull request #457 from lehner/feature/gpt

Import GPT-related updates
This commit is contained in:
Peter Boyle 2024-02-28 13:59:04 -05:00 committed by GitHub
commit 2e570f5300
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 50 additions and 25 deletions

View File

@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
basis_v.push_back(basis[k].View(AcceleratorWrite)); basis_v.push_back(basis[k].View(AcceleratorWrite));
} }
#if ( (!defined(GRID_CUDA)) ) #if ( !(defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)) )
int max_threads = thread_max(); int max_threads = thread_max();
Vector < vobj > Bt(Nm * max_threads); Vector < vobj > Bt(Nm * max_threads);
thread_region thread_region

View File

@ -469,15 +469,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
Coordinate fine_rdimensions = fine->_rdimensions; Coordinate fine_rdimensions = fine->_rdimensions;
Coordinate coarse_rdimensions = coarse->_rdimensions; Coordinate coarse_rdimensions = coarse->_rdimensions;
vobj zz = Zero();
accelerator_for(sc,coarse->oSites(),1,{ accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block // One thread per sub block
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
vobj cd = zz; vobj cd = Zero();
for(int sb=0;sb<blockVol;sb++){ for(int sb=0;sb<blockVol;sb++){

View File

@ -45,6 +45,7 @@ public:
}; };
// Host only // Host only
GridBase * getGrid(void) const { return _grid; }; GridBase * getGrid(void) const { return _grid; };
vobj* getHostPointer(void) const { return _odata; };
}; };
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////

View File

@ -63,7 +63,9 @@ public:
virtual void MooeeDag(const FermionField &in, FermionField &out) ; virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out) ; virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
virtual void M(const FermionField &in, FermionField &out) ;
virtual void Mdag(const FermionField &in, FermionField &out) ;
private: private:
RealD mu; // TwistedMass parameter RealD mu; // TwistedMass parameter

View File

@ -93,5 +93,25 @@ void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &ou
RealD b = tm /sq; RealD b = tm /sq;
axpibg5x(out,in,a,b); axpibg5x(out,in,a,b);
} }
template<class Impl>
void WilsonTMFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerNo);
FermionField tmp(out.Grid());
RealD a = 4.0+this->mass;
RealD b = this->mu;
axpibg5x(tmp,in,a,b);
axpy(out, 1.0, tmp, out);
}
template<class Impl>
void WilsonTMFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard();
this->Dhop(in, out, DaggerYes);
FermionField tmp(out.Grid());
RealD a = 4.0+this->mass;
RealD b = -this->mu;
axpibg5x(tmp,in,a,b);
axpy(out, 1.0, tmp, out);
}
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -706,7 +706,7 @@ public:
} }
} }
} }
std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl; //std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
} }
/// Introduce a block structure and switch off comms on boundaries /// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block) void DirichletBlock(const Coordinate &dirichlet_block)
@ -761,7 +761,8 @@ public:
int checkerboard, int checkerboard,
const std::vector<int> &directions, const std::vector<int> &directions,
const std::vector<int> &distances, const std::vector<int> &distances,
Parameters p=Parameters()) Parameters p=Parameters(),
bool preserve_shm=false)
{ {
face_table_computed=0; face_table_computed=0;
_grid = grid; _grid = grid;
@ -855,7 +856,9 @@ public:
///////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
_grid->ShmBufferFreeAll(); // Allow for multiple stencils to exist simultaneously
if (!preserve_shm)
_grid->ShmBufferFreeAll();
int maxl=2; int maxl=2;
u_simd_send_buf.resize(maxl); u_simd_send_buf.resize(maxl);

View File

@ -287,23 +287,24 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
unsigned long nt=acceleratorThreads(); \ unsigned long nt=acceleratorThreads(); \
unsigned long unum1 = num1; \ if(nt < 8)nt=8; \
unsigned long unum2 = num2; \ unsigned long unum1 = num1; \
if(nt < 8)nt=8; \ unsigned long unum2 = num2; \
cl::sycl::range<3> local {nt,1,nsimd}; \ unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \
cl::sycl::range<3> global{unum1,unum2,nsimd}; \ cl::sycl::range<3> local {nt,1,nsimd}; \
cgh.parallel_for( \ cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
cl::sycl::nd_range<3>(global,local), \ cgh.parallel_for( \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \ cl::sycl::nd_range<3>(global,local), \
[[intel::reqd_sub_group_size(16)]] \ [=] (cl::sycl::nd_item<3> item) /*mutable*/ \
{ \ [[intel::reqd_sub_group_size(16)]] \
auto iter1 = item.get_global_id(0); \ { \
auto iter2 = item.get_global_id(1); \ auto iter1 = item.get_global_id(0); \
auto lane = item.get_global_id(2); \ auto iter2 = item.get_global_id(1); \
{ __VA_ARGS__ }; \ auto lane = item.get_global_id(2); \
}); \ { if (iter1 < unum1){ __VA_ARGS__ } }; \
}); }); \
});
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); } #define accelerator_barrier(dummy) { theGridAccelerator->wait(); }