mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Merge pull request #457 from lehner/feature/gpt
Import GPT-related updates
This commit is contained in:
commit
2e570f5300
@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
|||||||
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ( (!defined(GRID_CUDA)) )
|
#if ( !(defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)) )
|
||||||
int max_threads = thread_max();
|
int max_threads = thread_max();
|
||||||
Vector < vobj > Bt(Nm * max_threads);
|
Vector < vobj > Bt(Nm * max_threads);
|
||||||
thread_region
|
thread_region
|
||||||
|
@ -469,15 +469,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
Coordinate fine_rdimensions = fine->_rdimensions;
|
Coordinate fine_rdimensions = fine->_rdimensions;
|
||||||
Coordinate coarse_rdimensions = coarse->_rdimensions;
|
Coordinate coarse_rdimensions = coarse->_rdimensions;
|
||||||
|
|
||||||
vobj zz = Zero();
|
|
||||||
|
|
||||||
accelerator_for(sc,coarse->oSites(),1,{
|
accelerator_for(sc,coarse->oSites(),1,{
|
||||||
|
|
||||||
// One thread per sub block
|
// One thread per sub block
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
||||||
|
|
||||||
vobj cd = zz;
|
vobj cd = Zero();
|
||||||
|
|
||||||
for(int sb=0;sb<blockVol;sb++){
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
|
@ -45,6 +45,7 @@ public:
|
|||||||
};
|
};
|
||||||
// Host only
|
// Host only
|
||||||
GridBase * getGrid(void) const { return _grid; };
|
GridBase * getGrid(void) const { return _grid; };
|
||||||
|
vobj* getHostPointer(void) const { return _odata; };
|
||||||
};
|
};
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -63,7 +63,9 @@ public:
|
|||||||
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
|
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
|
||||||
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
|
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
|
||||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
|
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
|
||||||
|
virtual void M(const FermionField &in, FermionField &out) ;
|
||||||
|
virtual void Mdag(const FermionField &in, FermionField &out) ;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
RealD mu; // TwistedMass parameter
|
RealD mu; // TwistedMass parameter
|
||||||
|
|
||||||
|
@ -93,5 +93,25 @@ void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &ou
|
|||||||
RealD b = tm /sq;
|
RealD b = tm /sq;
|
||||||
axpibg5x(out,in,a,b);
|
axpibg5x(out,in,a,b);
|
||||||
}
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonTMFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
this->Dhop(in, out, DaggerNo);
|
||||||
|
FermionField tmp(out.Grid());
|
||||||
|
RealD a = 4.0+this->mass;
|
||||||
|
RealD b = this->mu;
|
||||||
|
axpibg5x(tmp,in,a,b);
|
||||||
|
axpy(out, 1.0, tmp, out);
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonTMFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
this->Dhop(in, out, DaggerYes);
|
||||||
|
FermionField tmp(out.Grid());
|
||||||
|
RealD a = 4.0+this->mass;
|
||||||
|
RealD b = -this->mu;
|
||||||
|
axpibg5x(tmp,in,a,b);
|
||||||
|
axpy(out, 1.0, tmp, out);
|
||||||
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -706,7 +706,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
//std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||||
}
|
}
|
||||||
/// Introduce a block structure and switch off comms on boundaries
|
/// Introduce a block structure and switch off comms on boundaries
|
||||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||||
@ -761,7 +761,8 @@ public:
|
|||||||
int checkerboard,
|
int checkerboard,
|
||||||
const std::vector<int> &directions,
|
const std::vector<int> &directions,
|
||||||
const std::vector<int> &distances,
|
const std::vector<int> &distances,
|
||||||
Parameters p=Parameters())
|
Parameters p=Parameters(),
|
||||||
|
bool preserve_shm=false)
|
||||||
{
|
{
|
||||||
face_table_computed=0;
|
face_table_computed=0;
|
||||||
_grid = grid;
|
_grid = grid;
|
||||||
@ -855,7 +856,9 @@ public:
|
|||||||
/////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////
|
||||||
const int Nsimd = grid->Nsimd();
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
_grid->ShmBufferFreeAll();
|
// Allow for multiple stencils to exist simultaneously
|
||||||
|
if (!preserve_shm)
|
||||||
|
_grid->ShmBufferFreeAll();
|
||||||
|
|
||||||
int maxl=2;
|
int maxl=2;
|
||||||
u_simd_send_buf.resize(maxl);
|
u_simd_send_buf.resize(maxl);
|
||||||
|
@ -287,23 +287,24 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
|||||||
|
|
||||||
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
|
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
|
||||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
|
||||||
unsigned long nt=acceleratorThreads(); \
|
unsigned long nt=acceleratorThreads(); \
|
||||||
unsigned long unum1 = num1; \
|
if(nt < 8)nt=8; \
|
||||||
unsigned long unum2 = num2; \
|
unsigned long unum1 = num1; \
|
||||||
if(nt < 8)nt=8; \
|
unsigned long unum2 = num2; \
|
||||||
cl::sycl::range<3> local {nt,1,nsimd}; \
|
unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \
|
||||||
cl::sycl::range<3> global{unum1,unum2,nsimd}; \
|
cl::sycl::range<3> local {nt,1,nsimd}; \
|
||||||
cgh.parallel_for( \
|
cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
|
||||||
cl::sycl::nd_range<3>(global,local), \
|
cgh.parallel_for( \
|
||||||
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
|
cl::sycl::nd_range<3>(global,local), \
|
||||||
[[intel::reqd_sub_group_size(16)]] \
|
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
|
||||||
{ \
|
[[intel::reqd_sub_group_size(16)]] \
|
||||||
auto iter1 = item.get_global_id(0); \
|
{ \
|
||||||
auto iter2 = item.get_global_id(1); \
|
auto iter1 = item.get_global_id(0); \
|
||||||
auto lane = item.get_global_id(2); \
|
auto iter2 = item.get_global_id(1); \
|
||||||
{ __VA_ARGS__ }; \
|
auto lane = item.get_global_id(2); \
|
||||||
}); \
|
{ if (iter1 < unum1){ __VA_ARGS__ } }; \
|
||||||
});
|
}); \
|
||||||
|
});
|
||||||
|
|
||||||
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
|
#define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user