mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
8eb1232683
1
.gitignore
vendored
1
.gitignore
vendored
@ -88,6 +88,7 @@ Thumbs.db
|
|||||||
# build directory #
|
# build directory #
|
||||||
###################
|
###################
|
||||||
build*/*
|
build*/*
|
||||||
|
Documentation/_build
|
||||||
|
|
||||||
# IDE related files #
|
# IDE related files #
|
||||||
#####################
|
#####################
|
||||||
|
@ -442,6 +442,8 @@ public:
|
|||||||
for(int p=0; p<geom.npoint; p++)
|
for(int p=0; p<geom.npoint; p++)
|
||||||
points[p] = geom.points_dagger[p];
|
points[p] = geom.points_dagger[p];
|
||||||
|
|
||||||
|
auto points_p = &points[0];
|
||||||
|
|
||||||
RealD* dag_factor_p = &dag_factor[0];
|
RealD* dag_factor_p = &dag_factor[0];
|
||||||
|
|
||||||
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
|
||||||
@ -453,7 +455,7 @@ public:
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
|
|
||||||
for(int p=0;p<geom_v.npoint;p++){
|
for(int p=0;p<geom_v.npoint;p++){
|
||||||
int point = points[p];
|
int point = points_p[p];
|
||||||
|
|
||||||
SE=Stencil_v.GetEntry(ptype,point,ss);
|
SE=Stencil_v.GetEntry(ptype,point,ss);
|
||||||
|
|
||||||
@ -708,6 +710,8 @@ public:
|
|||||||
for(int p=0; p<npoint; p++)
|
for(int p=0; p<npoint; p++)
|
||||||
points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p;
|
points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p;
|
||||||
|
|
||||||
|
auto points_p = &points[0];
|
||||||
|
|
||||||
Vector<Aview> AcceleratorViewContainer;
|
Vector<Aview> AcceleratorViewContainer;
|
||||||
for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead));
|
for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead));
|
||||||
Aview *Aview_p = & AcceleratorViewContainer[0];
|
Aview *Aview_p = & AcceleratorViewContainer[0];
|
||||||
@ -728,7 +732,7 @@ public:
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
|
|
||||||
for(int p=0;p<npoint;p++){
|
for(int p=0;p<npoint;p++){
|
||||||
int point = points[p];
|
int point = points_p[p];
|
||||||
SE=st_v.GetEntry(ptype,point,ss);
|
SE=st_v.GetEntry(ptype,point,ss);
|
||||||
|
|
||||||
if(SE->_is_local) {
|
if(SE->_is_local) {
|
||||||
@ -754,7 +758,7 @@ public:
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
|
|
||||||
for(int p=0;p<npoint;p++){
|
for(int p=0;p<npoint;p++){
|
||||||
int point = points[p];
|
int point = points_p[p];
|
||||||
SE=st_v.GetEntry(ptype,point,ss);
|
SE=st_v.GetEntry(ptype,point,ss);
|
||||||
|
|
||||||
if(SE->_is_local) {
|
if(SE->_is_local) {
|
||||||
|
@ -136,7 +136,7 @@ public:
|
|||||||
flops=0;
|
flops=0;
|
||||||
usec =0;
|
usec =0;
|
||||||
Coordinate layout(Nd,1);
|
Coordinate layout(Nd,1);
|
||||||
sgrid = new GridCartesian(dimensions,layout,processors);
|
sgrid = new GridCartesian(dimensions,layout,processors,*grid);
|
||||||
};
|
};
|
||||||
|
|
||||||
~FFT ( void) {
|
~FFT ( void) {
|
||||||
@ -182,7 +182,7 @@ public:
|
|||||||
pencil_gd[dim] = G*processors[dim];
|
pencil_gd[dim] = G*processors[dim];
|
||||||
|
|
||||||
// Pencil global vol LxLxGxLxL per node
|
// Pencil global vol LxLxGxLxL per node
|
||||||
GridCartesian pencil_g(pencil_gd,layout,processors);
|
GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid);
|
||||||
|
|
||||||
// Construct pencils
|
// Construct pencils
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
@ -132,6 +132,31 @@ namespace Grid {
|
|||||||
(*this)(_Matrix,in,out,guess);
|
(*this)(_Matrix,in,out,guess);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
Field tmp(grid);
|
||||||
|
int nblock = in.size();
|
||||||
|
for(int b=0;b<nblock;b++){
|
||||||
|
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// James can write his own deflated guesser
|
||||||
|
// with optimised code for the inner products
|
||||||
|
// RedBlackSolveSplitGrid();
|
||||||
|
// RedBlackSolve(_Matrix,src_o,sol_o);
|
||||||
|
|
||||||
|
void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out)
|
||||||
|
{
|
||||||
|
GridBase *grid = _Matrix.RedBlackGrid();
|
||||||
|
Field tmp(grid);
|
||||||
|
int nblock = in.size();
|
||||||
|
for(int b=0;b<nblock;b++) {
|
||||||
|
pickCheckerboard(Even,tmp,in[b]);
|
||||||
|
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Guesser>
|
template<class Guesser>
|
||||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||||
{
|
{
|
||||||
@ -150,9 +175,11 @@ namespace Grid {
|
|||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
// Prepare RedBlack source
|
// Prepare RedBlack source
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
for(int b=0;b<nblock;b++){
|
RedBlackSource(_Matrix,in,src_o);
|
||||||
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
// for(int b=0;b<nblock;b++){
|
||||||
}
|
// RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||||
|
// }
|
||||||
|
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
// Make the guesses
|
// Make the guesses
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
|
@ -33,6 +33,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
bool Stencil_force_mpi = true;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Info that is setup once and indept of cartesian layout
|
// Info that is setup once and indept of cartesian layout
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -35,6 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
extern bool Stencil_force_mpi ;
|
||||||
|
|
||||||
class CartesianCommunicator : public SharedMemory {
|
class CartesianCommunicator : public SharedMemory {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
|||||||
double off_node_bytes=0.0;
|
double off_node_bytes=0.0;
|
||||||
int tag;
|
int tag;
|
||||||
|
|
||||||
if ( gfrom ==MPI_UNDEFINED) {
|
if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||||
tag= dir+from*32;
|
tag= dir+from*32;
|
||||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
|
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
@ -378,12 +378,18 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
|||||||
off_node_bytes+=bytes;
|
off_node_bytes+=bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( gdest == MPI_UNDEFINED ) {
|
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||||
tag= dir+_processor*32;
|
tag= dir+_processor*32;
|
||||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
list.push_back(xrq);
|
list.push_back(xrq);
|
||||||
off_node_bytes+=bytes;
|
off_node_bytes+=bytes;
|
||||||
|
} else {
|
||||||
|
// TODO : make a OMP loop on CPU, call threaded bcopy
|
||||||
|
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
|
||||||
|
assert(shm!=NULL);
|
||||||
|
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
|
||||||
|
acceleratorCopySynchronize(); // MPI prob slower
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||||
|
@ -546,6 +546,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// If it is me, pass around the IPC access key
|
// If it is me, pass around the IPC access key
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
|
void * thisBuf = ShmCommBuf;
|
||||||
|
if(!Stencil_force_mpi) {
|
||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
||||||
ze_ipc_mem_handle_t handle;
|
ze_ipc_mem_handle_t handle;
|
||||||
if ( r==WorldShmRank ) {
|
if ( r==WorldShmRank ) {
|
||||||
@ -583,6 +585,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// Share this IPC handle across the Shm Comm
|
// Share this IPC handle across the Shm Comm
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
@ -598,7 +601,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// If I am not the source, overwrite thisBuf with remote buffer
|
// If I am not the source, overwrite thisBuf with remote buffer
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
void * thisBuf = ShmCommBuf;
|
|
||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
|
||||||
if ( r!=WorldShmRank ) {
|
if ( r!=WorldShmRank ) {
|
||||||
thisBuf = nullptr;
|
thisBuf = nullptr;
|
||||||
@ -639,7 +642,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Save a copy of the device buffers
|
// Save a copy of the device buffers
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
WorldShmCommBufs[r] = thisBuf;
|
}
|
||||||
|
WorldShmCommBufs[r] = thisBuf;
|
||||||
#else
|
#else
|
||||||
WorldShmCommBufs[r] = ShmCommBuf;
|
WorldShmCommBufs[r] = ShmCommBuf;
|
||||||
#endif
|
#endif
|
||||||
|
@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
|||||||
autoView( x_v , x, AcceleratorRead);
|
autoView( x_v , x, AcceleratorRead);
|
||||||
autoView( y_v , y, AcceleratorRead);
|
autoView( y_v , y, AcceleratorRead);
|
||||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||||
auto tmp = a*x_v(ss)+y_v(ss);
|
auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]);
|
||||||
coalescedWrite(ret_v[ss],tmp);
|
coalescedWrite(ret_v[ss],tmp);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
|||||||
|
|
||||||
for(int k=k0; k<k1; ++k){
|
for(int k=k0; k<k1; ++k){
|
||||||
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
||||||
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
|
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss]));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
|||||||
int jj =j0+j;
|
int jj =j0+j;
|
||||||
int ss =sj/nrot;
|
int ss =sj/nrot;
|
||||||
int sss=ss+s;
|
int sss=ss+s;
|
||||||
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -361,6 +361,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
// But easily avoided by using double precision fields
|
// But easily avoided by using double precision fields
|
||||||
///////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename vobj::scalar_object::scalar_type scalar_type;
|
||||||
GridBase *grid = Data.Grid();
|
GridBase *grid = Data.Grid();
|
||||||
assert(grid!=NULL);
|
assert(grid!=NULL);
|
||||||
|
|
||||||
@ -419,20 +420,19 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sum over nodes.
|
// sum over nodes.
|
||||||
sobj gsum;
|
|
||||||
for(int t=0;t<fd;t++){
|
for(int t=0;t<fd;t++){
|
||||||
int pt = t/ld; // processor plane
|
int pt = t/ld; // processor plane
|
||||||
int lt = t%ld;
|
int lt = t%ld;
|
||||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||||
gsum=lsSum[lt];
|
result[t]=lsSum[lt];
|
||||||
} else {
|
} else {
|
||||||
gsum=Zero();
|
result[t]=Zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->GlobalSum(gsum);
|
|
||||||
|
|
||||||
result[t]=gsum;
|
|
||||||
}
|
}
|
||||||
|
scalar_type * ptr = (scalar_type *) &result[0];
|
||||||
|
int words = fd*sizeof(sobj)/sizeof(scalar_type);
|
||||||
|
grid->GlobalSumVector(ptr, words);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
|
@ -364,15 +364,21 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
autoView( coarseData_ , coarseData, AcceleratorWrite);
|
autoView( coarseData_ , coarseData, AcceleratorWrite);
|
||||||
autoView( fineData_ , fineData, AcceleratorRead);
|
autoView( fineData_ , fineData, AcceleratorRead);
|
||||||
|
|
||||||
|
auto coarseData_p = &coarseData_[0];
|
||||||
|
auto fineData_p = &fineData_[0];
|
||||||
|
|
||||||
Coordinate fine_rdimensions = fine->_rdimensions;
|
Coordinate fine_rdimensions = fine->_rdimensions;
|
||||||
Coordinate coarse_rdimensions = coarse->_rdimensions;
|
Coordinate coarse_rdimensions = coarse->_rdimensions;
|
||||||
|
|
||||||
|
vobj zz = Zero();
|
||||||
|
|
||||||
accelerator_for(sc,coarse->oSites(),1,{
|
accelerator_for(sc,coarse->oSites(),1,{
|
||||||
|
|
||||||
// One thread per sub block
|
// One thread per sub block
|
||||||
Coordinate coor_c(_ndimension);
|
Coordinate coor_c(_ndimension);
|
||||||
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
|
||||||
coarseData_[sc]=Zero();
|
|
||||||
|
vobj cd = zz;
|
||||||
|
|
||||||
for(int sb=0;sb<blockVol;sb++){
|
for(int sb=0;sb<blockVol;sb++){
|
||||||
|
|
||||||
@ -383,9 +389,11 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
|
||||||
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
|
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
|
||||||
|
|
||||||
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
cd=cd+fineData_p[sf];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
coarseData_p[sc] = cd;
|
||||||
|
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -115,9 +115,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
|
|||||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
|
//typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
|
||||||
typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
|
//typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
|
||||||
typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
|
//typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
|
||||||
|
|
||||||
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
|
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
|
||||||
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
|
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
|
||||||
@ -158,41 +158,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
|
|||||||
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
|
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
|
||||||
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
||||||
|
|
||||||
typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
|
//typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
|
||||||
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
|
//typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
|
||||||
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
|
//typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
|
||||||
|
|
||||||
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
|
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
|
||||||
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
|
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
|
||||||
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
|
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
|
||||||
|
|
||||||
typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
|
//typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
|
||||||
typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
|
//typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
|
||||||
typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
|
//typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
|
||||||
|
|
||||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||||
|
|
||||||
typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
|
//typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
|
||||||
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
|
//typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
|
||||||
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
|
//typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
|
||||||
|
|
||||||
typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR;
|
typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR;
|
||||||
typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF;
|
typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF;
|
||||||
typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD;
|
typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD;
|
||||||
|
|
||||||
typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL;
|
//typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL;
|
||||||
typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH;
|
//typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH;
|
||||||
typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF;
|
//typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF;
|
||||||
|
|
||||||
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
||||||
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
||||||
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
||||||
|
|
||||||
typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
|
//typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
|
||||||
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
|
//typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
|
||||||
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
|
//typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
|
||||||
|
|
||||||
// Ls vectorised
|
// Ls vectorised
|
||||||
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
||||||
@ -235,49 +235,49 @@ typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
|
|||||||
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
|
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
|
||||||
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
|
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
|
||||||
|
|
||||||
typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
|
//typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
|
||||||
typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
|
//typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
|
||||||
typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
|
//typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
|
||||||
|
|
||||||
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
|
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
|
||||||
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
|
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
|
||||||
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
|
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
|
||||||
|
|
||||||
typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
|
//typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
|
||||||
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
|
//typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
|
||||||
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
|
//typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
|
||||||
|
|
||||||
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
|
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
|
||||||
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
|
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
|
||||||
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
|
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
|
||||||
|
|
||||||
typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
|
//typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
|
||||||
typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
|
//typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
|
||||||
typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
|
//typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
|
||||||
|
|
||||||
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
|
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
|
||||||
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
|
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
|
||||||
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
|
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
|
||||||
|
|
||||||
typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
|
//typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
|
||||||
typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
|
//typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
|
||||||
typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
|
//typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
|
||||||
|
|
||||||
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
|
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
|
||||||
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
|
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
|
||||||
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
||||||
|
|
||||||
typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
|
//typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
|
||||||
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
|
//typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
|
||||||
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
|
//typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
|
||||||
|
|
||||||
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
|
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
|
||||||
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
|
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
|
||||||
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
|
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
|
||||||
|
|
||||||
typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
|
//typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
|
||||||
typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
|
//typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
|
||||||
typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF;
|
//typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF;
|
||||||
|
|
||||||
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
||||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||||
|
@ -327,8 +327,8 @@ typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> Gparit
|
|||||||
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float
|
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float
|
||||||
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double
|
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double
|
||||||
|
|
||||||
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
|
//typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
|
||||||
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float
|
//typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float
|
||||||
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double
|
//typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -68,11 +68,12 @@ public:
|
|||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
/* Compress includes precision change if mpi data is not same */
|
/* Compress includes precision change if mpi data is not same */
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
template<class _SiteHalfSpinor, class _SiteSpinor>
|
accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
|
||||||
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const {
|
typedef decltype(coalescedRead(buf)) sobj;
|
||||||
_SiteHalfSpinor tmp;
|
sobj sp;
|
||||||
projector::Proj(tmp,in,mu,dag);
|
auto sin = coalescedRead(in);
|
||||||
vstream(buf[o],tmp);
|
projector::Proj(sp,sin,mu,dag);
|
||||||
|
coalescedWrite(buf,sp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
@ -82,13 +83,18 @@ public:
|
|||||||
const SiteHalfSpinor * __restrict__ vp0,
|
const SiteHalfSpinor * __restrict__ vp0,
|
||||||
const SiteHalfSpinor * __restrict__ vp1,
|
const SiteHalfSpinor * __restrict__ vp1,
|
||||||
Integer type,Integer o) const {
|
Integer type,Integer o) const {
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
|
||||||
|
#else
|
||||||
SiteHalfSpinor tmp1;
|
SiteHalfSpinor tmp1;
|
||||||
SiteHalfSpinor tmp2;
|
SiteHalfSpinor tmp2;
|
||||||
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
|
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
|
||||||
vstream(mp[2*o ],tmp1);
|
vstream(mp[2*o ],tmp1);
|
||||||
vstream(mp[2*o+1],tmp2);
|
vstream(mp[2*o+1],tmp2);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
/* Have a decompression step if mpi data is not same */
|
/* Have a decompression step if mpi data is not same */
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
@ -105,6 +111,28 @@ public:
|
|||||||
const SiteSpinor * __restrict__ in,
|
const SiteSpinor * __restrict__ in,
|
||||||
Integer j,Integer k, Integer m,Integer type) const
|
Integer j,Integer k, Integer m,Integer type) const
|
||||||
{
|
{
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
typedef SiteSpinor vobj;
|
||||||
|
typedef SiteHalfSpinor hvobj;
|
||||||
|
typedef decltype(coalescedRead(*in)) sobj;
|
||||||
|
typedef decltype(coalescedRead(*out0)) hsobj;
|
||||||
|
|
||||||
|
unsigned int Nsimd = vobj::Nsimd();
|
||||||
|
unsigned int mask = Nsimd >> (type + 1);
|
||||||
|
int lane = acceleratorSIMTlane(Nsimd);
|
||||||
|
int j0 = lane &(~mask); // inner coor zero
|
||||||
|
int j1 = lane |(mask) ; // inner coor one
|
||||||
|
const vobj *vp0 = &in[k];
|
||||||
|
const vobj *vp1 = &in[m];
|
||||||
|
const vobj *vp = (lane&mask) ? vp1:vp0;
|
||||||
|
auto sa = coalescedRead(*vp,j0);
|
||||||
|
auto sb = coalescedRead(*vp,j1);
|
||||||
|
hsobj psa, psb;
|
||||||
|
projector::Proj(psa,sa,mu,dag);
|
||||||
|
projector::Proj(psb,sb,mu,dag);
|
||||||
|
coalescedWrite(out0[j],psa);
|
||||||
|
coalescedWrite(out1[j],psb);
|
||||||
|
#else
|
||||||
SiteHalfSpinor temp1, temp2;
|
SiteHalfSpinor temp1, temp2;
|
||||||
SiteHalfSpinor temp3, temp4;
|
SiteHalfSpinor temp3, temp4;
|
||||||
projector::Proj(temp1,in[k],mu,dag);
|
projector::Proj(temp1,in[k],mu,dag);
|
||||||
@ -112,6 +140,7 @@ public:
|
|||||||
exchange(temp3,temp4,temp1,temp2,type);
|
exchange(temp3,temp4,temp1,temp2,type);
|
||||||
vstream(out0[j],temp3);
|
vstream(out0[j],temp3);
|
||||||
vstream(out1[j],temp4);
|
vstream(out1[j],temp4);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
@ -121,6 +150,7 @@ public:
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if 0
|
||||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
|
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
|
||||||
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
||||||
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
|
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
|
||||||
@ -149,13 +179,23 @@ public:
|
|||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
/* Compress includes precision change if mpi data is not same */
|
/* Compress includes precision change if mpi data is not same */
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
template<class _SiteHalfSpinor, class _SiteSpinor>
|
accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
|
||||||
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const {
|
SiteHalfSpinor hsp;
|
||||||
_SiteHalfSpinor hsp;
|
|
||||||
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
|
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
|
||||||
projector::Proj(hsp,in,mu,dag);
|
projector::Proj(hsp,in,mu,dag);
|
||||||
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
|
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
|
||||||
}
|
}
|
||||||
|
accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
typedef decltype(coalescedRead(buf)) sobj;
|
||||||
|
sobj sp;
|
||||||
|
auto sin = coalescedRead(in);
|
||||||
|
projector::Proj(sp,sin,mu,dag);
|
||||||
|
coalescedWrite(buf,sp);
|
||||||
|
#else
|
||||||
|
projector::Proj(buf,in,mu,dag);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*****************************************************/
|
/*****************************************************/
|
||||||
/* Exchange includes precision change if mpi data is not same */
|
/* Exchange includes precision change if mpi data is not same */
|
||||||
@ -203,6 +243,7 @@ public:
|
|||||||
accelerator_inline bool DecompressionStep(void) const { return true; }
|
accelerator_inline bool DecompressionStep(void) const { return true; }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#define DECLARE_PROJ(Projector,Compressor,spProj) \
|
#define DECLARE_PROJ(Projector,Compressor,spProj) \
|
||||||
class Projector { \
|
class Projector { \
|
||||||
@ -253,33 +294,8 @@ public:
|
|||||||
typedef typename Base::View_type View_type;
|
typedef typename Base::View_type View_type;
|
||||||
typedef typename Base::StencilVector StencilVector;
|
typedef typename Base::StencilVector StencilVector;
|
||||||
|
|
||||||
double timer0;
|
void ZeroCountersi(void) { }
|
||||||
double timer1;
|
void Reporti(int calls) { }
|
||||||
double timer2;
|
|
||||||
double timer3;
|
|
||||||
double timer4;
|
|
||||||
double timer5;
|
|
||||||
double timer6;
|
|
||||||
uint64_t callsi;
|
|
||||||
void ZeroCountersi(void)
|
|
||||||
{
|
|
||||||
timer0=0;
|
|
||||||
timer1=0;
|
|
||||||
timer2=0;
|
|
||||||
timer3=0;
|
|
||||||
timer4=0;
|
|
||||||
timer5=0;
|
|
||||||
timer6=0;
|
|
||||||
callsi=0;
|
|
||||||
}
|
|
||||||
void Reporti(int calls)
|
|
||||||
{
|
|
||||||
if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl;
|
|
||||||
if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate) " <<timer1/calls <<std::endl;
|
|
||||||
if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge ) " <<timer2/calls <<std::endl;
|
|
||||||
if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl;
|
|
||||||
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> surface_list;
|
std::vector<int> surface_list;
|
||||||
|
|
||||||
@ -321,26 +337,18 @@ public:
|
|||||||
{
|
{
|
||||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||||
this->HaloExchangeOptGather(source,compress);
|
this->HaloExchangeOptGather(source,compress);
|
||||||
double t1=usecond();
|
|
||||||
// Asynchronous MPI calls multidirectional, Isend etc...
|
// Asynchronous MPI calls multidirectional, Isend etc...
|
||||||
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
|
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
|
||||||
this->Communicate();
|
this->Communicate();
|
||||||
double t2=usecond(); timer1 += t2-t1;
|
|
||||||
this->CommsMerge(compress);
|
this->CommsMerge(compress);
|
||||||
double t3=usecond(); timer2 += t3-t2;
|
|
||||||
this->CommsMergeSHM(compress);
|
this->CommsMergeSHM(compress);
|
||||||
double t4=usecond(); timer3 += t4-t3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class compressor>
|
template <class compressor>
|
||||||
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
|
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
|
||||||
{
|
{
|
||||||
this->Prepare();
|
this->Prepare();
|
||||||
double t0=usecond();
|
|
||||||
this->HaloGatherOpt(source,compress);
|
this->HaloGatherOpt(source,compress);
|
||||||
double t1=usecond();
|
|
||||||
timer0 += t1-t0;
|
|
||||||
callsi++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class compressor>
|
template <class compressor>
|
||||||
@ -352,12 +360,9 @@ public:
|
|||||||
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
|
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
|
||||||
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
|
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
|
||||||
|
|
||||||
this->mpi3synctime_g-=usecond();
|
|
||||||
this->_grid->StencilBarrier();
|
this->_grid->StencilBarrier();
|
||||||
this->mpi3synctime_g+=usecond();
|
|
||||||
|
|
||||||
assert(source.Grid()==this->_grid);
|
assert(source.Grid()==this->_grid);
|
||||||
this->halogtime-=usecond();
|
|
||||||
|
|
||||||
this->u_comm_offset=0;
|
this->u_comm_offset=0;
|
||||||
|
|
||||||
@ -393,7 +398,6 @@ public:
|
|||||||
}
|
}
|
||||||
this->face_table_computed=1;
|
this->face_table_computed=1;
|
||||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||||
this->halogtime+=usecond();
|
|
||||||
accelerator_barrier();
|
accelerator_barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,17 +243,17 @@ typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR
|
|||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
|
//typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
|
//typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
|
//typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
|
//typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
|
//typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
|
||||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
|
//typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
|
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
|
||||||
|
@ -880,7 +880,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<RealD> G_s(Ls,1.0);
|
std::vector<RealD> G_s(Ls,1.0);
|
||||||
Integer sign = 1; // sign flip for vector/tadpole
|
RealD sign = 1; // sign flip for vector/tadpole
|
||||||
if ( curr_type == Current::Axial ) {
|
if ( curr_type == Current::Axial ) {
|
||||||
for(int s=0;s<Ls/2;s++){
|
for(int s=0;s<Ls/2;s++){
|
||||||
G_s[s] = -1.0;
|
G_s[s] = -1.0;
|
||||||
@ -901,8 +901,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
|
|
||||||
int sp = (s+1)%Ls;
|
int sp = (s+1)%Ls;
|
||||||
int sr = Ls-1-s;
|
// int sr = Ls-1-s;
|
||||||
int srp= (sr+1)%Ls;
|
// int srp= (sr+1)%Ls;
|
||||||
|
|
||||||
// Mobius parameters
|
// Mobius parameters
|
||||||
auto b=this->bs[s];
|
auto b=this->bs[s];
|
||||||
|
@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
// template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// XYZT vectorised, dag Kernel, single
|
// XYZT vectorised, dag Kernel, single
|
||||||
@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef MAYBEPERM
|
#undef MAYBEPERM
|
||||||
#undef MULT_2SPIN
|
#undef MULT_2SPIN
|
||||||
@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
//
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// Ls vectorised, dag Kernel, single
|
// Ls vectorised, dag Kernel, single
|
||||||
@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#endif // VEC 5D
|
#endif // VEC 5D
|
||||||
|
|
||||||
@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// XYZT vectorised, dag Kernel, single
|
// XYZT vectorised, dag Kernel, single
|
||||||
@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef MAYBEPERM
|
#undef MAYBEPERM
|
||||||
#undef MULT_2SPIN
|
#undef MULT_2SPIN
|
||||||
@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// Ls vectorised, dag Kernel, single
|
// Ls vectorised, dag Kernel, single
|
||||||
@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled
|
|||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
template<> void
|
//template<> void
|
||||||
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
//WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#endif // VEC 5D
|
#endif // VEC 5D
|
||||||
|
|
||||||
|
@ -1 +0,0 @@
|
|||||||
../CayleyFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../DomainWallEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../MobiusEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../PartialFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonCloverFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonKernelsInstantiationGparity.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonTMFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
#define IMPLEMENTATION GparityWilsonImplDF
|
|
@ -1 +0,0 @@
|
|||||||
../CayleyFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../DomainWallEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../MobiusEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../PartialFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonCloverFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonKernelsInstantiationGparity.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonTMFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
#define IMPLEMENTATION GparityWilsonImplFH
|
|
@ -1 +0,0 @@
|
|||||||
../CayleyFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../DomainWallEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../MobiusEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../PartialFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonCloverFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonTMFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
#define IMPLEMENTATION WilsonImplDF
|
|
@ -1 +0,0 @@
|
|||||||
../CayleyFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../DomainWallEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../MobiusEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../PartialFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonCloverFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermionInstantiation.cc.master
|
|
@ -1,51 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015, 2020
|
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution
|
|
||||||
directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
|
|
||||||
|
|
||||||
#ifndef AVX512
|
|
||||||
#ifndef QPX
|
|
||||||
#ifndef A64FX
|
|
||||||
#ifndef A64FXFIXEDSIZE
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
|
||||||
|
|
||||||
#include "impl.h"
|
|
||||||
template class WilsonKernels<IMPLEMENTATION>;
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonTMFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
#define IMPLEMENTATION WilsonImplFH
|
|
@ -1 +0,0 @@
|
|||||||
../CayleyFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../DomainWallEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../MobiusEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../PartialFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermion5DInstantiation.cc.master
|
|
@ -1,51 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015, 2020
|
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution
|
|
||||||
directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
|
|
||||||
|
|
||||||
#ifndef AVX512
|
|
||||||
#ifndef QPX
|
|
||||||
#ifndef A64FX
|
|
||||||
#ifndef A64FXFIXEDSIZE
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
|
||||||
|
|
||||||
#include "impl.h"
|
|
||||||
template class WilsonKernels<IMPLEMENTATION>;
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
|
@ -1 +0,0 @@
|
|||||||
#define IMPLEMENTATION ZWilsonImplDF
|
|
@ -1 +0,0 @@
|
|||||||
../CayleyFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../ContinuedFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../DomainWallEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../MobiusEOFAFermionInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../PartialFractionFermion5DInstantiation.cc.master
|
|
@ -1 +0,0 @@
|
|||||||
../WilsonFermion5DInstantiation.cc.master
|
|
@ -1,51 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015, 2020
|
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution
|
|
||||||
directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
|
|
||||||
|
|
||||||
#ifndef AVX512
|
|
||||||
#ifndef QPX
|
|
||||||
#ifndef A64FX
|
|
||||||
#ifndef A64FXFIXEDSIZE
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
|
||||||
|
|
||||||
#include "impl.h"
|
|
||||||
template class WilsonKernels<IMPLEMENTATION>;
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
|
@ -1 +0,0 @@
|
|||||||
#define IMPLEMENTATION ZWilsonImplFH
|
|
@ -9,8 +9,6 @@ STAG5_IMPL_LIST=""
|
|||||||
WILSON_IMPL_LIST=" \
|
WILSON_IMPL_LIST=" \
|
||||||
WilsonImplF \
|
WilsonImplF \
|
||||||
WilsonImplD \
|
WilsonImplD \
|
||||||
WilsonImplFH \
|
|
||||||
WilsonImplDF \
|
|
||||||
WilsonAdjImplF \
|
WilsonAdjImplF \
|
||||||
WilsonAdjImplD \
|
WilsonAdjImplD \
|
||||||
WilsonTwoIndexSymmetricImplF \
|
WilsonTwoIndexSymmetricImplF \
|
||||||
@ -18,26 +16,17 @@ WILSON_IMPL_LIST=" \
|
|||||||
WilsonTwoIndexAntiSymmetricImplF \
|
WilsonTwoIndexAntiSymmetricImplF \
|
||||||
WilsonTwoIndexAntiSymmetricImplD \
|
WilsonTwoIndexAntiSymmetricImplD \
|
||||||
GparityWilsonImplF \
|
GparityWilsonImplF \
|
||||||
GparityWilsonImplD \
|
GparityWilsonImplD "
|
||||||
GparityWilsonImplFH \
|
|
||||||
GparityWilsonImplDF"
|
|
||||||
|
|
||||||
DWF_IMPL_LIST=" \
|
DWF_IMPL_LIST=" \
|
||||||
WilsonImplF \
|
WilsonImplF \
|
||||||
WilsonImplD \
|
WilsonImplD \
|
||||||
WilsonImplFH \
|
|
||||||
WilsonImplDF \
|
|
||||||
ZWilsonImplF \
|
ZWilsonImplF \
|
||||||
ZWilsonImplD \
|
ZWilsonImplD "
|
||||||
ZWilsonImplFH \
|
|
||||||
ZWilsonImplDF "
|
|
||||||
|
|
||||||
GDWF_IMPL_LIST=" \
|
GDWF_IMPL_LIST=" \
|
||||||
GparityWilsonImplF \
|
GparityWilsonImplF \
|
||||||
GparityWilsonImplD \
|
GparityWilsonImplD "
|
||||||
GparityWilsonImplFH \
|
|
||||||
GparityWilsonImplDF"
|
|
||||||
|
|
||||||
|
|
||||||
IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST"
|
IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST"
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
// Dirac algebra adjoint operator (not in to overload other adj)
|
// Dirac algebra adjoint operator (not in to overload other adj)
|
||||||
accelerator_inline Gamma adj(const Gamma &g)
|
inline Gamma adj(const Gamma &g)
|
||||||
{
|
{
|
||||||
return Gamma (Gamma::adj[g.g]);
|
return Gamma (Gamma::adj[g.g]);
|
||||||
}
|
}
|
||||||
@ -48,7 +48,7 @@ accelerator_inline Gamma adj(const Gamma &g)
|
|||||||
|
|
||||||
|
|
||||||
// Dirac algebra mutliplication operator
|
// Dirac algebra mutliplication operator
|
||||||
accelerator_inline Gamma operator*(const Gamma &g1, const Gamma &g2)
|
inline Gamma operator*(const Gamma &g1, const Gamma &g2)
|
||||||
{
|
{
|
||||||
return Gamma (Gamma::mul[g1.g][g2.g]);
|
return Gamma (Gamma::mul[g1.g][g2.g]);
|
||||||
}
|
}
|
||||||
|
@ -2,14 +2,11 @@
|
|||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
Source file: ./lib/serialisation/BaseIO.h
|
||||||
|
|
||||||
Copyright (C) 2015, 2020
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Michael Marshall <michael.marshall@ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -25,27 +22,14 @@ You should have received a copy of the GNU General Public License along
|
|||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
directory
|
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
|
|
||||||
|
|
||||||
#ifndef AVX512
|
#include <Grid/GridCore.h>
|
||||||
#ifndef QPX
|
|
||||||
#ifndef A64FX
|
|
||||||
#ifndef A64FXFIXEDSIZE
|
|
||||||
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid)
|
||||||
|
|
||||||
#include "impl.h"
|
std::uint64_t EigenIO::EigenResizeCounter(0);
|
||||||
template class WilsonKernels<IMPLEMENTATION>;
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid)
|
@ -9,6 +9,7 @@
|
|||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
Author: Michael Marshall <michael.marshall@ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -30,6 +31,7 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
|||||||
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
|
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
|
||||||
#define GRID_SERIALISATION_ABSTRACT_READER_H
|
#define GRID_SERIALISATION_ABSTRACT_READER_H
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <Grid/tensors/Tensors.h>
|
#include <Grid/tensors/Tensors.h>
|
||||||
#include <Grid/serialisation/VectorUtils.h>
|
#include <Grid/serialisation/VectorUtils.h>
|
||||||
@ -110,6 +112,10 @@ namespace Grid {
|
|||||||
template <typename ET>
|
template <typename ET>
|
||||||
inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type
|
inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type
|
||||||
getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); }
|
getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); }
|
||||||
|
|
||||||
|
// Counter for resized EigenTensors (poor man's substitute for allocator)
|
||||||
|
// Defined in BinaryIO.cc
|
||||||
|
extern std::uint64_t EigenResizeCounter;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Abstract writer/reader classes ////////////////////////////////////////////
|
// Abstract writer/reader classes ////////////////////////////////////////////
|
||||||
@ -497,8 +503,14 @@ namespace Grid {
|
|||||||
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
|
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
|
||||||
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
|
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
|
||||||
{
|
{
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
// The memory counter is the reason this must be done from the primary thread
|
||||||
|
assert(omp_in_parallel()==0 && "Deserialisation which resizes Eigen tensor must happen from primary thread");
|
||||||
|
#endif
|
||||||
|
EigenIO::EigenResizeCounter -= static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
|
||||||
//t.reshape( dims );
|
//t.reshape( dims );
|
||||||
t.resize( dims );
|
t.resize( dims );
|
||||||
|
EigenIO::EigenResizeCounter += static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -1,3 +1,34 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./Grid/serialisation/VectorUtils.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Peter Boyle <paboyle@ed.ac.uk>
|
||||||
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
Author: Michael Marshall <michael.marshall@ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -1,3 +1,34 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./Grid/serialisation/VectorUtils.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ed.ac.uk>
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
Author: Michael Marshall <michael.marshall@ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
#ifndef GRID_SERIALISATION_HDF5_H
|
#ifndef GRID_SERIALISATION_HDF5_H
|
||||||
#define GRID_SERIALISATION_HDF5_H
|
#define GRID_SERIALISATION_HDF5_H
|
||||||
|
|
||||||
@ -34,11 +65,13 @@ namespace Grid
|
|||||||
template <typename U>
|
template <typename U>
|
||||||
void writeDefault(const std::string &s, const U &x);
|
void writeDefault(const std::string &s, const U &x);
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
void writeRagged(const std::string &s, const std::vector<U> &x);
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
||||||
writeDefault(const std::string &s, const std::vector<U> &x);
|
writeDefault(const std::string &s, const std::vector<U> &x);
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
|
||||||
writeDefault(const std::string &s, const std::vector<U> &x);
|
writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); }
|
||||||
template <typename U>
|
template <typename U>
|
||||||
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
|
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
|
||||||
H5NS::Group & getGroup(void);
|
H5NS::Group & getGroup(void);
|
||||||
@ -64,11 +97,13 @@ namespace Grid
|
|||||||
template <typename U>
|
template <typename U>
|
||||||
void readDefault(const std::string &s, U &output);
|
void readDefault(const std::string &s, U &output);
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
void readRagged(const std::string &s, std::vector<U> &x);
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
||||||
readDefault(const std::string &s, std::vector<U> &x);
|
readDefault(const std::string &s, std::vector<U> &x);
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
|
||||||
readDefault(const std::string &s, std::vector<U> &x);
|
readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); }
|
||||||
template <typename U>
|
template <typename U>
|
||||||
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
|
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
|
||||||
H5NS::Group & getGroup(void);
|
H5NS::Group & getGroup(void);
|
||||||
@ -176,24 +211,30 @@ namespace Grid
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
||||||
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
|
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
|
||||||
{
|
{
|
||||||
// alias to element type
|
if (isRegularShape(x))
|
||||||
typedef typename element<std::vector<U>>::type Element;
|
{
|
||||||
|
// alias to element type
|
||||||
|
using Scalar = typename is_flattenable<std::vector<U>>::type;
|
||||||
|
|
||||||
// flatten the vector and getting dimensions
|
// flatten the vector and getting dimensions
|
||||||
Flatten<std::vector<U>> flat(x);
|
Flatten<std::vector<U>> flat(x);
|
||||||
std::vector<size_t> dim;
|
std::vector<size_t> dim;
|
||||||
const auto &flatx = flat.getFlatVector();
|
const auto &flatx = flat.getFlatVector();
|
||||||
for (auto &d: flat.getDim())
|
for (auto &d: flat.getDim())
|
||||||
dim.push_back(d);
|
dim.push_back(d);
|
||||||
writeMultiDim<Element>(s, dim, &flatx[0], flatx.size());
|
writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
writeRagged(s, x);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x)
|
||||||
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
|
|
||||||
{
|
{
|
||||||
push(s);
|
push(s);
|
||||||
writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
|
writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
|
||||||
@ -229,7 +270,7 @@ namespace Grid
|
|||||||
void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
|
void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
|
||||||
{
|
{
|
||||||
// alias to element type
|
// alias to element type
|
||||||
typedef typename element<std::vector<U>>::type Element;
|
using Scalar = typename is_flattenable<std::vector<U>>::type;
|
||||||
|
|
||||||
// read the dimensions
|
// read the dimensions
|
||||||
H5NS::DataSpace dataSpace;
|
H5NS::DataSpace dataSpace;
|
||||||
@ -260,37 +301,44 @@ namespace Grid
|
|||||||
H5NS::DataSet dataSet;
|
H5NS::DataSet dataSet;
|
||||||
|
|
||||||
dataSet = group_.openDataSet(s);
|
dataSet = group_.openDataSet(s);
|
||||||
dataSet.read(buf.data(), Hdf5Type<Element>::type());
|
dataSet.read(buf.data(), Hdf5Type<Scalar>::type());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
H5NS::Attribute attribute;
|
H5NS::Attribute attribute;
|
||||||
|
|
||||||
attribute = group_.openAttribute(s);
|
attribute = group_.openAttribute(s);
|
||||||
attribute.read(Hdf5Type<Element>::type(), buf.data());
|
attribute.read(Hdf5Type<Scalar>::type(), buf.data());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
||||||
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
|
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
|
||||||
{
|
{
|
||||||
// alias to element type
|
if (H5Lexists (group_.getId(), s.c_str(), H5P_DEFAULT) > 0
|
||||||
typedef typename element<std::vector<U>>::type Element;
|
&& H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0)
|
||||||
|
{
|
||||||
|
readRagged(s, x);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// alias to element type
|
||||||
|
using Scalar = typename is_flattenable<std::vector<U>>::type;
|
||||||
|
|
||||||
std::vector<size_t> dim;
|
std::vector<size_t> dim;
|
||||||
std::vector<Element> buf;
|
std::vector<Scalar> buf;
|
||||||
readMultiDim( s, buf, dim );
|
readMultiDim( s, buf, dim );
|
||||||
|
|
||||||
// reconstruct the multidimensional vector
|
// reconstruct the multidimensional vector
|
||||||
Reconstruct<std::vector<U>> r(buf, dim);
|
Reconstruct<std::vector<U>> r(buf, dim);
|
||||||
|
|
||||||
x = r.getVector();
|
x = r.getVector();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x)
|
||||||
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
|
|
||||||
{
|
{
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
|
|
||||||
|
@ -118,13 +118,13 @@ static inline std::string SerialisableClassName(void) {return std::string(#cname
|
|||||||
static constexpr bool isEnum = false; \
|
static constexpr bool isEnum = false; \
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
|
static inline void write(::Grid::Writer<T> &WR,const std::string &s, const cname &obj){ \
|
||||||
push(WR,s);\
|
push(WR,s);\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
|
||||||
pop(WR);\
|
pop(WR);\
|
||||||
}\
|
}\
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void read(Reader<T> &RD,const std::string &s, cname &obj){ \
|
static inline void read(::Grid::Reader<T> &RD,const std::string &s, cname &obj){ \
|
||||||
if (!push(RD,s))\
|
if (!push(RD,s))\
|
||||||
{\
|
{\
|
||||||
std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \
|
std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Michael Marshall <michael.marshall@ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -236,21 +237,36 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vector element trait //////////////////////////////////////////////////////
|
// is_flattenable<T>::value is true if T is a std::vector<> which can be flattened //////////////////////
|
||||||
template <typename T>
|
template <typename T, typename V = void>
|
||||||
struct element
|
struct is_flattenable : std::false_type
|
||||||
{
|
{
|
||||||
typedef T type;
|
using type = T;
|
||||||
static constexpr bool is_number = false;
|
using grid_type = T;
|
||||||
|
static constexpr int vecRank = 0;
|
||||||
|
static constexpr bool isGridTensor = false;
|
||||||
|
static constexpr bool children_flattenable = std::is_arithmetic<T>::value or is_complex<T>::value;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct element<std::vector<T>>
|
struct is_flattenable<T, typename std::enable_if<isGridTensor<T>::value>::type> : std::false_type
|
||||||
{
|
{
|
||||||
typedef typename element<T>::type type;
|
using type = typename GridTypeMapper<T>::scalar_type;
|
||||||
static constexpr bool is_number = std::is_arithmetic<T>::value
|
using grid_type = T;
|
||||||
or is_complex<T>::value
|
static constexpr int vecRank = 0;
|
||||||
or element<T>::is_number;
|
static constexpr bool isGridTensor = true;
|
||||||
|
static constexpr bool children_flattenable = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct is_flattenable<std::vector<T>, typename std::enable_if<is_flattenable<T>::children_flattenable>::type>
|
||||||
|
: std::true_type
|
||||||
|
{
|
||||||
|
using type = typename is_flattenable<T>::type;
|
||||||
|
using grid_type = typename is_flattenable<T>::grid_type;
|
||||||
|
static constexpr bool isGridTensor = is_flattenable<T>::isGridTensor;
|
||||||
|
static constexpr int vecRank = is_flattenable<T>::vecRank + 1;
|
||||||
|
static constexpr bool children_flattenable = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Vector flattening utility class ////////////////////////////////////////////
|
// Vector flattening utility class ////////////////////////////////////////////
|
||||||
@ -259,23 +275,30 @@ namespace Grid {
|
|||||||
class Flatten
|
class Flatten
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef typename element<V>::type Element;
|
using Scalar = typename is_flattenable<V>::type;
|
||||||
|
static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
|
||||||
public:
|
public:
|
||||||
explicit Flatten(const V &vector);
|
explicit Flatten(const V &vector);
|
||||||
const V & getVector(void);
|
const V & getVector(void) const { return vector_; }
|
||||||
const std::vector<Element> & getFlatVector(void);
|
const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
|
||||||
const std::vector<size_t> & getDim(void);
|
const std::vector<size_t> & getDim(void) const { return dim_; }
|
||||||
private:
|
private:
|
||||||
void accumulate(const Element &e);
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
|
||||||
template <typename W>
|
accumulate(const W &e);
|
||||||
void accumulate(const W &v);
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
|
||||||
void accumulateDim(const Element &e);
|
accumulate(const W &e);
|
||||||
template <typename W>
|
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
|
||||||
void accumulateDim(const W &v);
|
accumulate(const W &v);
|
||||||
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
|
||||||
|
accumulateDim(const W &e) {} // Innermost is a scalar - do nothing
|
||||||
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
|
||||||
|
accumulateDim(const W &e);
|
||||||
|
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
|
||||||
|
accumulateDim(const W &v);
|
||||||
private:
|
private:
|
||||||
const V &vector_;
|
const V &vector_;
|
||||||
std::vector<Element> flatVector_;
|
std::vector<Scalar> flatVector_;
|
||||||
std::vector<size_t> dim_;
|
std::vector<size_t> dim_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Class to reconstruct a multidimensional std::vector
|
// Class to reconstruct a multidimensional std::vector
|
||||||
@ -283,38 +306,57 @@ namespace Grid {
|
|||||||
class Reconstruct
|
class Reconstruct
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef typename element<V>::type Element;
|
using Scalar = typename is_flattenable<V>::type;
|
||||||
|
static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
|
||||||
public:
|
public:
|
||||||
Reconstruct(const std::vector<Element> &flatVector,
|
Reconstruct(const std::vector<Scalar> &flatVector,
|
||||||
const std::vector<size_t> &dim);
|
const std::vector<size_t> &dim);
|
||||||
const V & getVector(void);
|
const V & getVector(void) const { return vector_; }
|
||||||
const std::vector<Element> & getFlatVector(void);
|
const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
|
||||||
const std::vector<size_t> & getDim(void);
|
const std::vector<size_t> & getDim(void) const { return dim_; }
|
||||||
private:
|
private:
|
||||||
void fill(std::vector<Element> &v);
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
|
||||||
template <typename W>
|
fill(W &v);
|
||||||
void fill(W &v);
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
|
||||||
void resize(std::vector<Element> &v, const unsigned int dim);
|
fill(W &v);
|
||||||
template <typename W>
|
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
|
||||||
void resize(W &v, const unsigned int dim);
|
fill(W &v);
|
||||||
|
template <typename W> typename std::enable_if< is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
|
||||||
|
resize(W &v, const unsigned int dim);
|
||||||
|
template <typename W> typename std::enable_if< is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
|
||||||
|
resize(W &v, const unsigned int dim);
|
||||||
|
template <typename W> typename std::enable_if<!is_flattenable<W>::isGridTensor>::type
|
||||||
|
checkInnermost(const W &e) {} // Innermost is a scalar - do nothing
|
||||||
|
template <typename W> typename std::enable_if< is_flattenable<W>::isGridTensor>::type
|
||||||
|
checkInnermost(const W &e);
|
||||||
private:
|
private:
|
||||||
V vector_;
|
V vector_;
|
||||||
const std::vector<Element> &flatVector_;
|
const std::vector<Scalar> &flatVector_;
|
||||||
std::vector<size_t> dim_;
|
std::vector<size_t> dim_;
|
||||||
size_t ind_{0};
|
size_t ind_{0};
|
||||||
unsigned int dimInd_{0};
|
unsigned int dimInd_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Flatten class template implementation
|
// Flatten class template implementation
|
||||||
template <typename V>
|
template <typename V>
|
||||||
void Flatten<V>::accumulate(const Element &e)
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
|
||||||
|
Flatten<V>::accumulate(const W &e)
|
||||||
{
|
{
|
||||||
flatVector_.push_back(e);
|
flatVector_.push_back(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
template <typename W>
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
|
||||||
void Flatten<V>::accumulate(const W &v)
|
Flatten<V>::accumulate(const W &e)
|
||||||
|
{
|
||||||
|
for (const Scalar &x: e) {
|
||||||
|
flatVector_.push_back(x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
|
||||||
|
Flatten<V>::accumulate(const W &v)
|
||||||
{
|
{
|
||||||
for (auto &e: v)
|
for (auto &e: v)
|
||||||
{
|
{
|
||||||
@ -323,11 +365,17 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
void Flatten<V>::accumulateDim(const Element &e) {};
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
|
||||||
|
Flatten<V>::accumulateDim(const W &e)
|
||||||
|
{
|
||||||
|
using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
|
||||||
|
for (int rank=0; rank < Traits::Rank; ++rank)
|
||||||
|
dim_.push_back(Traits::Dimension(rank));
|
||||||
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
template <typename W>
|
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
|
||||||
void Flatten<V>::accumulateDim(const W &v)
|
Flatten<V>::accumulateDim(const W &v)
|
||||||
{
|
{
|
||||||
dim_.push_back(v.size());
|
dim_.push_back(v.size());
|
||||||
accumulateDim(v[0]);
|
accumulateDim(v[0]);
|
||||||
@ -337,32 +385,26 @@ namespace Grid {
|
|||||||
Flatten<V>::Flatten(const V &vector)
|
Flatten<V>::Flatten(const V &vector)
|
||||||
: vector_(vector)
|
: vector_(vector)
|
||||||
{
|
{
|
||||||
accumulate(vector_);
|
|
||||||
accumulateDim(vector_);
|
accumulateDim(vector_);
|
||||||
}
|
std::size_t TotalSize{ dim_[0] };
|
||||||
|
for (int i = 1; i < dim_.size(); ++i) {
|
||||||
template <typename V>
|
TotalSize *= dim_[i];
|
||||||
const V & Flatten<V>::getVector(void)
|
}
|
||||||
{
|
flatVector_.reserve(TotalSize);
|
||||||
return vector_;
|
accumulate(vector_);
|
||||||
}
|
|
||||||
|
|
||||||
template <typename V>
|
|
||||||
const std::vector<typename Flatten<V>::Element> &
|
|
||||||
Flatten<V>::getFlatVector(void)
|
|
||||||
{
|
|
||||||
return flatVector_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename V>
|
|
||||||
const std::vector<size_t> & Flatten<V>::getDim(void)
|
|
||||||
{
|
|
||||||
return dim_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reconstruct class template implementation
|
// Reconstruct class template implementation
|
||||||
template <typename V>
|
template <typename V>
|
||||||
void Reconstruct<V>::fill(std::vector<Element> &v)
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
|
||||||
|
Reconstruct<V>::fill(W &v)
|
||||||
|
{
|
||||||
|
v = flatVector_[ind_++];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
|
||||||
|
Reconstruct<V>::fill(W &v)
|
||||||
{
|
{
|
||||||
for (auto &e: v)
|
for (auto &e: v)
|
||||||
{
|
{
|
||||||
@ -371,8 +413,8 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
template <typename W>
|
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
|
||||||
void Reconstruct<V>::fill(W &v)
|
Reconstruct<V>::fill(W &v)
|
||||||
{
|
{
|
||||||
for (auto &e: v)
|
for (auto &e: v)
|
||||||
{
|
{
|
||||||
@ -381,14 +423,15 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim)
|
template <typename W> typename std::enable_if<is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
|
||||||
|
Reconstruct<V>::resize(W &v, const unsigned int dim)
|
||||||
{
|
{
|
||||||
v.resize(dim_[dim]);
|
v.resize(dim_[dim]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
template <typename W>
|
template <typename W> typename std::enable_if<is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
|
||||||
void Reconstruct<V>::resize(W &v, const unsigned int dim)
|
Reconstruct<V>::resize(W &v, const unsigned int dim)
|
||||||
{
|
{
|
||||||
v.resize(dim_[dim]);
|
v.resize(dim_[dim]);
|
||||||
for (auto &e: v)
|
for (auto &e: v)
|
||||||
@ -398,34 +441,31 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
template <typename V>
|
||||||
Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector,
|
template <typename W> typename std::enable_if<is_flattenable<W>::isGridTensor>::type
|
||||||
|
Reconstruct<V>::checkInnermost(const W &)
|
||||||
|
{
|
||||||
|
using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
|
||||||
|
const int gridRank{Traits::Rank};
|
||||||
|
const int dimRank{static_cast<int>(dim_.size())};
|
||||||
|
assert(dimRank >= gridRank && "Tensor rank too low for Grid tensor");
|
||||||
|
for (int i=0; i<gridRank; ++i) {
|
||||||
|
assert(dim_[dimRank - gridRank + i] == Traits::Dimension(i) && "Tensor dimension doesn't match Grid tensor");
|
||||||
|
}
|
||||||
|
dim_.resize(dimRank - gridRank);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
Reconstruct<V>::Reconstruct(const std::vector<Scalar> &flatVector,
|
||||||
const std::vector<size_t> &dim)
|
const std::vector<size_t> &dim)
|
||||||
: flatVector_(flatVector)
|
: flatVector_(flatVector)
|
||||||
, dim_(dim)
|
, dim_(dim)
|
||||||
{
|
{
|
||||||
|
checkInnermost(vector_);
|
||||||
|
assert(dim_.size() == is_flattenable<V>::vecRank && "Tensor rank doesn't match nested std::vector rank");
|
||||||
resize(vector_, 0);
|
resize(vector_, 0);
|
||||||
fill(vector_);
|
fill(vector_);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V>
|
|
||||||
const V & Reconstruct<V>::getVector(void)
|
|
||||||
{
|
|
||||||
return vector_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename V>
|
|
||||||
const std::vector<typename Reconstruct<V>::Element> &
|
|
||||||
Reconstruct<V>::getFlatVector(void)
|
|
||||||
{
|
|
||||||
return flatVector_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename V>
|
|
||||||
const std::vector<size_t> & Reconstruct<V>::getDim(void)
|
|
||||||
{
|
|
||||||
return dim_;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Vector IO utilities ///////////////////////////////////////////////////////
|
// Vector IO utilities ///////////////////////////////////////////////////////
|
||||||
// helper function to read space-separated values
|
// helper function to read space-separated values
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -459,6 +499,64 @@ namespace Grid {
|
|||||||
|
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In general, scalar types are considered "flattenable" (regularly shaped)
|
||||||
|
template <typename T>
|
||||||
|
bool isRegularShapeHelper(const std::vector<T> &, std::vector<std::size_t> &, int, bool)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool isRegularShapeHelper(const std::vector<std::vector<T>> &v, std::vector<std::size_t> &Dims, int Depth, bool bFirst)
|
||||||
|
{
|
||||||
|
if( bFirst)
|
||||||
|
{
|
||||||
|
assert( Dims.size() == Depth && "Bug: Delete this message after testing" );
|
||||||
|
Dims.push_back(v[0].size());
|
||||||
|
if (!Dims[Depth])
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert( Dims.size() >= Depth + 1 && "Bug: Delete this message after testing" );
|
||||||
|
}
|
||||||
|
for (std::size_t i = 0; i < v.size(); ++i)
|
||||||
|
{
|
||||||
|
if (v[i].size() != Dims[Depth] || !isRegularShapeHelper(v[i], Dims, Depth + 1, bFirst && i==0))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool isRegularShape(const T &t) { return true; }
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool isRegularShape(const std::vector<T> &v) { return !v.empty(); }
|
||||||
|
|
||||||
|
// Return non-zero if all dimensions of this std::vector<std::vector<T>> are regularly shaped
|
||||||
|
template <typename T>
|
||||||
|
bool isRegularShape(const std::vector<std::vector<T>> &v)
|
||||||
|
{
|
||||||
|
if (v.empty() || v[0].empty())
|
||||||
|
return false;
|
||||||
|
// Make sure all of my rows are the same size
|
||||||
|
std::vector<std::size_t> Dims;
|
||||||
|
Dims.reserve(is_flattenable<T>::vecRank);
|
||||||
|
Dims.push_back(v.size());
|
||||||
|
Dims.push_back(v[0].size());
|
||||||
|
for (std::size_t i = 0; i < Dims[0]; ++i)
|
||||||
|
{
|
||||||
|
if (v[i].size() != Dims[1] || !isRegularShapeHelper(v[i], Dims, 2, i==0))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper function to read space-separated values
|
// helper function to read space-separated values
|
||||||
|
@ -3,20 +3,48 @@
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
accelerator_inline void exchangeSIMT(vobj &mp0,vobj &mp1,const vobj &vp0,const vobj &vp1,Integer type)
|
||||||
|
{
|
||||||
|
typedef decltype(coalescedRead(mp0)) sobj;
|
||||||
|
unsigned int Nsimd = vobj::Nsimd();
|
||||||
|
unsigned int mask = Nsimd >> (type + 1);
|
||||||
|
int lane = acceleratorSIMTlane(Nsimd);
|
||||||
|
int j0 = lane &(~mask); // inner coor zero
|
||||||
|
int j1 = lane |(mask) ; // inner coor one
|
||||||
|
const vobj *vpa = &vp0;
|
||||||
|
const vobj *vpb = &vp1;
|
||||||
|
const vobj *vp = (lane&mask) ? (vpb) : (vpa);
|
||||||
|
auto sa = coalescedRead(vp[0],j0);
|
||||||
|
auto sb = coalescedRead(vp[0],j1);
|
||||||
|
coalescedWrite(mp0,sa);
|
||||||
|
coalescedWrite(mp1,sb);
|
||||||
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
class SimpleCompressor {
|
class SimpleCompressor {
|
||||||
public:
|
public:
|
||||||
void Point(int) {};
|
void Point(int) {};
|
||||||
accelerator_inline int CommDatumSize(void) const { return sizeof(vobj); }
|
accelerator_inline int CommDatumSize(void) const { return sizeof(vobj); }
|
||||||
accelerator_inline bool DecompressionStep(void) const { return false; }
|
accelerator_inline bool DecompressionStep(void) const { return false; }
|
||||||
template<class cobj> accelerator_inline void Compress(cobj *buf,int o,const cobj &in) const { buf[o]=in; }
|
accelerator_inline void Compress(vobj &buf,const vobj &in) const {
|
||||||
|
coalescedWrite(buf,coalescedRead(in));
|
||||||
|
}
|
||||||
accelerator_inline void Exchange(vobj *mp,vobj *vp0,vobj *vp1,Integer type,Integer o) const {
|
accelerator_inline void Exchange(vobj *mp,vobj *vp0,vobj *vp1,Integer type,Integer o) const {
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
|
||||||
|
#else
|
||||||
exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
|
exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
accelerator_inline void Decompress(vobj *out,vobj *in, int o) const { assert(0); }
|
accelerator_inline void Decompress(vobj *out,vobj *in, int o) const { assert(0); }
|
||||||
accelerator_inline void CompressExchange(vobj *out0,vobj *out1,const vobj *in,
|
accelerator_inline void CompressExchange(vobj *out0,vobj *out1,const vobj *in,
|
||||||
int j,int k, int m,int type) const {
|
int j,int k, int m,int type) const {
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
exchangeSIMT(out0[j],out1[j],in[k],in[m],type);
|
||||||
|
#else
|
||||||
exchange(out0[j],out1[j],in[k],in[m],type);
|
exchange(out0[j],out1[j],in[k],in[m],type);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
// For cshift. Cshift should drop compressor coupling altogether
|
// For cshift. Cshift should drop compressor coupling altogether
|
||||||
// because I had to decouple the code from the Stencil anyway
|
// because I had to decouple the code from the Stencil anyway
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
||||||
int off,Vector<std::pair<int,int> > & table)
|
int off,std::vector<std::pair<int,int> > & table)
|
||||||
{
|
{
|
||||||
table.resize(0);
|
table.resize(0);
|
||||||
|
|
||||||
|
@ -57,27 +57,22 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
||||||
int off,Vector<std::pair<int,int> > & table);
|
int off,std::vector<std::pair<int,int> > & table);
|
||||||
|
|
||||||
template<class vobj,class cobj,class compressor>
|
template<class vobj,class cobj,class compressor>
|
||||||
void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline));
|
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so) __attribute__((noinline));
|
||||||
|
|
||||||
template<class vobj,class cobj,class compressor>
|
template<class vobj,class cobj,class compressor>
|
||||||
void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)
|
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)
|
||||||
{
|
{
|
||||||
int num=table.size();
|
int num=table.size();
|
||||||
std::pair<int,int> *table_v = & table[0];
|
std::pair<int,int> *table_v = & table[0];
|
||||||
|
|
||||||
auto rhs_v = rhs.View(AcceleratorRead);
|
auto rhs_v = rhs.View(AcceleratorRead);
|
||||||
accelerator_forNB( i,num, vobj::Nsimd(), {
|
accelerator_forNB( i,num, vobj::Nsimd(), {
|
||||||
typedef decltype(coalescedRead(buffer[0])) compressed_t;
|
compress.Compress(buffer[off+table_v[i].first],rhs_v[so+table_v[i].second]);
|
||||||
compressed_t tmp_c;
|
|
||||||
uint64_t o = table_v[i].first;
|
|
||||||
compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second));
|
|
||||||
coalescedWrite(buffer[off+o],tmp_c);
|
|
||||||
});
|
});
|
||||||
rhs_v.ViewClose();
|
rhs_v.ViewClose();
|
||||||
// Further optimisatoin: i) software prefetch the first element of the next table entry, prefetch the table
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
@ -85,10 +80,10 @@ void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice
|
|||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
template<class cobj,class vobj,class compressor>
|
template<class cobj,class vobj,class compressor>
|
||||||
void Gather_plane_exchange_table(const Lattice<vobj> &rhs,
|
void Gather_plane_exchange_table(const Lattice<vobj> &rhs,
|
||||||
Vector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type) __attribute__((noinline));
|
commVector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type) __attribute__((noinline));
|
||||||
|
|
||||||
template<class cobj,class vobj,class compressor>
|
template<class cobj,class vobj,class compressor>
|
||||||
void Gather_plane_exchange_table(Vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,
|
void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,
|
||||||
Vector<cobj *> pointers,int dimension,int plane,int cbmask,
|
Vector<cobj *> pointers,int dimension,int plane,int cbmask,
|
||||||
compressor &compress,int type)
|
compressor &compress,int type)
|
||||||
{
|
{
|
||||||
@ -100,7 +95,7 @@ void Gather_plane_exchange_table(Vector<std::pair<int,int> >& table,const Lattic
|
|||||||
auto p0=&pointers[0][0];
|
auto p0=&pointers[0][0];
|
||||||
auto p1=&pointers[1][0];
|
auto p1=&pointers[1][0];
|
||||||
auto tp=&table[0];
|
auto tp=&table[0];
|
||||||
accelerator_forNB(j, num, 1, {
|
accelerator_forNB(j, num, vobj::Nsimd(), {
|
||||||
compress.CompressExchange(p0,p1, &rhs_v[0], j,
|
compress.CompressExchange(p0,p1, &rhs_v[0], j,
|
||||||
so+tp[2*j ].second,
|
so+tp[2*j ].second,
|
||||||
so+tp[2*j+1].second,
|
so+tp[2*j+1].second,
|
||||||
@ -266,10 +261,11 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int face_table_computed;
|
int face_table_computed;
|
||||||
std::vector<Vector<std::pair<int,int> > > face_table ;
|
std::vector<commVector<std::pair<int,int> > > face_table ;
|
||||||
Vector<int> surface_list;
|
Vector<int> surface_list;
|
||||||
|
|
||||||
stencilVector<StencilEntry> _entries; // Resident in managed memory
|
stencilVector<StencilEntry> _entries; // Resident in managed memory
|
||||||
|
commVector<StencilEntry> _entries_device; // Resident in managed memory
|
||||||
std::vector<Packet> Packets;
|
std::vector<Packet> Packets;
|
||||||
std::vector<Merge> Mergers;
|
std::vector<Merge> Mergers;
|
||||||
std::vector<Merge> MergersSHM;
|
std::vector<Merge> MergersSHM;
|
||||||
@ -330,21 +326,8 @@ public:
|
|||||||
int xmit_to_rank;
|
int xmit_to_rank;
|
||||||
|
|
||||||
if ( ! comm_dim ) return 1;
|
if ( ! comm_dim ) return 1;
|
||||||
|
if ( displacement == 0 ) return 1;
|
||||||
int nbr_proc;
|
return 0;
|
||||||
if (displacement>0) nbr_proc = 1;
|
|
||||||
else nbr_proc = pd-1;
|
|
||||||
|
|
||||||
// FIXME this logic needs to be sorted for three link term
|
|
||||||
// assert( (displacement==1) || (displacement==-1));
|
|
||||||
// Present hack only works for >= 4^4 subvol per node
|
|
||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
|
||||||
|
|
||||||
void *shm = (void *) _grid->ShmBufferTranslate(recv_from_rank,this->u_recv_buf_p);
|
|
||||||
|
|
||||||
if ( shm==NULL ) return 0;
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
@ -609,13 +592,14 @@ public:
|
|||||||
template<class decompressor>
|
template<class decompressor>
|
||||||
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
|
void CommsMerge(decompressor decompress,std::vector<Merge> &mm,std::vector<Decompress> &dd) {
|
||||||
|
|
||||||
|
|
||||||
mergetime-=usecond();
|
mergetime-=usecond();
|
||||||
for(int i=0;i<mm.size();i++){
|
for(int i=0;i<mm.size();i++){
|
||||||
auto mp = &mm[i].mpointer[0];
|
auto mp = &mm[i].mpointer[0];
|
||||||
auto vp0= &mm[i].vpointers[0][0];
|
auto vp0= &mm[i].vpointers[0][0];
|
||||||
auto vp1= &mm[i].vpointers[1][0];
|
auto vp1= &mm[i].vpointers[1][0];
|
||||||
auto type= mm[i].type;
|
auto type= mm[i].type;
|
||||||
accelerator_forNB(o,mm[i].buffer_size/2,1,{
|
accelerator_forNB(o,mm[i].buffer_size/2,vobj::Nsimd(),{
|
||||||
decompress.Exchange(mp,vp0,vp1,type,o);
|
decompress.Exchange(mp,vp0,vp1,type,o);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -1023,7 +1007,6 @@ public:
|
|||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= _grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
int sshift= _grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||||
|
|
||||||
int shm_receive_only = 1;
|
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
@ -1039,7 +1022,12 @@ public:
|
|||||||
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||||
if ( !face_table_computed ) {
|
if ( !face_table_computed ) {
|
||||||
face_table.resize(face_idx+1);
|
face_table.resize(face_idx+1);
|
||||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table[face_idx]);
|
std::vector<std::pair<int,int> > face_table_host ;
|
||||||
|
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host);
|
||||||
|
face_table[face_idx].resize(face_table_host.size());
|
||||||
|
acceleratorCopyToDevice(&face_table_host[0],
|
||||||
|
&face_table[face_idx][0],
|
||||||
|
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
// int rank = _grid->_processor;
|
// int rank = _grid->_processor;
|
||||||
@ -1050,10 +1038,6 @@ public:
|
|||||||
assert (xmit_to_rank != _grid->ThisRank());
|
assert (xmit_to_rank != _grid->ThisRank());
|
||||||
assert (recv_from_rank != _grid->ThisRank());
|
assert (recv_from_rank != _grid->ThisRank());
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
// try the direct copy if possible
|
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
cobj *send_buf;
|
|
||||||
cobj *recv_buf;
|
cobj *recv_buf;
|
||||||
if ( compress.DecompressionStep() ) {
|
if ( compress.DecompressionStep() ) {
|
||||||
recv_buf=u_simd_recv_buf[0];
|
recv_buf=u_simd_recv_buf[0];
|
||||||
@ -1061,52 +1045,36 @@ public:
|
|||||||
recv_buf=this->u_recv_buf_p;
|
recv_buf=this->u_recv_buf_p;
|
||||||
}
|
}
|
||||||
|
|
||||||
send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,recv_buf);
|
cobj *send_buf;
|
||||||
if ( send_buf==NULL ) {
|
send_buf = this->u_send_buf_p; // Gather locally, must send
|
||||||
send_buf = this->u_send_buf_p;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find out if we get the direct copy.
|
|
||||||
void *success = (void *) _grid->ShmBufferTranslate(recv_from_rank,this->u_send_buf_p);
|
|
||||||
if (success==NULL) {
|
|
||||||
// we found a packet that comes from MPI and contributes to this leg of stencil
|
|
||||||
shm_receive_only = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Gather locally
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
gathertime-=usecond();
|
gathertime-=usecond();
|
||||||
assert(send_buf!=NULL);
|
assert(send_buf!=NULL);
|
||||||
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
|
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++;
|
||||||
gathertime+=usecond();
|
gathertime+=usecond();
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Build a list of things to do after we synchronise GPUs
|
||||||
|
// Start comms now???
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
AddPacket((void *)&send_buf[u_comm_offset],
|
||||||
|
(void *)&recv_buf[u_comm_offset],
|
||||||
|
xmit_to_rank,
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
if ( compress.DecompressionStep() ) {
|
if ( compress.DecompressionStep() ) {
|
||||||
|
AddDecompress(&this->u_recv_buf_p[u_comm_offset],
|
||||||
if ( shm_receive_only ) { // Early decompress before MPI is finished is possible
|
&recv_buf[u_comm_offset],
|
||||||
AddDecompress(&this->u_recv_buf_p[u_comm_offset],
|
words,Decompressions);
|
||||||
&recv_buf[u_comm_offset],
|
|
||||||
words,DecompressionsSHM);
|
|
||||||
} else { // Decompress after MPI is finished
|
|
||||||
AddDecompress(&this->u_recv_buf_p[u_comm_offset],
|
|
||||||
&recv_buf[u_comm_offset],
|
|
||||||
words,Decompressions);
|
|
||||||
}
|
|
||||||
|
|
||||||
AddPacket((void *)&send_buf[u_comm_offset],
|
|
||||||
(void *)&recv_buf[u_comm_offset],
|
|
||||||
xmit_to_rank,
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
AddPacket((void *)&send_buf[u_comm_offset],
|
|
||||||
(void *)&this->u_recv_buf_p[u_comm_offset],
|
|
||||||
xmit_to_rank,
|
|
||||||
recv_from_rank,
|
|
||||||
bytes);
|
|
||||||
}
|
}
|
||||||
u_comm_offset+=words;
|
u_comm_offset+=words;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return shm_receive_only;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class compressor>
|
template<class compressor>
|
||||||
@ -1157,7 +1125,6 @@ public:
|
|||||||
int sshift= _grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
int sshift= _grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||||
|
|
||||||
// loop over outer coord planes orthog to dim
|
// loop over outer coord planes orthog to dim
|
||||||
int shm_receive_only = 1;
|
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
int any_offnode = ( ((x+sshift)%fd) >= rd );
|
int any_offnode = ( ((x+sshift)%fd) >= rd );
|
||||||
@ -1172,11 +1139,18 @@ public:
|
|||||||
|
|
||||||
if ( !face_table_computed ) {
|
if ( !face_table_computed ) {
|
||||||
face_table.resize(face_idx+1);
|
face_table.resize(face_idx+1);
|
||||||
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table[face_idx]);
|
std::vector<std::pair<int,int> > face_table_host ;
|
||||||
|
|
||||||
|
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host);
|
||||||
|
face_table[face_idx].resize(face_table_host.size());
|
||||||
|
acceleratorCopyToDevice(&face_table_host[0],
|
||||||
|
&face_table[face_idx][0],
|
||||||
|
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||||
}
|
}
|
||||||
gathermtime-=usecond();
|
gathermtime-=usecond();
|
||||||
|
|
||||||
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); face_idx++;
|
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
||||||
|
face_idx++;
|
||||||
|
|
||||||
gathermtime+=usecond();
|
gathermtime+=usecond();
|
||||||
//spointers[0] -- low
|
//spointers[0] -- low
|
||||||
@ -1205,20 +1179,7 @@ public:
|
|||||||
|
|
||||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
|
||||||
// shm == receive pointer if offnode
|
rpointers[i] = rp;
|
||||||
// shm == Translate[send pointer] if on node -- my view of his send pointer
|
|
||||||
cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
|
||||||
if (shm==NULL) {
|
|
||||||
shm = rp;
|
|
||||||
// we found a packet that comes from MPI and contributes to this shift.
|
|
||||||
// is_same_node is only used in the WilsonStencil, and gets set for this point in the stencil.
|
|
||||||
// Kernel will add the exterior_terms except if is_same_node.
|
|
||||||
shm_receive_only = 0;
|
|
||||||
// leg of stencil
|
|
||||||
}
|
|
||||||
// if Direct, StencilSendToRecvFrom will suppress copy to a peer on node
|
|
||||||
// assuming above pointer flip
|
|
||||||
rpointers[i] = shm;
|
|
||||||
|
|
||||||
AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
|
AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
|
||||||
|
|
||||||
@ -1230,102 +1191,17 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( shm_receive_only ) {
|
AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers);
|
||||||
AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,MergersSHM);
|
|
||||||
} else {
|
|
||||||
AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers);
|
|
||||||
}
|
|
||||||
|
|
||||||
u_comm_offset +=buffer_size;
|
u_comm_offset +=buffer_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return shm_receive_only;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZeroCounters(void) {
|
void ZeroCounters(void) { };
|
||||||
gathertime = 0.;
|
|
||||||
commtime = 0.;
|
|
||||||
mpi3synctime=0.;
|
|
||||||
mpi3synctime_g=0.;
|
|
||||||
shmmergetime=0.;
|
|
||||||
for(int i=0;i<this->_npoints;i++){
|
|
||||||
comm_time_thr[i]=0;
|
|
||||||
comm_bytes_thr[i]=0;
|
|
||||||
comm_enter_thr[i]=0;
|
|
||||||
comm_leave_thr[i]=0;
|
|
||||||
shm_bytes_thr[i]=0;
|
|
||||||
}
|
|
||||||
halogtime = 0.;
|
|
||||||
mergetime = 0.;
|
|
||||||
decompresstime = 0.;
|
|
||||||
gathermtime = 0.;
|
|
||||||
splicetime = 0.;
|
|
||||||
nosplicetime = 0.;
|
|
||||||
comms_bytes = 0.;
|
|
||||||
shm_bytes = 0.;
|
|
||||||
calls = 0.;
|
|
||||||
};
|
|
||||||
|
|
||||||
void Report(void) {
|
void Report(void) { };
|
||||||
#define AVERAGE(A)
|
|
||||||
#define PRINTIT(A) AVERAGE(A); std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
|
|
||||||
RealD NP = _grid->_Nprocessors;
|
|
||||||
RealD NN = _grid->NodeCount();
|
|
||||||
double t = 0;
|
|
||||||
// if comm_time_thr is set they were all done in parallel so take the max
|
|
||||||
// but add up the bytes
|
|
||||||
int threaded = 0 ;
|
|
||||||
for (int i = 0; i < 8; ++i) {
|
|
||||||
if ( comm_time_thr[i]>0.0 ) {
|
|
||||||
threaded = 1;
|
|
||||||
comms_bytes += comm_bytes_thr[i];
|
|
||||||
shm_bytes += shm_bytes_thr[i];
|
|
||||||
if (t < comm_time_thr[i]) t = comm_time_thr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (threaded) commtime += t;
|
|
||||||
|
|
||||||
_grid->GlobalSum(commtime); commtime/=NP;
|
|
||||||
if ( calls > 0. ) {
|
|
||||||
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
|
|
||||||
PRINTIT(halogtime);
|
|
||||||
PRINTIT(gathertime);
|
|
||||||
PRINTIT(gathermtime);
|
|
||||||
PRINTIT(mergetime);
|
|
||||||
PRINTIT(decompresstime);
|
|
||||||
if(comms_bytes>1.0){
|
|
||||||
PRINTIT(comms_bytes);
|
|
||||||
PRINTIT(commtime);
|
|
||||||
std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s per rank"<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000.*NP/NN << " GB/s per node"<<std::endl;
|
|
||||||
}
|
|
||||||
if(shm_bytes>1.0){
|
|
||||||
PRINTIT(shm_bytes); // X bytes + R bytes
|
|
||||||
// Double this to include spin projection overhead with 2:1 ratio in wilson
|
|
||||||
auto gatheralltime = gathertime+gathermtime;
|
|
||||||
std::cout << GridLogMessage << " Stencil SHM " << (shm_bytes)/gatheralltime/1000. << " GB/s per rank"<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " Stencil SHM " << (shm_bytes)/gatheralltime/1000.*NP/NN << " GB/s per node"<<std::endl;
|
|
||||||
|
|
||||||
auto all_bytes = comms_bytes+shm_bytes;
|
|
||||||
std::cout << GridLogMessage << " Stencil SHM all " << (all_bytes)/gatheralltime/1000. << " GB/s per rank"<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " Stencil SHM all " << (all_bytes)/gatheralltime/1000.*NP/NN << " GB/s per node"<<std::endl;
|
|
||||||
|
|
||||||
auto membytes = (shm_bytes + comms_bytes/2) // read/write
|
|
||||||
+ (shm_bytes+comms_bytes)/2 * sizeof(vobj)/sizeof(cobj);
|
|
||||||
std::cout << GridLogMessage << " Stencil SHM mem " << (membytes)/gatheralltime/1000. << " GB/s per rank"<<std::endl;
|
|
||||||
std::cout << GridLogMessage << " Stencil SHM mem " << (membytes)/gatheralltime/1000.*NP/NN << " GB/s per node"<<std::endl;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
PRINTIT(mpi3synctime);
|
|
||||||
PRINTIT(mpi3synctime_g);
|
|
||||||
PRINTIT(shmmergetime);
|
|
||||||
PRINTIT(splicetime);
|
|
||||||
PRINTIT(nosplicetime);
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
#undef PRINTIT
|
|
||||||
#undef AVERAGE
|
|
||||||
};
|
|
||||||
|
|
||||||
};
|
};
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -417,7 +417,7 @@ public:
|
|||||||
stream << "{";
|
stream << "{";
|
||||||
for (int j = 0; j < N; j++) {
|
for (int j = 0; j < N; j++) {
|
||||||
stream << o._internal[i][j];
|
stream << o._internal[i][j];
|
||||||
if (i < N - 1) stream << ",";
|
if (j < N - 1) stream << ",";
|
||||||
}
|
}
|
||||||
stream << "}";
|
stream << "}";
|
||||||
if (i != N - 1) stream << "\n\t\t";
|
if (i != N - 1) stream << "\n\t\t";
|
||||||
|
@ -8,6 +8,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
|||||||
|
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
cudaDeviceProp *gpu_props;
|
cudaDeviceProp *gpu_props;
|
||||||
|
cudaStream_t copyStream;
|
||||||
void acceleratorInit(void)
|
void acceleratorInit(void)
|
||||||
{
|
{
|
||||||
int nDevices = 1;
|
int nDevices = 1;
|
||||||
@ -83,11 +84,11 @@ void acceleratorInit(void)
|
|||||||
printf("AcceleratorCudaInit: using default device \n");
|
printf("AcceleratorCudaInit: using default device \n");
|
||||||
printf("AcceleratorCudaInit: assume user either uses a) IBM jsrun, or \n");
|
printf("AcceleratorCudaInit: assume user either uses a) IBM jsrun, or \n");
|
||||||
printf("AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding \n");
|
printf("AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding \n");
|
||||||
printf("AcceleratorCudaInit: Configure options --enable-summit, --enable-select-gpu=no \n");
|
printf("AcceleratorCudaInit: Configure options --enable-setdevice=no \n");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
printf("AcceleratorCudaInit: rank %d setting device to node rank %d\n",world_rank,rank);
|
printf("AcceleratorCudaInit: rank %d setting device to node rank %d\n",world_rank,rank);
|
||||||
printf("AcceleratorCudaInit: Configure options --enable-select-gpu=yes \n");
|
printf("AcceleratorCudaInit: Configure options --enable-setdevice=yes \n");
|
||||||
cudaSetDevice(rank);
|
cudaSetDevice(rank);
|
||||||
#endif
|
#endif
|
||||||
if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n");
|
if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n");
|
||||||
|
@ -39,6 +39,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __APPLE__
|
||||||
|
// no memalign
|
||||||
|
inline void *memalign(size_t align, size_t bytes) { return malloc(bytes); }
|
||||||
|
#endif
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
@ -101,6 +105,7 @@ void acceleratorInit(void);
|
|||||||
#define accelerator_inline __host__ __device__ inline
|
#define accelerator_inline __host__ __device__ inline
|
||||||
|
|
||||||
extern int acceleratorAbortOnGpuError;
|
extern int acceleratorAbortOnGpuError;
|
||||||
|
extern cudaStream_t copyStream;
|
||||||
|
|
||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||||
#ifdef GRID_SIMT
|
#ifdef GRID_SIMT
|
||||||
@ -209,9 +214,13 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
|||||||
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
||||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToDevice);}
|
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
|
||||||
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||||
|
{
|
||||||
|
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
|
||||||
|
}
|
||||||
|
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
|
||||||
inline int acceleratorIsCommunicable(void *ptr)
|
inline int acceleratorIsCommunicable(void *ptr)
|
||||||
{
|
{
|
||||||
// int uvm=0;
|
// int uvm=0;
|
||||||
@ -285,7 +294,10 @@ inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*t
|
|||||||
inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);};
|
inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);};
|
||||||
inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
||||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) {
|
||||||
|
theGridAccelerator->memcpy(to,from,bytes);
|
||||||
|
}
|
||||||
|
inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); }
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();}
|
||||||
@ -390,7 +402,8 @@ inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
|
|||||||
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
||||||
|
inline void acceleratorCopySynchronise(void) { }
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -419,6 +432,8 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
|
|||||||
|
|
||||||
#undef GRID_SIMT
|
#undef GRID_SIMT
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define accelerator
|
#define accelerator
|
||||||
#define accelerator_inline strong_inline
|
#define accelerator_inline strong_inline
|
||||||
#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ });
|
#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ });
|
||||||
@ -429,7 +444,8 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
|
|||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
|
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
|
||||||
inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
||||||
|
inline void acceleratorCopySynchronize(void) {};
|
||||||
|
|
||||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { memset(base,value,bytes);}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { memset(base,value,bytes);}
|
||||||
|
@ -56,6 +56,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
static int
|
static int
|
||||||
feenableexcept (unsigned int excepts)
|
feenableexcept (unsigned int excepts)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
|
// Fails on Apple M1
|
||||||
static fenv_t fenv;
|
static fenv_t fenv;
|
||||||
unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
|
unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
|
||||||
unsigned int old_excepts; // previous masks
|
unsigned int old_excepts; // previous masks
|
||||||
@ -70,6 +72,8 @@ feenableexcept (unsigned int excepts)
|
|||||||
|
|
||||||
iold_excepts = (int) old_excepts;
|
iold_excepts = (int) old_excepts;
|
||||||
return ( fesetenv (&fenv) ? -1 : iold_excepts );
|
return ( fesetenv (&fenv) ? -1 : iold_excepts );
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -297,6 +301,13 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL;
|
GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-mpi") ){
|
||||||
|
int forcempi;
|
||||||
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm-mpi");
|
||||||
|
GridCmdOptionInt(arg,forcempi);
|
||||||
|
Stencil_force_mpi = (bool)forcempi;
|
||||||
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--device-mem") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--device-mem") ){
|
||||||
int MB;
|
int MB;
|
||||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--device-mem");
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--device-mem");
|
||||||
@ -415,7 +426,9 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --shm-mpi 0|1 : Force MPI usage under multi-rank per node "<<std::endl;
|
||||||
std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
|
std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --device-mem M : Size of device software cache for lattice fields (MB) "<<std::endl;
|
||||||
std::cout<<GridLogMessage<<std::endl;
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<std::endl;
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
# additional include paths necessary to compile the C++ library
|
||||||
SUBDIRS = Grid HMC benchmarks tests
|
SUBDIRS = Grid HMC benchmarks tests examples
|
||||||
|
|
||||||
include $(top_srcdir)/doxygen.inc
|
include $(top_srcdir)/doxygen.inc
|
||||||
|
|
||||||
|
@ -236,34 +236,6 @@ int main (int argc, char ** argv)
|
|||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
|
||||||
if (0) {
|
|
||||||
FGrid->Barrier();
|
|
||||||
DwH.ZeroCounters();
|
|
||||||
DwH.Dhop(src,result,0);
|
|
||||||
double t0=usecond();
|
|
||||||
for(int i=0;i<ncall;i++){
|
|
||||||
__SSC_START;
|
|
||||||
DwH.Dhop(src,result,0);
|
|
||||||
__SSC_STOP;
|
|
||||||
}
|
|
||||||
double t1=usecond();
|
|
||||||
FGrid->Barrier();
|
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
|
||||||
double flops=single_site_flops*volume*ncall;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
|
||||||
err = ref-result;
|
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
|
||||||
|
|
||||||
assert (norm2(err)< 1.0e-3 );
|
|
||||||
DwH.Report();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{ // Naive wilson dag implementation
|
{ // Naive wilson dag implementation
|
||||||
ref = Zero();
|
ref = Zero();
|
||||||
|
@ -182,7 +182,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
int ncall =1000;
|
int ncall =3000;
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
;
|
|
||||||
|
|
||||||
template<class d>
|
template<class d>
|
||||||
struct scal {
|
struct scal {
|
||||||
@ -118,30 +117,6 @@ int main (int argc, char ** argv)
|
|||||||
Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage<< "* SINGLE/HALF"<<std::endl;
|
|
||||||
GparityDomainWallFermionFH DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
|
||||||
if (1) {
|
|
||||||
FGrid->Barrier();
|
|
||||||
DwH.ZeroCounters();
|
|
||||||
DwH.Dhop(src,result,0);
|
|
||||||
double t0=usecond();
|
|
||||||
for(int i=0;i<ncall;i++){
|
|
||||||
__SSC_START;
|
|
||||||
DwH.Dhop(src,result,0);
|
|
||||||
__SSC_STOP;
|
|
||||||
}
|
|
||||||
double t1=usecond();
|
|
||||||
FGrid->Barrier();
|
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
|
||||||
double flops=2*1320*volume*ncall;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
|
||||||
DwH.Report();
|
|
||||||
}
|
|
||||||
|
|
||||||
GridCartesian * UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
|
GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
|
||||||
|
@ -390,6 +390,7 @@ case ${CXXTEST} in
|
|||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||||
if test $ac_openmp = yes; then
|
if test $ac_openmp = yes; then
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||||
|
LDFLAGS="$LDFLAGS -Xcompiler -fopenmp"
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
hipcc)
|
hipcc)
|
||||||
@ -815,6 +816,7 @@ AC_CONFIG_FILES(tests/smearing/Makefile)
|
|||||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
||||||
AC_CONFIG_FILES(tests/testu01/Makefile)
|
AC_CONFIG_FILES(tests/testu01/Makefile)
|
||||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||||
|
AC_CONFIG_FILES(examples/Makefile)
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
Binary file not shown.
@ -1787,7 +1787,7 @@ Hdf5Writer Hdf5Reader HDF5
|
|||||||
|
|
||||||
Write interfaces, similar to the XML facilities in QDP++ are presented. However,
|
Write interfaces, similar to the XML facilities in QDP++ are presented. However,
|
||||||
the serialisation routines are automatically generated by the macro, and a virtual
|
the serialisation routines are automatically generated by the macro, and a virtual
|
||||||
reader adn writer interface enables writing to any of a number of formats.
|
reader and writer interface enables writing to any of a number of formats.
|
||||||
|
|
||||||
**Example**::
|
**Example**::
|
||||||
|
|
||||||
@ -1814,6 +1814,91 @@ reader adn writer interface enables writing to any of a number of formats.
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Eigen tensor support -- added 2019H1
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The Serialisation library was expanded in 2019 to support de/serialisation of
|
||||||
|
Eigen tensors. De/serialisation of existing types was not changed. Data files
|
||||||
|
without Eigen tensors remain compatible with earlier versions of Grid and other readers.
|
||||||
|
Conversely, data files containing serialised Eigen tensors is a breaking change.
|
||||||
|
|
||||||
|
Eigen tensor serialisation support was added to BaseIO, which was modified to provide a Traits class
|
||||||
|
to recognise Eigen tensors with elements that are either: primitive scalars (arithmetic and complex types);
|
||||||
|
or Grid tensors.
|
||||||
|
|
||||||
|
**Traits determining de/serialisable scalars**::
|
||||||
|
|
||||||
|
// Is this an Eigen tensor
|
||||||
|
template<typename T> struct is_tensor : std::integral_constant<bool,
|
||||||
|
std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {};
|
||||||
|
// Is this an Eigen tensor of a supported scalar
|
||||||
|
template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {};
|
||||||
|
template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {};
|
||||||
|
// Is this an Eigen tensor of a supported container
|
||||||
|
template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {};
|
||||||
|
template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {};
|
||||||
|
|
||||||
|
|
||||||
|
Eigen tensors are regular, multidimensional objects, and each Reader/Writer
|
||||||
|
was extended to support this new datatype. Where the Eigen tensor contains
|
||||||
|
a Grid tensor, the dimensions of the data written are the dimensions of the
|
||||||
|
Eigen tensor plus the dimensions of the underlying Grid scalar. Dimensions
|
||||||
|
of size 1 are preserved.
|
||||||
|
|
||||||
|
**New Reader/Writer methods for multi-dimensional data**::
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
|
||||||
|
template <typename U>
|
||||||
|
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
|
||||||
|
|
||||||
|
|
||||||
|
On readback, the Eigen tensor rank must match the data being read, but the tensor
|
||||||
|
dimensions will be resized if necessary. Resizing is not possible for Eigen::TensorMap<T>
|
||||||
|
because these tensors use a buffer provided at construction, and this buffer cannot be changed.
|
||||||
|
Deserialisation failures cause Grid to assert.
|
||||||
|
|
||||||
|
|
||||||
|
HDF5 Optimisations -- added June 2021
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Grid serialisation is intended to be light, deterministic and provide a layer of abstraction over
|
||||||
|
multiple file formats. HDF5 excels at handling multi-dimensional data, and the Grid HDF5Reader/HDF5Writer exploits this.
|
||||||
|
When serialising nested ``std::vector<T>``, where ``T`` is an arithmetic or complex type,
|
||||||
|
the Hdf5Writer writes the data as an Hdf5 DataSet object.
|
||||||
|
|
||||||
|
However, nested ``std::vector<std::vector<...T>>`` might be "ragged", i.e. not necessarily regular. E.g. a 3d nested
|
||||||
|
``std::vector`` might contain 2 rows, the first being a 2x2 block and the second row being a 1 x 2 block.
|
||||||
|
A bug existed whereby this was not checked on write, so nested, ragged vectors
|
||||||
|
were written as a regular dataset, with a buffer under/overrun and jumbled contents.
|
||||||
|
|
||||||
|
Clearly this was not used in production, as the bug went undetected until now. Fixing this bug
|
||||||
|
is an opportunity to further optimise the HDF5 file format.
|
||||||
|
|
||||||
|
The goals of this change are to:
|
||||||
|
|
||||||
|
* Make changes to the Hdf5 file format only -- i.e. do not impact other file formats
|
||||||
|
|
||||||
|
* Implement file format changes in such a way that they are transparent to the Grid reader
|
||||||
|
|
||||||
|
* Correct the bug for ragged vectors of numeric / complex types
|
||||||
|
|
||||||
|
* Extend the support of nested std::vector<T> to arbitrarily nested Grid tensors
|
||||||
|
|
||||||
|
|
||||||
|
The trait class ``element`` has been redefined to ``is_flattenable``, which is a trait class for
|
||||||
|
potentially "flattenable" objects. These are (possibly nested) ``std::vector<T>`` where ``T`` is
|
||||||
|
an arithmetic, complex or Grid tensor type. Flattenable objects are tested on write
|
||||||
|
(with the function ``isRegularShape``) to see whether they actually are regular.
|
||||||
|
|
||||||
|
Flattenable, regular objects are written to a multidimensional HDF5 DataSet.
|
||||||
|
Otherwise, an Hdf5 sub group is created with the object "name", and each element of the outer dimension is
|
||||||
|
recursively written to as object "name_n", where n is a 0-indexed number.
|
||||||
|
|
||||||
|
On readback (by Grid)), the presence of a subgroup containing the attribute ``Grid_vector_size`` triggers a
|
||||||
|
"ragged read", otherwise a read from a DataSet is attempted.
|
||||||
|
|
||||||
|
|
||||||
Data parallel field IO
|
Data parallel field IO
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user