mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
commit
5b117865b2
@ -230,15 +230,18 @@ public:
|
|||||||
result = source;
|
result = source;
|
||||||
int pc = processor_coor[dim];
|
int pc = processor_coor[dim];
|
||||||
for(int p=0;p<processors[dim];p++) {
|
for(int p=0;p<processors[dim];p++) {
|
||||||
thread_for(idx, sgrid->lSites(),{
|
{
|
||||||
|
autoView(r_v,result,CpuRead);
|
||||||
|
autoView(p_v,pgbuf,CpuWrite);
|
||||||
|
thread_for(idx, sgrid->lSites(),{
|
||||||
Coordinate cbuf(Nd);
|
Coordinate cbuf(Nd);
|
||||||
sobj s;
|
sobj s;
|
||||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||||
peekLocalSite(s,result,cbuf);
|
peekLocalSite(s,r_v,cbuf);
|
||||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||||
// cbuf[dim]+=p*L;
|
pokeLocalSite(s,p_v,cbuf);
|
||||||
pokeLocalSite(s,pgbuf,cbuf);
|
});
|
||||||
});
|
}
|
||||||
if (p != processors[dim] - 1) {
|
if (p != processors[dim] - 1) {
|
||||||
result = Cshift(result,dim,L);
|
result = Cshift(result,dim,L);
|
||||||
}
|
}
|
||||||
@ -267,15 +270,19 @@ public:
|
|||||||
flops+= flops_call*NN;
|
flops+= flops_call*NN;
|
||||||
|
|
||||||
// writing out result
|
// writing out result
|
||||||
thread_for(idx,sgrid->lSites(),{
|
{
|
||||||
|
autoView(pgbuf_v,pgbuf,CpuRead);
|
||||||
|
autoView(result_v,result,CpuWrite);
|
||||||
|
thread_for(idx,sgrid->lSites(),{
|
||||||
Coordinate clbuf(Nd), cgbuf(Nd);
|
Coordinate clbuf(Nd), cgbuf(Nd);
|
||||||
sobj s;
|
sobj s;
|
||||||
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||||
cgbuf = clbuf;
|
cgbuf = clbuf;
|
||||||
cgbuf[dim] = clbuf[dim]+L*pc;
|
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||||
peekLocalSite(s,pgbuf,cgbuf);
|
peekLocalSite(s,pgbuf_v,cgbuf);
|
||||||
pokeLocalSite(s,result,clbuf);
|
pokeLocalSite(s,result_v,clbuf);
|
||||||
});
|
});
|
||||||
|
}
|
||||||
result = result*div;
|
result = result*div;
|
||||||
|
|
||||||
// destroying plan
|
// destroying plan
|
||||||
|
@ -86,23 +86,33 @@ void MemoryManager::Init(void)
|
|||||||
Ncache[AccSmall]=Nc;
|
Ncache[AccSmall]=Nc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << "MemoryManager::Init() setting up"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
||||||
#ifdef ALLOCATION_CACHE
|
#ifdef ALLOCATION_CACHE
|
||||||
std::cout << "MemoryManager::Init() cache pool for recent allocations: SMALL "<<Ncache[CpuSmall]<<" LARGE "<<Ncache[Cpu]<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent allocations: SMALL "<<Ncache[CpuSmall]<<" LARGE "<<Ncache[Cpu]<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_UVM
|
#ifdef GRID_UVM
|
||||||
std::cout << "MemoryManager::Init() Unified memory space"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Unified memory space"<<std::endl;
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
std::cout << "MemoryManager::Init() Using cudaMallocManaged"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMallocManaged"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
std::cout << "MemoryManager::Init() Using hipMallocManaged"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMallocManaged"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
std::cout << "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
std::cout << "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
|
||||||
|
#ifdef GRID_CUDA
|
||||||
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMalloc"<<std::endl;
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_HIP
|
||||||
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMalloc"<<std::endl;
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_SYCL
|
||||||
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
// Move control to configure.ac and Config.h?
|
// Move control to configure.ac and Config.h?
|
||||||
|
|
||||||
#undef ALLOCATION_CACHE
|
#define ALLOCATION_CACHE
|
||||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||||
#define GRID_ALLOC_SMALL_LIMIT (4096)
|
#define GRID_ALLOC_SMALL_LIMIT (4096)
|
||||||
|
|
||||||
|
@ -154,17 +154,18 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
|||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
// Peek a scalar object from the SIMD array
|
// Peek a scalar object from the SIMD array
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
|
// Must be CPU read view
|
||||||
template<class vobj,class sobj>
|
template<class vobj,class sobj>
|
||||||
inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
|
inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
|
||||||
{
|
{
|
||||||
GridBase *grid = l.Grid();
|
GridBase *grid = l.getGrid();
|
||||||
|
assert(l.mode==CpuRead);
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
int Nsimd = grid->Nsimd();
|
int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
assert( l.Checkerboard()== l.Grid()->CheckerBoard(site));
|
assert( l.Checkerboard()== grid->CheckerBoard(site));
|
||||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||||
|
|
||||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
@ -172,8 +173,7 @@ inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
|
|||||||
idx= grid->iIndex(site);
|
idx= grid->iIndex(site);
|
||||||
odx= grid->oIndex(site);
|
odx= grid->oIndex(site);
|
||||||
|
|
||||||
autoView( l_v , l, CpuRead);
|
scalar_type * vp = (scalar_type *)&l[odx];
|
||||||
scalar_type * vp = (scalar_type *)&l_v[odx];
|
|
||||||
scalar_type * pt = (scalar_type *)&s;
|
scalar_type * pt = (scalar_type *)&s;
|
||||||
|
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
@ -182,18 +182,19 @@ inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
// Must be CPU write view
|
||||||
template<class vobj,class sobj>
|
template<class vobj,class sobj>
|
||||||
inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site)
|
inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
|
||||||
{
|
{
|
||||||
GridBase *grid=l.Grid();
|
GridBase *grid=l.getGrid();
|
||||||
|
assert(l.mode==CpuWrite);
|
||||||
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
int Nsimd = grid->Nsimd();
|
int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
assert( l.Checkerboard()== l.Grid()->CheckerBoard(site));
|
assert( l.Checkerboard()== grid->CheckerBoard(site));
|
||||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||||
|
|
||||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
@ -201,8 +202,7 @@ inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site)
|
|||||||
idx= grid->iIndex(site);
|
idx= grid->iIndex(site);
|
||||||
odx= grid->oIndex(site);
|
odx= grid->oIndex(site);
|
||||||
|
|
||||||
autoView( l_v , l, CpuWrite);
|
scalar_type * vp = (scalar_type *)&l[odx];
|
||||||
scalar_type * vp = (scalar_type *)&l_v[odx];
|
|
||||||
scalar_type * pt = (scalar_type *)&s;
|
scalar_type * pt = (scalar_type *)&s;
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
vp[idx+w*Nsimd] = pt[w];
|
vp[idx+w*Nsimd] = pt[w];
|
||||||
|
@ -520,15 +520,17 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
|||||||
assert(ig->lSites() == og->lSites());
|
assert(ig->lSites() == og->lSites());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
autoView(in_v,in,CpuRead);
|
||||||
|
autoView(out_v,out,CpuWrite);
|
||||||
thread_for(idx, ig->lSites(),{
|
thread_for(idx, ig->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
ssobj ss;
|
ssobj ss;
|
||||||
|
|
||||||
Coordinate lcoor(ni);
|
Coordinate lcoor(ni);
|
||||||
ig->LocalIndexToLocalCoor(idx,lcoor);
|
ig->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
peekLocalSite(s,in,lcoor);
|
peekLocalSite(s,in_v,lcoor);
|
||||||
ss=s;
|
ss=s;
|
||||||
pokeLocalSite(ss,out,lcoor);
|
pokeLocalSite(ss,out_v,lcoor);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -588,8 +590,6 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
|||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd]; // FIXME IF RRII layout, type pun no worke
|
tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd]; // FIXME IF RRII layout, type pun no worke
|
||||||
}
|
}
|
||||||
// peekLocalSite(s,From,Fcoor);
|
|
||||||
// pokeLocalSite(s,To ,Tcoor);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -620,6 +620,8 @@ void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
|
autoView(lowDimv,lowDim,CpuRead);
|
||||||
|
autoView(higherDimv,higherDim,CpuWrite);
|
||||||
thread_for(idx,lg->lSites(),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
@ -632,8 +634,8 @@ void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice
|
|||||||
hcoor[d]=lcoor[ddl++];
|
hcoor[d]=lcoor[ddl++];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
peekLocalSite(s,lowDim,lcoor);
|
peekLocalSite(s,lowDimv,lcoor);
|
||||||
pokeLocalSite(s,higherDim,hcoor);
|
pokeLocalSite(s,higherDimv,hcoor);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -661,6 +663,8 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
|
autoView(lowDimv,lowDim,CpuWrite);
|
||||||
|
autoView(higherDimv,higherDim,CpuRead);
|
||||||
thread_for(idx,lg->lSites(),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
@ -673,8 +677,8 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
|
|||||||
hcoor[d]=lcoor[ddl++];
|
hcoor[d]=lcoor[ddl++];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
peekLocalSite(s,higherDim,hcoor);
|
peekLocalSite(s,higherDimv,hcoor);
|
||||||
pokeLocalSite(s,lowDim,lcoor);
|
pokeLocalSite(s,lowDimv,lcoor);
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -702,6 +706,8 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
|
autoView(lowDimv,lowDim,CpuRead);
|
||||||
|
autoView(higherDimv,higherDim,CpuWrite);
|
||||||
thread_for(idx,lg->lSites(),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
@ -710,8 +716,8 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
if( lcoor[orthog] == slice_lo ) {
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
hcoor=lcoor;
|
hcoor=lcoor;
|
||||||
hcoor[orthog] = slice_hi;
|
hcoor[orthog] = slice_hi;
|
||||||
peekLocalSite(s,lowDim,lcoor);
|
peekLocalSite(s,lowDimv,lcoor);
|
||||||
pokeLocalSite(s,higherDim,hcoor);
|
pokeLocalSite(s,higherDimv,hcoor);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -739,6 +745,8 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
|
autoView(lowDimv,lowDim,CpuWrite);
|
||||||
|
autoView(higherDimv,higherDim,CpuRead);
|
||||||
thread_for(idx,lg->lSites(),{
|
thread_for(idx,lg->lSites(),{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate lcoor(nl);
|
Coordinate lcoor(nl);
|
||||||
@ -747,8 +755,8 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int
|
|||||||
if( lcoor[orthog] == slice_lo ) {
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
hcoor=lcoor;
|
hcoor=lcoor;
|
||||||
hcoor[orthog] = slice_hi;
|
hcoor[orthog] = slice_hi;
|
||||||
peekLocalSite(s,higherDim,hcoor);
|
peekLocalSite(s,higherDimv,hcoor);
|
||||||
pokeLocalSite(s,lowDim,lcoor);
|
pokeLocalSite(s,lowDimv,lcoor);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,8 @@ public:
|
|||||||
if (grid) conformable(grid, _grid);
|
if (grid) conformable(grid, _grid);
|
||||||
else grid = _grid;
|
else grid = _grid;
|
||||||
};
|
};
|
||||||
|
// Host only
|
||||||
|
GridBase * getGrid(void) const { return _grid; };
|
||||||
};
|
};
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -114,19 +114,22 @@ public:
|
|||||||
U = adj(Cshift(U, mu, -1));
|
U = adj(Cshift(U, mu, -1));
|
||||||
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
autoView(Umu_v,Umu,CpuRead);
|
||||||
|
autoView(Uadj_v,Uadj,CpuRead);
|
||||||
|
autoView(Uds_v,Uds,CpuWrite);
|
||||||
|
thread_for( lidx, GaugeGrid->lSites(), {
|
||||||
Coordinate lcoor;
|
Coordinate lcoor;
|
||||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||||
|
|
||||||
peekLocalSite(ScalarUmu, Umu, lcoor);
|
peekLocalSite(ScalarUmu, Umu_v, lcoor);
|
||||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||||
|
|
||||||
peekLocalSite(ScalarUmu, Uadj, lcoor);
|
peekLocalSite(ScalarUmu, Uadj_v, lcoor);
|
||||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||||
|
|
||||||
pokeLocalSite(ScalarUds, Uds, lcoor);
|
pokeLocalSite(ScalarUds, Uds_v, lcoor);
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
|
||||||
|
@ -113,20 +113,7 @@ public:
|
|||||||
|
|
||||||
inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu)
|
inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu)
|
||||||
{
|
{
|
||||||
GridBase *GaugeGrid = U_ds.Grid();
|
assert(0);
|
||||||
thread_for(lidx, GaugeGrid->lSites(),{
|
|
||||||
|
|
||||||
SiteScalarGaugeLink ScalarU;
|
|
||||||
SiteDoubledGaugeField ScalarUds;
|
|
||||||
|
|
||||||
Coordinate lcoor;
|
|
||||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
|
||||||
peekLocalSite(ScalarUds, U_ds, lcoor);
|
|
||||||
|
|
||||||
peekLocalSite(ScalarU, U, lcoor);
|
|
||||||
ScalarUds(mu) = ScalarU();
|
|
||||||
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,
|
inline void DoubleStore(GridBase *GaugeGrid,
|
||||||
DoubledGaugeField &UUUds, // for Naik term
|
DoubledGaugeField &UUUds, // for Naik term
|
||||||
|
@ -98,32 +98,35 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
Coordinate lcoor;
|
Coordinate lcoor;
|
||||||
typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero();
|
typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero();
|
||||||
|
|
||||||
for (int site = 0; site < lvol; site++)
|
|
||||||
{
|
{
|
||||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
autoView(CTv,CloverTerm,CpuRead);
|
||||||
EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
autoView(CTIv,CloverTermInv,CpuWrite);
|
||||||
peekLocalSite(Qx, CloverTerm, lcoor);
|
for (int site = 0; site < lvol; site++) {
|
||||||
Qxinv = Zero();
|
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||||
//if (csw!=0){
|
EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
||||||
for (int j = 0; j < Ns; j++)
|
peekLocalSite(Qx, CTv, lcoor);
|
||||||
for (int k = 0; k < Ns; k++)
|
Qxinv = Zero();
|
||||||
for (int a = 0; a < DimRep; a++)
|
//if (csw!=0){
|
||||||
for (int b = 0; b < DimRep; b++){
|
for (int j = 0; j < Ns; j++)
|
||||||
auto zz = Qx()(j, k)(a, b);
|
for (int k = 0; k < Ns; k++)
|
||||||
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
|
for (int a = 0; a < DimRep; a++)
|
||||||
}
|
for (int b = 0; b < DimRep; b++){
|
||||||
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
|
auto zz = Qx()(j, k)(a, b);
|
||||||
|
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
|
||||||
EigenInvCloverOp = EigenCloverOp.inverse();
|
}
|
||||||
//std::cout << EigenInvCloverOp << std::endl;
|
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
|
||||||
for (int j = 0; j < Ns; j++)
|
|
||||||
for (int k = 0; k < Ns; k++)
|
EigenInvCloverOp = EigenCloverOp.inverse();
|
||||||
for (int a = 0; a < DimRep; a++)
|
//std::cout << EigenInvCloverOp << std::endl;
|
||||||
for (int b = 0; b < DimRep; b++)
|
for (int j = 0; j < Ns; j++)
|
||||||
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
|
for (int k = 0; k < Ns; k++)
|
||||||
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
|
for (int a = 0; a < DimRep; a++)
|
||||||
// }
|
for (int b = 0; b < DimRep; b++)
|
||||||
pokeLocalSite(Qxinv, CloverTermInv, lcoor);
|
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
|
||||||
|
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
|
||||||
|
// }
|
||||||
|
pokeLocalSite(Qxinv, CTIv, lcoor);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Separate the even and odd parts
|
// Separate the even and odd parts
|
||||||
|
@ -580,16 +580,21 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
|||||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||||
|
|
||||||
// FIXME Need a Lattice acosh
|
// FIXME Need a Lattice acosh
|
||||||
for(int idx=0;idx<_grid->lSites();idx++){
|
|
||||||
Coordinate lcoor(Nd);
|
{
|
||||||
Tcomplex cc;
|
autoView(cosha_v,cosha,CpuRead);
|
||||||
// RealD sgn;
|
autoView(a_v,a,CpuWrite);
|
||||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
for(int idx=0;idx<_grid->lSites();idx++){
|
||||||
peekLocalSite(cc,cosha,lcoor);
|
Coordinate lcoor(Nd);
|
||||||
assert((double)real(cc)>=1.0);
|
Tcomplex cc;
|
||||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
// RealD sgn;
|
||||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
pokeLocalSite(cc,a,lcoor);
|
peekLocalSite(cc,cosha_v,lcoor);
|
||||||
|
assert((double)real(cc)>=1.0);
|
||||||
|
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||||
|
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||||
|
pokeLocalSite(cc,a_v,lcoor);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Wea = ( exp( a) * abs(W) );
|
Wea = ( exp( a) * abs(W) );
|
||||||
@ -775,17 +780,20 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
|||||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||||
|
|
||||||
// FIXME Need a Lattice acosh
|
// FIXME Need a Lattice acosh
|
||||||
|
{
|
||||||
|
autoView(cosha_v,cosha,CpuRead);
|
||||||
|
autoView(a_v,a,CpuWrite);
|
||||||
for(int idx=0;idx<_grid->lSites();idx++){
|
for(int idx=0;idx<_grid->lSites();idx++){
|
||||||
Coordinate lcoor(Nd);
|
Coordinate lcoor(Nd);
|
||||||
Tcomplex cc;
|
Tcomplex cc;
|
||||||
// RealD sgn;
|
// RealD sgn;
|
||||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
peekLocalSite(cc,cosha,lcoor);
|
peekLocalSite(cc,cosha_v,lcoor);
|
||||||
assert((double)real(cc)>=1.0);
|
assert((double)real(cc)>=1.0);
|
||||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||||
pokeLocalSite(cc,a,lcoor);
|
pokeLocalSite(cc,a_v,lcoor);
|
||||||
}
|
}}
|
||||||
|
|
||||||
Wea = ( exp( a) * abs(W) );
|
Wea = ( exp( a) * abs(W) );
|
||||||
Wema= ( exp(-a) * abs(W) );
|
Wema= ( exp(-a) * abs(W) );
|
||||||
|
@ -67,7 +67,12 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
|||||||
diag_mass = 4.0 + mass;
|
diag_mass = 4.0 + mass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int vol4;
|
||||||
|
vol4=Fgrid.oSites();
|
||||||
|
Stencil.BuildSurfaceList(1,vol4);
|
||||||
|
vol4=Hgrid.oSites();
|
||||||
|
StencilEven.BuildSurfaceList(1,vol4);
|
||||||
|
StencilOdd.BuildSurfaceList(1,vol4);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
uint32_t accelerator_threads=8;
|
uint32_t accelerator_threads=2;
|
||||||
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
||||||
void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
||||||
|
|
||||||
@ -37,9 +37,10 @@ void acceleratorInit(void)
|
|||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||||
cudaGetDeviceProperties(&gpu_props[i], i);
|
cudaGetDeviceProperties(&gpu_props[i], i);
|
||||||
|
cudaDeviceProp prop;
|
||||||
|
prop = gpu_props[i];
|
||||||
|
totalDeviceMem = prop.totalGlobalMem;
|
||||||
if ( world_rank == 0) {
|
if ( world_rank == 0) {
|
||||||
cudaDeviceProp prop;
|
|
||||||
prop = gpu_props[i];
|
|
||||||
printf("AcceleratorCudaInit: ========================\n");
|
printf("AcceleratorCudaInit: ========================\n");
|
||||||
printf("AcceleratorCudaInit: Device Number : %d\n", i);
|
printf("AcceleratorCudaInit: Device Number : %d\n", i);
|
||||||
printf("AcceleratorCudaInit: ========================\n");
|
printf("AcceleratorCudaInit: ========================\n");
|
||||||
@ -49,7 +50,6 @@ void acceleratorInit(void)
|
|||||||
GPU_PROP(managedMemory);
|
GPU_PROP(managedMemory);
|
||||||
GPU_PROP(isMultiGpuBoard);
|
GPU_PROP(isMultiGpuBoard);
|
||||||
GPU_PROP(warpSize);
|
GPU_PROP(warpSize);
|
||||||
totalDeviceMem = prop.totalGlobalMem;
|
|
||||||
// GPU_PROP(unifiedAddressing);
|
// GPU_PROP(unifiedAddressing);
|
||||||
// GPU_PROP(l2CacheSize);
|
// GPU_PROP(l2CacheSize);
|
||||||
// GPU_PROP(singleToDoublePrecisionPerfRatio);
|
// GPU_PROP(singleToDoublePrecisionPerfRatio);
|
||||||
|
@ -286,8 +286,6 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver
|
acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver
|
||||||
|
|
||||||
MemoryManager::Init();
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){
|
||||||
int MB;
|
int MB;
|
||||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm");
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm");
|
||||||
@ -358,6 +356,15 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl;
|
std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl;
|
||||||
std::cout << GridLogMessage << "================================================ "<<std::endl;
|
std::cout << GridLogMessage << "================================================ "<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Memory manager
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
MemoryManager::Init();
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
// Reporting
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
std::cout << GridLogMessage << "Requested "<< GlobalSharedMemory::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
std::cout << GridLogMessage << "Requested "<< GlobalSharedMemory::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
||||||
if ( GlobalSharedMemory::Hugepages) {
|
if ( GlobalSharedMemory::Hugepages) {
|
||||||
std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
|
std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
|
||||||
|
@ -470,6 +470,14 @@ case ${ac_SHM} in
|
|||||||
|
|
||||||
shmopen)
|
shmopen)
|
||||||
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
|
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
|
||||||
|
CXXFLAGS_CPY=$CXXFLAGS
|
||||||
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
|
AC_SEARCH_LIBS([shm_unlink], [rt], [],
|
||||||
|
[AC_MSG_ERROR("no library found for shm_unlink")])
|
||||||
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
;;
|
;;
|
||||||
|
|
||||||
shmget)
|
shmget)
|
||||||
|
@ -104,7 +104,7 @@ int main (int argc, char ** argv)
|
|||||||
GridDefaultMpi());
|
GridDefaultMpi());
|
||||||
|
|
||||||
double lo=0.001;
|
double lo=0.001;
|
||||||
double hi=1.0;
|
double hi=20.0;
|
||||||
int precision=64;
|
int precision=64;
|
||||||
int degree=10;
|
int degree=10;
|
||||||
AlgRemez remez(lo,hi,precision);
|
AlgRemez remez(lo,hi,precision);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user