1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Shaken out stencil to the point where I think wilson dslash is correct.

Need to audit code carefully, consolidate between stencil and cshift,
and then benchmark and optimise.
This commit is contained in:
Peter Boyle
2015-04-28 08:11:59 +01:00
parent f159495a9d
commit 25d523c0f4
24 changed files with 599 additions and 605 deletions

View File

@ -5,21 +5,23 @@ namespace Grid {
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
{
typedef typename iobj::scalar_object scalar_object;
typedef typename iobj::scalar_type scalar_type;
typedef typename iobj::vector_type vector_type;
GridBase *grid = l._grid;
int Nsimd = grid->iSites();
std::vector<int> gcoor;
std::vector<scalar_type> mergebuf(Nsimd);
std::vector<scalar_type *> mergeptr(Nsimd);
vector_type vI;
for(int o=0;o<grid->oSites();o++){
for(int i=0;i<grid->iSites();i++){
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
mergebuf[i]=gcoor[mu];
mergeptr[i]=&mergebuf[i];
mergebuf[i]=(Integer)gcoor[mu];
}
merge(vI,mergeptr);
AmergeA<vector_type,scalar_type>(vI,mergebuf);
l._odata[o]=vI;
}
};

View File

@ -94,15 +94,12 @@ namespace Grid {
grid->Broadcast(grid->BossRank(),s);
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
// extract-modify-merge cycle is easiest way and this is not perf critical
if ( rank == grid->ThisRank() ) {
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
extract(l._odata[odx],pointers);
extract(l._odata[odx],buf);
buf[idx] = s;
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
merge(l._odata[odx],pointers);
merge(l._odata[odx],buf);
}
return;
@ -127,13 +124,12 @@ namespace Grid {
int rank,odx,idx;
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
extract(l._odata[odx],pointers);
std::vector<sobj> buf(Nsimd);
extract(l._odata[odx],buf);
s = buf[idx];
grid->Broadcast(rank,s);
return;
@ -160,10 +156,8 @@ namespace Grid {
odx= grid->oIndex(site);
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
extract(l._odata[odx],pointers);
extract(l._odata[odx],buf);
s = buf[idx];
@ -188,16 +182,13 @@ namespace Grid {
odx= grid->oIndex(site);
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
// extract-modify-merge cycle is easiest way and this is not perf critical
extract(l._odata[odx],pointers);
extract(l._odata[odx],buf);
buf[idx] = s;
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
merge(l._odata[odx],pointers);
merge(l._odata[odx],buf);
return;
};

View File

@ -66,9 +66,7 @@ namespace Grid {
}
std::vector<sobj> buf(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
extract(vsum,pointers);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];

View File

@ -26,8 +26,21 @@ namespace Grid {
}
};
// real scalars are one component
template<class scalar,class distribution,class generator> void fillScalar(scalar &s,distribution &dist,generator & gen)
{
s=dist(gen);
}
template<class distribution,class generator> void fillScalar(ComplexF &s,distribution &dist, generator &gen)
{
s=ComplexF(dist(gen),dist(gen));
}
template<class distribution,class generator> void fillScalar(ComplexD &s,distribution &dist,generator &gen)
{
s=ComplexD(dist(gen),dist(gen));
}
class GridRNGbase {
public:
@ -64,20 +77,6 @@ namespace Grid {
}
// real scalars are one component
template<class scalar,class distribution> void fillScalar(scalar &s,distribution &dist)
{
s=dist(_generators[0]);
}
template<class distribution> void fillScalar(ComplexF &s,distribution &dist)
{
s=ComplexF(dist(_generators[0]),dist(_generators[0]));
}
template<class distribution> void fillScalar(ComplexD &s,distribution &dist)
{
s=ComplexD(dist(_generators[0]),dist(_generators[0]));
}
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
@ -88,7 +87,7 @@ namespace Grid {
scalar_type *buf = (scalar_type *) & l;
for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist);
fillScalar(buf[idx],dist,_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
@ -96,47 +95,47 @@ namespace Grid {
};
template <class distribution> inline void fill(ComplexF &l,distribution &dist){
fillScalar(l,dist);
fillScalar(l,dist,_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(ComplexD &l,distribution &dist){
fillScalar(l,dist);
fillScalar(l,dist,_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealF &l,distribution &dist){
fillScalar(l,dist);
fillScalar(l,dist,_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealD &l,distribution &dist){
fillScalar(l,dist);
fillScalar(l,dist,_generators[0]);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
// vector fill
template <class distribution> inline void fill(vComplexF &l,distribution &dist){
RealF *pointer=(RealF *)&l;
for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist);
fillScalar(pointer[i],dist,_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vComplexD &l,distribution &dist){
RealD *pointer=(RealD *)&l;
for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist);
fillScalar(pointer[i],dist,_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealF &l,distribution &dist){
RealF *pointer=(RealF *)&l;
for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist);
fillScalar(pointer[i],dist,_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealD &l,distribution &dist){
RealD *pointer=(RealD *)&l;
for(int i=0;i<vRealD::Nsimd();i++){
fillScalar(pointer[i],dist);
fillScalar(pointer[i],dist,_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
@ -187,18 +186,31 @@ namespace Grid {
{
std::vector<int> gcoor;
for(int gidx=0;gidx<_grid->_gsites;gidx++){
int gsites = _grid->_gsites;
typename source::result_type init = src();
std::ranlux48 pseeder(init);
std::uniform_int_distribution<uint64_t> ui;
for(int gidx=0;gidx<gsites;gidx++){
int rank,o_idx,i_idx;
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
int l_idx=generator_idx(o_idx,i_idx);
std::vector<int> site_seeds(4);
for(int i=0;i<4;i++){
site_seeds[i]= ui(pseeder);
}
typename source::result_type init = src();
_grid->Broadcast(0,(void *)&site_seeds[0],sizeof(int)*site_seeds.size());
_grid->Broadcast(0,(void *)&init,sizeof(init));
if( rank == _grid->ThisRank() ){
_generators[l_idx] = std::ranlux48(init);
fixedSeed ssrc(site_seeds);
typename source::result_type sinit = ssrc();
_generators[l_idx] = std::ranlux48(sinit);
}
}
_seeded=1;
@ -210,6 +222,7 @@ namespace Grid {
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,distribution &dist){
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
@ -217,25 +230,22 @@ namespace Grid {
int Nsimd =_grid->Nsimd();
int osites=_grid->oSites();
int words=sizeof(scalar_object)/sizeof(scalar_type);
int words = sizeof(vobj)/sizeof(vector_type);
std::vector<std::vector<scalar_type> > buf(Nsimd,std::vector<scalar_type>(words));
std::vector<scalar_type *> pointers(Nsimd);
std::vector<scalar_object> buf(Nsimd);
for(int ss=0;ss<osites;ss++){
for(int si=0;si<Nsimd;si++){
int gdx = generator_idx(ss,si); // index of generator state
pointers[si] = (scalar_type *)&buf[si][0];
scalar_type *pointer = (scalar_type *)&buf[si];
for(int idx=0;idx<words;idx++){
pointers[si][idx] = dist(_generators[gdx]);
fillScalar(pointer[idx],dist,_generators[gdx]);
}
}
// merge into SIMD lanes
merge(l._odata[ss],pointers);
merge(l._odata[ss],buf);
}
};