mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Shaken out stencil to the point where I think wilson dslash is correct.
Need to audit code carefully, consolidate between stencil and cshift, and then benchmark and optimise.
This commit is contained in:
@ -5,21 +5,23 @@ namespace Grid {
|
||||
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
typedef typename iobj::scalar_object scalar_object;
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
GridBase *grid = l._grid;
|
||||
int Nsimd = grid->iSites();
|
||||
|
||||
std::vector<int> gcoor;
|
||||
std::vector<scalar_type> mergebuf(Nsimd);
|
||||
std::vector<scalar_type *> mergeptr(Nsimd);
|
||||
|
||||
vector_type vI;
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=gcoor[mu];
|
||||
mergeptr[i]=&mergebuf[i];
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge(vI,mergeptr);
|
||||
AmergeA<vector_type,scalar_type>(vI,mergebuf);
|
||||
l._odata[o]=vI;
|
||||
}
|
||||
};
|
||||
|
@ -94,15 +94,12 @@ namespace Grid {
|
||||
grid->Broadcast(grid->BossRank(),s);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
extract(l._odata[odx],pointers);
|
||||
extract(l._odata[odx],buf);
|
||||
buf[idx] = s;
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
merge(l._odata[odx],pointers);
|
||||
merge(l._odata[odx],buf);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -127,13 +124,12 @@ namespace Grid {
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
extract(l._odata[odx],pointers);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
extract(l._odata[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
@ -160,10 +156,8 @@ namespace Grid {
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
extract(l._odata[odx],pointers);
|
||||
extract(l._odata[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
@ -188,16 +182,13 @@ namespace Grid {
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
extract(l._odata[odx],pointers);
|
||||
extract(l._odata[odx],buf);
|
||||
|
||||
buf[idx] = s;
|
||||
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
merge(l._odata[odx],pointers);
|
||||
merge(l._odata[odx],buf);
|
||||
|
||||
return;
|
||||
};
|
||||
|
@ -66,9 +66,7 @@ namespace Grid {
|
||||
}
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
|
||||
extract(vsum,pointers);
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
|
||||
|
@ -26,8 +26,21 @@ namespace Grid {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution,class generator> void fillScalar(scalar &s,distribution &dist,generator & gen)
|
||||
{
|
||||
s=dist(gen);
|
||||
}
|
||||
template<class distribution,class generator> void fillScalar(ComplexF &s,distribution &dist, generator &gen)
|
||||
{
|
||||
s=ComplexF(dist(gen),dist(gen));
|
||||
}
|
||||
template<class distribution,class generator> void fillScalar(ComplexD &s,distribution &dist,generator &gen)
|
||||
{
|
||||
s=ComplexD(dist(gen),dist(gen));
|
||||
}
|
||||
|
||||
class GridRNGbase {
|
||||
|
||||
public:
|
||||
@ -64,20 +77,6 @@ namespace Grid {
|
||||
}
|
||||
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution> void fillScalar(scalar &s,distribution &dist)
|
||||
{
|
||||
s=dist(_generators[0]);
|
||||
}
|
||||
template<class distribution> void fillScalar(ComplexF &s,distribution &dist)
|
||||
{
|
||||
s=ComplexF(dist(_generators[0]),dist(_generators[0]));
|
||||
}
|
||||
template<class distribution> void fillScalar(ComplexD &s,distribution &dist)
|
||||
{
|
||||
s=ComplexD(dist(_generators[0]),dist(_generators[0]));
|
||||
}
|
||||
|
||||
|
||||
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
|
||||
|
||||
@ -88,7 +87,7 @@ namespace Grid {
|
||||
scalar_type *buf = (scalar_type *) & l;
|
||||
|
||||
for(int idx=0;idx<words;idx++){
|
||||
fillScalar(buf[idx],dist);
|
||||
fillScalar(buf[idx],dist,_generators[0]);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
@ -96,47 +95,47 @@ namespace Grid {
|
||||
};
|
||||
|
||||
template <class distribution> inline void fill(ComplexF &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
fillScalar(l,dist,_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(ComplexD &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
fillScalar(l,dist,_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealF &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
fillScalar(l,dist,_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealD &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
fillScalar(l,dist,_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
// vector fill
|
||||
template <class distribution> inline void fill(vComplexF &l,distribution &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
fillScalar(pointer[i],dist,_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vComplexD &l,distribution &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
fillScalar(pointer[i],dist,_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealF &l,distribution &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
fillScalar(pointer[i],dist,_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealD &l,distribution &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
fillScalar(pointer[i],dist,_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
@ -187,18 +186,31 @@ namespace Grid {
|
||||
{
|
||||
std::vector<int> gcoor;
|
||||
|
||||
for(int gidx=0;gidx<_grid->_gsites;gidx++){
|
||||
int gsites = _grid->_gsites;
|
||||
|
||||
typename source::result_type init = src();
|
||||
std::ranlux48 pseeder(init);
|
||||
std::uniform_int_distribution<uint64_t> ui;
|
||||
|
||||
for(int gidx=0;gidx<gsites;gidx++){
|
||||
|
||||
int rank,o_idx,i_idx;
|
||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
|
||||
std::vector<int> site_seeds(4);
|
||||
for(int i=0;i<4;i++){
|
||||
site_seeds[i]= ui(pseeder);
|
||||
}
|
||||
|
||||
typename source::result_type init = src();
|
||||
_grid->Broadcast(0,(void *)&site_seeds[0],sizeof(int)*site_seeds.size());
|
||||
|
||||
_grid->Broadcast(0,(void *)&init,sizeof(init));
|
||||
if( rank == _grid->ThisRank() ){
|
||||
_generators[l_idx] = std::ranlux48(init);
|
||||
fixedSeed ssrc(site_seeds);
|
||||
typename source::result_type sinit = ssrc();
|
||||
_generators[l_idx] = std::ranlux48(sinit);
|
||||
}
|
||||
}
|
||||
_seeded=1;
|
||||
@ -210,6 +222,7 @@ namespace Grid {
|
||||
|
||||
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,distribution &dist){
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
@ -217,25 +230,22 @@ namespace Grid {
|
||||
|
||||
int Nsimd =_grid->Nsimd();
|
||||
int osites=_grid->oSites();
|
||||
int words=sizeof(scalar_object)/sizeof(scalar_type);
|
||||
|
||||
int words = sizeof(vobj)/sizeof(vector_type);
|
||||
std::vector<std::vector<scalar_type> > buf(Nsimd,std::vector<scalar_type>(words));
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
std::vector<scalar_object> buf(Nsimd);
|
||||
|
||||
for(int ss=0;ss<osites;ss++){
|
||||
|
||||
for(int si=0;si<Nsimd;si++){
|
||||
|
||||
int gdx = generator_idx(ss,si); // index of generator state
|
||||
|
||||
pointers[si] = (scalar_type *)&buf[si][0];
|
||||
scalar_type *pointer = (scalar_type *)&buf[si];
|
||||
for(int idx=0;idx<words;idx++){
|
||||
pointers[si][idx] = dist(_generators[gdx]);
|
||||
fillScalar(pointer[idx],dist,_generators[gdx]);
|
||||
}
|
||||
|
||||
}
|
||||
// merge into SIMD lanes
|
||||
merge(l._odata[ss],pointers);
|
||||
merge(l._odata[ss],buf);
|
||||
}
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user