mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Configure option between host and device
This commit is contained in:
parent
f16c2665f5
commit
50b808ab33
@ -35,7 +35,7 @@ extern Vector<std::pair<int,int> > Cshift_table;
|
|||||||
// Gather for when there is no need to SIMD split
|
// Gather for when there is no need to SIMD split
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> void
|
template<class vobj> void
|
||||||
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
|
Gather_plane_simple (const Lattice<vobj> &rhs,cshiftVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
|
||||||
{
|
{
|
||||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||||
|
|
||||||
@ -73,12 +73,19 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
|
||||||
auto buffer_p = & buffer[0];
|
auto buffer_p = & buffer[0];
|
||||||
auto table = &Cshift_table[0];
|
auto table = &Cshift_table[0];
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||||
});
|
});
|
||||||
|
#else
|
||||||
|
autoView(rhs_v , rhs, CpuRead);
|
||||||
|
thread_for(i,ent,{
|
||||||
|
buffer_p[table[i].first]=rhs_v[table[i].second];
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,6 +110,7 @@ Gather_plane_extract(const Lattice<vobj> &rhs,
|
|||||||
int n1=rhs.Grid()->_slice_stride[dimension];
|
int n1=rhs.Grid()->_slice_stride[dimension];
|
||||||
|
|
||||||
if ( cbmask ==0x3){
|
if ( cbmask ==0x3){
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
accelerator_for2d(n,e1,b,e2,1,{
|
accelerator_for2d(n,e1,b,e2,1,{
|
||||||
int o = n*n1;
|
int o = n*n1;
|
||||||
@ -111,12 +119,22 @@ Gather_plane_extract(const Lattice<vobj> &rhs,
|
|||||||
vobj temp =rhs_v[so+o+b];
|
vobj temp =rhs_v[so+o+b];
|
||||||
extract<vobj>(temp,pointers,offset);
|
extract<vobj>(temp,pointers,offset);
|
||||||
});
|
});
|
||||||
} else {
|
#else
|
||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, CpuRead);
|
||||||
|
thread_for2d(n,e1,b,e2,{
|
||||||
|
int o = n*n1;
|
||||||
|
int offset = b+n*e2;
|
||||||
|
|
||||||
|
vobj temp =rhs_v[so+o+b];
|
||||||
|
extract<vobj>(temp,pointers,offset);
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
Coordinate rdim=rhs.Grid()->_rdimensions;
|
Coordinate rdim=rhs.Grid()->_rdimensions;
|
||||||
Coordinate cdm =rhs.Grid()->_checker_dim_mask;
|
Coordinate cdm =rhs.Grid()->_checker_dim_mask;
|
||||||
std::cout << " Dense packed buffer WARNING " <<std::endl; // Does this get called twice once for each cb?
|
std::cout << " Dense packed buffer WARNING " <<std::endl; // Does this get called twice once for each cb?
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
accelerator_for2d(n,e1,b,e2,1,{
|
accelerator_for2d(n,e1,b,e2,1,{
|
||||||
|
|
||||||
Coordinate coor;
|
Coordinate coor;
|
||||||
@ -134,13 +152,33 @@ Gather_plane_extract(const Lattice<vobj> &rhs,
|
|||||||
extract<vobj>(temp,pointers,offset);
|
extract<vobj>(temp,pointers,offset);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
#else
|
||||||
|
autoView(rhs_v , rhs, CpuRead);
|
||||||
|
thread_for2d(n,e1,b,e2,{
|
||||||
|
|
||||||
|
Coordinate coor;
|
||||||
|
|
||||||
|
int o=n*n1;
|
||||||
|
int oindex = o+b;
|
||||||
|
|
||||||
|
int cb = RedBlackCheckerBoardFromOindex(oindex, rdim, cdm);
|
||||||
|
|
||||||
|
int ocb=1<<cb;
|
||||||
|
int offset = b+n*e2;
|
||||||
|
|
||||||
|
if ( ocb & cbmask ) {
|
||||||
|
vobj temp =rhs_v[so+o+b];
|
||||||
|
extract<vobj>(temp,pointers,offset);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Scatter for when there is no need to SIMD split
|
// Scatter for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,cshiftVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||||
|
|
||||||
@ -182,12 +220,19 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
autoView( rhs_v, rhs, AcceleratorWrite);
|
|
||||||
auto buffer_p = & buffer[0];
|
auto buffer_p = & buffer[0];
|
||||||
auto table = &Cshift_table[0];
|
auto table = &Cshift_table[0];
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
|
autoView( rhs_v, rhs, AcceleratorWrite);
|
||||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
|
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
|
||||||
});
|
});
|
||||||
|
#else
|
||||||
|
autoView( rhs_v, rhs, CpuWrite);
|
||||||
|
thread_for(i,ent,{
|
||||||
|
rhs_v[table[i].first]=buffer_p[table[i].second];
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -208,14 +253,23 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
|||||||
int e2=rhs.Grid()->_slice_block[dimension];
|
int e2=rhs.Grid()->_slice_block[dimension];
|
||||||
|
|
||||||
if(cbmask ==0x3 ) {
|
if(cbmask ==0x3 ) {
|
||||||
autoView( rhs_v , rhs, AcceleratorWrite);
|
|
||||||
int _slice_stride = rhs.Grid()->_slice_stride[dimension];
|
int _slice_stride = rhs.Grid()->_slice_stride[dimension];
|
||||||
int _slice_block = rhs.Grid()->_slice_block[dimension];
|
int _slice_block = rhs.Grid()->_slice_block[dimension];
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
|
autoView( rhs_v , rhs, AcceleratorWrite);
|
||||||
accelerator_for2d(n,e1,b,e2,1,{
|
accelerator_for2d(n,e1,b,e2,1,{
|
||||||
int o = n*_slice_stride;
|
int o = n*_slice_stride;
|
||||||
int offset = b+n*_slice_block;
|
int offset = b+n*_slice_block;
|
||||||
merge(rhs_v[so+o+b],pointers,offset);
|
merge(rhs_v[so+o+b],pointers,offset);
|
||||||
});
|
});
|
||||||
|
#else
|
||||||
|
autoView( rhs_v , rhs, CpuWrite);
|
||||||
|
thread_for2d(n,e1,b,e2,{
|
||||||
|
int o = n*_slice_stride;
|
||||||
|
int offset = b+n*_slice_block;
|
||||||
|
merge(rhs_v[so+o+b],pointers,offset);
|
||||||
|
});
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||||
@ -280,12 +334,20 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
auto table = &Cshift_table[0];
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
autoView(lhs_v , lhs, AcceleratorWrite);
|
autoView(lhs_v , lhs, AcceleratorWrite);
|
||||||
auto table = &Cshift_table[0];
|
|
||||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||||
});
|
});
|
||||||
|
#else
|
||||||
|
autoView(rhs_v , rhs, CpuRead);
|
||||||
|
autoView(lhs_v , lhs, CpuWrite);
|
||||||
|
thread_for(i,ent,{
|
||||||
|
lhs_v[table[i].first]=rhs_v[table[i].second];
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -324,12 +386,20 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
auto table = &Cshift_table[0];
|
||||||
|
#ifdef ACCELERATOR_CSHIFT
|
||||||
autoView( rhs_v, rhs, AcceleratorRead);
|
autoView( rhs_v, rhs, AcceleratorRead);
|
||||||
autoView( lhs_v, lhs, AcceleratorWrite);
|
autoView( lhs_v, lhs, AcceleratorWrite);
|
||||||
auto table = &Cshift_table[0];
|
|
||||||
accelerator_for(i,ent,1,{
|
accelerator_for(i,ent,1,{
|
||||||
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
|
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
|
||||||
});
|
});
|
||||||
|
#else
|
||||||
|
autoView( rhs_v, rhs, CpuRead);
|
||||||
|
autoView( lhs_v, lhs, CpuWrite);
|
||||||
|
thread_for(i,ent,{
|
||||||
|
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user