mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-11 14:40:46 +01:00
Speed up Cshift more with coalesced
This commit is contained in:
parent
c273fb051c
commit
5791021dcd
@ -52,23 +52,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template<typename Op, typename T1>
|
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||||
auto Cshift(const LatticeUnaryExpression<Op,T1> &expr,int dim,int shift)
|
auto Cshift(const Expression &expr,int dim,int shift) -> decltype(closure(expr))
|
||||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
|
||||||
{
|
|
||||||
return Cshift(closure(expr),dim,shift);
|
|
||||||
}
|
|
||||||
template <class Op, class T1, class T2>
|
|
||||||
auto Cshift(const LatticeBinaryExpression<Op,T1,T2> &expr,int dim,int shift)
|
|
||||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
|
||||||
{
|
|
||||||
return Cshift(closure(expr),dim,shift);
|
|
||||||
}
|
|
||||||
template <class Op, class T1, class T2, class T3>
|
|
||||||
auto Cshift(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr,int dim,int shift)
|
|
||||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
|
||||||
eval(0, expr.arg2),
|
|
||||||
eval(0, expr.arg3)))>
|
|
||||||
{
|
{
|
||||||
return Cshift(closure(expr),dim,shift);
|
return Cshift(closure(expr),dim,shift);
|
||||||
}
|
}
|
||||||
|
@ -76,8 +76,8 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
auto buffer_p = & buffer[0];
|
auto buffer_p = & buffer[0];
|
||||||
auto table = &Cshift_table[0];
|
auto table = &Cshift_table[0];
|
||||||
accelerator_for(i,ent,1,{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
buffer_p[table[i].first]=rhs_v[table[i].second];
|
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,8 +185,8 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
autoView( rhs_v, rhs, AcceleratorWrite);
|
autoView( rhs_v, rhs, AcceleratorWrite);
|
||||||
auto buffer_p = & buffer[0];
|
auto buffer_p = & buffer[0];
|
||||||
auto table = &Cshift_table[0];
|
auto table = &Cshift_table[0];
|
||||||
accelerator_for(i,ent,1,{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
rhs_v[table[i].first]=buffer_p[table[i].second];
|
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -282,8 +282,8 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
autoView(rhs_v , rhs, AcceleratorRead);
|
autoView(rhs_v , rhs, AcceleratorRead);
|
||||||
autoView(lhs_v , lhs, AcceleratorWrite);
|
autoView(lhs_v , lhs, AcceleratorWrite);
|
||||||
auto table = &Cshift_table[0];
|
auto table = &Cshift_table[0];
|
||||||
accelerator_for(i,ent,1,{
|
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||||
lhs_v[table[i].first]=rhs_v[table[i].second];
|
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user