mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Accelerator loop changes
This commit is contained in:
parent
18e5de426d
commit
0ea7f5279d
@ -45,8 +45,8 @@ inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tenso
|
|||||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.View();
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.View();
|
||||||
accelerator_loop(ss,rhs_v,{
|
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
|
||||||
ret_v[ss]=innerProduct(rhs_v[ss],rhs_v[ss]);
|
coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss)));
|
||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -59,8 +59,8 @@ inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
|
|||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.View();
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.View();
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.View();
|
||||||
accelerator_loop(ss,rhs_v,{
|
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
|
||||||
ret_v[ss]=innerProduct(lhs_v[ss],rhs_v[ss]);
|
coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss)));
|
||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -70,11 +70,15 @@ inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
|
|||||||
template<class ll,class rr>
|
template<class ll,class rr>
|
||||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
|
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
|
||||||
{
|
{
|
||||||
|
typedef decltype(coalescedRead(ll())) sll;
|
||||||
|
typedef decltype(coalescedRead(rr())) srr;
|
||||||
Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
|
Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
|
||||||
auto lhs_v = lhs.View();
|
auto lhs_v = lhs.View();
|
||||||
auto rhs_v = rhs.View();
|
auto rhs_v = rhs.View();
|
||||||
auto ret_v = ret.View();
|
auto ret_v = ret.View();
|
||||||
accelerator_loop(ss,rhs_v,{
|
accelerator_for(ss,rhs_v.size(),1,{
|
||||||
|
// FIXME had issues with scalar version of outer
|
||||||
|
// Use vector [] operator and don't read coalesce this loop
|
||||||
ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
|
ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
|
||||||
});
|
});
|
||||||
return ret;
|
return ret;
|
||||||
|
Loading…
Reference in New Issue
Block a user