1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-12-23 22:24:30 +00:00

Introduce accelerator friendly expression template rewrite.

Must obtain and access lattice indexing through a view object that is safe
to copy construct in copy to GPU (without copying the lattice).
This commit is contained in:
paboyle
2018-03-04 16:03:19 +00:00
parent dad7862f91
commit 0e6197fbed
16 changed files with 470 additions and 513 deletions

View File

@@ -43,8 +43,10 @@ template<class vobj>
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
accelerator_loop(ss,rhs,{
ret[ss]=innerProduct(rhs[ss],rhs[ss]);
auto rhs_v = rhs.View();
auto ret_v = ret.View();
accelerator_loop(ss,rhs_v,{
ret_v[ss]=innerProduct(rhs_v[ss],rhs_v[ss]);
});
return ret;
}
@@ -54,8 +56,11 @@ template<class vobj>
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
{
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
accelerator_loop(ss,rhs,{
ret[ss]=innerProduct(lhs[ss],rhs[ss]);
auto lhs_v = lhs.View();
auto rhs_v = rhs.View();
auto ret_v = ret.View();
accelerator_loop(ss,rhs_v,{
ret_v[ss]=innerProduct(lhs_v[ss],rhs_v[ss]);
});
return ret;
}
@@ -63,11 +68,14 @@ inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
template<class ll,class rr>
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs[0],rhs[0]))>
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
{
Lattice<decltype(outerProduct(lhs[0],rhs[0]))> ret(rhs.Grid());
accelerator_loop(ss,rhs,{
ret[ss]=outerProduct(lhs[ss],rhs[ss]);
Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
auto lhs_v = lhs.View();
auto rhs_v = rhs.View();
auto ret_v = ret.View();
accelerator_loop(ss,rhs_v,{
ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
});
return ret;
}