mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Introduce accelerator friendly expression template rewrite.
Must obtain and access lattice indexing through a view object that is safe to copy construct in copy to GPU (without copying the lattice).
This commit is contained in:
parent
dad7862f91
commit
0e6197fbed
@ -68,149 +68,139 @@ accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftru
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// recursive evaluation of expressions; Could
|
||||
// switch to generic approach with variadics, a la
|
||||
// Antonin's Lat Sim but the repack to variadic with popped
|
||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
||||
////////////////////////////////////////////
|
||||
|
||||
// leaf eval of lattice ; should enable if protect using traits
|
||||
|
||||
template <typename T>
|
||||
using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||
|
||||
template <typename T>
|
||||
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
|
||||
|
||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
//Specialization of getVectorType for lattices
|
||||
/////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
struct getVectorType<Lattice<T> >{
|
||||
typedef typename Lattice<T>::vector_object type;
|
||||
};
|
||||
|
||||
template<class sobj>
|
||||
inline sobj eval(const unsigned int ss, const sobj &arg)
|
||||
|
||||
////////////////////////////////////////////
|
||||
//-- recursive evaluation of expressions; --
|
||||
// handle leaves of syntax tree
|
||||
///////////////////////////////////////////////////
|
||||
template<class sobj> accelerator_inline
|
||||
sobj eval(const unsigned int ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
template <class lobj>
|
||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
|
||||
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const unsigned int ss, const LatticeView<lobj> &arg)
|
||||
{
|
||||
return arg[ss];
|
||||
}
|
||||
|
||||
// handle nodes in syntax tree
|
||||
template <typename Op, typename T1>
|
||||
auto inline eval(
|
||||
const unsigned int ss,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)));
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const unsigned int ss, const Lattice<lobj> &arg)
|
||||
{
|
||||
auto view = arg.View();
|
||||
return view[ss];
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
auto inline eval(
|
||||
const unsigned int ss,
|
||||
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)));
|
||||
///////////////////////////////////////////////////
|
||||
// handle nodes in syntax tree- eval one operand
|
||||
///////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> accelerator_inline
|
||||
auto eval(const unsigned int ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> decltype(expr.op.func( eval(ss, expr.arg1)))
|
||||
{
|
||||
return expr.op.func( eval(ss, expr.arg1) );
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
auto inline eval(const unsigned int ss,
|
||||
const LatticeTrinaryExpression<Op, T1, T2, T3>
|
||||
&expr) // eval three operands
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)),
|
||||
eval(ss, std::get<2>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)),
|
||||
eval(ss, std::get<2>(expr.second)));
|
||||
///////////////////////
|
||||
// eval two operands
|
||||
///////////////////////
|
||||
template <typename Op, typename T1, typename T2> accelerator_inline
|
||||
auto eval(const unsigned int ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
|
||||
{
|
||||
return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
|
||||
}
|
||||
///////////////////////
|
||||
// eval three operands
|
||||
///////////////////////
|
||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
|
||||
auto eval(const unsigned int ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> decltype(expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3)))
|
||||
{
|
||||
return expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
// tree recursion; must retain grid pointer in the LatticeView class which sucks
|
||||
// Use a different method, and make it void *.
|
||||
// Perhaps a conformable method.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,
|
||||
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if (grid) {
|
||||
conformable(grid, lat.Grid());
|
||||
}
|
||||
grid = lat.Grid();
|
||||
lat.Conformable(grid);
|
||||
}
|
||||
template <class T1,
|
||||
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void GridFromExpression(GridBase *&grid,
|
||||
const T1 ¬lat) // non-lattice leaf
|
||||
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid,const T1 ¬lat) // non-lattice leaf
|
||||
{}
|
||||
|
||||
template <typename Op, typename T1>
|
||||
inline void GridFromExpression(GridBase *&grid,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid,const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void GridFromExpression(
|
||||
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
GridFromExpression(grid, std::get<1>(expr.second));
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
GridFromExpression(grid, expr.arg2);
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void GridFromExpression(
|
||||
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
GridFromExpression(grid, std::get<1>(expr.second));
|
||||
GridFromExpression(grid, std::get<2>(expr.second));
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
GridFromExpression(grid, expr.arg2); // recurse
|
||||
GridFromExpression(grid, expr.arg3); // recurse
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,
|
||||
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if ((cb == Odd) || (cb == Even)) {
|
||||
assert(cb == lat.Checkerboard());
|
||||
}
|
||||
cb = lat.Checkerboard();
|
||||
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||
}
|
||||
template <class T1,
|
||||
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||
{
|
||||
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1>
|
||||
inline void CBFromExpression(int &cb,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void CBFromExpression(int &cb,
|
||||
const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
CBFromExpression(cb, std::get<1>(expr.second));
|
||||
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||
template <typename Op, typename T1> inline
|
||||
void CBFromExpression(int &cb,const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2> inline
|
||||
void CBFromExpression(int &cb,const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
CBFromExpression(cb, expr.arg2); // recurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void CBFromExpression(
|
||||
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
CBFromExpression(cb, std::get<1>(expr.second));
|
||||
CBFromExpression(cb, std::get<2>(expr.second));
|
||||
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||
inline void CBFromExpression(int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
CBFromExpression(cb, expr.arg2); // recurse AST
|
||||
CBFromExpression(cb, expr.arg3); // recurse AST
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
@ -253,15 +243,16 @@ GridUnopClass(UnaryExp, exp(a));
|
||||
template <class left, class right> \
|
||||
struct name { \
|
||||
static auto inline func(const left &lhs, const right &rhs) \
|
||||
-> decltype(combination) const { \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
} \
|
||||
}
|
||||
};
|
||||
|
||||
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||
GridBinOpClass(BinarySub, lhs - rhs);
|
||||
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||
|
||||
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||
@ -273,68 +264,50 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
#define GridTrinOpClass(name, combination) \
|
||||
template <class predicate, class left, class right> \
|
||||
struct name { \
|
||||
static auto inline func(const predicate &pred, const left &lhs, \
|
||||
const right &rhs) -> decltype(combination) const { \
|
||||
static auto inline func(const predicate &pred, const left &lhs, const right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
} \
|
||||
}
|
||||
};
|
||||
|
||||
GridTrinOpClass(
|
||||
TrinaryWhere,
|
||||
(predicatedWhere<predicate, typename std::remove_reference<left>::type,
|
||||
typename std::remove_reference<right>::type>(pred, lhs,
|
||||
rhs)));
|
||||
GridTrinOpClass(TrinaryWhere,
|
||||
(predicatedWhere<predicate,
|
||||
typename std::remove_reference<left>::type,
|
||||
typename std::remove_reference<right>::type>(pred, lhs,rhs)));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Operator syntactical glue
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_TRINOP(name) \
|
||||
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
|
||||
#define GRID_DEF_UNOP(op, name) \
|
||||
template <typename T1, \
|
||||
typename std::enable_if<is_lattice<T1>::value || \
|
||||
is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &arg) \
|
||||
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
|
||||
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
|
||||
template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &arg) ->decltype(LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg)) \
|
||||
{ \
|
||||
return LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_LEFT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<is_lattice<T1>::value || \
|
||||
is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr> \
|
||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype( \
|
||||
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), \
|
||||
std::forward_as_tuple(lhs, rhs)))) { \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||
->decltype(LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs,rhs)) \
|
||||
{ \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs,rhs);\
|
||||
}
|
||||
|
||||
#define GRID_BINOP_RIGHT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<!is_lattice<T1>::value && \
|
||||
!is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr, \
|
||||
typename std::enable_if<is_lattice<T2>::value || \
|
||||
is_lattice_expr<T2>::value, \
|
||||
T2>::type * = nullptr> \
|
||||
typename std::enable_if<!is_lattice<T1>::value&&!is_lattice_expr<T1>::value,T1>::type * = nullptr, \
|
||||
typename std::enable_if< is_lattice<T2>::value|| is_lattice_expr<T2>::value,T2>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype( \
|
||||
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), \
|
||||
std::forward_as_tuple(lhs, rhs)))) { \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||
->decltype(LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs, rhs)) \
|
||||
{ \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs, rhs); \
|
||||
}
|
||||
|
||||
#define GRID_DEF_BINOP(op, name) \
|
||||
@ -344,18 +317,14 @@ GridTrinOpClass(
|
||||
#define GRID_DEF_TRINOP(op, name) \
|
||||
template <typename T1, typename T2, typename T3> \
|
||||
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||
->decltype( \
|
||||
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||
const T3 &>(std::make_pair( \
|
||||
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \
|
||||
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||
const T3 &>(std::make_pair( \
|
||||
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
|
||||
->decltype(LatticeTrinaryExpression<GRID_TRINOP(name),T1,T2,T3>(GRID_TRINOP(name)(),pred, lhs, rhs)) \
|
||||
{ \
|
||||
return LatticeTrinaryExpression<GRID_TRINOP(name),T1,T2,T3>(GRID_TRINOP(name)(),pred, lhs, rhs); \
|
||||
}
|
||||
|
||||
////////////////////////
|
||||
// Operator definitions
|
||||
////////////////////////
|
||||
|
||||
GRID_DEF_UNOP(operator-, UnarySub);
|
||||
GRID_DEF_UNOP(Not, UnaryNot);
|
||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||
@ -399,29 +368,27 @@ GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
|
||||
expr);
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second))))>
|
||||
ret(expr);
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second)),
|
||||
eval(0, std::get<2>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second)),
|
||||
eval(0, std::get<2>(expr.second))))>
|
||||
ret(expr);
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -432,34 +399,7 @@ auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
#undef GRID_DEF_UNOP
|
||||
#undef GRID_DEF_BINOP
|
||||
#undef GRID_DEF_TRINOP
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#if 0
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc,char **argv){
|
||||
|
||||
Lattice<double> v1(16);
|
||||
Lattice<double> v2(16);
|
||||
Lattice<double> v3(16);
|
||||
|
||||
BinaryAdd<double,double> tmp;
|
||||
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
||||
expr(std::make_pair(tmp,
|
||||
std::forward_as_tuple(v1,v2)));
|
||||
tmp.func(eval(0,v1),eval(0,v2));
|
||||
|
||||
auto var = v1+v2;
|
||||
std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;
|
||||
|
||||
v3=v1+v2;
|
||||
v3=v1+v2+v1*v2;
|
||||
};
|
||||
|
||||
void testit(Lattice<double> &v1,Lattice<double> &v2,Lattice<double> &v3)
|
||||
{
|
||||
v3=v1+v2+v1*v2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -36,17 +36,20 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs,{
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs[ss],&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
mult(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs,{
|
||||
mult(&ret[ss],&lhs[ss],&rhs[ss]);
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
mult(&ret_v[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -56,15 +59,18 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs,{
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs[ss],&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
mac(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs,{
|
||||
mac(&ret[ss],&lhs[ss],&rhs[ss]);
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
mac(&ret_v[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -74,15 +80,18 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs,{
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs[ss],&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
sub(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs,{
|
||||
sub(&ret[ss],&lhs[ss],&rhs[ss]);
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
sub(&ret[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -91,15 +100,18 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs,{
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs[ss],&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
add(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs,{
|
||||
add(&ret[ss],&lhs[ss],&rhs[ss]);
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
add(&ret_v[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -111,10 +123,12 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
accelerator_loop(ss,lhs,{
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs[ss],&rhs);
|
||||
vstream(ret[ss],tmp);
|
||||
mult(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
@ -122,10 +136,12 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
accelerator_loop(ss,lhs,{
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs[ss],&rhs);
|
||||
vstream(ret[ss],tmp);
|
||||
mac(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
@ -133,15 +149,17 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs,{
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs[ss],&rhs);
|
||||
vstream(ret[ss],tmp);
|
||||
sub(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs,{
|
||||
sub(&ret[ss],&lhs[ss],&rhs);
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
sub(&ret_v[ss],&lhs_v[ss],&rhs);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -149,15 +167,17 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs,{
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs[ss],&rhs);
|
||||
vstream(ret[ss],tmp);
|
||||
add(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs,{
|
||||
add(&ret[ss],&lhs[ss],&rhs);
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
add(&ret_v[ss],&lhs_v[ss],&rhs);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -169,15 +189,17 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs,{
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs,&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
mult(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs,{
|
||||
mult(&ret[ss],&lhs,&rhs[ss]);
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
mult(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -186,15 +208,17 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs,{
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs,&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
mac(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs,{
|
||||
mac(&ret[ss],&lhs,&rhs[ss]);
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
mac(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -203,15 +227,17 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs,{
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs,&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
sub(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs,{
|
||||
sub(&ret[ss],&lhs,&rhs[ss]);
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
sub(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -219,15 +245,17 @@ template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs,{
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs,&rhs[ss]);
|
||||
vstream(ret[ss],tmp);
|
||||
add(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs,{
|
||||
add(&ret[ss],&lhs,&rhs[ss]);
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
add(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -237,14 +265,17 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,x,{
|
||||
vobj tmp = a*x[ss]+y[ss];
|
||||
vstream(ret[ss],tmp);
|
||||
accelerator_loop(ss,x_v,{
|
||||
vobj tmp = a*x_v[ss]+y_v[ss];
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,x,{
|
||||
ret[ss]=a*x[ss]+y[ss];
|
||||
accelerator_loop(ss,x_v,{
|
||||
ret_v[ss]=a*x_v[ss]+y_v[ss];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
@ -253,14 +284,17 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,x,{
|
||||
vobj tmp = a*x[ss]+b*y[ss];
|
||||
vstream(ret[ss],tmp);
|
||||
accelerator_loop(ss,x_v,{
|
||||
vobj tmp = a*x_v[ss]+b*y_v[ss];
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,x,{
|
||||
ret[ss]=a*x[ss]+b*y[ss];
|
||||
accelerator_loop(ss,x_v,{
|
||||
ret_v[ss]=a*x_v[ss]+b*y_v[ss];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
@ -47,8 +47,11 @@ template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs.Grid());
|
||||
accelerator_loop( ss, rhs, {
|
||||
ret[ss]=op(lhs[ss],rhs[ss]);
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, rhs_v, {
|
||||
ret_v[ss]=op(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
@ -59,8 +62,10 @@ template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(lhs.Grid());
|
||||
accelerator_loop( ss, lhs, {
|
||||
ret[ss]=op(lhs[ss],rhs);
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss]=op(lhs_v[ss],rhs);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
@ -71,8 +76,10 @@ template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs.Grid());
|
||||
accelerator_loop( ss, rhs, {
|
||||
ret[ss]=op(lhs[ss],rhs);
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, rhs_v, {
|
||||
ret_v[ss]=op(lhs,rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
@ -44,42 +44,42 @@ NAMESPACE_BEGIN(Grid);
|
||||
//
|
||||
template<class lobj,class robj> class veq {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vne {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vlt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vle {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vgt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vge {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
@ -88,42 +88,42 @@ public:
|
||||
// Generic list of functors
|
||||
template<class lobj,class robj> class seq {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sne {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class slt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sle {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sgt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sge {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
@ -133,7 +133,7 @@ public:
|
||||
// Integer and real get extra relational functions.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
@ -150,7 +150,7 @@ inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
@ -165,7 +165,7 @@ inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd:
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
@ -181,35 +181,35 @@ inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs,
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs) \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
accelerator_inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs) \
|
||||
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs; \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
accelerator_inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
}
|
||||
|
@ -42,20 +42,22 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
||||
|
||||
vector_type vI;
|
||||
auto l_v = l.View();
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l[o]=vI;
|
||||
l_v[o]=vI;
|
||||
}
|
||||
};
|
||||
|
||||
// LatticeCoordinate();
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l[0];
|
||||
auto l_v = l.View();
|
||||
Real *v_ptr = (Real *)&l_v[0];
|
||||
size_t o_len = l.Grid()->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
@ -43,8 +43,10 @@ template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=innerProduct(rhs[ss],rhs[ss]);
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
ret_v[ss]=innerProduct(rhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
@ -54,8 +56,11 @@ template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=innerProduct(lhs[ss],rhs[ss]);
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
ret_v[ss]=innerProduct(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
@ -63,11 +68,14 @@ inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs[0],rhs[0]))>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs[0],rhs[0]))> ret(rhs.Grid());
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=outerProduct(lhs[ss],rhs[ss]);
|
||||
Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
@ -51,6 +51,9 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
auto X_v = X.View();
|
||||
auto Y_v = Y.View();
|
||||
auto R_v = R.View();
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
@ -60,16 +63,16 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
dot = Y_v[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
@ -85,14 +88,7 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
// int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
// int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
@ -100,16 +96,20 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
auto X_v = X.View();
|
||||
auto R_v = R.View();
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
|
||||
thread_loop_collapse2( (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
@ -118,7 +118,7 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
@ -156,7 +156,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
thread_region {
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
@ -168,8 +169,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
Left [i] = lhs_v[o+i*ostride];
|
||||
Right[i] = rhs_v[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
|
@ -42,22 +42,26 @@ NAMESPACE_BEGIN(Grid);
|
||||
// Peek internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs[0],i))>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(vobj(),i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs[0],i))> ret(lhs.Grid());
|
||||
Lattice<decltype(peekIndex<Index>(vobj(),i))> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
cpu_loop( ss, lhs, {
|
||||
ret[ss] = peekIndex<Index>(lhs[ss],i);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
ret_v[ss] = peekIndex<Index>(lhs_v[ss],i);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs[0],i,j))>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(vobj(),i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs[0],i,j))> ret(lhs.Grid());
|
||||
Lattice<decltype(peekIndex<Index>(vobj(),i,j))> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
cpu_loop( ss, lhs, {
|
||||
ret[ss] = peekIndex<Index>(lhs[ss],i,j);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
ret_v[ss] = peekIndex<Index>(lhs_v[ss],i,j);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
@ -66,17 +70,21 @@ auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekInd
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs[0],0))> & rhs,int i)
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(vobj(),0))> & rhs,int i)
|
||||
{
|
||||
cpu_loop( ss, lhs, {
|
||||
pokeIndex<Index>(lhs[ss],rhs[ss],i);
|
||||
auto rhs_v = rhs.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
pokeIndex<Index>(lhs_v[ss],rhs_v[ss],i);
|
||||
});
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs[0],0,0))> & rhs,int i,int j)
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(vobj(),0,0))> & rhs,int i,int j)
|
||||
{
|
||||
cpu_loop( ss, lhs, {
|
||||
pokeIndex<Index>(lhs[ss],rhs[ss],i,j);
|
||||
auto rhs_v = rhs.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
pokeIndex<Index>(lhs_v[ss],rhs_v[ss],i,j);
|
||||
});
|
||||
}
|
||||
|
||||
@ -103,10 +111,11 @@ void pokeSite(const sobj &s,Lattice<vobj> &l,const Coordinate &site){
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
auto l_v = l.View();
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
extract(l[odx],buf);
|
||||
extract(l_v[odx],buf);
|
||||
buf[idx] = s;
|
||||
merge(l[odx],buf);
|
||||
merge(l_v[odx],buf);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -132,7 +141,8 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
extract(l[odx],buf);
|
||||
auto l_v = l.View();
|
||||
extract(l_v[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
@ -162,8 +172,9 @@ void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
scalar_type * vp = (scalar_type *)&l[odx];
|
||||
|
||||
auto l_v = l.View();
|
||||
scalar_type * vp = (scalar_type *)&l_v[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
@ -191,9 +202,9 @@ void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
scalar_type * vp = (scalar_type *)&l[odx];
|
||||
auto l_v = l.View();
|
||||
scalar_type * vp = (scalar_type *)&l_v[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
vp[idx+w*Nsimd] = pt[w];
|
||||
}
|
||||
|
@ -40,16 +40,20 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
accelerator_loop( ss, lhs, {
|
||||
ret[ss] = adj(lhs[ss]);
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = adj(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
accelerator_loop( ss, lhs, {
|
||||
ret[ss] = conjugate(lhs[ss]);
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = conjugate(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
@ -47,14 +47,17 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
||||
|
||||
|
||||
auto left_v = left.View();
|
||||
auto right_v=right.View();
|
||||
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||
int mywork, myoff;
|
||||
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left[0],right[0])) vnrm=Zero(); // private to thread; sub summation
|
||||
decltype(innerProductD(left_v[0],right_v[0])) vnrm=Zero(); // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProductD(left[ss],right[ss]);
|
||||
vnrm = vnrm + innerProductD(left_v[ss],right_v[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
});
|
||||
@ -70,14 +73,14 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1,eval(0,expr.arg2))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
@ -85,10 +88,10 @@ inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1),
|
||||
eval(0,expr.arg2),
|
||||
eval(0,expr.arg3)
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
@ -103,14 +106,14 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=Zero();
|
||||
}
|
||||
|
||||
auto arg_v = arg.View();
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||
int mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=Zero();
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg[ss];
|
||||
vvsum = vvsum + arg_v[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
});
|
||||
@ -172,6 +175,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
auto Data_v = Data.View();
|
||||
thread_loop( (int r=0;r<rd;r++),{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
@ -179,7 +183,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
lvSum[r]=lvSum[r]+Data[ss];
|
||||
lvSum[r]=lvSum[r]+Data_v[ss];
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -251,6 +255,8 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
thread_loop( (int r=0;r<rd;r++),{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
@ -258,7 +264,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
vector_type vv = TensorRemove(innerProduct(lhs[ss],rhs[ss]));
|
||||
vector_type vv = TensorRemove(innerProduct(lhs_v[ss],rhs_v[ss]));
|
||||
lvSum[r]=lvSum[r]+vv;
|
||||
}
|
||||
}
|
||||
@ -358,10 +364,13 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
|
||||
|
||||
tensor_reduced at; at=av;
|
||||
|
||||
auto X_v = X.View();
|
||||
auto Y_v = Y.View();
|
||||
auto R_v = R.View();
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
R[ss] = at*X[ss]+Y[ss];
|
||||
R_v[ss] = at*X_v[ss]+Y_v[ss];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -346,7 +346,9 @@ public:
|
||||
int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity
|
||||
int words = sizeof(scalar_object) / sizeof(scalar_type);
|
||||
|
||||
thread_loop( (int ss=0;ss<osites;ss++), {
|
||||
auto l_v = l.View();
|
||||
// thread_loop( (int ss=0;ss<osites;ss++), {
|
||||
for (int ss=0;ss<osites;ss++) {
|
||||
ExtractBuffer<scalar_object> buf(Nsimd);
|
||||
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
|
||||
|
||||
@ -361,9 +363,10 @@ public:
|
||||
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
|
||||
}
|
||||
// merge into SIMD lanes, FIXME suboptimal implementation
|
||||
merge(l[sm], buf);
|
||||
merge(l_v[sm], buf);
|
||||
}
|
||||
});
|
||||
}
|
||||
// });
|
||||
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
}
|
||||
|
@ -38,12 +38,13 @@ NAMESPACE_BEGIN(Grid);
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(trace(lhs[0]))>
|
||||
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
||||
{
|
||||
Lattice<decltype(trace(lhs[0]))> ret(lhs.Grid());
|
||||
accelerator_loop( ss, lhs, {
|
||||
ret[ss] = trace(lhs[ss]);
|
||||
Lattice<decltype(trace(vobj()))> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = trace(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
@ -52,11 +53,13 @@ inline auto trace(const Lattice<vobj> &lhs)
|
||||
// Trace Index level dependent operation
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs[0]))>
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(vobj()))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(lhs[0]))> ret(lhs.Grid());
|
||||
accelerator_loop( ss, lhs, {
|
||||
ret[ss] = traceIndex<Index>(lhs[ss]);
|
||||
Lattice<decltype(traceIndex<Index>(vobj()))> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = traceIndex<Index>(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
@ -51,20 +51,24 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
||||
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||
half.Checkerboard() = cb;
|
||||
|
||||
auto half_v = half.View();
|
||||
auto full_v = full.View();
|
||||
thread_loop( (int ss=0;ss<full.Grid()->oSites();ss++),{
|
||||
int cbos;
|
||||
Coordinate coor;
|
||||
full.Grid()->oCoorFromOindex(coor,ss);
|
||||
cbos=half.Grid()->CheckerBoard(coor);
|
||||
|
||||
|
||||
if (cbos==cb) {
|
||||
int ssh=half.Grid()->oIndex(coor);
|
||||
half[ssh] = full[ss];
|
||||
half_v[ssh] = full_v[ss];
|
||||
}
|
||||
});
|
||||
}
|
||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||
int cb = half.Checkerboard();
|
||||
auto half_v = half.View();
|
||||
auto full_v = full.View();
|
||||
thread_loop( (int ss=0;ss<full.Grid()->oSites();ss++), {
|
||||
Coordinate coor;
|
||||
int cbos;
|
||||
@ -74,7 +78,7 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
|
||||
|
||||
if (cbos==cb) {
|
||||
int ssh=half.Grid()->oIndex(coor);
|
||||
full[ss]=half[ssh];
|
||||
full_v[ss]=half_v[ssh];
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -105,6 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
|
||||
coarseData=Zero();
|
||||
|
||||
auto fineData_ = fineData.View();
|
||||
auto coarseData_ = coarseData.View();
|
||||
// Loop over coars parallel, and then loop over fine associated with coarse.
|
||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
||||
|
||||
@ -117,9 +123,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
|
||||
thread_critical {
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
coarseData[sc](i)=coarseData[sc](i)
|
||||
+ innerProduct(Basis[i][sf],fineData[sf]);
|
||||
|
||||
auto Basis_ = Basis[i].View();
|
||||
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -151,6 +156,11 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]);
|
||||
}
|
||||
|
||||
auto fineZ_ = fineZ.View();
|
||||
auto fineX_ = fineX.View();
|
||||
auto fineY_ = fineY.View();
|
||||
auto coarseA_= coarseA.View();
|
||||
|
||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
||||
|
||||
int sc;
|
||||
@ -162,7 +172,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
// z = A x + y
|
||||
fineZ[sf]=coarseA[sc]*fineX[sf]+fineY[sf];
|
||||
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf];
|
||||
|
||||
});
|
||||
|
||||
@ -173,7 +183,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
const Lattice<vobj> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
{
|
||||
typedef decltype(innerProduct(fineX[0],fineY[0])) dotp;
|
||||
typedef decltype(innerProduct(vobj(),vobj())) dotp;
|
||||
|
||||
GridBase *coarse(CoarseInner.Grid());
|
||||
GridBase *fine (fineX.Grid());
|
||||
@ -182,10 +192,13 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
Lattice<dotp> coarse_inner(coarse);
|
||||
|
||||
// Precision promotion?
|
||||
auto CoarseInner_ = CoarseInner.View();
|
||||
auto coarse_inner_ = coarse_inner.View();
|
||||
|
||||
fine_inner = localInnerProduct(fineX,fineY);
|
||||
blockSum(coarse_inner,fine_inner);
|
||||
thread_loop( (int ss=0;ss<coarse->oSites();ss++),{
|
||||
CoarseInner[ss] = coarse_inner[ss];
|
||||
CoarseInner_[ss] = coarse_inner_[ss];
|
||||
});
|
||||
}
|
||||
template<class vobj,class CComplex>
|
||||
@ -218,24 +231,23 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
// Turn this around to loop threaded over sc and interior loop
|
||||
// over sf would thread better
|
||||
coarseData=Zero();
|
||||
thread_region {
|
||||
auto coarseData_ = coarseData.View();
|
||||
auto fineData_ = fineData.View();
|
||||
|
||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
||||
int sc;
|
||||
Coordinate coor_c(_ndimension);
|
||||
Coordinate coor_f(_ndimension);
|
||||
|
||||
thread_loop_in_region( (int sf=0;sf<fine->oSites();sf++),{
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
thread_critical {
|
||||
coarseData[sc]=coarseData[sc]+fineData[sf];
|
||||
}
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
thread_critical {
|
||||
coarseData_[sc]=coarseData_[sc]+fineData_[sf];
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
@ -306,25 +318,25 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
for(int d=0 ; d<_ndimension;d++){
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
}
|
||||
auto fineData_ = fineData.View();
|
||||
auto coarseData_ = coarseData.View();
|
||||
|
||||
// Loop with a cache friendly loop ordering
|
||||
thread_region {
|
||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
||||
int sc;
|
||||
Coordinate coor_c(_ndimension);
|
||||
Coordinate coor_f(_ndimension);
|
||||
|
||||
thread_loop_in_region( (int sf=0;sf<fine->oSites();sf++),{
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
if(i==0) fineData[sf]=coarseData[sc](i) * Basis[i][sf];
|
||||
else fineData[sf]=fineData[sf]+coarseData[sc](i)*Basis[i][sf];
|
||||
}
|
||||
});
|
||||
}
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
auto basis_ = Basis[i].View();
|
||||
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf];
|
||||
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf];
|
||||
}
|
||||
});
|
||||
return;
|
||||
|
||||
}
|
||||
@ -577,6 +589,7 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
}
|
||||
|
||||
//loop over outer index
|
||||
auto in_v = in.View();
|
||||
thread_loop( (int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++),{
|
||||
//Assemble vector of pointers to output elements
|
||||
ExtractPointerArray<sobj> out_ptrs(in_nsimd);
|
||||
@ -587,16 +600,19 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
Coordinate lcoor(in_grid->Nd());
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
for(int mu=0;mu<ndim;mu++)
|
||||
|
||||
for(int mu=0;mu<ndim;mu++){
|
||||
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||
}
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions);
|
||||
assert(lex < out.size());
|
||||
out_ptrs[lane] = &out[lex];
|
||||
}
|
||||
|
||||
//Unpack into those ptrs
|
||||
const vobj & in_vobj = in[in_oidx];
|
||||
const vobj & in_vobj = in_v[in_oidx];
|
||||
extract(in_vobj, out_ptrs, 0);
|
||||
});
|
||||
}
|
||||
@ -621,7 +637,7 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
icoor[lane].resize(ndim);
|
||||
grid->iCoorFromIindex(icoor[lane],lane);
|
||||
}
|
||||
|
||||
auto out_v = out.View();
|
||||
thread_loop( (uint64_t oidx = 0; oidx < grid->oSites(); oidx++),{
|
||||
//Assemble vector of pointers to output elements
|
||||
ExtractPointerArray<sobj> ptrs(nsimd);
|
||||
@ -644,7 +660,7 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
//pack from those ptrs
|
||||
vobj vecobj;
|
||||
merge(vecobj, ptrs, 0);
|
||||
out[oidx] = vecobj;
|
||||
out_v[oidx] = vecobj;
|
||||
});
|
||||
}
|
||||
|
||||
@ -673,6 +689,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
|
||||
unvectorizeToLexOrdArray(in_slex_conv, in);
|
||||
|
||||
auto out_v = out.View();
|
||||
thread_loop( (uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++),{
|
||||
Coordinate out_ocoor(ndim);
|
||||
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
|
||||
@ -688,7 +705,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
|
||||
ptrs[lane] = &in_slex_conv[llex];
|
||||
}
|
||||
merge(out[out_oidx], ptrs, 0);
|
||||
merge(out_v[out_oidx], ptrs, 0);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -41,8 +41,10 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
accelerator_loop(ss,lhs,{
|
||||
ret[ss] = transpose(lhs[ss]);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
ret_v[ss] = transpose(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
@ -51,11 +53,13 @@ inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs[0]))>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(vobj()))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs[0]))> ret(lhs.Grid());
|
||||
accelerator_loop(ss,lhs,{
|
||||
ret[ss] = transposeIndex<Index>(lhs[ss]);
|
||||
Lattice<decltype(transposeIndex<Index>(vobj()))> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
ret_v[ss] = transposeIndex<Index>(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
@ -33,43 +33,47 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs,RealD y){
|
||||
Lattice<obj> ret(rhs.Grid());
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs_i,RealD y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto rhs = rhs_i.View();
|
||||
auto ret = ret_i.View();
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=pow(rhs[ss],y);
|
||||
});
|
||||
return ret;
|
||||
return ret_i;
|
||||
}
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs.Grid());
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs_i,Integer y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto rhs = rhs_i.View();
|
||||
auto ret = ret_i.View();
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=mod(rhs[ss],y);
|
||||
});
|
||||
return ret;
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs.Grid());
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs_i,Integer y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto ret = ret_i.View();
|
||||
auto rhs = rhs_i.View();
|
||||
ret.Checkerboard() = rhs_i.Checkerboard();
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=div(rhs[ss],y);
|
||||
});
|
||||
return ret;
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||
Lattice<obj> ret(rhs.Grid());
|
||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto rhs = rhs_i.View();
|
||||
auto ret = ret_i.View();
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=Exponentiate(rhs[ss],alpha, Nexp);
|
||||
});
|
||||
return ret;
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,90 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_where.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_WHERE_H
|
||||
#define GRID_LATTICE_WHERE_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Must implement the predicate gating the
|
||||
// Must be able to reduce the predicate down to a single vInteger per site.
|
||||
// Must be able to require the type be iScalar x iScalar x ....
|
||||
// give a GetVtype method in iScalar
|
||||
// and blow away the tensor structures.
|
||||
//
|
||||
template<class vobj,class iobj>
|
||||
inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
conformable(iftrue,ret);
|
||||
|
||||
GridBase *grid=iftrue.Grid();
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename iobj::vector_type mask_type;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
thread_loop( (int ss=iftrue.begin(); ss<iftrue.end();ss++) , {
|
||||
|
||||
extract(iftrue[ss] ,truevals);
|
||||
extract(iffalse[ss] ,falsevals);
|
||||
extract<vInteger,Integer>(TensorRemove(predicate[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) falsevals[s]=truevals[s];
|
||||
}
|
||||
|
||||
merge(ret[ss],falsevals);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
template<class vobj,class iobj>
|
||||
inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
|
||||
Lattice<vobj> ret(iftrue.Grid());
|
||||
|
||||
where(ret,predicate,iftrue,iffalse);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user