mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-19 00:07:05 +01:00
Merge branch 'develop' into feature/gpu-port
This commit is contained in:
46
Grid/lattice/Lattice.h
Normal file
46
Grid/lattice/Lattice.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Lattice.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
#include <Grid/lattice/Lattice_base.h>
|
||||
#include <Grid/lattice/Lattice_conformable.h>
|
||||
#include <Grid/lattice/Lattice_ET.h>
|
||||
#include <Grid/lattice/Lattice_arith.h>
|
||||
#include <Grid/lattice/Lattice_trace.h>
|
||||
#include <Grid/lattice/Lattice_transpose.h>
|
||||
#include <Grid/lattice/Lattice_local.h>
|
||||
#include <Grid/lattice/Lattice_reduction.h>
|
||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||
#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||
#include <Grid/lattice/Lattice_comparison.h>
|
||||
#include <Grid/lattice/Lattice_coordinate.h>
|
||||
//#include <Grid/lattice/Lattice_where.h>
|
||||
#include <Grid/lattice/Lattice_rng.h>
|
||||
#include <Grid/lattice/Lattice_unary.h>
|
||||
#include <Grid/lattice/Lattice_transfer.h>
|
||||
|
407
Grid/lattice/Lattice_ET.h
Normal file
407
Grid/lattice/Lattice_ET.h
Normal file
@ -0,0 +1,407 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_ET.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ET_H
|
||||
#define GRID_LATTICE_ET_H
|
||||
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Predicated where support
|
||||
////////////////////////////////////////////////////
|
||||
template <class iobj, class vobj, class robj>
|
||||
accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||
const robj &iffalse) {
|
||||
typename std::remove_const<vobj>::type ret;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
|
||||
ExtractBuffer<Integer> mask(Nsimd);
|
||||
ExtractBuffer<scalar_object> truevals(Nsimd);
|
||||
ExtractBuffer<scalar_object> falsevals(Nsimd);
|
||||
|
||||
extract(iftrue, truevals);
|
||||
extract(iffalse, falsevals);
|
||||
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||
|
||||
for (int s = 0; s < Nsimd; s++) {
|
||||
if (mask[s]) falsevals[s] = truevals[s];
|
||||
}
|
||||
|
||||
merge(ret, falsevals);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
//Specialization of getVectorType for lattices
|
||||
/////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
struct getVectorType<Lattice<T> >{
|
||||
typedef typename Lattice<T>::vector_object type;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////
|
||||
//-- recursive evaluation of expressions; --
|
||||
// handle leaves of syntax tree
|
||||
///////////////////////////////////////////////////
|
||||
template<class sobj> accelerator_inline
|
||||
sobj eval(const uint64_t ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
|
||||
{
|
||||
return arg[ss];
|
||||
}
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
||||
{
|
||||
auto view = arg.View();
|
||||
return view[ss];
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// handle nodes in syntax tree- eval one operand
|
||||
///////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> decltype(expr.op.func( eval(ss, expr.arg1)))
|
||||
{
|
||||
return expr.op.func( eval(ss, expr.arg1) );
|
||||
}
|
||||
///////////////////////
|
||||
// eval two operands
|
||||
///////////////////////
|
||||
template <typename Op, typename T1, typename T2> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
|
||||
{
|
||||
return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
|
||||
}
|
||||
///////////////////////
|
||||
// eval three operands
|
||||
///////////////////////
|
||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> decltype(expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3)))
|
||||
{
|
||||
return expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion; must retain grid pointer in the LatticeView class which sucks
|
||||
// Use a different method, and make it void *.
|
||||
// Perhaps a conformable method.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
accelerator_inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
lat.Conformable(grid);
|
||||
}
|
||||
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid,const T1 ¬lat) // non-lattice leaf
|
||||
{}
|
||||
|
||||
template <typename Op, typename T1>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid,const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
GridFromExpression(grid, expr.arg2);
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
GridFromExpression(grid, expr.arg2); // recurse
|
||||
GridFromExpression(grid, expr.arg3); // recurse
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if ((cb == Odd) || (cb == Even)) {
|
||||
assert(cb == lat.Checkerboard());
|
||||
}
|
||||
cb = lat.Checkerboard();
|
||||
}
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Op, typename T1> inline
|
||||
void CBFromExpression(int &cb,const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2> inline
|
||||
void CBFromExpression(int &cb,const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
CBFromExpression(cb, expr.arg2); // recurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void CBFromExpression(int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
CBFromExpression(cb, expr.arg2); // recurse AST
|
||||
CBFromExpression(cb, expr.arg3); // recurse AST
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Unary operators and funcs
|
||||
////////////////////////////////////////////
|
||||
#define GridUnopClass(name, ret) \
|
||||
template <class arg> \
|
||||
struct name { \
|
||||
static auto accelerator_inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||
};
|
||||
|
||||
GridUnopClass(UnarySub, -a);
|
||||
GridUnopClass(UnaryNot, Not(a));
|
||||
GridUnopClass(UnaryAdj, adj(a));
|
||||
GridUnopClass(UnaryConj, conjugate(a));
|
||||
GridUnopClass(UnaryTrace, trace(a));
|
||||
GridUnopClass(UnaryTranspose, transpose(a));
|
||||
GridUnopClass(UnaryTa, Ta(a));
|
||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryReal, real(a));
|
||||
GridUnopClass(UnaryImag, imag(a));
|
||||
GridUnopClass(UnaryToReal, toReal(a));
|
||||
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||
GridUnopClass(UnaryTimesI, timesI(a));
|
||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||
GridUnopClass(UnaryAbs, abs(a));
|
||||
GridUnopClass(UnarySqrt, sqrt(a));
|
||||
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||
GridUnopClass(UnarySin, sin(a));
|
||||
GridUnopClass(UnaryCos, cos(a));
|
||||
GridUnopClass(UnaryAsin, asin(a));
|
||||
GridUnopClass(UnaryAcos, acos(a));
|
||||
GridUnopClass(UnaryLog, log(a));
|
||||
GridUnopClass(UnaryExp, exp(a));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Binary operators
|
||||
////////////////////////////////////////////
|
||||
#define GridBinOpClass(name, combination) \
|
||||
template <class left, class right> \
|
||||
struct name { \
|
||||
static auto accelerator_inline \
|
||||
func(const left &lhs, const right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
} \
|
||||
};
|
||||
|
||||
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||
GridBinOpClass(BinarySub, lhs - rhs);
|
||||
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Trinary conditional op
|
||||
////////////////////////////////////////////////////
|
||||
#define GridTrinOpClass(name, combination) \
|
||||
template <class predicate, class left, class right> \
|
||||
struct name { \
|
||||
static auto accelerator_inline \
|
||||
func(const predicate &pred, const left &lhs, const right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
} \
|
||||
};
|
||||
|
||||
GridTrinOpClass(TrinaryWhere,
|
||||
(predicatedWhere<predicate,
|
||||
typename std::remove_reference<left>::type,
|
||||
typename std::remove_reference<right>::type>(pred, lhs,rhs)));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Operator syntactical glue
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
|
||||
#define GRID_DEF_UNOP(op, name) \
|
||||
template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &arg) ->decltype(LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg)) \
|
||||
{ \
|
||||
return LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_LEFT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype(LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs,rhs)) \
|
||||
{ \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs,rhs);\
|
||||
}
|
||||
|
||||
#define GRID_BINOP_RIGHT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<!is_lattice<T1>::value&&!is_lattice_expr<T1>::value,T1>::type * = nullptr, \
|
||||
typename std::enable_if< is_lattice<T2>::value|| is_lattice_expr<T2>::value,T2>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype(LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs, rhs)) \
|
||||
{ \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs, rhs); \
|
||||
}
|
||||
|
||||
#define GRID_DEF_BINOP(op, name) \
|
||||
GRID_BINOP_LEFT(op, name); \
|
||||
GRID_BINOP_RIGHT(op, name);
|
||||
|
||||
#define GRID_DEF_TRINOP(op, name) \
|
||||
template <typename T1, typename T2, typename T3> \
|
||||
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||
->decltype(LatticeTrinaryExpression<GRID_TRINOP(name),T1,T2,T3>(GRID_TRINOP(name)(),pred, lhs, rhs)) \
|
||||
{ \
|
||||
return LatticeTrinaryExpression<GRID_TRINOP(name),T1,T2,T3>(GRID_TRINOP(name)(),pred, lhs, rhs); \
|
||||
}
|
||||
|
||||
////////////////////////
|
||||
// Operator definitions
|
||||
////////////////////////
|
||||
GRID_DEF_UNOP(operator-, UnarySub);
|
||||
GRID_DEF_UNOP(Not, UnaryNot);
|
||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(real, UnaryReal);
|
||||
GRID_DEF_UNOP(imag, UnaryImag);
|
||||
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||
// abs-fabs-dabs-labs thing
|
||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||
GRID_DEF_UNOP(sin, UnarySin);
|
||||
GRID_DEF_UNOP(cos, UnaryCos);
|
||||
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||
GRID_DEF_UNOP(log, UnaryLog);
|
||||
GRID_DEF_UNOP(exp, UnaryExp);
|
||||
|
||||
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||
GRID_DEF_BINOP(operator-, BinarySub);
|
||||
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||
GRID_DEF_BINOP(operator/, BinaryDiv);
|
||||
|
||||
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||
|
||||
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Closure convenience to force expression to evaluate
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#undef GRID_UNOP
|
||||
#undef GRID_BINOP
|
||||
#undef GRID_TRINOP
|
||||
|
||||
#undef GRID_DEF_UNOP
|
||||
#undef GRID_DEF_BINOP
|
||||
#undef GRID_DEF_TRINOP
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
314
Grid/lattice/Lattice_arith.h
Normal file
314
Grid/lattice/Lattice_arith.h
Normal file
@ -0,0 +1,314 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_arith.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ARITH_H
|
||||
#define GRID_LATTICE_ARITH_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
mult(&ret_v[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
mac(&ret_v[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
sub(&ret[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs_v[ss],&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
add(&ret_v[ss],&lhs_v[ss],&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
sub(&ret_v[ss],&lhs_v[ss],&rhs);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs_v[ss],&rhs);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
add(&ret_v[ss],&lhs_v[ss],&rhs);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
mult(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
mac(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
sub(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
auto ret_v = ret.View();
|
||||
auto rhs_v = lhs.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs,&rhs_v[ss]);
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
add(&ret_v[ss],&lhs,&rhs_v[ss]);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> inline
|
||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,x_v,{
|
||||
vobj tmp = a*x_v[ss]+y_v[ss];
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,x_v,{
|
||||
ret_v[ss]=a*x_v[ss]+y_v[ss];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
auto ret_v = ret.View();
|
||||
auto x_v = x.View();
|
||||
auto y_v = y.View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,x_v,{
|
||||
vobj tmp = a*x_v[ss]+b*y_v[ss];
|
||||
vstream(ret_v[ss],tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,x_v,{
|
||||
ret_v[ss]=a*x_v[ss]+b*y_v[ss];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
488
Grid/lattice/Lattice_base.h
Normal file
488
Grid/lattice/Lattice_base.h
Normal file
@ -0,0 +1,488 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#define STREAMING_STORES
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
extern int GridCshiftPermuteMap[4][16];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Base class which can be used by traits to pick up behaviour
|
||||
///////////////////////////////////////////////////////////////////
|
||||
class LatticeBase {};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Conformable checks; same instance of Grid required
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
{
|
||||
assert(lhs == rhs);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Minimal base class containing only data valid to access from accelerator
|
||||
// _odata will be a managed pointer in CUDA
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Force access to lattice through a view object.
|
||||
// prevents writing of code that will not offload to GPU, but perhaps annoyingly
|
||||
// strict since host could could in principle direct access through the lattice object
|
||||
// Need to decide programming model.
|
||||
#define LATTICE_VIEW_STRICT
|
||||
template<class vobj> class LatticeAccelerator : public LatticeBase
|
||||
{
|
||||
protected:
|
||||
GridBase *_grid;
|
||||
int checkerboard;
|
||||
vobj *_odata; // A managed pointer
|
||||
uint64_t _odata_size;
|
||||
public:
|
||||
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { };
|
||||
accelerator_inline uint64_t oSites(void) const { return _odata_size; };
|
||||
accelerator_inline int Checkerboard(void) const { return checkerboard; };
|
||||
accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
||||
accelerator_inline void Conformable(GridBase * &grid) const
|
||||
{
|
||||
if (grid) conformable(grid, _grid);
|
||||
else grid = _grid;
|
||||
};
|
||||
#ifndef LATTICE_VIEW_STRICT
|
||||
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
|
||||
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
#endif
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// A View class which provides accessor to the data.
|
||||
// This will be safe to call from accelerator_loops and is trivially copy constructible
|
||||
// The copy constructor for this will need to be used by device lambda functions
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
class LatticeView : public LatticeAccelerator<vobj>
|
||||
{
|
||||
public:
|
||||
#ifdef LATTICE_VIEW_STRICT
|
||||
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
|
||||
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
#endif
|
||||
accelerator_inline uint64_t begin(void) const { return 0;};
|
||||
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
||||
accelerator_inline uint64_t size(void) const { return this->_odata_size; };
|
||||
|
||||
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice expression types used by ET to assemble the AST
|
||||
//
|
||||
// Need to be able to detect code paths according to the whether a lattice object or not
|
||||
// so introduce some trait type things
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LatticeExpressionBase {};
|
||||
|
||||
template <typename T> using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||
|
||||
template<class T, bool isLattice> struct ViewMapBase { typedef T Type; };
|
||||
template<class T> struct ViewMapBase<T,true> { typedef LatticeView<typename T::vector_object> Type; };
|
||||
template<class T> using ViewMap = ViewMapBase<T,std::is_base_of<LatticeBase, T>::value >;
|
||||
|
||||
template <typename Op, typename _T1>
|
||||
class LatticeUnaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename _T1, typename _T2>
|
||||
class LatticeBinaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
typedef typename ViewMap<_T2>::Type T2;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
T2 arg2;
|
||||
LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename _T1, typename _T2, typename _T3>
|
||||
class LatticeTrinaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
typedef typename ViewMap<_T2>::Type T2;
|
||||
typedef typename ViewMap<_T3>::Type T3;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
T2 arg2;
|
||||
T3 arg3;
|
||||
LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {};
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// The real lattice class, with normal copy and assignment semantics.
|
||||
// This contains extra (host resident) grid pointer data that may be accessed by host code
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
class Lattice : public LatticeAccelerator<vobj>
|
||||
{
|
||||
public:
|
||||
GridBase *Grid(void) const { return this->_grid; }
|
||||
///////////////////////////////////////////////////
|
||||
// Member types
|
||||
///////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef vobj vector_object;
|
||||
|
||||
private:
|
||||
void dealloc(void)
|
||||
{
|
||||
alignedAllocator<vobj> alloc;
|
||||
if( this->_odata_size ) {
|
||||
alloc.deallocate(this->_odata,this->_odata_size);
|
||||
this->_odata=nullptr;
|
||||
this->_odata_size=0;
|
||||
}
|
||||
}
|
||||
void resize(uint64_t size)
|
||||
{
|
||||
alignedAllocator<vobj> alloc;
|
||||
if ( this->_odata_size != size ) {
|
||||
dealloc();
|
||||
}
|
||||
this->_odata_size = size;
|
||||
if ( size )
|
||||
this->_odata = alloc.allocate(this->_odata_size);
|
||||
else
|
||||
this->_odata = nullptr;
|
||||
}
|
||||
#if 0
|
||||
void copy_vec(vobj *ptr,uint64_t count)
|
||||
{
|
||||
dealloc();
|
||||
this->_odata = ptr;
|
||||
assert(this->_odata_size == count);
|
||||
}
|
||||
#endif
|
||||
public:
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Return a view object that may be dereferenced in site loops.
|
||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||
// in device lambdas
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
LatticeView<vobj> View (void) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
|
||||
return accessor;
|
||||
}
|
||||
|
||||
~Lattice() {
|
||||
if ( this->_odata_size ) {
|
||||
dealloc();
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Expression Template closure support
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(this->_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,me,{
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(me[ss] ,tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,me,{
|
||||
me[ss]=eval(ss,expr);
|
||||
});
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(this->_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto me = View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,me,{
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(me[ss] ,tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,me,{
|
||||
me[ss]=eval(ss,expr);
|
||||
});
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(this->_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
auto me = View();
|
||||
#ifdef STREAMING_STORES
|
||||
accelerator_loop(ss,me,{
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(me[ss] ,tmp);
|
||||
});
|
||||
#else
|
||||
accelerator_loop(ss,me,{
|
||||
me[ss] = eval(ss,expr);
|
||||
});
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
//GridFromExpression is tricky to do
|
||||
template<class Op,class T1>
|
||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
||||
this->_grid = nullptr;
|
||||
GridFromExpression(this->_grid,expr);
|
||||
assert(this->_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
resize(this->_grid->oSites());
|
||||
|
||||
*this = expr;
|
||||
}
|
||||
template<class Op,class T1, class T2>
|
||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
||||
this->_grid = nullptr;
|
||||
GridFromExpression(this->_grid,expr);
|
||||
assert(this->_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
resize(this->_grid->oSites());
|
||||
|
||||
*this = expr;
|
||||
}
|
||||
template<class Op,class T1, class T2, class T3>
|
||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
||||
this->_grid = nullptr;
|
||||
GridFromExpression(this->_grid,expr);
|
||||
assert(this->_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
resize(this->_grid->oSites());
|
||||
|
||||
*this = expr;
|
||||
}
|
||||
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
auto me = View();
|
||||
accelerator_loop(ss,me,{
|
||||
me[ss]=r;
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Follow rule of five, with Constructor requires "grid" passed
|
||||
// to user defined constructor
|
||||
///////////////////////////////////////////
|
||||
// user defined constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(GridBase *grid) {
|
||||
this->_grid = grid;
|
||||
resize(this->_grid->oSites());
|
||||
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
||||
this->checkerboard=0;
|
||||
}
|
||||
|
||||
// virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
if (this->_grid != grid) {
|
||||
this->_grid = grid;
|
||||
this->_odata.resize(grid->oSites());
|
||||
this->checkerboard = 0;
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// copy constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(const Lattice& r){
|
||||
// std::cout << "Lattice constructor(const Lattice &) "<<this<<std::endl;
|
||||
this->_grid = r.Grid();
|
||||
resize(this->_grid->oSites());
|
||||
*this = r;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// move constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(Lattice && r){
|
||||
this->_grid = r.Grid();
|
||||
this->_odata = r._odata;
|
||||
this->_odata_size = r._odata_size;
|
||||
this->checkerboard= r.Checkerboard();
|
||||
r._odata = nullptr;
|
||||
r._odata_size = 0;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// assignment template
|
||||
///////////////////////////////////////////
|
||||
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||
conformable(*this,r);
|
||||
this->checkerboard = r.Checkerboard();
|
||||
auto me = View();
|
||||
auto him= r.View();
|
||||
accelerator_loop(ss,me,{
|
||||
me[ss]=him[ss];
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Copy assignment
|
||||
///////////////////////////////////////////
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
this->checkerboard = r.Checkerboard();
|
||||
conformable(*this,r);
|
||||
auto me = View();
|
||||
auto him= r.View();
|
||||
accelerator_loop(ss,me,{
|
||||
me[ss]=him[ss];
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// Move assignment possible if same type
|
||||
///////////////////////////////////////////
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r){
|
||||
|
||||
resize(0); // deletes if appropriate
|
||||
this->_grid = r.Grid();
|
||||
this->_odata = r._odata;
|
||||
this->_odata_size = r._odata_size;
|
||||
this->checkerboard= r.Checkerboard();
|
||||
|
||||
r._odata = nullptr;
|
||||
r._odata_size = 0;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend inline void swap(Lattice &l, Lattice &r) {
|
||||
conformable(l,r);
|
||||
LatticeAccelerator<vobj> tmp;
|
||||
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
|
||||
LatticeAccelerator<vobj> *rp = (LatticeAccelerator<vobj> *)&r;
|
||||
tmp = *lp; *lp=*rp; *rp=tmp;
|
||||
}
|
||||
|
||||
}; // class Lattice
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
for(int g=0;g<o.Grid()->_gsites;g++){
|
||||
|
||||
Coordinate gcoor;
|
||||
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
||||
|
||||
sobj ss;
|
||||
peekSite(ss,o,gcoor);
|
||||
stream<<"[";
|
||||
for(int d=0;d<gcoor.size();d++){
|
||||
stream<<gcoor[d];
|
||||
if(d!=gcoor.size()-1) stream<<",";
|
||||
}
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
176
Grid/lattice/Lattice_comparison.h
Normal file
176
Grid/lattice/Lattice_comparison.h
Normal file
@ -0,0 +1,176 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COMPARISON_H
|
||||
#define GRID_LATTICE_COMPARISON_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// relational operators
|
||||
//
|
||||
// Support <,>,<=,>=,==,!=
|
||||
//
|
||||
//Query supporting bitwise &, |, ^, !
|
||||
//Query supporting logical &&, ||,
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, rhs_v, {
|
||||
ret_v[ss]=op(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to scalar
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(lhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss]=op(lhs_v[ss],rhs);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare scalar to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs.Grid());
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, rhs_v, {
|
||||
ret_v[ss]=op(lhs,rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Map to functors
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Less than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Less than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Greater than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// Greater than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// not equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
231
Grid/lattice/Lattice_comparison_utils.h
Normal file
231
Grid/lattice/Lattice_comparison_utils.h
Normal file
@ -0,0 +1,231 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison_utils.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMPARISON_H
|
||||
#define GRID_COMPARISON_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////
|
||||
// This implementation is a bit poor.
|
||||
//
|
||||
// Only support relational logical operations (<, > etc)
|
||||
// on scalar objects. Therefore can strip any tensor structures.
|
||||
//
|
||||
// Should guard this with isGridTensor<> enable if?
|
||||
/////////////////////////////////////////
|
||||
//
|
||||
// Generic list of functors
|
||||
//
|
||||
template<class lobj,class robj> class veq {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vne {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vlt {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vle {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vgt {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vge {
|
||||
public:
|
||||
accelerator vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
// Generic list of functors
|
||||
template<class lobj,class robj> class seq {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sne {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class slt {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sle {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sgt {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sge {
|
||||
public:
|
||||
accelerator Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer and real get extra relational functions.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd());
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],rhs);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
ExtractBuffer<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
ExtractBuffer<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(lhs,vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
typedef typename vsimd::scalar_type scalar; \
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs); \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
accelerator_inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs; \
|
||||
} \
|
||||
template<class vsimd> \
|
||||
accelerator_inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) DECLARE_RELATIONAL_EQ(op,functor)
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
#endif
|
40
Grid/lattice/Lattice_conformable.h
Normal file
40
Grid/lattice/Lattice_conformable.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_conformable.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_CONFORMABLE_H
|
||||
#define GRID_LATTICE_CONFORMABLE_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class obj1,class obj2> void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
|
||||
{
|
||||
assert(lhs.Grid() == rhs.Grid());
|
||||
assert(lhs.Checkerboard() == rhs.Checkerboard());
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
74
Grid/lattice/Lattice_coordinate.h
Normal file
74
Grid/lattice/Lattice_coordinate.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_coordinate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
GridBase *grid = l.Grid();
|
||||
int Nsimd = grid->iSites();
|
||||
|
||||
Coordinate gcoor;
|
||||
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
||||
|
||||
vector_type vI;
|
||||
auto l_v = l.View();
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l_v[o]=vI;
|
||||
}
|
||||
};
|
||||
|
||||
// LatticeCoordinate();
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
auto l_v = l.View();
|
||||
Real *v_ptr = (Real *)&l_v[0];
|
||||
size_t o_len = l.Grid()->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
||||
for(int i=0;i<o_len;i++){
|
||||
for(int j=0;j<v_len;j++){
|
||||
for(int vv=0;vv<vec_len;vv+=2){
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
83
Grid/lattice/Lattice_local.h
Normal file
83
Grid/lattice/Lattice_local.h
Normal file
@ -0,0 +1,83 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_local.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
|
||||
#define GRID_LATTICE_LOCALREDUCTION_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// localInner, localNorm, outerProduct
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
ret_v[ss]=innerProduct(rhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
ret_v[ss]=innerProduct(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop(ss,rhs_v,{
|
||||
ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
202
Grid/lattice/Lattice_matrix_reduction.h
Normal file
202
Grid/lattice/Lattice_matrix_reduction.h
Normal file
@ -0,0 +1,202 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
auto X_v = X.View();
|
||||
auto Y_v = Y.View();
|
||||
auto R_v = R.View();
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
thread_loop_collapse2( (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y_v[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
auto X_v = X.View();
|
||||
auto R_v = R.View();
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
thread_loop_collapse2( (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
// int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
// int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
thread_region {
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
thread_loop_collapse2((int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs_v[o+i*ostride];
|
||||
Right[i] = rhs_v[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
ComplexD z = Reduce(rtmp);
|
||||
mat_thread(i,j) += std::complex<double>(real(z),imag(z));
|
||||
}}
|
||||
}});
|
||||
thread_critical {
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
138
Grid/lattice/Lattice_overload.h
Normal file
138
Grid/lattice/Lattice_overload.h
Normal file
@ -0,0 +1,138 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_overload.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_OVERLOAD_H
|
||||
#define GRID_LATTICE_OVERLOAD_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// unary negation
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
Lattice<vobj> ret(r._grid);
|
||||
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
vstream(ret._odata[ss], -r._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice BinOp Lattice,
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
mult(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Scalar BinOp Lattice ;generate return type
|
||||
template<class left,class right>
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
217
Grid/lattice/Lattice_peekpoke.h
Normal file
217
Grid/lattice/Lattice_peekpoke.h
Normal file
@ -0,0 +1,217 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_peekpoke.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_PEEK_H
|
||||
#define GRID_LATTICE_PEEK_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Peeking and poking around
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
// FIXME accelerator_loop and accelerator_inline these
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Peek internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(vobj(),i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(vobj(),i))> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
ret_v[ss] = peekIndex<Index>(lhs_v[ss],i);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(vobj(),i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(vobj(),i,j))> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
ret_v[ss] = peekIndex<Index>(lhs_v[ss],i,j);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(vobj(),0))> & rhs,int i)
|
||||
{
|
||||
auto rhs_v = rhs.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
pokeIndex<Index>(lhs_v[ss],rhs_v[ss],i);
|
||||
});
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(vobj(),0,0))> & rhs,int i,int j)
|
||||
{
|
||||
auto rhs_v = rhs.View();
|
||||
auto lhs_v = lhs.View();
|
||||
cpu_loop( ss, lhs_v, {
|
||||
pokeIndex<Index>(lhs_v[ss],rhs_v[ss],i,j);
|
||||
});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Poke a scalar object into the SIMD array
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void pokeSite(const sobj &s,Lattice<vobj> &l,const Coordinate &site){
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard()== l.Grid()->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
// Optional to broadcast from node 0.
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
grid->Broadcast(grid->BossRank(),s);
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
auto l_v = l.View();
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
extract(l_v[odx],buf);
|
||||
buf[idx] = s;
|
||||
merge(l_v[odx],buf);
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard() == l.Grid()->CheckerBoard(site));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
auto l_v = l.View();
|
||||
extract(l_v[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
|
||||
|
||||
GridBase *grid = l.Grid();
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard()== l.Grid()->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
auto l_v = l.View();
|
||||
scalar_type * vp = (scalar_type *)&l_v[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
pt[w] = vp[idx+w*Nsimd];
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
template<class vobj,class sobj>
|
||||
void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard()== l.Grid()->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
auto l_v = l.View();
|
||||
scalar_type * vp = (scalar_type *)&l_v[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
for(int w=0;w<words;w++){
|
||||
vp[idx+w*Nsimd] = pt[w];
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
63
Grid/lattice/Lattice_reality.h
Normal file
63
Grid/lattice/Lattice_reality.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_reality.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REALITY_H
|
||||
#define GRID_LATTICE_REALITY_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = adj(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
auto lhs_v = lhs.View();
|
||||
auto ret_v = ret.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = conjugate(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
738
Grid/lattice/Lattice_reduction.h
Normal file
738
Grid/lattice/Lattice_reduction.h
Normal file
@ -0,0 +1,738 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
ComplexD nrm = innerProduct(arg,arg);
|
||||
return real(nrm);
|
||||
}
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
scalar_type nrm;
|
||||
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
Vector<vector_type> sumarray(grid->SumArraySize());
|
||||
|
||||
auto left_v = left.View();
|
||||
auto right_v=right.View();
|
||||
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||
int mywork, myoff;
|
||||
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left_v[0],right_v[0])) vnrm=Zero(); // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProductD(left_v[ss],right_v[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
});
|
||||
|
||||
vector_type vvnrm; vvnrm=Zero(); // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
}
|
||||
nrm = Reduce(vvnrm);// sum across simd
|
||||
right.Grid()->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.Checkerboard() = x.Checkerboard();
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x.Grid();
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
auto x_v=x.View();
|
||||
auto y_v=y.View();
|
||||
auto z_v=z.View();
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),
|
||||
{
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x.Grid()->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z_v[0],z_v[0])) vnrm=Zero();
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x_v[ss]+b*y_v[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z_v[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
});
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z.Grid()->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1),eval(0,expr.arg2)))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1),
|
||||
eval(0,expr.arg2),
|
||||
eval(0,expr.arg3)
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||
{
|
||||
GridBase *grid=arg.Grid();
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
Vector<vobj> sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=Zero();
|
||||
}
|
||||
|
||||
auto arg_v=arg.View();
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=Zero();
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg_v[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
});
|
||||
|
||||
vobj vsum=Zero(); // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ssum=Zero();
|
||||
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
arg.Grid()->GlobalSum(ssum);
|
||||
|
||||
return ssum;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
///////////////////////////////////////////////////////
|
||||
// FIXME precision promoted summation
|
||||
// may be important for correlation functions
|
||||
// But easily avoided by using double precision fields
|
||||
///////////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data.Grid();
|
||||
assert(grid!=NULL);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
Vector<vobj> lvSum(rd); // will locally sum vectors first
|
||||
Vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
|
||||
ExtractBuffer<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=Zero();
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
// Parallel over orthog direction
|
||||
auto Data_v=Data.View();
|
||||
thread_loop( (int r=0;r<rd;r++), {
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
lvSum[r]=lvSum[r]+Data_v[ss];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
Coordinate icoor(Nd);
|
||||
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
extract(lvSum[rt],extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
sobj gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=Zero();
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs.Grid();
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs.Grid());
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
Vector<vector_type> lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=Zero();
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
auto l_v=lhs.View();
|
||||
auto r_v=rhs.View();
|
||||
thread_loop( (int r=0;r<rd;r++),{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(l_v[ss], r_v[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
});
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
Coordinate icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs.Grid();
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs.Grid();
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs.Grid());
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
Vector<vector_type> lvSum(rd); // will locally sum vectors first
|
||||
Vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=Zero();
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
auto lhv=lhs.View();
|
||||
auto rhv=rhs.View();
|
||||
thread_loop( (int r=0;r<rd;r++),{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
vector_type vv = TensorRemove(innerProduct(lhv[ss],rhv[ss]));
|
||||
lvSum[r]=lvSum[r]+vv;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
Coordinate icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
scalar_type gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=scalar_type(0.0);
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs.Grid()->GlobalDimensions()[Orthog];
|
||||
Vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int orthogdim,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced tensor_reduced;
|
||||
|
||||
scalar_type zscale(scale);
|
||||
|
||||
GridBase *grid = X.Grid();
|
||||
|
||||
int Nsimd =grid->Nsimd();
|
||||
int Nblock =grid->GlobalDimensions()[orthogdim];
|
||||
|
||||
int fd =grid->_fdimensions[orthogdim];
|
||||
int ld =grid->_ldimensions[orthogdim];
|
||||
int rd =grid->_rdimensions[orthogdim];
|
||||
|
||||
int e1 =grid->_slice_nblock[orthogdim];
|
||||
int e2 =grid->_slice_block [orthogdim];
|
||||
int stride =grid->_slice_stride[orthogdim];
|
||||
|
||||
Coordinate icoor;
|
||||
for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
vector_type av;
|
||||
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
grid->iCoorFromIindex(icoor,l);
|
||||
int ldx =r+icoor[orthogdim]*rd;
|
||||
scalar_type *as =(scalar_type *)&av;
|
||||
as[l] = scalar_type(a[ldx])*zscale;
|
||||
}
|
||||
|
||||
tensor_reduced at; at=av;
|
||||
|
||||
auto Rv=R.View();
|
||||
auto Xv=X.View();
|
||||
auto Yv=Y.View();
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++) , {
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
Rv[ss] = at*Xv[ss]+Yv[ss];
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
*/
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
thread_region
|
||||
{
|
||||
Vector<vobj> s_x(Nblock);
|
||||
|
||||
thread_loop_collapse_in_region(2 ,(int n=0;n<nblock;n++), {
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
|
||||
thread_loop_collapse_in_region( 2 , (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
thread_loop_collapse_in_region( 2, (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
mat_thread(i,j) += Reduce(rtmp);
|
||||
}}
|
||||
}});
|
||||
thread_critical
|
||||
{
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
||||
|
518
Grid/lattice/Lattice_rng.h
Normal file
518
Grid/lattice/Lattice_rng.h
Normal file
@ -0,0 +1,518 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_rng.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_RNG_H
|
||||
#define GRID_LATTICE_RNG_H
|
||||
|
||||
#include <random>
|
||||
|
||||
#ifdef RNG_SITMO
|
||||
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
|
||||
#endif
|
||||
|
||||
#if defined(RNG_SITMO)
|
||||
#define RNG_FAST_DISCARD
|
||||
#else
|
||||
#undef RNG_FAST_DISCARD
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Allow the RNG state to be less dense than the fine grid
|
||||
//////////////////////////////////////////////////////////////
|
||||
inline int RNGfillable(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension;
|
||||
assert(lowerdims >= 0);
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
assert(fine->_simd_layout[d]==1);
|
||||
assert(fine->_processors[d]==1);
|
||||
}
|
||||
|
||||
int multiplicity=1;
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
multiplicity=multiplicity*fine->_rdimensions[d];
|
||||
}
|
||||
// local and global volumes subdivide cleanly after SIMDization
|
||||
for(int d=0;d<rngdims;d++){
|
||||
int fd= d+lowerdims;
|
||||
assert(coarse->_processors[d] == fine->_processors[fd]);
|
||||
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
|
||||
assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]);
|
||||
|
||||
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
|
||||
}
|
||||
return multiplicity;
|
||||
}
|
||||
|
||||
|
||||
// merge of April 11 2017
|
||||
// this function is necessary for the LS vectorised field
|
||||
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
|
||||
// assumes that the higher dimensions are not using more processors
|
||||
// all further divisions are local
|
||||
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
|
||||
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
|
||||
|
||||
// then divide the number of local sites
|
||||
// check that the total number of sims agree, meanse the iSites are the same
|
||||
assert(fine->Nsimd() == coarse->Nsimd());
|
||||
|
||||
// check that the two grids divide cleanly
|
||||
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
|
||||
|
||||
return fine->lSites() / coarse->lSites();
|
||||
}
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution,class generator>
|
||||
void fillScalar(scalar &s,distribution &dist,generator & gen)
|
||||
{
|
||||
s=dist(gen);
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
|
||||
{
|
||||
s=ComplexF(dist(gen),dist(gen));
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
|
||||
{
|
||||
s=ComplexD(dist(gen),dist(gen));
|
||||
}
|
||||
|
||||
class GridRNGbase {
|
||||
public:
|
||||
// One generator per site.
|
||||
// Uniform and Gaussian distributions from these generators.
|
||||
#ifdef RNG_RANLUX
|
||||
typedef std::ranlux48 RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 15;
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
typedef std::mt19937 RngEngine;
|
||||
typedef uint32_t RngStateType;
|
||||
static const int RngStateCount = std::mt19937::state_size;
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
typedef sitmo::prng_engine RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 13;
|
||||
#endif
|
||||
|
||||
std::vector<RngEngine> _generators;
|
||||
std::vector<std::uniform_real_distribution<RealD> > _uniform;
|
||||
std::vector<std::normal_distribution<RealD> > _gaussian;
|
||||
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
|
||||
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
|
||||
|
||||
///////////////////////
|
||||
// support for parallel init
|
||||
///////////////////////
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
static void Skip(RngEngine &eng,uint64_t site)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Skip by 2^40 elements between successive lattice sites
|
||||
// This goes by 10^12.
|
||||
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
|
||||
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
|
||||
// skip ahead.
|
||||
// For HMC unlikely to go at faster than a solve per second, and
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
uint64_t skip = site;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
#endif
|
||||
static RngEngine Reseed(RngEngine &eng)
|
||||
{
|
||||
std::vector<uint32_t> newseed;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
return Reseed(eng,newseed,uid);
|
||||
}
|
||||
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
|
||||
std::uniform_int_distribution<uint32_t> &uid)
|
||||
{
|
||||
const int reseeds=4;
|
||||
|
||||
newseed.resize(reseeds);
|
||||
for(int i=0;i<reseeds;i++){
|
||||
newseed[i] = uid(eng);
|
||||
}
|
||||
std::seed_seq sseq(newseed.begin(),newseed.end());
|
||||
return RngEngine(sseq);
|
||||
}
|
||||
|
||||
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
|
||||
saved.resize(RngStateCount);
|
||||
std::stringstream ss;
|
||||
ss<<eng;
|
||||
ss.seekg(0,ss.beg);
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss>>saved[i];
|
||||
}
|
||||
}
|
||||
void GetState(std::vector<RngStateType> & saved,int gen) {
|
||||
GetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,RngEngine &eng){
|
||||
assert(saved.size()==RngStateCount);
|
||||
std::stringstream ss;
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss<< saved[i]<<" ";
|
||||
}
|
||||
ss.seekg(0,ss.beg);
|
||||
ss>>eng;
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,int gen){
|
||||
SetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetEngine(RngEngine &Eng, int gen){
|
||||
_generators[gen]=Eng;
|
||||
}
|
||||
void GetEngine(RngEngine &Eng, int gen){
|
||||
Eng=_generators[gen];
|
||||
}
|
||||
template<class source> void Seed(source &src, int gen)
|
||||
{
|
||||
_generators[gen] = RngEngine(src);
|
||||
}
|
||||
};
|
||||
|
||||
class GridSerialRNG : public GridRNGbase {
|
||||
public:
|
||||
|
||||
GridSerialRNG() : GridRNGbase() {
|
||||
_generators.resize(1);
|
||||
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename sobj::scalar_type scalar_type;
|
||||
|
||||
int words = sizeof(sobj)/sizeof(scalar_type);
|
||||
|
||||
scalar_type *buf = (scalar_type *) & l;
|
||||
|
||||
dist[0].reset();
|
||||
for(int idx=0;idx<words;idx++){
|
||||
fillScalar(buf[idx],dist[0],_generators[0]);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
|
||||
}
|
||||
|
||||
template <class distribution> inline void fill(ComplexF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(ComplexD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
// vector fill
|
||||
template <class distribution> inline void fill(vComplexF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vComplexD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
private:
|
||||
double _time_counter;
|
||||
GridBase *_grid;
|
||||
unsigned int _vol;
|
||||
|
||||
public:
|
||||
GridBase *Grid(void) const { return _grid; }
|
||||
int generator_idx(int os,int is) {
|
||||
return is*_grid->oSites()+os;
|
||||
}
|
||||
|
||||
GridParallelRNG(GridBase *grid) : GridRNGbase() {
|
||||
_grid = grid;
|
||||
_vol =_grid->iSites()*_grid->oSites();
|
||||
|
||||
_generators.resize(_vol);
|
||||
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
double inner_time_counter = usecond();
|
||||
|
||||
int multiplicity = RNGfillable_general(_grid, l.Grid()); // l has finer or same grid
|
||||
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l.Grid() too
|
||||
int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity
|
||||
int words = sizeof(scalar_object) / sizeof(scalar_type);
|
||||
|
||||
auto l_v = l.View();
|
||||
thread_loop( (int ss=0;ss<osites;ss++), {
|
||||
ExtractBuffer<scalar_object> buf(Nsimd);
|
||||
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
|
||||
|
||||
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
|
||||
|
||||
for (int si = 0; si < Nsimd; si++) {
|
||||
|
||||
int gdx = generator_idx(ss, si); // index of generator state
|
||||
scalar_type *pointer = (scalar_type *)&buf[si];
|
||||
dist[gdx].reset();
|
||||
for (int idx = 0; idx < words; idx++)
|
||||
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
|
||||
}
|
||||
// merge into SIMD lanes, FIXME suboptimal implementation
|
||||
merge(l_v[sm], buf);
|
||||
}
|
||||
});
|
||||
// });
|
||||
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
|
||||
std::seed_seq source(seeds.begin(),seeds.end());
|
||||
|
||||
RngEngine master_engine(source);
|
||||
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
////////////////////////////////////////////////
|
||||
// Skip ahead through a single stream.
|
||||
// Applicable to SITMO and other has based/crypto RNGs
|
||||
// Should be applicable to Mersenne Twister, but the C++11
|
||||
// MT implementation does not implement fast discard even though
|
||||
// in principle this is possible
|
||||
////////////////////////////////////////////////
|
||||
|
||||
// Everybody loops over global volume.
|
||||
thread_loop( (int gidx=0;gidx<_grid->_gsites;gidx++) , {
|
||||
// Where is it?
|
||||
int rank;
|
||||
int o_idx;
|
||||
int i_idx;
|
||||
|
||||
Coordinate gcoor;
|
||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// If this is one of mine we take it
|
||||
if( rank == _grid->ThisRank() ){
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
_generators[l_idx] = master_engine;
|
||||
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
|
||||
}
|
||||
});
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Machine and thread decomposition dependent seeding is efficient
|
||||
// and maximally parallel; but NOT reproducible from machine to machine.
|
||||
// Not ideal, but fastest way to reseed all nodes.
|
||||
////////////////////////////////////////////////////////////////
|
||||
{
|
||||
// Obtain one Reseed per processor
|
||||
int Nproc = _grid->ProcessorCount();
|
||||
std::vector<RngEngine> seeders(Nproc);
|
||||
int me= _grid->ThisRank();
|
||||
for(int p=0;p<Nproc;p++){
|
||||
seeders[p] = Reseed(master_engine);
|
||||
}
|
||||
master_engine = seeders[me];
|
||||
}
|
||||
|
||||
{
|
||||
// Obtain one reseeded generator per thread
|
||||
int Nthread = GridThread::GetThreads();
|
||||
std::vector<RngEngine> seeders(Nthread);
|
||||
for(int t=0;t<Nthread;t++){
|
||||
seeders[t] = Reseed(master_engine);
|
||||
}
|
||||
|
||||
thread_loop( (int t=0;t<Nthread;t++), {
|
||||
// set up one per local site in threaded fashion
|
||||
std::vector<uint32_t> newseeds;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
for(int l=0;l<_grid->lSites();l++) {
|
||||
if ( (l%Nthread)==t ) {
|
||||
_generators[l] = Reseed(seeders[t],newseeds,uid);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Report(){
|
||||
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Support for rigorous test of RNG's
|
||||
// Return uniform random uint32_t from requested site generator
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
uint32_t GlobalU01(int gsite){
|
||||
|
||||
uint32_t the_number;
|
||||
// who
|
||||
int rank,o_idx,i_idx;
|
||||
Coordinate gcoor;
|
||||
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// draw
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
if( rank == _grid->ThisRank() ){
|
||||
the_number = _uid[l_idx](_generators[l_idx]);
|
||||
}
|
||||
|
||||
// share & return
|
||||
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
|
||||
return the_number;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
|
||||
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
|
||||
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
|
||||
|
||||
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
|
||||
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
|
||||
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
69
Grid/lattice/Lattice_trace.h
Normal file
69
Grid/lattice/Lattice_trace.h
Normal file
@ -0,0 +1,69 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_trace.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRACE_H
|
||||
#define GRID_LATTICE_TRACE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Tracing, transposing, peeking, poking
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
||||
{
|
||||
Lattice<decltype(trace(vobj()))> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = trace(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace Index level dependent operation
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(vobj()))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(vobj()))> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop( ss, lhs_v, {
|
||||
ret_v[ss] = traceIndex<Index>(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
1108
Grid/lattice/Lattice_transfer.h
Normal file
1108
Grid/lattice/Lattice_transfer.h
Normal file
File diff suppressed because it is too large
Load Diff
68
Grid/lattice/Lattice_transpose.h
Normal file
68
Grid/lattice/Lattice_transpose.h
Normal file
@ -0,0 +1,68 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_transpose.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRANSPOSE_H
|
||||
#define GRID_LATTICE_TRANSPOSE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Transpose
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
ret_v[ss] = transpose(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(vobj()))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(vobj()))> ret(lhs.Grid());
|
||||
auto ret_v = ret.View();
|
||||
auto lhs_v = lhs.View();
|
||||
accelerator_loop(ss,lhs_v,{
|
||||
ret_v[ss] = transposeIndex<Index>(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
80
Grid/lattice/Lattice_unary.h
Normal file
80
Grid/lattice/Lattice_unary.h
Normal file
@ -0,0 +1,80 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_unary.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_UNARY_H
|
||||
#define GRID_LATTICE_UNARY_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs_i,RealD y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto rhs = rhs_i.View();
|
||||
auto ret = ret_i.View();
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=pow(rhs[ss],y);
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs_i,Integer y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto rhs = rhs_i.View();
|
||||
auto ret = ret_i.View();
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=mod(rhs[ss],y);
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs_i,Integer y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto ret = ret_i.View();
|
||||
auto rhs = rhs_i.View();
|
||||
ret.Checkerboard() = rhs_i.Checkerboard();
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=div(rhs[ss],y);
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
auto rhs = rhs_i.View();
|
||||
auto ret = ret_i.View();
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
accelerator_loop(ss,rhs,{
|
||||
ret[ss]=Exponentiate(rhs[ss],alpha, Nexp);
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
86
Grid/lattice/Lattice_where.h
Normal file
86
Grid/lattice/Lattice_where.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_where.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_WHERE_H
|
||||
#define GRID_LATTICE_WHERE_H
|
||||
namespace Grid {
|
||||
// Must implement the predicate gating the
|
||||
// Must be able to reduce the predicate down to a single vInteger per site.
|
||||
// Must be able to require the type be iScalar x iScalar x ....
|
||||
// give a GetVtype method in iScalar
|
||||
// and blow away the tensor structures.
|
||||
//
|
||||
template<class vobj,class iobj>
|
||||
inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
conformable(iftrue,ret);
|
||||
|
||||
GridBase *grid=iftrue._grid;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename iobj::vector_type mask_type;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
|
||||
extract(iftrue._odata[ss] ,truevals);
|
||||
extract(iffalse._odata[ss] ,falsevals);
|
||||
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) falsevals[s]=truevals[s];
|
||||
}
|
||||
|
||||
merge(ret._odata[ss],falsevals);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class iobj>
|
||||
inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
|
||||
Lattice<vobj> ret(iftrue._grid);
|
||||
|
||||
where(ret,predicate,iftrue,iffalse);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
Reference in New Issue
Block a user