mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 15:57:05 +01:00
Hadrons: moving Hadrons to root directory, build system improvements
This commit is contained in:
33
Grid/lattice/Lattice.h
Normal file
33
Grid/lattice/Lattice.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Lattice.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_H
|
||||
#define GRID_LATTICE_H
|
||||
|
||||
#include <Grid/lattice/Lattice_base.h>
|
||||
|
||||
#endif
|
466
Grid/lattice/Lattice_ET.h
Normal file
466
Grid/lattice/Lattice_ET.h
Normal file
@ -0,0 +1,466 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_ET.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ET_H
|
||||
#define GRID_LATTICE_ET_H
|
||||
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Predicated where support
|
||||
////////////////////////////////////////////////////
|
||||
template <class iobj, class vobj, class robj>
|
||||
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||
const robj &iffalse) {
|
||||
typename std::remove_const<vobj>::type ret;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
const int words = sizeof(vobj) / sizeof(vector_type);
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals(Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
extract(iftrue, truevals);
|
||||
extract(iffalse, falsevals);
|
||||
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||
|
||||
for (int s = 0; s < Nsimd; s++) {
|
||||
if (mask[s]) falsevals[s] = truevals[s];
|
||||
}
|
||||
|
||||
merge(ret, falsevals);
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// recursive evaluation of expressions; Could
|
||||
// switch to generic approach with variadics, a la
|
||||
// Antonin's Lat Sim but the repack to variadic with popped
|
||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
||||
////////////////////////////////////////////
|
||||
|
||||
// leaf eval of lattice ; should enable if protect using traits
|
||||
|
||||
template <typename T>
|
||||
using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||
|
||||
template <typename T>
|
||||
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
|
||||
|
||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||
|
||||
//Specialization of getVectorType for lattices
|
||||
template<typename T>
|
||||
struct getVectorType<Lattice<T> >{
|
||||
typedef typename Lattice<T>::vector_object type;
|
||||
};
|
||||
|
||||
template<class sobj>
|
||||
inline sobj eval(const unsigned int ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
template <class lobj>
|
||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
|
||||
return arg._odata[ss];
|
||||
}
|
||||
|
||||
// handle nodes in syntax tree
|
||||
template <typename Op, typename T1>
|
||||
auto inline eval(
|
||||
const unsigned int ss,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)));
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
auto inline eval(
|
||||
const unsigned int ss,
|
||||
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)));
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
auto inline eval(const unsigned int ss,
|
||||
const LatticeTrinaryExpression<Op, T1, T2, T3>
|
||||
&expr) // eval three operands
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)),
|
||||
eval(ss, std::get<2>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)),
|
||||
eval(ss, std::get<2>(expr.second)));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,
|
||||
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if (grid) {
|
||||
conformable(grid, lat._grid);
|
||||
}
|
||||
grid = lat._grid;
|
||||
}
|
||||
template <class T1,
|
||||
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void GridFromExpression(GridBase *&grid,
|
||||
const T1 ¬lat) // non-lattice leaf
|
||||
{}
|
||||
template <typename Op, typename T1>
|
||||
inline void GridFromExpression(GridBase *&grid,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void GridFromExpression(
|
||||
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
GridFromExpression(grid, std::get<1>(expr.second));
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void GridFromExpression(
|
||||
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
GridFromExpression(grid, std::get<1>(expr.second));
|
||||
GridFromExpression(grid, std::get<2>(expr.second));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,
|
||||
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if ((cb == Odd) || (cb == Even)) {
|
||||
assert(cb == lat.checkerboard);
|
||||
}
|
||||
cb = lat.checkerboard;
|
||||
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||
}
|
||||
template <class T1,
|
||||
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||
{
|
||||
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1>
|
||||
inline void CBFromExpression(int &cb,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void CBFromExpression(int &cb,
|
||||
const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
CBFromExpression(cb, std::get<1>(expr.second));
|
||||
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void CBFromExpression(
|
||||
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
CBFromExpression(cb, std::get<1>(expr.second));
|
||||
CBFromExpression(cb, std::get<2>(expr.second));
|
||||
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Unary operators and funcs
|
||||
////////////////////////////////////////////
|
||||
#define GridUnopClass(name, ret) \
|
||||
template <class arg> \
|
||||
struct name { \
|
||||
static auto inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||
};
|
||||
|
||||
GridUnopClass(UnarySub, -a);
|
||||
GridUnopClass(UnaryNot, Not(a));
|
||||
GridUnopClass(UnaryAdj, adj(a));
|
||||
GridUnopClass(UnaryConj, conjugate(a));
|
||||
GridUnopClass(UnaryTrace, trace(a));
|
||||
GridUnopClass(UnaryTranspose, transpose(a));
|
||||
GridUnopClass(UnaryTa, Ta(a));
|
||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryReal, real(a));
|
||||
GridUnopClass(UnaryImag, imag(a));
|
||||
GridUnopClass(UnaryToReal, toReal(a));
|
||||
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||
GridUnopClass(UnaryTimesI, timesI(a));
|
||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||
GridUnopClass(UnaryAbs, abs(a));
|
||||
GridUnopClass(UnarySqrt, sqrt(a));
|
||||
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||
GridUnopClass(UnarySin, sin(a));
|
||||
GridUnopClass(UnaryCos, cos(a));
|
||||
GridUnopClass(UnaryAsin, asin(a));
|
||||
GridUnopClass(UnaryAcos, acos(a));
|
||||
GridUnopClass(UnaryLog, log(a));
|
||||
GridUnopClass(UnaryExp, exp(a));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Binary operators
|
||||
////////////////////////////////////////////
|
||||
#define GridBinOpClass(name, combination) \
|
||||
template <class left, class right> \
|
||||
struct name { \
|
||||
static auto inline func(const left &lhs, const right &rhs) \
|
||||
-> decltype(combination) const { \
|
||||
return combination; \
|
||||
} \
|
||||
}
|
||||
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||
GridBinOpClass(BinarySub, lhs - rhs);
|
||||
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||
|
||||
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Trinary conditional op
|
||||
////////////////////////////////////////////////////
|
||||
#define GridTrinOpClass(name, combination) \
|
||||
template <class predicate, class left, class right> \
|
||||
struct name { \
|
||||
static auto inline func(const predicate &pred, const left &lhs, \
|
||||
const right &rhs) -> decltype(combination) const { \
|
||||
return combination; \
|
||||
} \
|
||||
}
|
||||
|
||||
GridTrinOpClass(
|
||||
TrinaryWhere,
|
||||
(predicatedWhere<predicate, typename std::remove_reference<left>::type,
|
||||
typename std::remove_reference<right>::type>(pred, lhs,
|
||||
rhs)));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Operator syntactical glue
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_TRINOP(name) \
|
||||
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
|
||||
#define GRID_DEF_UNOP(op, name) \
|
||||
template <typename T1, \
|
||||
typename std::enable_if<is_lattice<T1>::value || \
|
||||
is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &arg) \
|
||||
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
|
||||
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_LEFT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<is_lattice<T1>::value || \
|
||||
is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype( \
|
||||
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), \
|
||||
std::forward_as_tuple(lhs, rhs)))) { \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_RIGHT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<!is_lattice<T1>::value && \
|
||||
!is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr, \
|
||||
typename std::enable_if<is_lattice<T2>::value || \
|
||||
is_lattice_expr<T2>::value, \
|
||||
T2>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype( \
|
||||
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), \
|
||||
std::forward_as_tuple(lhs, rhs)))) { \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||
}
|
||||
|
||||
#define GRID_DEF_BINOP(op, name) \
|
||||
GRID_BINOP_LEFT(op, name); \
|
||||
GRID_BINOP_RIGHT(op, name);
|
||||
|
||||
#define GRID_DEF_TRINOP(op, name) \
|
||||
template <typename T1, typename T2, typename T3> \
|
||||
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||
->decltype( \
|
||||
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||
const T3 &>(std::make_pair( \
|
||||
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \
|
||||
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||
const T3 &>(std::make_pair( \
|
||||
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
|
||||
}
|
||||
////////////////////////
|
||||
// Operator definitions
|
||||
////////////////////////
|
||||
|
||||
GRID_DEF_UNOP(operator-, UnarySub);
|
||||
GRID_DEF_UNOP(Not, UnaryNot);
|
||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(real, UnaryReal);
|
||||
GRID_DEF_UNOP(imag, UnaryImag);
|
||||
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||
// abs-fabs-dabs-labs thing
|
||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||
GRID_DEF_UNOP(sin, UnarySin);
|
||||
GRID_DEF_UNOP(cos, UnaryCos);
|
||||
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||
GRID_DEF_UNOP(log, UnaryLog);
|
||||
GRID_DEF_UNOP(exp, UnaryExp);
|
||||
|
||||
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||
GRID_DEF_BINOP(operator-, BinarySub);
|
||||
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||
GRID_DEF_BINOP(operator/, BinaryDiv);
|
||||
|
||||
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||
|
||||
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Closure convenience to force expression to evaluate
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
|
||||
expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second))))>
|
||||
ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second)),
|
||||
eval(0, std::get<2>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second)),
|
||||
eval(0, std::get<2>(expr.second))))>
|
||||
ret(expr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#undef GRID_UNOP
|
||||
#undef GRID_BINOP
|
||||
#undef GRID_TRINOP
|
||||
|
||||
#undef GRID_DEF_UNOP
|
||||
#undef GRID_DEF_BINOP
|
||||
#undef GRID_DEF_TRINOP
|
||||
}
|
||||
|
||||
#if 0
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc,char **argv){
|
||||
|
||||
Lattice<double> v1(16);
|
||||
Lattice<double> v2(16);
|
||||
Lattice<double> v3(16);
|
||||
|
||||
BinaryAdd<double,double> tmp;
|
||||
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
||||
expr(std::make_pair(tmp,
|
||||
std::forward_as_tuple(v1,v2)));
|
||||
tmp.func(eval(0,v1),eval(0,v2));
|
||||
|
||||
auto var = v1+v2;
|
||||
std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;
|
||||
|
||||
v3=v1+v2;
|
||||
v3=v1+v2+v1*v2;
|
||||
};
|
||||
|
||||
void testit(Lattice<double> &v1,Lattice<double> &v2,Lattice<double> &v3)
|
||||
{
|
||||
v3=v1+v2+v1*v2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
255
Grid/lattice/Lattice_arith.h
Normal file
255
Grid/lattice/Lattice_arith.h
Normal file
@ -0,0 +1,255 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_arith.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ARITH_H
|
||||
#define GRID_LATTICE_ARITH_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
ret._odata[ss]=a*x._odata[ss]+y._odata[ss];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
ret._odata[ss]=a*x._odata[ss]+b*y._odata[ss];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
375
Grid/lattice/Lattice_base.h
Normal file
375
Grid/lattice/Lattice_base.h
Normal file
@ -0,0 +1,375 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_BASE_H
|
||||
#define GRID_LATTICE_BASE_H
|
||||
|
||||
#define STREAMING_STORES
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// TODO:
|
||||
// mac,real,imag
|
||||
|
||||
// Functionality:
|
||||
// -=,+=,*=,()
|
||||
// add,+,sub,-,mult,mac,*
|
||||
// adj,conjugate
|
||||
// real,imag
|
||||
// transpose,transposeIndex
|
||||
// trace,traceIndex
|
||||
// peekIndex
|
||||
// innerProduct,outerProduct,
|
||||
// localNorm2
|
||||
// localInnerProduct
|
||||
|
||||
extern int GridCshiftPermuteMap[4][16];
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Basic expressions used in Expression Template
|
||||
////////////////////////////////////////////////
|
||||
|
||||
class LatticeBase
|
||||
{
|
||||
public:
|
||||
virtual ~LatticeBase(void) = default;
|
||||
GridBase *_grid;
|
||||
};
|
||||
|
||||
class LatticeExpressionBase {};
|
||||
|
||||
template <typename Op, typename T1>
|
||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||
public:
|
||||
LatticeUnaryExpression(const std::pair<Op,std::tuple<T1> > &arg): std::pair<Op,std::tuple<T1> >(arg) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
class LatticeBinaryExpression : public std::pair<Op,std::tuple<T1,T2> > , public LatticeExpressionBase {
|
||||
public:
|
||||
LatticeBinaryExpression(const std::pair<Op,std::tuple<T1,T2> > &arg): std::pair<Op,std::tuple<T1,T2> >(arg) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, public LatticeExpressionBase {
|
||||
public:
|
||||
LatticeTrinaryExpression(const std::pair<Op,std::tuple<T1,T2,T3> > &arg): std::pair<Op,std::tuple<T1,T2,T3> >(arg) {};
|
||||
};
|
||||
|
||||
void inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
{
|
||||
assert(lhs == rhs);
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
class Lattice : public LatticeBase
|
||||
{
|
||||
public:
|
||||
int checkerboard;
|
||||
Vector<vobj> _odata;
|
||||
|
||||
// to pthread need a computable loop where loop induction is not required
|
||||
int begin(void) { return 0;};
|
||||
int end(void) { return _odata.size(); }
|
||||
vobj & operator[](int i) { return _odata[i]; };
|
||||
const vobj & operator[](int i) const { return _odata[i]; };
|
||||
|
||||
public:
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef vobj vector_object;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Expression Template closure support
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
//vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,eval(ss,expr));
|
||||
#else
|
||||
_odata[ss] = eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
//GridFromExpression is tricky to do
|
||||
template<class Op,class T1>
|
||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
||||
_grid = nullptr;
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<class Op,class T1, class T2>
|
||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
||||
_grid = nullptr;
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<class Op,class T1, class T2, class T3>
|
||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
||||
_grid = nullptr;
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
vstream(_odata[ss] ,eval(ss,expr));
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Constructor requires "grid" passed.
|
||||
// what about a default grid?
|
||||
//////////////////////////////////////////////////////////////////
|
||||
Lattice(GridBase *grid) : _odata(grid->oSites()) {
|
||||
_grid = grid;
|
||||
// _odata.reserve(_grid->oSites());
|
||||
// _odata.resize(_grid->oSites());
|
||||
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
|
||||
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
||||
checkerboard=0;
|
||||
}
|
||||
|
||||
Lattice(const Lattice& r){ // copy constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
Lattice(Lattice&& r){ // move constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata=std::move(r._odata);
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||
{
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata =std::move(r._odata);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
if (_grid != grid) {
|
||||
_grid = grid;
|
||||
_odata.resize(grid->oSites());
|
||||
checkerboard = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
}; // class Lattice
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||
std::vector<int> gcoor;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ss;
|
||||
for(int g=0;g<o._grid->_gsites;g++){
|
||||
o._grid->GlobalIndexToGlobalCoor(g,gcoor);
|
||||
peekSite(ss,o,gcoor);
|
||||
stream<<"[";
|
||||
for(int d=0;d<gcoor.size();d++){
|
||||
stream<<gcoor[d];
|
||||
if(d!=gcoor.size()-1) stream<<",";
|
||||
}
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "Lattice_conformable.h"
|
||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||
#include "Lattice_ET.h"
|
||||
#else
|
||||
#include "Lattice_overload.h"
|
||||
#endif
|
||||
#include "Lattice_arith.h"
|
||||
#include "Lattice_trace.h"
|
||||
#include "Lattice_transpose.h"
|
||||
#include "Lattice_local.h"
|
||||
#include "Lattice_reduction.h"
|
||||
#include "Lattice_peekpoke.h"
|
||||
#include "Lattice_reality.h"
|
||||
#include "Lattice_comparison_utils.h"
|
||||
#include "Lattice_comparison.h"
|
||||
#include "Lattice_coordinate.h"
|
||||
#include "Lattice_where.h"
|
||||
#include "Lattice_rng.h"
|
||||
#include "Lattice_unary.h"
|
||||
#include "Lattice_transfer.h"
|
||||
|
||||
|
||||
#endif
|
169
Grid/lattice/Lattice_comparison.h
Normal file
169
Grid/lattice/Lattice_comparison.h
Normal file
@ -0,0 +1,169 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COMPARISON_H
|
||||
#define GRID_LATTICE_COMPARISON_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// relational operators
|
||||
//
|
||||
// Support <,>,<=,>=,==,!=
|
||||
//
|
||||
//Query supporting bitwise &, |, ^, !
|
||||
//Query supporting logical &&, ||,
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to scalar
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare scalar to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Map to functors
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Less than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Less than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Greater than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// Greater than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// not equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
}
|
||||
#endif
|
232
Grid/lattice/Lattice_comparison_utils.h
Normal file
232
Grid/lattice/Lattice_comparison_utils.h
Normal file
@ -0,0 +1,232 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison_utils.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMPARISON_H
|
||||
#define GRID_COMPARISON_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////
|
||||
// This implementation is a bit poor.
|
||||
//
|
||||
// Only support relational logical operations (<, > etc)
|
||||
// on scalar objects. Therefore can strip any tensor structures.
|
||||
//
|
||||
// Should guard this with isGridTensor<> enable if?
|
||||
/////////////////////////////////////////
|
||||
//
|
||||
// Generic list of functors
|
||||
//
|
||||
template<class lobj,class robj> class veq {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vne {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vlt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vle {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vgt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vge {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
// Generic list of functors
|
||||
template<class lobj,class robj> class seq {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sne {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class slt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sle {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sgt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sge {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer and real get extra relational functions.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
std::vector<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
std::vector<scalar> vrhs(vsimd::Nsimd());
|
||||
std::vector<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
std::vector<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
std::vector<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],rhs);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
std::vector<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
std::vector<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(lhs,vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
40
Grid/lattice/Lattice_conformable.h
Normal file
40
Grid/lattice/Lattice_conformable.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_conformable.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_CONFORMABLE_H
|
||||
#define GRID_LATTICE_CONFORMABLE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class obj1,class obj2> void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
|
||||
{
|
||||
assert(lhs._grid == rhs._grid);
|
||||
assert(lhs.checkerboard == rhs.checkerboard);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
56
Grid/lattice/Lattice_coordinate.h
Normal file
56
Grid/lattice/Lattice_coordinate.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_coordinate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COORDINATE_H
|
||||
#define GRID_LATTICE_COORDINATE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
GridBase *grid = l._grid;
|
||||
int Nsimd = grid->iSites();
|
||||
|
||||
std::vector<int> gcoor;
|
||||
std::vector<scalar_type> mergebuf(Nsimd);
|
||||
|
||||
vector_type vI;
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l._odata[o]=vI;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
75
Grid/lattice/Lattice_local.h
Normal file
75
Grid/lattice/Lattice_local.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_local.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
|
||||
#define GRID_LATTICE_LOCALREDUCTION_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// localInner, localNorm, outerProduct
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
138
Grid/lattice/Lattice_overload.h
Normal file
138
Grid/lattice/Lattice_overload.h
Normal file
@ -0,0 +1,138 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_overload.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_OVERLOAD_H
|
||||
#define GRID_LATTICE_OVERLOAD_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// unary negation
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
Lattice<vobj> ret(r._grid);
|
||||
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
vstream(ret._odata[ss], -r._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice BinOp Lattice,
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
mult(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Scalar BinOp Lattice ;generate return type
|
||||
template<class left,class right>
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
205
Grid/lattice/Lattice_peekpoke.h
Normal file
205
Grid/lattice/Lattice_peekpoke.h
Normal file
@ -0,0 +1,205 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_peekpoke.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_PEEK_H
|
||||
#define GRID_LATTICE_PEEK_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Peeking and poking around
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Peek internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
||||
ret.checkerboard=lhs.checkerboard;
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
||||
ret.checkerboard=lhs.checkerboard;
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
||||
{
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
||||
{
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Poke a scalar object into the SIMD array
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void pokeSite(const sobj &s,Lattice<vobj> &l,const std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
// Optional to broadcast from node 0.
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
grid->Broadcast(grid->BossRank(),s);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
extract(l._odata[odx],buf);
|
||||
buf[idx] = s;
|
||||
merge(l._odata[odx],buf);
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekSite(sobj &s,const Lattice<vobj> &l,const std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard == l._grid->CheckerBoard(site));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
extract(l._odata[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid = l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
scalar_type * vp = (scalar_type *)&l._odata[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
pt[w] = vp[idx+w*Nsimd];
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
template<class vobj,class sobj>
|
||||
void pokeLocalSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
scalar_type * vp = (scalar_type *)&l._odata[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
vp[idx+w*Nsimd] = pt[w];
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
57
Grid/lattice/Lattice_reality.h
Normal file
57
Grid/lattice/Lattice_reality.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_reality.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REALITY_H
|
||||
#define GRID_LATTICE_REALITY_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = adj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = conjugate(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
#endif
|
733
Grid/lattice/Lattice_reduction.h
Normal file
733
Grid/lattice/Lattice_reduction.h
Normal file
@ -0,0 +1,733 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REDUCTION_H
|
||||
#define GRID_LATTICE_REDUCTION_H
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
namespace Grid {
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
auto nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
GridBase *grid = left._grid;
|
||||
const int pad = 8;
|
||||
|
||||
ComplexD inner;
|
||||
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
// All threads sum across SIMD; reduce serial work at end
|
||||
// one write per cacheline with streaming store
|
||||
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||
vstream(sumarray[thr*pad],tmp);
|
||||
}
|
||||
|
||||
inner=0.0;
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
inner = inner+sumarray[i*pad];
|
||||
}
|
||||
right._grid->GlobalSum(inner);
|
||||
return inner;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.checkerboard = x.checkerboard;
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x._grid;
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z._odata[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
}
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||
{
|
||||
GridBase *grid=arg._grid;
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
}
|
||||
|
||||
vobj vsum=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ssum=zero;
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
arg._grid->GlobalSum(ssum);
|
||||
|
||||
return ssum;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
///////////////////////////////////////////////////////
|
||||
// FIXME precision promoted summation
|
||||
// may be important for correlation functions
|
||||
// But easily avoided by using double precision fields
|
||||
///////////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data._grid;
|
||||
assert(grid!=NULL);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
|
||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
// Parallel over orthog direction
|
||||
parallel_for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
lvSum[r]=lvSum[r]+Data._odata[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
extract(lvSum[rt],extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
sobj gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=zero;
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
parallel_for(int r=0;r<rd;r++)
|
||||
{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
parallel_for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss]));
|
||||
lvSum[r]=lvSum[r]+vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
scalar_type gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=scalar_type(0.0);
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
|
||||
std::vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int orthogdim,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced tensor_reduced;
|
||||
|
||||
scalar_type zscale(scale);
|
||||
|
||||
GridBase *grid = X._grid;
|
||||
|
||||
int Nsimd =grid->Nsimd();
|
||||
int Nblock =grid->GlobalDimensions()[orthogdim];
|
||||
|
||||
int fd =grid->_fdimensions[orthogdim];
|
||||
int ld =grid->_ldimensions[orthogdim];
|
||||
int rd =grid->_rdimensions[orthogdim];
|
||||
|
||||
int e1 =grid->_slice_nblock[orthogdim];
|
||||
int e2 =grid->_slice_block [orthogdim];
|
||||
int stride =grid->_slice_stride[orthogdim];
|
||||
|
||||
std::vector<int> icoor;
|
||||
|
||||
for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
vector_type av;
|
||||
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
grid->iCoorFromIindex(icoor,l);
|
||||
int ldx =r+icoor[orthogdim]*rd;
|
||||
scalar_type *as =(scalar_type *)&av;
|
||||
as[l] = scalar_type(a[ldx])*zscale;
|
||||
}
|
||||
|
||||
tensor_reduced at; at=av;
|
||||
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
*/
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
mat_thread(i,j) += Reduce(rtmp);
|
||||
}}
|
||||
}}
|
||||
#pragma omp critical
|
||||
{
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
} /*END NAMESPACE GRID*/
|
||||
#endif
|
||||
|
||||
|
||||
|
520
Grid/lattice/Lattice_rng.h
Normal file
520
Grid/lattice/Lattice_rng.h
Normal file
@ -0,0 +1,520 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_rng.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_RNG_H
|
||||
#define GRID_LATTICE_RNG_H
|
||||
|
||||
#include <random>
|
||||
|
||||
#ifdef RNG_SITMO
|
||||
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
|
||||
#endif
|
||||
|
||||
#if defined(RNG_SITMO)
|
||||
#define RNG_FAST_DISCARD
|
||||
#else
|
||||
#undef RNG_FAST_DISCARD
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Allow the RNG state to be less dense than the fine grid
|
||||
//////////////////////////////////////////////////////////////
|
||||
inline int RNGfillable(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension;
|
||||
assert(lowerdims >= 0);
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
assert(fine->_simd_layout[d]==1);
|
||||
assert(fine->_processors[d]==1);
|
||||
}
|
||||
|
||||
int multiplicity=1;
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
multiplicity=multiplicity*fine->_rdimensions[d];
|
||||
}
|
||||
// local and global volumes subdivide cleanly after SIMDization
|
||||
for(int d=0;d<rngdims;d++){
|
||||
int fd= d+lowerdims;
|
||||
assert(coarse->_processors[d] == fine->_processors[fd]);
|
||||
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
|
||||
assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]);
|
||||
|
||||
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
|
||||
}
|
||||
return multiplicity;
|
||||
}
|
||||
|
||||
|
||||
// merge of April 11 2017
|
||||
// this function is necessary for the LS vectorised field
|
||||
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
|
||||
// assumes that the higher dimensions are not using more processors
|
||||
// all further divisions are local
|
||||
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
|
||||
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
|
||||
|
||||
// then divide the number of local sites
|
||||
// check that the total number of sims agree, meanse the iSites are the same
|
||||
assert(fine->Nsimd() == coarse->Nsimd());
|
||||
|
||||
// check that the two grids divide cleanly
|
||||
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
|
||||
|
||||
return fine->lSites() / coarse->lSites();
|
||||
}
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution,class generator>
|
||||
void fillScalar(scalar &s,distribution &dist,generator & gen)
|
||||
{
|
||||
s=dist(gen);
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
|
||||
{
|
||||
s=ComplexF(dist(gen),dist(gen));
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
|
||||
{
|
||||
s=ComplexD(dist(gen),dist(gen));
|
||||
}
|
||||
|
||||
class GridRNGbase {
|
||||
public:
|
||||
// One generator per site.
|
||||
// Uniform and Gaussian distributions from these generators.
|
||||
#ifdef RNG_RANLUX
|
||||
typedef std::ranlux48 RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 15;
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
typedef std::mt19937 RngEngine;
|
||||
typedef uint32_t RngStateType;
|
||||
static const int RngStateCount = std::mt19937::state_size;
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
typedef sitmo::prng_engine RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 13;
|
||||
#endif
|
||||
|
||||
std::vector<RngEngine> _generators;
|
||||
std::vector<std::uniform_real_distribution<RealD> > _uniform;
|
||||
std::vector<std::normal_distribution<RealD> > _gaussian;
|
||||
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
|
||||
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
|
||||
|
||||
///////////////////////
|
||||
// support for parallel init
|
||||
///////////////////////
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
static void Skip(RngEngine &eng,uint64_t site)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Skip by 2^40 elements between successive lattice sites
|
||||
// This goes by 10^12.
|
||||
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
|
||||
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
|
||||
// skip ahead.
|
||||
// For HMC unlikely to go at faster than a solve per second, and
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
uint64_t skip = site;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
#endif
|
||||
static RngEngine Reseed(RngEngine &eng)
|
||||
{
|
||||
std::vector<uint32_t> newseed;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
return Reseed(eng,newseed,uid);
|
||||
}
|
||||
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
|
||||
std::uniform_int_distribution<uint32_t> &uid)
|
||||
{
|
||||
const int reseeds=4;
|
||||
|
||||
newseed.resize(reseeds);
|
||||
for(int i=0;i<reseeds;i++){
|
||||
newseed[i] = uid(eng);
|
||||
}
|
||||
std::seed_seq sseq(newseed.begin(),newseed.end());
|
||||
return RngEngine(sseq);
|
||||
}
|
||||
|
||||
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
|
||||
saved.resize(RngStateCount);
|
||||
std::stringstream ss;
|
||||
ss<<eng;
|
||||
ss.seekg(0,ss.beg);
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss>>saved[i];
|
||||
}
|
||||
}
|
||||
void GetState(std::vector<RngStateType> & saved,int gen) {
|
||||
GetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,RngEngine &eng){
|
||||
assert(saved.size()==RngStateCount);
|
||||
std::stringstream ss;
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss<< saved[i]<<" ";
|
||||
}
|
||||
ss.seekg(0,ss.beg);
|
||||
ss>>eng;
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,int gen){
|
||||
SetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetEngine(RngEngine &Eng, int gen){
|
||||
_generators[gen]=Eng;
|
||||
}
|
||||
void GetEngine(RngEngine &Eng, int gen){
|
||||
Eng=_generators[gen];
|
||||
}
|
||||
template<class source> void Seed(source &src, int gen)
|
||||
{
|
||||
_generators[gen] = RngEngine(src);
|
||||
}
|
||||
};
|
||||
|
||||
class GridSerialRNG : public GridRNGbase {
|
||||
public:
|
||||
|
||||
GridSerialRNG() : GridRNGbase() {
|
||||
_generators.resize(1);
|
||||
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename sobj::scalar_type scalar_type;
|
||||
|
||||
int words = sizeof(sobj)/sizeof(scalar_type);
|
||||
|
||||
scalar_type *buf = (scalar_type *) & l;
|
||||
|
||||
dist[0].reset();
|
||||
for(int idx=0;idx<words;idx++){
|
||||
fillScalar(buf[idx],dist[0],_generators[0]);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
|
||||
};
|
||||
|
||||
template <class distribution> inline void fill(ComplexF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(ComplexD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
// vector fill
|
||||
template <class distribution> inline void fill(vComplexF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vComplexD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
|
||||
double _time_counter;
|
||||
|
||||
public:
|
||||
GridBase *_grid;
|
||||
unsigned int _vol;
|
||||
|
||||
int generator_idx(int os,int is) {
|
||||
return is*_grid->oSites()+os;
|
||||
}
|
||||
|
||||
GridParallelRNG(GridBase *grid) : GridRNGbase() {
|
||||
_grid = grid;
|
||||
_vol =_grid->iSites()*_grid->oSites();
|
||||
|
||||
_generators.resize(_vol);
|
||||
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
double inner_time_counter = usecond();
|
||||
|
||||
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
|
||||
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
|
||||
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
|
||||
int words = sizeof(scalar_object) / sizeof(scalar_type);
|
||||
|
||||
parallel_for(int ss=0;ss<osites;ss++){
|
||||
std::vector<scalar_object> buf(Nsimd);
|
||||
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
|
||||
|
||||
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
|
||||
|
||||
for (int si = 0; si < Nsimd; si++) {
|
||||
|
||||
int gdx = generator_idx(ss, si); // index of generator state
|
||||
scalar_type *pointer = (scalar_type *)&buf[si];
|
||||
dist[gdx].reset();
|
||||
for (int idx = 0; idx < words; idx++)
|
||||
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
|
||||
}
|
||||
// merge into SIMD lanes, FIXME suboptimal implementation
|
||||
merge(l._odata[sm], buf);
|
||||
}
|
||||
}
|
||||
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
};
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
|
||||
std::seed_seq source(seeds.begin(),seeds.end());
|
||||
|
||||
RngEngine master_engine(source);
|
||||
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
////////////////////////////////////////////////
|
||||
// Skip ahead through a single stream.
|
||||
// Applicable to SITMO and other has based/crypto RNGs
|
||||
// Should be applicable to Mersenne Twister, but the C++11
|
||||
// MT implementation does not implement fast discard even though
|
||||
// in principle this is possible
|
||||
////////////////////////////////////////////////
|
||||
|
||||
// Everybody loops over global volume.
|
||||
parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){
|
||||
|
||||
// Where is it?
|
||||
int rank,o_idx,i_idx;
|
||||
std::vector<int> gcoor;
|
||||
|
||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// If this is one of mine we take it
|
||||
if( rank == _grid->ThisRank() ){
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
_generators[l_idx] = master_engine;
|
||||
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Machine and thread decomposition dependent seeding is efficient
|
||||
// and maximally parallel; but NOT reproducible from machine to machine.
|
||||
// Not ideal, but fastest way to reseed all nodes.
|
||||
////////////////////////////////////////////////////////////////
|
||||
{
|
||||
// Obtain one Reseed per processor
|
||||
int Nproc = _grid->ProcessorCount();
|
||||
std::vector<RngEngine> seeders(Nproc);
|
||||
int me= _grid->ThisRank();
|
||||
for(int p=0;p<Nproc;p++){
|
||||
seeders[p] = Reseed(master_engine);
|
||||
}
|
||||
master_engine = seeders[me];
|
||||
}
|
||||
|
||||
{
|
||||
// Obtain one reseeded generator per thread
|
||||
int Nthread = GridThread::GetThreads();
|
||||
std::vector<RngEngine> seeders(Nthread);
|
||||
for(int t=0;t<Nthread;t++){
|
||||
seeders[t] = Reseed(master_engine);
|
||||
}
|
||||
|
||||
parallel_for(int t=0;t<Nthread;t++) {
|
||||
// set up one per local site in threaded fashion
|
||||
std::vector<uint32_t> newseeds;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
for(int l=0;l<_grid->lSites();l++) {
|
||||
if ( (l%Nthread)==t ) {
|
||||
_generators[l] = Reseed(seeders[t],newseeds,uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Report(){
|
||||
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Support for rigorous test of RNG's
|
||||
// Return uniform random uint32_t from requested site generator
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
uint32_t GlobalU01(int gsite){
|
||||
|
||||
uint32_t the_number;
|
||||
// who
|
||||
std::vector<int> gcoor;
|
||||
int rank,o_idx,i_idx;
|
||||
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// draw
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
if( rank == _grid->ThisRank() ){
|
||||
the_number = _uid[l_idx](_generators[l_idx]);
|
||||
}
|
||||
|
||||
// share & return
|
||||
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
|
||||
return the_number;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
|
||||
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
|
||||
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
|
||||
|
||||
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
|
||||
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
|
||||
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
|
||||
|
||||
}
|
||||
#endif
|
67
Grid/lattice/Lattice_trace.h
Normal file
67
Grid/lattice/Lattice_trace.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_trace.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRACE_H
|
||||
#define GRID_LATTICE_TRACE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Tracing, transposing, peeking, poking
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(trace(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = trace(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace Index level dependent operation
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
1085
Grid/lattice/Lattice_transfer.h
Normal file
1085
Grid/lattice/Lattice_transfer.h
Normal file
File diff suppressed because it is too large
Load Diff
63
Grid/lattice/Lattice_transpose.h
Normal file
63
Grid/lattice/Lattice_transpose.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_transpose.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRANSPOSE_H
|
||||
#define GRID_LATTICE_TRANSPOSE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Transpose
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transpose(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
#endif
|
84
Grid/lattice/Lattice_unary.h
Normal file
84
Grid/lattice/Lattice_unary.h
Normal file
@ -0,0 +1,84 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_unary.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_UNARY_H
|
||||
#define GRID_LATTICE_UNARY_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs,RealD y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=pow(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=mod(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=div(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
86
Grid/lattice/Lattice_where.h
Normal file
86
Grid/lattice/Lattice_where.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_where.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_WHERE_H
|
||||
#define GRID_LATTICE_WHERE_H
|
||||
namespace Grid {
|
||||
// Must implement the predicate gating the
|
||||
// Must be able to reduce the predicate down to a single vInteger per site.
|
||||
// Must be able to require the type be iScalar x iScalar x ....
|
||||
// give a GetVtype method in iScalar
|
||||
// and blow away the tensor structures.
|
||||
//
|
||||
template<class vobj,class iobj>
|
||||
inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
conformable(iftrue,ret);
|
||||
|
||||
GridBase *grid=iftrue._grid;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename iobj::vector_type mask_type;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
|
||||
extract(iftrue._odata[ss] ,truevals);
|
||||
extract(iffalse._odata[ss] ,falsevals);
|
||||
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) falsevals[s]=truevals[s];
|
||||
}
|
||||
|
||||
merge(ret._odata[ss],falsevals);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class iobj>
|
||||
inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
|
||||
Lattice<vobj> ret(iftrue._grid);
|
||||
|
||||
where(ret,predicate,iftrue,iffalse);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
Reference in New Issue
Block a user