1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Debugged the real() and imag() functions and added tests to Test_Simd

This commit is contained in:
Guido Cossu 2016-07-06 14:16:03 +01:00
parent 3e3b367aa9
commit e3d5319470
6 changed files with 999 additions and 867 deletions

View File

@ -1,32 +1,33 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Simd.h
Source file: ./lib/Simd.h
Copyright (C) 2015
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_SIMD_H
#define GRID_SIMD_H
@ -118,6 +119,14 @@ namespace Grid {
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
// define projections to real and imaginay parts
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
// define auxiliary functions for complex computations
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}

View File

@ -40,7 +40,7 @@ namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg);
return real(nrm);
return std::real(nrm);
}
template<class vobj>

View File

@ -5,64 +5,62 @@
#ifndef STOUT_SMEAR_
#define STOUT_SMEAR_
namespace Grid {
namespace QCD {
namespace Grid {
namespace QCD {
/*! @brief Stout smearing of link variable. */
template <class Gimpl>
class Smear_Stout: public Smear<Gimpl> {
/*! @brief Stout smearing of link variable. */
template <class Gimpl>
class Smear_Stout : public Smear<Gimpl> {
private:
const Smear < Gimpl > * SmearBase;
const Smear<Gimpl>* SmearBase;
public:
INHERIT_GIMPL_TYPES(Gimpl)
Smear_Stout(Smear < Gimpl >* base):SmearBase(base){
static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3");
Smear_Stout(Smear<Gimpl>* base) : SmearBase(base) {
static_assert(Nc == 3,
"Stout smearing currently implemented only for Nc==3");
}
/*! Default constructor */
Smear_Stout(double rho = 1.0):SmearBase(new Smear_APE < Gimpl > (rho)){
static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3");
Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE<Gimpl>(rho)) {
static_assert(Nc == 3,
"Stout smearing currently implemented only for Nc==3");
}
~Smear_Stout(){} //delete SmearBase...
~Smear_Stout() {} // delete SmearBase...
void smear(GaugeField& u_smr,const GaugeField& U) const{
void smear(GaugeField& u_smr, const GaugeField& U) const {
GaugeField C(U._grid);
GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid);
std::cout<< GridLogDebug << "Stout smearing started\n";
std::cout << GridLogDebug << "Stout smearing started\n";
//Smear the configurations
// Smear the configurations
SmearBase->smear(C, U);
for (int mu = 0; mu<Nd; mu++)
{
tmp = peekLorentz(C,mu);
Umu = peekLorentz(U,mu);
iq_mu = Ta(tmp * adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
for (int mu = 0; mu < Nd; mu++) {
tmp = peekLorentz(C, mu);
Umu = peekLorentz(U, mu);
iq_mu = Ta(
tmp *
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
exponentiate_iQ(tmp, iq_mu);
pokeLorentz(u_smr, tmp*Umu, mu);// u_smr = exp(iQ_mu)*U_mu
pokeLorentz(u_smr, tmp * Umu, mu); // u_smr = exp(iQ_mu)*U_mu
}
std::cout<< GridLogDebug << "Stout smearing completed\n";
std::cout << GridLogDebug << "Stout smearing completed\n";
};
void derivative(GaugeField& SigmaTerm,
const GaugeField& iLambda,
const GaugeField& Gauge) const{
void derivative(GaugeField& SigmaTerm, const GaugeField& iLambda,
const GaugeField& Gauge) const {
SmearBase->derivative(SigmaTerm, iLambda, Gauge);
};
void BaseSmear(GaugeField& C,
const GaugeField& U) const{
void BaseSmear(GaugeField& C, const GaugeField& U) const {
SmearBase->smear(C, U);
};
void exponentiate_iQ(GaugeLinkField& e_iQ,
const GaugeLinkField& iQ) const{
void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const {
// Put this outside
// only valid for SU(3) matrices
@ -73,9 +71,9 @@
// the i sign is coming from outside
// input matrix is anti-hermitian NOT hermitian
GridBase *grid = iQ._grid;
GridBase* grid = iQ._grid;
GaugeLinkField unity(grid);
unity=1.0;
unity = 1.0;
GaugeLinkField iQ2(grid), iQ3(grid);
LatticeComplex u(grid), w(grid);
@ -87,37 +85,34 @@
set_uw(u, w, iQ2, iQ3);
set_fj(f0, f1, f2, u, w);
e_iQ = f0*unity + timesMinusI(f1) * iQ - f2 * iQ2;
e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2;
};
void set_uw(LatticeComplex& u, LatticeComplex& w, GaugeLinkField& iQ2,
GaugeLinkField& iQ3) const {
Complex one_over_three = 1.0 / 3.0;
Complex one_over_two = 1.0 / 2.0;
void set_uw(LatticeComplex& u, LatticeComplex& w,
GaugeLinkField& iQ2, GaugeLinkField& iQ3) const{
Complex one_over_three = 1.0/3.0;
Complex one_over_two = 1.0/2.0;
GridBase *grid = u._grid;
GridBase* grid = u._grid;
LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid);
// sign in c0 from the conventions on the Ta
c0 = - real(timesMinusI(trace(iQ3))) * one_over_three; //temporary hack
c1 = - real(trace(iQ2)) * one_over_two;
c0 = -imag(trace(iQ3)) * one_over_three;
c1 = -real(trace(iQ2)) * one_over_two;
//Cayley Hamilton checks to machine precision, tested
// Cayley Hamilton checks to machine precision, tested
tmp = c1 * one_over_three;
c0max = 2.0 * pow(tmp, 1.5);
theta = acos(c0/c0max)*one_over_three; // divide by three here, now leave as it is
u = sqrt(tmp) * cos( theta );
w = sqrt(c1) * sin( theta );
theta = acos(c0 / c0max) *
one_over_three; // divide by three here, now leave as it is
u = sqrt(tmp) * cos(theta);
w = sqrt(c1) * sin(theta);
}
void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2,
const LatticeComplex& u, const LatticeComplex& w) const{
GridBase *grid = u._grid;
const LatticeComplex& u, const LatticeComplex& w) const {
GridBase* grid = u._grid;
LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid);
LatticeComplex fden(grid);
LatticeComplex h0(grid), h1(grid), h2(grid);
@ -132,36 +127,34 @@
ixi0 = timesI(xi0);
emiu = cos(u) - timesI(sin(u));
e2iu = cos(2.0*u) + timesI(sin(2.0*u));
e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
h0 = e2iu * (u2 - w2) + emiu * ( (8.0*u2*cosw) + (2.0*u*(3.0*u2 + w2)*ixi0));
h1 = e2iu * (2.0 * u) - emiu * ( (2.0*u*cosw) - (3.0*u2-w2)*ixi0);
h2 = e2iu - emiu * ( cosw + (3.0*u)*ixi0);
h0 = e2iu * (u2 - w2) +
emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);
fden = unity/(9.0*u2 - w2);// reals
fden = unity / (9.0 * u2 - w2); // reals
f0 = h0 * fden;
f1 = h1 * fden;
f2 = h2 * fden;
}
LatticeComplex func_xi0(const LatticeComplex& w) const{
LatticeComplex func_xi0(const LatticeComplex& w) const {
// Define a function to do the check
//if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small: "<< w <<"\n";
return sin(w)/w;
// if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small:
// "<< w <<"\n";
return sin(w) / w;
}
LatticeComplex func_xi1(const LatticeComplex& w) const{
LatticeComplex func_xi1(const LatticeComplex& w) const {
// Define a function to do the check
//if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small: "<< w <<"\n";
return cos(w)/(w*w) - sin(w)/(w*w*w);
}
};
// if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small:
// "<< w <<"\n";
return cos(w) / (w * w) - sin(w) / (w * w * w);
}
};
}
}
#endif

View File

@ -1,33 +1,34 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_vector_types.h
Source file: ./lib/simd/Grid_vector_types.h
Copyright (C) 2015
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
//---------------------------------------------------------------------------
/*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types
@ -43,7 +44,7 @@ Author: neo <cossu@post.kek.jp>
#ifdef SSE4
#include "Grid_sse4.h"
#endif
#if defined (AVX1)|| defined (AVX2) || defined (AVXFMA4)
#if defined(AVX1) || defined(AVX2) || defined(AVXFMA4)
#include "Grid_avx.h"
#endif
#if defined AVX512
@ -61,190 +62,250 @@ Author: neo <cossu@post.kek.jp>
namespace Grid {
//////////////////////////////////////
// To take the floating point type of real/complex type
//////////////////////////////////////
template <typename T> struct RealPart {
//////////////////////////////////////
// To take the floating point type of real/complex type
//////////////////////////////////////
template <typename T>
struct RealPart {
typedef T type;
};
template <typename T> struct RealPart< std::complex<T> >{
};
template <typename T>
struct RealPart<std::complex<T> > {
typedef T type;
};
};
//////////////////////////////////////
// demote a vector to real type
//////////////////////////////////////
//////////////////////////////////////
// demote a vector to real type
//////////////////////////////////////
// type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke = typename T::type;
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType> >;
// type alias used to simplify the syntax of std::enable_if
template <typename T>
using Invoke = typename T::type;
template <typename Condition, typename ReturnType>
using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
template <typename Condition, typename ReturnType>
using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T>
struct is_complex : public std::false_type {};
template <>
struct is_complex<std::complex<double> > : public std::true_type {};
template <>
struct is_complex<std::complex<float> > : public std::true_type {};
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T> struct is_complex : public std::false_type {};
template <> struct is_complex<std::complex<double> >: public std::true_type {};
template <> struct is_complex<std::complex<float> > : public std::true_type {};
template <typename T>
using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
template <typename T>
using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
template <typename T>
using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
template <typename T> using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value,int> > ;
template <typename T> using IfComplex = Invoke<std::enable_if<is_complex<T>::value,int> > ;
template <typename T> using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value,int> > ;
template <typename T>
using IfNotReal =
Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
template <typename T>
using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
template <typename T>
using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
template <typename T> using IfNotReal = Invoke<std::enable_if<!std::is_floating_point<T>::value,int> > ;
template <typename T> using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value,int> > ;
template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value,int> > ;
////////////////////////////////////////////////////////
// Define the operation templates functors
// general forms to allow for vsplat syntax
// need explicit declaration of types when used since
// clang cannot automatically determine the output type sometimes
template < class Out, class Input1, class Input2, class Operation >
Out binary(Input1 src_1, Input2 src_2, Operation op){
////////////////////////////////////////////////////////
// Define the operation templates functors
// general forms to allow for vsplat syntax
// need explicit declaration of types when used since
// clang cannot automatically determine the output type sometimes
template <class Out, class Input1, class Input2, class Operation>
Out binary(Input1 src_1, Input2 src_2, Operation op) {
return op(src_1, src_2);
}
}
template < class Out, class Input, class Operation >
Out unary(Input src, Operation op){
template <class Out, class Input, class Operation>
Out unary(Input src, Operation op) {
return op(src);
}
///////////////////////////////////////////////
}
///////////////////////////////////////////////
/*
/*
@brief Grid_simd class for the SIMD vector type operations
*/
template < class Scalar_type, class Vector_type >
class Grid_simd {
template <class Scalar_type, class Vector_type>
class Grid_simd {
public:
typedef typename RealPart < Scalar_type >::type Real;
typedef typename RealPart<Scalar_type>::type Real;
typedef Vector_type vector_type;
typedef Scalar_type scalar_type;
typedef union conv_t_union {
Vector_type v;
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)];
Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)];
conv_t_union(){};
} conv_t;
Vector_type v;
static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);}
static inline int Nsimd(void) {
return sizeof(Vector_type) / sizeof(Scalar_type);
}
Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;};
Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler
Grid_simd()=default;
Grid_simd(const Grid_simd& rhs) :v(rhs.v){}; //compiles in movaps
Grid_simd(const Grid_simd&& rhs):v(rhs.v){};
Grid_simd &operator=(const Grid_simd &&rhs) {
v = rhs.v;
return *this;
};
Grid_simd &operator=(const Grid_simd &rhs) {
v = rhs.v;
return *this;
}; // faster than not declaring it and leaving to the compiler
Grid_simd() = default;
Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
/////////////////////////////
// Constructors
/////////////////////////////
Grid_simd & operator = ( Zero & z){
Grid_simd &operator=(Zero &z) {
vzero(*this);
return (*this);
}
//Enable if complex type
template < typename S = Scalar_type >
Grid_simd(const typename std::enable_if< is_complex < S >::value, S>::type a){
vsplat(*this,a);
// Enable if complex type
template <typename S = Scalar_type>
Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
vsplat(*this, a);
};
Grid_simd(const Real a){
vsplat(*this,Scalar_type(a));
};
Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
///////////////////////////////////////////////
// mac, mult, sub, add, adj
///////////////////////////////////////////////
// FIXME -- alias this to an inline MAC struct.
friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
friend inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y);
};
friend inline void mult(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) * (*r);
}
friend inline void sub(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) - (*r);
}
friend inline void add(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) + (*r);
}
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
friend inline void mac(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ a,
const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y);
};
friend inline void mult(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) * (*r);
}
friend inline void sub(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) - (*r);
}
friend inline void add(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) + (*r);
}
friend inline void mac (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
friend inline void mult(Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
friend inline void mac (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ a,const Scalar_type *__restrict__ x){ *y = (*a)*(*x)+(*y); };
friend inline void mult(Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) + (*r); }
friend inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
const Scalar_type *__restrict__ x) {
*y = (*a) * (*x) + (*y);
};
friend inline void mult(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) {
*y = (*l) * (*r);
}
friend inline void sub(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) {
*y = (*l) - (*r);
}
friend inline void add(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) {
*y = (*l) + (*r);
}
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch
////////////////////////////////////////////////////////////////////////
friend inline void vset(Grid_simd &ret, Scalar_type *a){
friend inline void vset(Grid_simd &ret, Scalar_type *a) {
ret.v = unary<Vector_type>(a, VsetSIMD());
}
///////////////////////
// Vstore
///////////////////////
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
binary<void>(ret.v, (Real*)a, VstoreSIMD());
friend inline void vstore(const Grid_simd &ret, Scalar_type *a) {
binary<void>(ret.v, (Real *)a, VstoreSIMD());
}
///////////////////////
// Vprefetch
///////////////////////
friend inline void vprefetch(const Grid_simd &v)
{
prefetch_HINT_T0((const char*)&v.v);
friend inline void vprefetch(const Grid_simd &v) {
prefetch_HINT_T0((const char *)&v.v);
}
///////////////////////
// Reduce
///////////////////////
friend inline Scalar_type Reduce(const Grid_simd & in)
{
friend inline Scalar_type Reduce(const Grid_simd &in) {
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
}
////////////////////////////
// opreator scalar * simd
////////////////////////////
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) {
Grid_simd va;
vsplat(va,a);
return va*b;
vsplat(va, a);
return va * b;
}
friend inline Grid_simd operator * (Grid_simd b,const Scalar_type &a){
return a*b;
friend inline Grid_simd operator*(Grid_simd b, const Scalar_type &a) {
return a * b;
}
///////////////////////
// Unary negation
///////////////////////
friend inline Grid_simd operator -(const Grid_simd &r) {
friend inline Grid_simd operator-(const Grid_simd &r) {
Grid_simd ret;
vzero(ret);
ret = ret - r;
return ret;
}
// *=,+=,-= operators
inline Grid_simd &operator *=(const Grid_simd &r) {
*this = (*this)*r;
inline Grid_simd &operator*=(const Grid_simd &r) {
*this = (*this) * r;
return *this;
// return (*this)*r; ?
}
inline Grid_simd &operator +=(const Grid_simd &r) {
*this = *this+r;
inline Grid_simd &operator+=(const Grid_simd &r) {
*this = *this + r;
return *this;
}
inline Grid_simd &operator -=(const Grid_simd &r) {
*this = *this-r;
inline Grid_simd &operator-=(const Grid_simd &r) {
*this = *this - r;
return *this;
}
@ -255,26 +316,30 @@ namespace Grid {
// provides support
///////////////////////////////////////
template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) {
template <class functor>
friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) {
Grid_simd ret;
Grid_simd::conv_t conv;
conv.v = v.v;
for(int i=0;i<Nsimd();i++){
conv.s[i]=func(conv.s[i]);
for (int i = 0; i < Nsimd(); i++) {
conv.s[i] = func(conv.s[i]);
}
ret.v = conv.v;
return ret;
}
template<class functor> friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) {
template <class functor>
friend inline Grid_simd SimdApplyBinop(const functor &func,
const Grid_simd &x,
const Grid_simd &y) {
Grid_simd ret;
Grid_simd::conv_t cx;
Grid_simd::conv_t cy;
cx.v = x.v;
cy.v = y.v;
for(int i=0;i<Nsimd();i++){
cx.s[i]=func(cx.s[i],cy.s[i]);
for (int i = 0; i < Nsimd(); i++) {
cx.s[i] = func(cx.s[i], cy.s[i]);
}
ret.v = cx.v;
return ret;
@ -285,294 +350,347 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
friend inline void permute0(Grid_simd &y,Grid_simd b){
friend inline void permute0(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute0(b.v);
}
friend inline void permute1(Grid_simd &y,Grid_simd b){
friend inline void permute1(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute1(b.v);
}
friend inline void permute2(Grid_simd &y,Grid_simd b){
friend inline void permute2(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute2(b.v);
}
friend inline void permute3(Grid_simd &y,Grid_simd b){
friend inline void permute3(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute3(b.v);
}
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{
if ( perm & RotateBit ) {
int dist = perm&0xF;
y=rotate(b,dist);
friend inline void permute(Grid_simd &y, Grid_simd b, int perm) {
if (perm & RotateBit) {
int dist = perm & 0xF;
y = rotate(b, dist);
return;
}
switch(perm){
case 3: permute3(y,b); break;
case 2: permute2(y,b); break;
case 1: permute1(y,b); break;
case 0: permute0(y,b); break;
default: assert(0);
switch (perm) {
case 3:
permute3(y, b);
break;
case 2:
permute2(y, b);
break;
case 1:
permute1(y, b);
break;
case 0:
permute0(y, b);
break;
default:
assert(0);
}
}
};// end of Grid_simd class definition
}; // end of Grid_simd class definition
////////////////////////////////////////////////////////////////////
// General rotate
////////////////////////////////////////////////////////////////////
template <class S, class V, IfNotComplex<S> =0>
inline Grid_simd<S,V> rotate(Grid_simd<S,V> b,int nrot)
{
nrot = nrot % Grid_simd<S,V>::Nsimd();
Grid_simd<S,V> ret;
////////////////////////////////////////////////////////////////////
// General rotate
////////////////////////////////////////////////////////////////////
template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) {
nrot = nrot % Grid_simd<S, V>::Nsimd();
Grid_simd<S, V> ret;
// std::cout << "Rotate Real by "<<nrot<<std::endl;
ret.v = Optimization::Rotate::rotate(b.v,nrot);
ret.v = Optimization::Rotate::rotate(b.v, nrot);
return ret;
}
template <class S, class V, IfComplex<S> =0>
inline Grid_simd<S,V> rotate(Grid_simd<S,V> b,int nrot)
{
nrot = nrot % Grid_simd<S,V>::Nsimd();
Grid_simd<S,V> ret;
}
template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) {
nrot = nrot % Grid_simd<S, V>::Nsimd();
Grid_simd<S, V> ret;
// std::cout << "Rotate Complex by "<<nrot<<std::endl;
ret.v = Optimization::Rotate::rotate(b.v,2*nrot);
ret.v = Optimization::Rotate::rotate(b.v, 2 * nrot);
return ret;
}
}
///////////////////////
// Splat
///////////////////////
///////////////////////
// Splat
///////////////////////
// this is only for the complex version
template <class S, class V, IfComplex<S> =0, class ABtype>
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){
// this is only for the complex version
template <class S, class V, IfComplex<S> = 0, class ABtype>
inline void vsplat(Grid_simd<S, V> &ret, ABtype a, ABtype b) {
ret.v = binary<V>(a, b, VsplatSIMD());
}
}
// overload if complex
template <class S,class V> inline void vsplat(Grid_simd<S,V> &ret, EnableIf<is_complex < S >, S> c) {
vsplat(ret,real(c),imag(c));
}
// overload if complex
template <class S, class V>
inline void vsplat(Grid_simd<S, V> &ret, EnableIf<is_complex<S>, S> c) {
vsplat(ret, real(c), imag(c));
}
//if real fill with a, if complex fill with a in the real part (first function above)
template <class S,class V>
inline void vsplat(Grid_simd<S,V> &ret,NotEnableIf<is_complex< S>,S> a){
// if real fill with a, if complex fill with a in the real part (first function
// above)
template <class S, class V>
inline void vsplat(Grid_simd<S, V> &ret, NotEnableIf<is_complex<S>, S> a) {
ret.v = unary<V>(a, VsplatSIMD());
}
//////////////////////////
}
//////////////////////////
///////////////////////////////////////////////
// Initialise to 1,0,i for the correct types
///////////////////////////////////////////////
// For complex types
template <class S,class V, IfComplex<S> = 0 > inline void vone(Grid_simd<S,V> &ret) { vsplat(ret,S(1.0,0.0)); }
template <class S,class V, IfComplex<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) { vsplat(ret,S(0.0,0.0)); }// use xor?
template <class S,class V, IfComplex<S> = 0 > inline void vcomplex_i(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0,1.0));}
///////////////////////////////////////////////
// Initialise to 1,0,i for the correct types
///////////////////////////////////////////////
// For complex types
template <class S, class V, IfComplex<S> = 0>
inline void vone(Grid_simd<S, V> &ret) {
vsplat(ret, S(1.0, 0.0));
}
template <class S, class V, IfComplex<S> = 0>
inline void vzero(Grid_simd<S, V> &ret) {
vsplat(ret, S(0.0, 0.0));
} // use xor?
template <class S, class V, IfComplex<S> = 0>
inline void vcomplex_i(Grid_simd<S, V> &ret) {
vsplat(ret, S(0.0, 1.0));
}
template <class S,class V, IfComplex<S> = 0 > inline void visign(Grid_simd<S,V> &ret){ vsplat(ret,S(1.0,-1.0));}
template <class S,class V, IfComplex<S> = 0 > inline void vrsign(Grid_simd<S,V> &ret){ vsplat(ret,S(-1.0,1.0));}
template <class S, class V, IfComplex<S> = 0>
inline void visign(Grid_simd<S, V> &ret) {
vsplat(ret, S(1.0, -1.0));
}
template <class S, class V, IfComplex<S> = 0>
inline void vrsign(Grid_simd<S, V> &ret) {
vsplat(ret, S(-1.0, 1.0));
}
// if not complex overload here
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,S(1.0)); }
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0)); }
// if not complex overload here
template <class S, class V, IfReal<S> = 0>
inline void vone(Grid_simd<S, V> &ret) {
vsplat(ret, S(1.0));
}
template <class S, class V, IfReal<S> = 0>
inline void vzero(Grid_simd<S, V> &ret) {
vsplat(ret, S(0.0));
}
// For integral types
template <class S,class V,IfInteger<S> = 0 > inline void vone(Grid_simd<S,V> &ret) {vsplat(ret,1); }
template <class S,class V,IfInteger<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) {vsplat(ret,0); }
template <class S,class V,IfInteger<S> = 0 > inline void vtrue (Grid_simd<S,V> &ret){vsplat(ret,0xFFFFFFFF);}
template <class S,class V,IfInteger<S> = 0 > inline void vfalse(Grid_simd<S,V> &ret){vsplat(ret,0);}
// For integral types
template <class S, class V, IfInteger<S> = 0>
inline void vone(Grid_simd<S, V> &ret) {
vsplat(ret, 1);
}
template <class S, class V, IfInteger<S> = 0>
inline void vzero(Grid_simd<S, V> &ret) {
vsplat(ret, 0);
}
template <class S, class V, IfInteger<S> = 0>
inline void vtrue(Grid_simd<S, V> &ret) {
vsplat(ret, 0xFFFFFFFF);
}
template <class S, class V, IfInteger<S> = 0>
inline void vfalse(Grid_simd<S, V> &ret) {
vsplat(ret, 0);
}
template<class S,class V> inline void zeroit(Grid_simd<S,V> &z){ vzero(z);}
template <class S, class V>
inline void zeroit(Grid_simd<S, V> &z) {
vzero(z);
}
///////////////////////
// Vstream
///////////////////////
template <class S,class V, IfReal<S> = 0 >
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
///////////////////////
// Vstream
///////////////////////
template <class S, class V, IfReal<S> = 0>
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
binary<void>((S *)&out.v, in.v, VstreamSIMD());
}
template <class S,class V, IfComplex<S> = 0 >
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
}
template <class S, class V, IfComplex<S> = 0>
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
typedef typename S::value_type T;
binary<void>((T *)&out.v, in.v, VstreamSIMD());
}
}
template <class S,class V, IfInteger<S> = 0 >
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
out=in;
}
template <class S, class V, IfInteger<S> = 0>
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
out = in;
}
////////////////////////////////////
// Arithmetic operator overloads +,-,*
////////////////////////////////////
template<class S,class V> inline Grid_simd<S,V> operator + (Grid_simd<S,V> a, Grid_simd<S,V> b) {
Grid_simd<S,V> ret;
////////////////////////////////////
// Arithmetic operator overloads +,-,*
////////////////////////////////////
template <class S, class V>
inline Grid_simd<S, V> operator+(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, SumSIMD());
return ret;
};
};
template<class S,class V> inline Grid_simd<S,V> operator - (Grid_simd<S,V> a, Grid_simd<S,V> b) {
Grid_simd<S,V> ret;
template <class S, class V>
inline Grid_simd<S, V> operator-(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, SubSIMD());
return ret;
};
};
// Distinguish between complex types and others
template<class S,class V, IfComplex<S> = 0 > inline Grid_simd<S,V> operator * (Grid_simd<S,V> a, Grid_simd<S,V> b) {
Grid_simd<S,V> ret;
ret.v = binary<V>(a.v,b.v, MultComplexSIMD());
// Distinguish between complex types and others
template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, MultComplexSIMD());
return ret;
};
};
// Real/Integer types
template<class S,class V, IfNotComplex<S> = 0 > inline Grid_simd<S,V> operator * (Grid_simd<S,V> a, Grid_simd<S,V> b) {
Grid_simd<S,V> ret;
ret.v = binary<V>(a.v,b.v, MultSIMD());
// Real/Integer types
template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, MultSIMD());
return ret;
};
};
///////////////////////
// Conjugate
///////////////////////
template <class S,class V, IfComplex<S> = 0 >
inline Grid_simd<S,V> conjugate(const Grid_simd<S,V> &in){
Grid_simd<S,V> ret ;
///////////////////////
// Conjugate
///////////////////////
template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
Grid_simd<S, V> ret;
ret.v = unary<V>(in.v, ConjSIMD());
return ret;
}
template <class S,class V, IfNotComplex<S> = 0 > inline Grid_simd<S,V> conjugate(const Grid_simd<S,V> &in){
}
template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
return in; // for real objects
}
}
//Suppress adj for integer types... // odd; why conjugate above but not adj??
template < class S, class V, IfNotInteger<S> = 0 >
inline Grid_simd<S,V> adj(const Grid_simd<S,V> &in){ return conjugate(in); }
// Suppress adj for integer types... // odd; why conjugate above but not adj??
template <class S, class V, IfNotInteger<S> = 0>
inline Grid_simd<S, V> adj(const Grid_simd<S, V> &in) {
return conjugate(in);
}
///////////////////////
// timesMinusI
///////////////////////
template<class S,class V,IfComplex<S> = 0 >
inline void timesMinusI( Grid_simd<S,V> &ret,const Grid_simd<S,V> &in){
///////////////////////
// timesMinusI
///////////////////////
template <class S, class V, IfComplex<S> = 0>
inline void timesMinusI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD());
}
}
template<class S,class V,IfComplex<S> = 0 >
inline Grid_simd<S,V> timesMinusI(const Grid_simd<S,V> &in){
Grid_simd<S,V> ret;
timesMinusI(ret,in);
template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
Grid_simd<S, V> ret;
timesMinusI(ret, in);
return ret;
}
}
template<class S,class V,IfNotComplex<S> = 0 >
inline Grid_simd<S,V> timesMinusI(const Grid_simd<S,V> &in){
template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
return in;
}
}
///////////////////////
// timesI
///////////////////////
template<class S,class V,IfComplex<S> = 0 >
inline void timesI(Grid_simd<S,V> &ret,const Grid_simd<S,V> &in){
///////////////////////
// timesI
///////////////////////
template <class S, class V, IfComplex<S> = 0>
inline void timesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
ret.v = binary<V>(in.v, ret.v, TimesISIMD());
}
}
template<class S,class V,IfComplex<S> = 0 >
inline Grid_simd<S,V> timesI(const Grid_simd<S,V> &in){
Grid_simd<S,V> ret;
timesI(ret,in);
template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
Grid_simd<S, V> ret;
timesI(ret, in);
return ret;
}
}
template<class S,class V,IfNotComplex<S> = 0 >
inline Grid_simd<S,V> timesI(const Grid_simd<S,V> &in){
template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
return in;
}
}
/////////////////////
// Inner, outer
/////////////////////
/////////////////////
// Inner, outer
/////////////////////
template<class S, class V >
inline Grid_simd< S, V> innerProduct(const Grid_simd< S, V> & l, const Grid_simd< S, V> & r)
{
return conjugate(l)*r;
}
template <class S, class V>
inline Grid_simd<S, V> innerProduct(const Grid_simd<S, V> &l,
const Grid_simd<S, V> &r) {
return conjugate(l) * r;
}
template<class S, class V >
inline Grid_simd< S, V> outerProduct(const Grid_simd< S, V> &l, const Grid_simd< S, V> & r)
{
return l*conjugate(r);
}
template <class S, class V>
inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l,
const Grid_simd<S, V> &r) {
return l * conjugate(r);
}
template<class S, class V >
inline Grid_simd< S, V> trace(const Grid_simd< S, V> &arg){
template <class S, class V>
inline Grid_simd<S, V> trace(const Grid_simd<S, V> &arg) {
return arg;
}
}
////////////////////////////////////////////////////////////
// copy/splat complex real parts into real;
// insert real into complex and zero imag;
////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
// copy/splat complex real parts into real;
// insert real into complex and zero imag;
////////////////////////////////////////////////////////////
//real = toReal( complex )
template<class S,class V,IfReal<S> = 0>
inline Grid_simd<S,V> toReal(const Grid_simd<std::complex<S>,V> &in)
{
typedef Grid_simd<S,V> simd;
// real = toReal( complex )
template <class S, class V, IfReal<S> = 0>
inline Grid_simd<S, V> toReal(const Grid_simd<std::complex<S>, V> &in) {
typedef Grid_simd<S, V> simd;
simd ret;
typename simd::conv_t conv;
conv.v = in.v; // copy the vector content (bytewise)
for(int i=0;i<simd::Nsimd();i+=2){
conv.s[i+1]=conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
for (int i = 0; i < simd::Nsimd(); i += 2) {
conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
}
ret.v = conv.v;
return ret;
}
}
//complex = toComplex( real )
template<class R,class V,IfReal<R> = 0 > // must be a real arg
inline Grid_simd<std::complex<R>,V> toComplex (const Grid_simd<R,V> &in)
{
typedef Grid_simd<R,V> Rsimd;
typedef Grid_simd<std::complex<R>,V> Csimd;
typename Rsimd::conv_t conv;// address as real
// complex = toComplex( real )
template <class R, class V, IfReal<R> = 0> // must be a real arg
inline Grid_simd<std::complex<R>, V> toComplex(const Grid_simd<R, V> &in) {
typedef Grid_simd<R, V> Rsimd;
typedef Grid_simd<std::complex<R>, V> Csimd;
typename Rsimd::conv_t conv; // address as real
conv.v = in.v;
for(int i=0;i<Rsimd::Nsimd();i+=2){
assert(conv.s[i+1]==conv.s[i]); // trap any cases where real was not duplicated
// indicating the SIMD grids of real and imag assignment did not correctly match
conv.s[i+1]=0.0; // zero imaginary parts
for (int i = 0; i < Rsimd::Nsimd(); i += 2) {
assert(conv.s[i + 1] ==
conv.s[i]); // trap any cases where real was not duplicated
// indicating the SIMD grids of real and imag assignment did not correctly
// match
conv.s[i + 1] = 0.0; // zero imaginary parts
}
Csimd ret;
ret.v = conv.v;
return ret;
}
}
///////////////////////////////
// Define available types
///////////////////////////////
typedef Grid_simd<float, SIMD_Ftype> vRealF;
typedef Grid_simd<double, SIMD_Dtype> vRealD;
typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF;
typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD;
typedef Grid_simd<Integer, SIMD_Itype> vInteger;
/////////////////////////////////////////
// Some traits to recognise the types
/////////////////////////////////////////
template <typename T>
struct is_simd : public std::false_type {};
template <>
struct is_simd<vRealF> : public std::true_type {};
template <>
struct is_simd<vRealD> : public std::true_type {};
template <>
struct is_simd<vComplexF> : public std::true_type {};
template <>
struct is_simd<vComplexD> : public std::true_type {};
template <>
struct is_simd<vInteger> : public std::true_type {};
///////////////////////////////
// Define available types
///////////////////////////////
typedef Grid_simd< float , SIMD_Ftype > vRealF;
typedef Grid_simd< double , SIMD_Dtype > vRealD;
typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF;
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
/////////////////////////////////////////
// Some traits to recognise the types
/////////////////////////////////////////
template <typename T> struct is_simd : public std::false_type{};
template <> struct is_simd<vRealF> : public std::true_type {};
template <> struct is_simd<vRealD> : public std::true_type {};
template <> struct is_simd<vComplexF>: public std::true_type {};
template <> struct is_simd<vComplexD>: public std::true_type {};
template <> struct is_simd<vInteger> : public std::true_type {};
template <typename T> using IfSimd = Invoke<std::enable_if< is_simd<T>::value,int> > ;
template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value,unsigned> > ;
template <typename T>
using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >;
template <typename T>
using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >;
}
#endif

View File

@ -1,33 +1,34 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_vector_unops.h
Source file: ./lib/simd/Grid_vector_unops.h
Copyright (C) 2015
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_VECTOR_UNOPS
#define GRID_VECTOR_UNOPS
@ -35,213 +36,199 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid {
template<class scalar> struct SqrtRealFunctor {
scalar operator()(const scalar &a) const {
return sqrt(real(a));
}
};
template <class scalar>
struct SqrtRealFunctor {
scalar operator()(const scalar &a) const { return sqrt(real(a)); }
};
template<class scalar> struct RSqrtRealFunctor {
template <class scalar>
struct RSqrtRealFunctor {
scalar operator()(const scalar &a) const {
return scalar(1.0/sqrt(real(a)));
return scalar(1.0 / sqrt(real(a)));
}
};
};
template<class scalar> struct CosRealFunctor {
scalar operator()(const scalar &a) const {
return cos(real(a));
}
};
template <class scalar>
struct CosRealFunctor {
scalar operator()(const scalar &a) const { return cos(real(a)); }
};
template<class scalar> struct SinRealFunctor {
scalar operator()(const scalar &a) const {
return sin(real(a));
}
};
template <class scalar>
struct SinRealFunctor {
scalar operator()(const scalar &a) const { return sin(real(a)); }
};
template<class scalar> struct AcosRealFunctor {
scalar operator()(const scalar &a) const {
return acos(real(a));
}
};
template <class scalar>
struct AcosRealFunctor {
scalar operator()(const scalar &a) const { return acos(real(a)); }
};
template<class scalar> struct AsinRealFunctor {
scalar operator()(const scalar &a) const {
return asin(real(a));
}
};
template <class scalar>
struct AsinRealFunctor {
scalar operator()(const scalar &a) const { return asin(real(a)); }
};
template<class scalar> struct LogRealFunctor {
scalar operator()(const scalar &a) const {
return log(real(a));
}
};
template <class scalar>
struct LogRealFunctor {
scalar operator()(const scalar &a) const { return log(real(a)); }
};
template<class scalar> struct ExpRealFunctor {
scalar operator()(const scalar &a) const {
return exp(real(a));
}
};
template<class scalar> struct NotFunctor {
scalar operator()(const scalar &a) const {
return (!a);
}
};
template<class scalar> struct AbsRealFunctor {
scalar operator()(const scalar &a) const {
return std::abs(real(a));
}
};
template <class scalar>
struct ExpRealFunctor {
scalar operator()(const scalar &a) const { return exp(real(a)); }
};
template <class scalar>
struct NotFunctor {
scalar operator()(const scalar &a) const { return (!a); }
};
template <class scalar>
struct AbsRealFunctor {
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
};
template<class scalar> struct PowRealFunctor {
template <class scalar>
struct PowRealFunctor {
double y;
PowRealFunctor(double _y) : y(_y) {};
scalar operator()(const scalar &a) const {
return pow(real(a),y);
}
};
PowRealFunctor(double _y) : y(_y){};
scalar operator()(const scalar &a) const { return pow(real(a), y); }
};
template<class scalar> struct ModIntFunctor {
template <class scalar>
struct ModIntFunctor {
Integer y;
ModIntFunctor(Integer _y) : y(_y) {};
scalar operator()(const scalar &a) const {
return Integer(a)%y;
}
};
ModIntFunctor(Integer _y) : y(_y){};
scalar operator()(const scalar &a) const { return Integer(a) % y; }
};
template<class scalar> struct DivIntFunctor {
template <class scalar>
struct DivIntFunctor {
Integer y;
DivIntFunctor(Integer _y) : y(_y) {};
scalar operator()(const scalar &a) const {
return Integer(a)/y;
}
};
DivIntFunctor(Integer _y) : y(_y){};
scalar operator()(const scalar &a) const { return Integer(a) / y; }
};
template<class scalar> struct RealFunctor {
scalar operator()(const std::complex<scalar> &a) const {
return real(a);
}
};
template<class scalar> struct ImagFunctor {
scalar operator()(const std::complex<scalar> &a) const {
return imag(a);
}
};
template < class S, class V >
inline Grid_simd<S,V> real(const Grid_simd<S,V> &r) {
return SimdApply(RealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> imag(const Grid_simd<S,V> &r) {
return SimdApply(ImagFunctor<S>(),r);
}
template <class scalar>
struct RealFunctor {
scalar operator()(const scalar &a) const { return std::real(a); }
};
template <class scalar>
struct ImagFunctor {
scalar operator()(const scalar &a) const { return std::imag(a); }
};
template <class S, class V>
inline Grid_simd<S, V> real(const Grid_simd<S, V> &r) {
return SimdApply(RealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> imag(const Grid_simd<S, V> &r) {
return SimdApply(ImagFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> sqrt(const Grid_simd<S, V> &r) {
return SimdApply(SqrtRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> rsqrt(const Grid_simd<S, V> &r) {
return SimdApply(RSqrtRealFunctor<S>(), r);
}
template <class Scalar>
inline Scalar rsqrt(const Scalar &r) {
return (RSqrtRealFunctor<Scalar>(), r);
}
template < class S, class V >
inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) {
return SimdApply(SqrtRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> rsqrt(const Grid_simd<S,V> &r) {
return SimdApply(RSqrtRealFunctor<S>(),r);
}
template < class Scalar >
inline Scalar rsqrt(const Scalar &r) {
return (RSqrtRealFunctor<Scalar>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> cos(const Grid_simd<S,V> &r) {
return SimdApply(CosRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> sin(const Grid_simd<S,V> &r) {
return SimdApply(SinRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> acos(const Grid_simd<S,V> &r) {
return SimdApply(AcosRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> asin(const Grid_simd<S,V> &r) {
return SimdApply(AsinRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> log(const Grid_simd<S,V> &r) {
return SimdApply(LogRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> abs(const Grid_simd<S,V> &r) {
return SimdApply(AbsRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> exp(const Grid_simd<S,V> &r) {
return SimdApply(ExpRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> Not(const Grid_simd<S,V> &r) {
return SimdApply(NotFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) {
return SimdApply(PowRealFunctor<S>(y),r);
}
template < class S, class V >
inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) {
return SimdApply(ModIntFunctor<S>(y),r);
}
template < class S, class V >
inline Grid_simd<S,V> div(const Grid_simd<S,V> &r,Integer y) {
return SimdApply(DivIntFunctor<S>(y),r);
}
////////////////////////////////////////////////////////////////////////////
// Allows us to assign into **conformable** real vectors from complex
////////////////////////////////////////////////////////////////////////////
// template < class S, class V >
// inline auto ComplexRemove(const Grid_simd<S,V> &c) -> Grid_simd<Grid_simd<S,V>::Real,V> {
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
// ret.v = c.v;
// return ret;
// }
template<class scalar> struct AndFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x & y;
}
};
template<class scalar> struct OrFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x | y;
}
};
template<class scalar> struct AndAndFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x && y;
}
};
template<class scalar> struct OrOrFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x || y;
}
};
////////////////////////////////
// Calls to simd binop functors
////////////////////////////////
template < class S, class V >
inline Grid_simd<S,V> operator &(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(AndFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator &&(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(AndAndFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator |(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(OrFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator ||(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(OrOrFunctor<S>(),x,y);
}
template <class S, class V>
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
return SimdApply(CosRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> sin(const Grid_simd<S, V> &r) {
return SimdApply(SinRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> acos(const Grid_simd<S, V> &r) {
return SimdApply(AcosRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> asin(const Grid_simd<S, V> &r) {
return SimdApply(AsinRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> log(const Grid_simd<S, V> &r) {
return SimdApply(LogRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
return SimdApply(AbsRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
return SimdApply(ExpRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
return SimdApply(NotFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> pow(const Grid_simd<S, V> &r, double y) {
return SimdApply(PowRealFunctor<S>(y), r);
}
template <class S, class V>
inline Grid_simd<S, V> mod(const Grid_simd<S, V> &r, Integer y) {
return SimdApply(ModIntFunctor<S>(y), r);
}
template <class S, class V>
inline Grid_simd<S, V> div(const Grid_simd<S, V> &r, Integer y) {
return SimdApply(DivIntFunctor<S>(y), r);
}
////////////////////////////////////////////////////////////////////////////
// Allows us to assign into **conformable** real vectors from complex
////////////////////////////////////////////////////////////////////////////
// template < class S, class V >
// inline auto ComplexRemove(const Grid_simd<S,V> &c) ->
// Grid_simd<Grid_simd<S,V>::Real,V> {
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
// ret.v = c.v;
// return ret;
// }
template <class scalar>
struct AndFunctor {
scalar operator()(const scalar &x, const scalar &y) const { return x & y; }
};
template <class scalar>
struct OrFunctor {
scalar operator()(const scalar &x, const scalar &y) const { return x | y; }
};
template <class scalar>
struct AndAndFunctor {
scalar operator()(const scalar &x, const scalar &y) const { return x && y; }
};
template <class scalar>
struct OrOrFunctor {
scalar operator()(const scalar &x, const scalar &y) const { return x || y; }
};
////////////////////////////////
// Calls to simd binop functors
////////////////////////////////
template <class S, class V>
inline Grid_simd<S, V> operator&(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(AndFunctor<S>(), x, y);
}
template <class S, class V>
inline Grid_simd<S, V> operator&&(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(AndAndFunctor<S>(), x, y);
}
template <class S, class V>
inline Grid_simd<S, V> operator|(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(OrFunctor<S>(), x, y);
}
template <class S, class V>
inline Grid_simd<S, V> operator||(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(OrOrFunctor<S>(), x, y);
}
}
#endif

View File

@ -1,31 +1,32 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_simd.cc
Source file: ./tests/Test_simd.cc
Copyright (C) 2015
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
using namespace std;
@ -62,6 +63,18 @@ public:
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
std::string name(void) const { return std::string("Adj"); }
};
class funcImag {
public:
funcImag() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = imag(i1);}
std::string name(void) const { return std::string("imag"); }
};
class funcReal {
public:
funcReal() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = real(i1);}
std::string name(void) const { return std::string("real"); }
};
class funcTimesI {
public:
@ -141,7 +154,13 @@ void Tester(const functor &func)
}
extract<vec,scal>(v_result,result);
std::cout<<GridLogMessage << " " << func.name()<<std::endl;
std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_result << std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
@ -389,6 +408,8 @@ int main (int argc, char ** argv)
Tester<ComplexF,vComplexF>(funcTimes());
Tester<ComplexF,vComplexF>(funcConj());
Tester<ComplexF,vComplexF>(funcAdj());
Tester<ComplexF,vComplexF>(funcReal());
Tester<ComplexF,vComplexF>(funcImag());
Tester<ComplexF,vComplexF>(funcInnerProduct());
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
@ -421,17 +442,21 @@ int main (int argc, char ** argv)
Tester<ComplexD,vComplexD>(funcTimes());
Tester<ComplexD,vComplexD>(funcConj());
Tester<ComplexD,vComplexD>(funcAdj());
Tester<ComplexD,vComplexD>(funcInnerProduct());
ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
Tester<ComplexD, vComplexD>(funcReal());
Tester<ComplexD, vComplexD>(funcImag());
Tester<ComplexD, vComplexD>(funcInnerProduct());
ReductionTester<ComplexD, ComplexD, vComplexD>(funcReduce());
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vComplexD permutes "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout << GridLogMessage
<< "===================================" << std::endl;
std::cout << GridLogMessage << "Testing vComplexD permutes " << std::endl;
std::cout << GridLogMessage
<< "===================================" << std::endl;
// Log2 iteration
for(int i=0;(1<<i)< vComplexD::Nsimd();i++){
PermTester<ComplexD,vComplexD>(funcPermute(i));
for (int i = 0; (1 << i) < vComplexD::Nsimd(); i++) {
PermTester<ComplexD, vComplexD>(funcPermute(i));
}