1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Debugged the real() and imag() functions and added tests to Test_Simd

This commit is contained in:
Guido Cossu 2016-07-06 14:16:03 +01:00
parent 3e3b367aa9
commit e3d5319470
6 changed files with 999 additions and 867 deletions

View File

@ -1,32 +1,33 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Simd.h Source file: ./lib/Simd.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef GRID_SIMD_H #ifndef GRID_SIMD_H
#define GRID_SIMD_H #define GRID_SIMD_H
@ -118,6 +119,14 @@ namespace Grid {
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));} inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));} inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));} inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
// define projections to real and imaginay parts
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
// define auxiliary functions for complex computations
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);} inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);} inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);} inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}

View File

@ -40,7 +40,7 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg); ComplexD nrm = innerProduct(arg,arg);
return real(nrm); return std::real(nrm);
} }
template<class vobj> template<class vobj>

View File

@ -5,64 +5,62 @@
#ifndef STOUT_SMEAR_ #ifndef STOUT_SMEAR_
#define STOUT_SMEAR_ #define STOUT_SMEAR_
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
/*! @brief Stout smearing of link variable. */ /*! @brief Stout smearing of link variable. */
template <class Gimpl> template <class Gimpl>
class Smear_Stout: public Smear<Gimpl> { class Smear_Stout : public Smear<Gimpl> {
private: private:
const Smear < Gimpl > * SmearBase; const Smear<Gimpl>* SmearBase;
public: public:
INHERIT_GIMPL_TYPES(Gimpl) INHERIT_GIMPL_TYPES(Gimpl)
Smear_Stout(Smear < Gimpl >* base):SmearBase(base){ Smear_Stout(Smear<Gimpl>* base) : SmearBase(base) {
static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3"); static_assert(Nc == 3,
"Stout smearing currently implemented only for Nc==3");
} }
/*! Default constructor */ /*! Default constructor */
Smear_Stout(double rho = 1.0):SmearBase(new Smear_APE < Gimpl > (rho)){ Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE<Gimpl>(rho)) {
static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3"); static_assert(Nc == 3,
"Stout smearing currently implemented only for Nc==3");
} }
~Smear_Stout(){} //delete SmearBase... ~Smear_Stout() {} // delete SmearBase...
void smear(GaugeField& u_smr,const GaugeField& U) const{ void smear(GaugeField& u_smr, const GaugeField& U) const {
GaugeField C(U._grid); GaugeField C(U._grid);
GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid); GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid);
std::cout<< GridLogDebug << "Stout smearing started\n"; std::cout << GridLogDebug << "Stout smearing started\n";
//Smear the configurations // Smear the configurations
SmearBase->smear(C, U); SmearBase->smear(C, U);
for (int mu = 0; mu<Nd; mu++) for (int mu = 0; mu < Nd; mu++) {
{ tmp = peekLorentz(C, mu);
tmp = peekLorentz(C,mu); Umu = peekLorentz(U, mu);
Umu = peekLorentz(U,mu); iq_mu = Ta(
iq_mu = Ta(tmp * adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper tmp *
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
exponentiate_iQ(tmp, iq_mu); exponentiate_iQ(tmp, iq_mu);
pokeLorentz(u_smr, tmp*Umu, mu);// u_smr = exp(iQ_mu)*U_mu pokeLorentz(u_smr, tmp * Umu, mu); // u_smr = exp(iQ_mu)*U_mu
} }
std::cout<< GridLogDebug << "Stout smearing completed\n"; std::cout << GridLogDebug << "Stout smearing completed\n";
}; };
void derivative(GaugeField& SigmaTerm, const GaugeField& iLambda,
void derivative(GaugeField& SigmaTerm, const GaugeField& Gauge) const {
const GaugeField& iLambda,
const GaugeField& Gauge) const{
SmearBase->derivative(SigmaTerm, iLambda, Gauge); SmearBase->derivative(SigmaTerm, iLambda, Gauge);
}; };
void BaseSmear(GaugeField& C, const GaugeField& U) const {
void BaseSmear(GaugeField& C,
const GaugeField& U) const{
SmearBase->smear(C, U); SmearBase->smear(C, U);
}; };
void exponentiate_iQ(GaugeLinkField& e_iQ, void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const {
const GaugeLinkField& iQ) const{
// Put this outside // Put this outside
// only valid for SU(3) matrices // only valid for SU(3) matrices
@ -73,9 +71,9 @@
// the i sign is coming from outside // the i sign is coming from outside
// input matrix is anti-hermitian NOT hermitian // input matrix is anti-hermitian NOT hermitian
GridBase *grid = iQ._grid; GridBase* grid = iQ._grid;
GaugeLinkField unity(grid); GaugeLinkField unity(grid);
unity=1.0; unity = 1.0;
GaugeLinkField iQ2(grid), iQ3(grid); GaugeLinkField iQ2(grid), iQ3(grid);
LatticeComplex u(grid), w(grid); LatticeComplex u(grid), w(grid);
@ -87,37 +85,34 @@
set_uw(u, w, iQ2, iQ3); set_uw(u, w, iQ2, iQ3);
set_fj(f0, f1, f2, u, w); set_fj(f0, f1, f2, u, w);
e_iQ = f0*unity + timesMinusI(f1) * iQ - f2 * iQ2; e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2;
}; };
void set_uw(LatticeComplex& u, LatticeComplex& w, GaugeLinkField& iQ2,
GaugeLinkField& iQ3) const {
Complex one_over_three = 1.0 / 3.0;
Complex one_over_two = 1.0 / 2.0;
void set_uw(LatticeComplex& u, LatticeComplex& w, GridBase* grid = u._grid;
GaugeLinkField& iQ2, GaugeLinkField& iQ3) const{
Complex one_over_three = 1.0/3.0;
Complex one_over_two = 1.0/2.0;
GridBase *grid = u._grid;
LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid); LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid);
// sign in c0 from the conventions on the Ta // sign in c0 from the conventions on the Ta
c0 = - real(timesMinusI(trace(iQ3))) * one_over_three; //temporary hack c0 = -imag(trace(iQ3)) * one_over_three;
c1 = - real(trace(iQ2)) * one_over_two; c1 = -real(trace(iQ2)) * one_over_two;
//Cayley Hamilton checks to machine precision, tested // Cayley Hamilton checks to machine precision, tested
tmp = c1 * one_over_three; tmp = c1 * one_over_three;
c0max = 2.0 * pow(tmp, 1.5); c0max = 2.0 * pow(tmp, 1.5);
theta = acos(c0/c0max)*one_over_three; // divide by three here, now leave as it is theta = acos(c0 / c0max) *
u = sqrt(tmp) * cos( theta ); one_over_three; // divide by three here, now leave as it is
w = sqrt(c1) * sin( theta ); u = sqrt(tmp) * cos(theta);
w = sqrt(c1) * sin(theta);
} }
void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2, void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2,
const LatticeComplex& u, const LatticeComplex& w) const{ const LatticeComplex& u, const LatticeComplex& w) const {
GridBase* grid = u._grid;
GridBase *grid = u._grid;
LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid); LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid);
LatticeComplex fden(grid); LatticeComplex fden(grid);
LatticeComplex h0(grid), h1(grid), h2(grid); LatticeComplex h0(grid), h1(grid), h2(grid);
@ -132,36 +127,34 @@
ixi0 = timesI(xi0); ixi0 = timesI(xi0);
emiu = cos(u) - timesI(sin(u)); emiu = cos(u) - timesI(sin(u));
e2iu = cos(2.0*u) + timesI(sin(2.0*u)); e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
h0 = e2iu * (u2 - w2) + emiu * ( (8.0*u2*cosw) + (2.0*u*(3.0*u2 + w2)*ixi0)); h0 = e2iu * (u2 - w2) +
h1 = e2iu * (2.0 * u) - emiu * ( (2.0*u*cosw) - (3.0*u2-w2)*ixi0); emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
h2 = e2iu - emiu * ( cosw + (3.0*u)*ixi0); h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);
fden = unity/(9.0*u2 - w2);// reals fden = unity / (9.0 * u2 - w2); // reals
f0 = h0 * fden; f0 = h0 * fden;
f1 = h1 * fden; f1 = h1 * fden;
f2 = h2 * fden; f2 = h2 * fden;
} }
LatticeComplex func_xi0(const LatticeComplex& w) const {
LatticeComplex func_xi0(const LatticeComplex& w) const{
// Define a function to do the check // Define a function to do the check
//if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small: "<< w <<"\n"; // if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small:
return sin(w)/w; // "<< w <<"\n";
return sin(w) / w;
} }
LatticeComplex func_xi1(const LatticeComplex& w) const{ LatticeComplex func_xi1(const LatticeComplex& w) const {
// Define a function to do the check // Define a function to do the check
//if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small: "<< w <<"\n"; // if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small:
return cos(w)/(w*w) - sin(w)/(w*w*w); // "<< w <<"\n";
} return cos(w) / (w * w) - sin(w) / (w * w * w);
};
} }
};
}
} }
#endif #endif

View File

@ -1,33 +1,34 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_vector_types.h Source file: ./lib/simd/Grid_vector_types.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Guido Cossu <cossu@iroiro-pc.kek.jp> Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
/*! @file Grid_vector_types.h /*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types @brief Defines templated class Grid_simd to deal with inner vector types
@ -43,7 +44,7 @@ Author: neo <cossu@post.kek.jp>
#ifdef SSE4 #ifdef SSE4
#include "Grid_sse4.h" #include "Grid_sse4.h"
#endif #endif
#if defined (AVX1)|| defined (AVX2) || defined (AVXFMA4) #if defined(AVX1) || defined(AVX2) || defined(AVXFMA4)
#include "Grid_avx.h" #include "Grid_avx.h"
#endif #endif
#if defined AVX512 #if defined AVX512
@ -61,190 +62,250 @@ Author: neo <cossu@post.kek.jp>
namespace Grid { namespace Grid {
////////////////////////////////////// //////////////////////////////////////
// To take the floating point type of real/complex type // To take the floating point type of real/complex type
////////////////////////////////////// //////////////////////////////////////
template <typename T> struct RealPart { template <typename T>
struct RealPart {
typedef T type; typedef T type;
}; };
template <typename T> struct RealPart< std::complex<T> >{ template <typename T>
struct RealPart<std::complex<T> > {
typedef T type; typedef T type;
}; };
////////////////////////////////////// //////////////////////////////////////
// demote a vector to real type // demote a vector to real type
////////////////////////////////////// //////////////////////////////////////
// type alias used to simplify the syntax of std::enable_if // type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke = typename T::type; template <typename T>
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >; using Invoke = typename T::type;
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType> >; template <typename Condition, typename ReturnType>
using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
template <typename Condition, typename ReturnType>
using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
////////////////////////////////////////////////////////
// Check for complexity with type traits
template <typename T>
struct is_complex : public std::false_type {};
template <>
struct is_complex<std::complex<double> > : public std::true_type {};
template <>
struct is_complex<std::complex<float> > : public std::true_type {};
//////////////////////////////////////////////////////// template <typename T>
// Check for complexity with type traits using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
template <typename T> struct is_complex : public std::false_type {}; template <typename T>
template <> struct is_complex<std::complex<double> >: public std::true_type {}; using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
template <> struct is_complex<std::complex<float> > : public std::true_type {}; template <typename T>
using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
template <typename T> using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value,int> > ; template <typename T>
template <typename T> using IfComplex = Invoke<std::enable_if<is_complex<T>::value,int> > ; using IfNotReal =
template <typename T> using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value,int> > ; Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
template <typename T>
using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
template <typename T>
using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
template <typename T> using IfNotReal = Invoke<std::enable_if<!std::is_floating_point<T>::value,int> > ; ////////////////////////////////////////////////////////
template <typename T> using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value,int> > ; // Define the operation templates functors
template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value,int> > ; // general forms to allow for vsplat syntax
// need explicit declaration of types when used since
//////////////////////////////////////////////////////// // clang cannot automatically determine the output type sometimes
// Define the operation templates functors template <class Out, class Input1, class Input2, class Operation>
// general forms to allow for vsplat syntax Out binary(Input1 src_1, Input2 src_2, Operation op) {
// need explicit declaration of types when used since
// clang cannot automatically determine the output type sometimes
template < class Out, class Input1, class Input2, class Operation >
Out binary(Input1 src_1, Input2 src_2, Operation op){
return op(src_1, src_2); return op(src_1, src_2);
} }
template < class Out, class Input, class Operation > template <class Out, class Input, class Operation>
Out unary(Input src, Operation op){ Out unary(Input src, Operation op) {
return op(src); return op(src);
} }
/////////////////////////////////////////////// ///////////////////////////////////////////////
/*
/*
@brief Grid_simd class for the SIMD vector type operations @brief Grid_simd class for the SIMD vector type operations
*/ */
template < class Scalar_type, class Vector_type > template <class Scalar_type, class Vector_type>
class Grid_simd { class Grid_simd {
public: public:
typedef typename RealPart < Scalar_type >::type Real; typedef typename RealPart<Scalar_type>::type Real;
typedef Vector_type vector_type; typedef Vector_type vector_type;
typedef Scalar_type scalar_type; typedef Scalar_type scalar_type;
typedef union conv_t_union { typedef union conv_t_union {
Vector_type v; Vector_type v;
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)]; Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)];
conv_t_union(){}; conv_t_union(){};
} conv_t; } conv_t;
Vector_type v; Vector_type v;
static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);} static inline int Nsimd(void) {
return sizeof(Vector_type) / sizeof(Scalar_type);
}
Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;}; Grid_simd &operator=(const Grid_simd &&rhs) {
Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler v = rhs.v;
Grid_simd()=default; return *this;
Grid_simd(const Grid_simd& rhs) :v(rhs.v){}; //compiles in movaps };
Grid_simd(const Grid_simd&& rhs):v(rhs.v){}; Grid_simd &operator=(const Grid_simd &rhs) {
v = rhs.v;
return *this;
}; // faster than not declaring it and leaving to the compiler
Grid_simd() = default;
Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
///////////////////////////// /////////////////////////////
// Constructors // Constructors
///////////////////////////// /////////////////////////////
Grid_simd & operator = ( Zero & z){ Grid_simd &operator=(Zero &z) {
vzero(*this); vzero(*this);
return (*this); return (*this);
} }
//Enable if complex type // Enable if complex type
template < typename S = Scalar_type > template <typename S = Scalar_type>
Grid_simd(const typename std::enable_if< is_complex < S >::value, S>::type a){ Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
vsplat(*this,a); vsplat(*this, a);
}; };
Grid_simd(const Real a){ Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
vsplat(*this,Scalar_type(a));
};
/////////////////////////////////////////////// ///////////////////////////////////////////////
// mac, mult, sub, add, adj // mac, mult, sub, add, adj
/////////////////////////////////////////////// ///////////////////////////////////////////////
// FIXME -- alias this to an inline MAC struct. // FIXME -- alias this to an inline MAC struct.
friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y);
};
friend inline void mult(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) * (*r);
}
friend inline void sub(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) - (*r);
}
friend inline void add(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) + (*r);
}
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } friend inline void mac(Grid_simd *__restrict__ y,
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } const Scalar_type *__restrict__ a,
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y);
};
friend inline void mult(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) * (*r);
}
friend inline void sub(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) - (*r);
}
friend inline void add(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) {
*y = (*l) + (*r);
}
friend inline void mac (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mac(Grid_simd *__restrict__ y,
friend inline void mult(Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } const Grid_simd *__restrict__ a,
friend inline void sub (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } const Scalar_type *__restrict__ x) {
friend inline void add (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } *y = (*a) * (*x) + (*y);
};
friend inline void mac (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ a,const Scalar_type *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mult(Grid_simd *__restrict__ y,
friend inline void mult(Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) * (*r); } const Grid_simd *__restrict__ l,
friend inline void sub (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) - (*r); } const Scalar_type *__restrict__ r) {
friend inline void add (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) + (*r); } *y = (*l) * (*r);
}
friend inline void sub(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) {
*y = (*l) - (*r);
}
friend inline void add(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) {
*y = (*l) + (*r);
}
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch // FIXME: gonna remove these load/store, get, set, prefetch
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
friend inline void vset(Grid_simd &ret, Scalar_type *a){ friend inline void vset(Grid_simd &ret, Scalar_type *a) {
ret.v = unary<Vector_type>(a, VsetSIMD()); ret.v = unary<Vector_type>(a, VsetSIMD());
} }
/////////////////////// ///////////////////////
// Vstore // Vstore
/////////////////////// ///////////////////////
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ friend inline void vstore(const Grid_simd &ret, Scalar_type *a) {
binary<void>(ret.v, (Real*)a, VstoreSIMD()); binary<void>(ret.v, (Real *)a, VstoreSIMD());
} }
/////////////////////// ///////////////////////
// Vprefetch // Vprefetch
/////////////////////// ///////////////////////
friend inline void vprefetch(const Grid_simd &v) friend inline void vprefetch(const Grid_simd &v) {
{ prefetch_HINT_T0((const char *)&v.v);
prefetch_HINT_T0((const char*)&v.v);
} }
/////////////////////// ///////////////////////
// Reduce // Reduce
/////////////////////// ///////////////////////
friend inline Scalar_type Reduce(const Grid_simd & in) friend inline Scalar_type Reduce(const Grid_simd &in) {
{
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>()); return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
} }
//////////////////////////// ////////////////////////////
// opreator scalar * simd // opreator scalar * simd
//////////////////////////// ////////////////////////////
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){ friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) {
Grid_simd va; Grid_simd va;
vsplat(va,a); vsplat(va, a);
return va*b; return va * b;
} }
friend inline Grid_simd operator * (Grid_simd b,const Scalar_type &a){ friend inline Grid_simd operator*(Grid_simd b, const Scalar_type &a) {
return a*b; return a * b;
} }
/////////////////////// ///////////////////////
// Unary negation // Unary negation
/////////////////////// ///////////////////////
friend inline Grid_simd operator -(const Grid_simd &r) { friend inline Grid_simd operator-(const Grid_simd &r) {
Grid_simd ret; Grid_simd ret;
vzero(ret); vzero(ret);
ret = ret - r; ret = ret - r;
return ret; return ret;
} }
// *=,+=,-= operators // *=,+=,-= operators
inline Grid_simd &operator *=(const Grid_simd &r) { inline Grid_simd &operator*=(const Grid_simd &r) {
*this = (*this)*r; *this = (*this) * r;
return *this; return *this;
// return (*this)*r; ? // return (*this)*r; ?
} }
inline Grid_simd &operator +=(const Grid_simd &r) { inline Grid_simd &operator+=(const Grid_simd &r) {
*this = *this+r; *this = *this + r;
return *this; return *this;
} }
inline Grid_simd &operator -=(const Grid_simd &r) { inline Grid_simd &operator-=(const Grid_simd &r) {
*this = *this-r; *this = *this - r;
return *this; return *this;
} }
@ -255,26 +316,30 @@ namespace Grid {
// provides support // provides support
/////////////////////////////////////// ///////////////////////////////////////
template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) { template <class functor>
friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) {
Grid_simd ret; Grid_simd ret;
Grid_simd::conv_t conv; Grid_simd::conv_t conv;
conv.v = v.v; conv.v = v.v;
for(int i=0;i<Nsimd();i++){ for (int i = 0; i < Nsimd(); i++) {
conv.s[i]=func(conv.s[i]); conv.s[i] = func(conv.s[i]);
} }
ret.v = conv.v; ret.v = conv.v;
return ret; return ret;
} }
template<class functor> friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) { template <class functor>
friend inline Grid_simd SimdApplyBinop(const functor &func,
const Grid_simd &x,
const Grid_simd &y) {
Grid_simd ret; Grid_simd ret;
Grid_simd::conv_t cx; Grid_simd::conv_t cx;
Grid_simd::conv_t cy; Grid_simd::conv_t cy;
cx.v = x.v; cx.v = x.v;
cy.v = y.v; cy.v = y.v;
for(int i=0;i<Nsimd();i++){ for (int i = 0; i < Nsimd(); i++) {
cx.s[i]=func(cx.s[i],cy.s[i]); cx.s[i] = func(cx.s[i], cy.s[i]);
} }
ret.v = cx.v; ret.v = cx.v;
return ret; return ret;
@ -285,294 +350,347 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could // all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example // add the vector width as a template param for BG/Q for example
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
friend inline void permute0(Grid_simd &y,Grid_simd b){ friend inline void permute0(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute0(b.v); y.v = Optimization::Permute::Permute0(b.v);
} }
friend inline void permute1(Grid_simd &y,Grid_simd b){ friend inline void permute1(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute1(b.v); y.v = Optimization::Permute::Permute1(b.v);
} }
friend inline void permute2(Grid_simd &y,Grid_simd b){ friend inline void permute2(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute2(b.v); y.v = Optimization::Permute::Permute2(b.v);
} }
friend inline void permute3(Grid_simd &y,Grid_simd b){ friend inline void permute3(Grid_simd &y, Grid_simd b) {
y.v = Optimization::Permute::Permute3(b.v); y.v = Optimization::Permute::Permute3(b.v);
} }
friend inline void permute(Grid_simd &y,Grid_simd b,int perm) friend inline void permute(Grid_simd &y, Grid_simd b, int perm) {
{ if (perm & RotateBit) {
if ( perm & RotateBit ) { int dist = perm & 0xF;
int dist = perm&0xF; y = rotate(b, dist);
y=rotate(b,dist);
return; return;
} }
switch(perm){ switch (perm) {
case 3: permute3(y,b); break; case 3:
case 2: permute2(y,b); break; permute3(y, b);
case 1: permute1(y,b); break; break;
case 0: permute0(y,b); break; case 2:
default: assert(0); permute2(y, b);
break;
case 1:
permute1(y, b);
break;
case 0:
permute0(y, b);
break;
default:
assert(0);
} }
} }
};// end of Grid_simd class definition }; // end of Grid_simd class definition
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// General rotate // General rotate
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class S, class V, IfNotComplex<S> =0> template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S,V> rotate(Grid_simd<S,V> b,int nrot) inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) {
{ nrot = nrot % Grid_simd<S, V>::Nsimd();
nrot = nrot % Grid_simd<S,V>::Nsimd(); Grid_simd<S, V> ret;
Grid_simd<S,V> ret;
// std::cout << "Rotate Real by "<<nrot<<std::endl; // std::cout << "Rotate Real by "<<nrot<<std::endl;
ret.v = Optimization::Rotate::rotate(b.v,nrot); ret.v = Optimization::Rotate::rotate(b.v, nrot);
return ret; return ret;
} }
template <class S, class V, IfComplex<S> =0> template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S,V> rotate(Grid_simd<S,V> b,int nrot) inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) {
{ nrot = nrot % Grid_simd<S, V>::Nsimd();
nrot = nrot % Grid_simd<S,V>::Nsimd(); Grid_simd<S, V> ret;
Grid_simd<S,V> ret;
// std::cout << "Rotate Complex by "<<nrot<<std::endl; // std::cout << "Rotate Complex by "<<nrot<<std::endl;
ret.v = Optimization::Rotate::rotate(b.v,2*nrot); ret.v = Optimization::Rotate::rotate(b.v, 2 * nrot);
return ret; return ret;
} }
/////////////////////// ///////////////////////
// Splat // Splat
/////////////////////// ///////////////////////
// this is only for the complex version // this is only for the complex version
template <class S, class V, IfComplex<S> =0, class ABtype> template <class S, class V, IfComplex<S> = 0, class ABtype>
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){ inline void vsplat(Grid_simd<S, V> &ret, ABtype a, ABtype b) {
ret.v = binary<V>(a, b, VsplatSIMD()); ret.v = binary<V>(a, b, VsplatSIMD());
} }
// overload if complex // overload if complex
template <class S,class V> inline void vsplat(Grid_simd<S,V> &ret, EnableIf<is_complex < S >, S> c) { template <class S, class V>
vsplat(ret,real(c),imag(c)); inline void vsplat(Grid_simd<S, V> &ret, EnableIf<is_complex<S>, S> c) {
} vsplat(ret, real(c), imag(c));
}
//if real fill with a, if complex fill with a in the real part (first function above) // if real fill with a, if complex fill with a in the real part (first function
template <class S,class V> // above)
inline void vsplat(Grid_simd<S,V> &ret,NotEnableIf<is_complex< S>,S> a){ template <class S, class V>
inline void vsplat(Grid_simd<S, V> &ret, NotEnableIf<is_complex<S>, S> a) {
ret.v = unary<V>(a, VsplatSIMD()); ret.v = unary<V>(a, VsplatSIMD());
} }
////////////////////////// //////////////////////////
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Initialise to 1,0,i for the correct types // Initialise to 1,0,i for the correct types
/////////////////////////////////////////////// ///////////////////////////////////////////////
// For complex types // For complex types
template <class S,class V, IfComplex<S> = 0 > inline void vone(Grid_simd<S,V> &ret) { vsplat(ret,S(1.0,0.0)); } template <class S, class V, IfComplex<S> = 0>
template <class S,class V, IfComplex<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) { vsplat(ret,S(0.0,0.0)); }// use xor? inline void vone(Grid_simd<S, V> &ret) {
template <class S,class V, IfComplex<S> = 0 > inline void vcomplex_i(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0,1.0));} vsplat(ret, S(1.0, 0.0));
}
template <class S, class V, IfComplex<S> = 0>
inline void vzero(Grid_simd<S, V> &ret) {
vsplat(ret, S(0.0, 0.0));
} // use xor?
template <class S, class V, IfComplex<S> = 0>
inline void vcomplex_i(Grid_simd<S, V> &ret) {
vsplat(ret, S(0.0, 1.0));
}
template <class S,class V, IfComplex<S> = 0 > inline void visign(Grid_simd<S,V> &ret){ vsplat(ret,S(1.0,-1.0));} template <class S, class V, IfComplex<S> = 0>
template <class S,class V, IfComplex<S> = 0 > inline void vrsign(Grid_simd<S,V> &ret){ vsplat(ret,S(-1.0,1.0));} inline void visign(Grid_simd<S, V> &ret) {
vsplat(ret, S(1.0, -1.0));
}
template <class S, class V, IfComplex<S> = 0>
inline void vrsign(Grid_simd<S, V> &ret) {
vsplat(ret, S(-1.0, 1.0));
}
// if not complex overload here // if not complex overload here
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,S(1.0)); } template <class S, class V, IfReal<S> = 0>
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0)); } inline void vone(Grid_simd<S, V> &ret) {
vsplat(ret, S(1.0));
}
template <class S, class V, IfReal<S> = 0>
inline void vzero(Grid_simd<S, V> &ret) {
vsplat(ret, S(0.0));
}
// For integral types // For integral types
template <class S,class V,IfInteger<S> = 0 > inline void vone(Grid_simd<S,V> &ret) {vsplat(ret,1); } template <class S, class V, IfInteger<S> = 0>
template <class S,class V,IfInteger<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) {vsplat(ret,0); } inline void vone(Grid_simd<S, V> &ret) {
template <class S,class V,IfInteger<S> = 0 > inline void vtrue (Grid_simd<S,V> &ret){vsplat(ret,0xFFFFFFFF);} vsplat(ret, 1);
template <class S,class V,IfInteger<S> = 0 > inline void vfalse(Grid_simd<S,V> &ret){vsplat(ret,0);} }
template <class S, class V, IfInteger<S> = 0>
inline void vzero(Grid_simd<S, V> &ret) {
vsplat(ret, 0);
}
template <class S, class V, IfInteger<S> = 0>
inline void vtrue(Grid_simd<S, V> &ret) {
vsplat(ret, 0xFFFFFFFF);
}
template <class S, class V, IfInteger<S> = 0>
inline void vfalse(Grid_simd<S, V> &ret) {
vsplat(ret, 0);
}
template<class S,class V> inline void zeroit(Grid_simd<S,V> &z){ vzero(z);} template <class S, class V>
inline void zeroit(Grid_simd<S, V> &z) {
vzero(z);
}
/////////////////////// ///////////////////////
// Vstream // Vstream
/////////////////////// ///////////////////////
template <class S,class V, IfReal<S> = 0 > template <class S, class V, IfReal<S> = 0>
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){ inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
binary<void>((S *)&out.v, in.v, VstreamSIMD()); binary<void>((S *)&out.v, in.v, VstreamSIMD());
} }
template <class S,class V, IfComplex<S> = 0 > template <class S, class V, IfComplex<S> = 0>
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){ inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
typedef typename S::value_type T; typedef typename S::value_type T;
binary<void>((T *)&out.v, in.v, VstreamSIMD()); binary<void>((T *)&out.v, in.v, VstreamSIMD());
} }
template <class S,class V, IfInteger<S> = 0 > template <class S, class V, IfInteger<S> = 0>
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){ inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
out=in; out = in;
} }
//////////////////////////////////// ////////////////////////////////////
// Arithmetic operator overloads +,-,* // Arithmetic operator overloads +,-,*
//////////////////////////////////// ////////////////////////////////////
template<class S,class V> inline Grid_simd<S,V> operator + (Grid_simd<S,V> a, Grid_simd<S,V> b) { template <class S, class V>
Grid_simd<S,V> ret; inline Grid_simd<S, V> operator+(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, SumSIMD()); ret.v = binary<V>(a.v, b.v, SumSIMD());
return ret; return ret;
}; };
template<class S,class V> inline Grid_simd<S,V> operator - (Grid_simd<S,V> a, Grid_simd<S,V> b) { template <class S, class V>
Grid_simd<S,V> ret; inline Grid_simd<S, V> operator-(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, SubSIMD()); ret.v = binary<V>(a.v, b.v, SubSIMD());
return ret; return ret;
}; };
// Distinguish between complex types and others // Distinguish between complex types and others
template<class S,class V, IfComplex<S> = 0 > inline Grid_simd<S,V> operator * (Grid_simd<S,V> a, Grid_simd<S,V> b) { template <class S, class V, IfComplex<S> = 0>
Grid_simd<S,V> ret; inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
ret.v = binary<V>(a.v,b.v, MultComplexSIMD()); Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, MultComplexSIMD());
return ret; return ret;
}; };
// Real/Integer types // Real/Integer types
template<class S,class V, IfNotComplex<S> = 0 > inline Grid_simd<S,V> operator * (Grid_simd<S,V> a, Grid_simd<S,V> b) { template <class S, class V, IfNotComplex<S> = 0>
Grid_simd<S,V> ret; inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
ret.v = binary<V>(a.v,b.v, MultSIMD()); Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, MultSIMD());
return ret; return ret;
}; };
///////////////////////
/////////////////////// // Conjugate
// Conjugate ///////////////////////
/////////////////////// template <class S, class V, IfComplex<S> = 0>
template <class S,class V, IfComplex<S> = 0 > inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
inline Grid_simd<S,V> conjugate(const Grid_simd<S,V> &in){ Grid_simd<S, V> ret;
Grid_simd<S,V> ret ;
ret.v = unary<V>(in.v, ConjSIMD()); ret.v = unary<V>(in.v, ConjSIMD());
return ret; return ret;
} }
template <class S,class V, IfNotComplex<S> = 0 > inline Grid_simd<S,V> conjugate(const Grid_simd<S,V> &in){ template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
return in; // for real objects return in; // for real objects
} }
//Suppress adj for integer types... // odd; why conjugate above but not adj?? // Suppress adj for integer types... // odd; why conjugate above but not adj??
template < class S, class V, IfNotInteger<S> = 0 > template <class S, class V, IfNotInteger<S> = 0>
inline Grid_simd<S,V> adj(const Grid_simd<S,V> &in){ return conjugate(in); } inline Grid_simd<S, V> adj(const Grid_simd<S, V> &in) {
return conjugate(in);
}
/////////////////////// ///////////////////////
// timesMinusI // timesMinusI
/////////////////////// ///////////////////////
template<class S,class V,IfComplex<S> = 0 > template <class S, class V, IfComplex<S> = 0>
inline void timesMinusI( Grid_simd<S,V> &ret,const Grid_simd<S,V> &in){ inline void timesMinusI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD()); ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD());
} }
template<class S,class V,IfComplex<S> = 0 > template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S,V> timesMinusI(const Grid_simd<S,V> &in){ inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
Grid_simd<S,V> ret; Grid_simd<S, V> ret;
timesMinusI(ret,in); timesMinusI(ret, in);
return ret; return ret;
} }
template<class S,class V,IfNotComplex<S> = 0 > template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S,V> timesMinusI(const Grid_simd<S,V> &in){ inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
return in; return in;
} }
/////////////////////// ///////////////////////
// timesI // timesI
/////////////////////// ///////////////////////
template<class S,class V,IfComplex<S> = 0 > template <class S, class V, IfComplex<S> = 0>
inline void timesI(Grid_simd<S,V> &ret,const Grid_simd<S,V> &in){ inline void timesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
ret.v = binary<V>(in.v, ret.v, TimesISIMD()); ret.v = binary<V>(in.v, ret.v, TimesISIMD());
} }
template<class S,class V,IfComplex<S> = 0 > template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S,V> timesI(const Grid_simd<S,V> &in){ inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
Grid_simd<S,V> ret; Grid_simd<S, V> ret;
timesI(ret,in); timesI(ret, in);
return ret; return ret;
} }
template<class S,class V,IfNotComplex<S> = 0 > template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S,V> timesI(const Grid_simd<S,V> &in){ inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
return in; return in;
} }
///////////////////// /////////////////////
// Inner, outer // Inner, outer
///////////////////// /////////////////////
template<class S, class V > template <class S, class V>
inline Grid_simd< S, V> innerProduct(const Grid_simd< S, V> & l, const Grid_simd< S, V> & r) inline Grid_simd<S, V> innerProduct(const Grid_simd<S, V> &l,
{ const Grid_simd<S, V> &r) {
return conjugate(l)*r; return conjugate(l) * r;
} }
template<class S, class V > template <class S, class V>
inline Grid_simd< S, V> outerProduct(const Grid_simd< S, V> &l, const Grid_simd< S, V> & r) inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l,
{ const Grid_simd<S, V> &r) {
return l*conjugate(r); return l * conjugate(r);
} }
template<class S, class V > template <class S, class V>
inline Grid_simd< S, V> trace(const Grid_simd< S, V> &arg){ inline Grid_simd<S, V> trace(const Grid_simd<S, V> &arg) {
return arg; return arg;
} }
////////////////////////////////////////////////////////////
// copy/splat complex real parts into real;
// insert real into complex and zero imag;
////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////// // real = toReal( complex )
// copy/splat complex real parts into real; template <class S, class V, IfReal<S> = 0>
// insert real into complex and zero imag; inline Grid_simd<S, V> toReal(const Grid_simd<std::complex<S>, V> &in) {
//////////////////////////////////////////////////////////// typedef Grid_simd<S, V> simd;
//real = toReal( complex )
template<class S,class V,IfReal<S> = 0>
inline Grid_simd<S,V> toReal(const Grid_simd<std::complex<S>,V> &in)
{
typedef Grid_simd<S,V> simd;
simd ret; simd ret;
typename simd::conv_t conv; typename simd::conv_t conv;
conv.v = in.v; // copy the vector content (bytewise) conv.v = in.v; // copy the vector content (bytewise)
for(int i=0;i<simd::Nsimd();i+=2){ for (int i = 0; i < simd::Nsimd(); i += 2) {
conv.s[i+1]=conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc... conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
} }
ret.v = conv.v; ret.v = conv.v;
return ret; return ret;
} }
//complex = toComplex( real ) // complex = toComplex( real )
template<class R,class V,IfReal<R> = 0 > // must be a real arg template <class R, class V, IfReal<R> = 0> // must be a real arg
inline Grid_simd<std::complex<R>,V> toComplex (const Grid_simd<R,V> &in) inline Grid_simd<std::complex<R>, V> toComplex(const Grid_simd<R, V> &in) {
{ typedef Grid_simd<R, V> Rsimd;
typedef Grid_simd<R,V> Rsimd; typedef Grid_simd<std::complex<R>, V> Csimd;
typedef Grid_simd<std::complex<R>,V> Csimd; typename Rsimd::conv_t conv; // address as real
typename Rsimd::conv_t conv;// address as real
conv.v = in.v; conv.v = in.v;
for(int i=0;i<Rsimd::Nsimd();i+=2){ for (int i = 0; i < Rsimd::Nsimd(); i += 2) {
assert(conv.s[i+1]==conv.s[i]); // trap any cases where real was not duplicated assert(conv.s[i + 1] ==
// indicating the SIMD grids of real and imag assignment did not correctly match conv.s[i]); // trap any cases where real was not duplicated
conv.s[i+1]=0.0; // zero imaginary parts // indicating the SIMD grids of real and imag assignment did not correctly
// match
conv.s[i + 1] = 0.0; // zero imaginary parts
} }
Csimd ret; Csimd ret;
ret.v = conv.v; ret.v = conv.v;
return ret; return ret;
} }
///////////////////////////////
// Define available types
///////////////////////////////
typedef Grid_simd<float, SIMD_Ftype> vRealF;
typedef Grid_simd<double, SIMD_Dtype> vRealD;
typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF;
typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD;
typedef Grid_simd<Integer, SIMD_Itype> vInteger;
/////////////////////////////////////////
// Some traits to recognise the types
/////////////////////////////////////////
template <typename T>
struct is_simd : public std::false_type {};
template <>
struct is_simd<vRealF> : public std::true_type {};
template <>
struct is_simd<vRealD> : public std::true_type {};
template <>
struct is_simd<vComplexF> : public std::true_type {};
template <>
struct is_simd<vComplexD> : public std::true_type {};
template <>
struct is_simd<vInteger> : public std::true_type {};
template <typename T>
/////////////////////////////// using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >;
// Define available types template <typename T>
/////////////////////////////// using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >;
typedef Grid_simd< float , SIMD_Ftype > vRealF;
typedef Grid_simd< double , SIMD_Dtype > vRealD;
typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF;
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
/////////////////////////////////////////
// Some traits to recognise the types
/////////////////////////////////////////
template <typename T> struct is_simd : public std::false_type{};
template <> struct is_simd<vRealF> : public std::true_type {};
template <> struct is_simd<vRealD> : public std::true_type {};
template <> struct is_simd<vComplexF>: public std::true_type {};
template <> struct is_simd<vComplexD>: public std::true_type {};
template <> struct is_simd<vInteger> : public std::true_type {};
template <typename T> using IfSimd = Invoke<std::enable_if< is_simd<T>::value,int> > ;
template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value,unsigned> > ;
} }
#endif #endif

View File

@ -1,33 +1,34 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_vector_unops.h Source file: ./lib/simd/Grid_vector_unops.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef GRID_VECTOR_UNOPS #ifndef GRID_VECTOR_UNOPS
#define GRID_VECTOR_UNOPS #define GRID_VECTOR_UNOPS
@ -35,213 +36,199 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid { namespace Grid {
template<class scalar> struct SqrtRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct SqrtRealFunctor {
return sqrt(real(a)); scalar operator()(const scalar &a) const { return sqrt(real(a)); }
} };
};
template<class scalar> struct RSqrtRealFunctor { template <class scalar>
struct RSqrtRealFunctor {
scalar operator()(const scalar &a) const { scalar operator()(const scalar &a) const {
return scalar(1.0/sqrt(real(a))); return scalar(1.0 / sqrt(real(a)));
} }
}; };
template<class scalar> struct CosRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct CosRealFunctor {
return cos(real(a)); scalar operator()(const scalar &a) const { return cos(real(a)); }
} };
};
template<class scalar> struct SinRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct SinRealFunctor {
return sin(real(a)); scalar operator()(const scalar &a) const { return sin(real(a)); }
} };
};
template<class scalar> struct AcosRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct AcosRealFunctor {
return acos(real(a)); scalar operator()(const scalar &a) const { return acos(real(a)); }
} };
};
template<class scalar> struct AsinRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct AsinRealFunctor {
return asin(real(a)); scalar operator()(const scalar &a) const { return asin(real(a)); }
} };
};
template<class scalar> struct LogRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct LogRealFunctor {
return log(real(a)); scalar operator()(const scalar &a) const { return log(real(a)); }
} };
};
template<class scalar> struct ExpRealFunctor { template <class scalar>
scalar operator()(const scalar &a) const { struct ExpRealFunctor {
return exp(real(a)); scalar operator()(const scalar &a) const { return exp(real(a)); }
} };
}; template <class scalar>
template<class scalar> struct NotFunctor { struct NotFunctor {
scalar operator()(const scalar &a) const { scalar operator()(const scalar &a) const { return (!a); }
return (!a); };
} template <class scalar>
}; struct AbsRealFunctor {
template<class scalar> struct AbsRealFunctor { scalar operator()(const scalar &a) const { return std::abs(real(a)); }
scalar operator()(const scalar &a) const { };
return std::abs(real(a));
}
};
template<class scalar> struct PowRealFunctor { template <class scalar>
struct PowRealFunctor {
double y; double y;
PowRealFunctor(double _y) : y(_y) {}; PowRealFunctor(double _y) : y(_y){};
scalar operator()(const scalar &a) const { scalar operator()(const scalar &a) const { return pow(real(a), y); }
return pow(real(a),y); };
}
};
template<class scalar> struct ModIntFunctor { template <class scalar>
struct ModIntFunctor {
Integer y; Integer y;
ModIntFunctor(Integer _y) : y(_y) {}; ModIntFunctor(Integer _y) : y(_y){};
scalar operator()(const scalar &a) const { scalar operator()(const scalar &a) const { return Integer(a) % y; }
return Integer(a)%y; };
}
};
template<class scalar> struct DivIntFunctor { template <class scalar>
struct DivIntFunctor {
Integer y; Integer y;
DivIntFunctor(Integer _y) : y(_y) {}; DivIntFunctor(Integer _y) : y(_y){};
scalar operator()(const scalar &a) const { scalar operator()(const scalar &a) const { return Integer(a) / y; }
return Integer(a)/y; };
}
};
template<class scalar> struct RealFunctor { template <class scalar>
scalar operator()(const std::complex<scalar> &a) const { struct RealFunctor {
return real(a); scalar operator()(const scalar &a) const { return std::real(a); }
} };
}; template <class scalar>
template<class scalar> struct ImagFunctor { struct ImagFunctor {
scalar operator()(const std::complex<scalar> &a) const { scalar operator()(const scalar &a) const { return std::imag(a); }
return imag(a); };
} template <class S, class V>
}; inline Grid_simd<S, V> real(const Grid_simd<S, V> &r) {
template < class S, class V > return SimdApply(RealFunctor<S>(), r);
inline Grid_simd<S,V> real(const Grid_simd<S,V> &r) { }
return SimdApply(RealFunctor<S>(),r); template <class S, class V>
} inline Grid_simd<S, V> imag(const Grid_simd<S, V> &r) {
template < class S, class V > return SimdApply(ImagFunctor<S>(), r);
inline Grid_simd<S,V> imag(const Grid_simd<S,V> &r) { }
return SimdApply(ImagFunctor<S>(),r); template <class S, class V>
} inline Grid_simd<S, V> sqrt(const Grid_simd<S, V> &r) {
return SimdApply(SqrtRealFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> rsqrt(const Grid_simd<S, V> &r) {
return SimdApply(RSqrtRealFunctor<S>(), r);
}
template <class Scalar>
inline Scalar rsqrt(const Scalar &r) {
return (RSqrtRealFunctor<Scalar>(), r);
}
template < class S, class V > template <class S, class V>
inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) { inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
return SimdApply(SqrtRealFunctor<S>(),r); return SimdApply(CosRealFunctor<S>(), r);
} }
template < class S, class V > template <class S, class V>
inline Grid_simd<S,V> rsqrt(const Grid_simd<S,V> &r) { inline Grid_simd<S, V> sin(const Grid_simd<S, V> &r) {
return SimdApply(RSqrtRealFunctor<S>(),r); return SimdApply(SinRealFunctor<S>(), r);
} }
template < class Scalar > template <class S, class V>
inline Scalar rsqrt(const Scalar &r) { inline Grid_simd<S, V> acos(const Grid_simd<S, V> &r) {
return (RSqrtRealFunctor<Scalar>(),r); return SimdApply(AcosRealFunctor<S>(), r);
} }
template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> asin(const Grid_simd<S, V> &r) {
inline Grid_simd<S,V> cos(const Grid_simd<S,V> &r) { return SimdApply(AsinRealFunctor<S>(), r);
return SimdApply(CosRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> log(const Grid_simd<S, V> &r) {
inline Grid_simd<S,V> sin(const Grid_simd<S,V> &r) { return SimdApply(LogRealFunctor<S>(), r);
return SimdApply(SinRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
inline Grid_simd<S,V> acos(const Grid_simd<S,V> &r) { return SimdApply(AbsRealFunctor<S>(), r);
return SimdApply(AcosRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
inline Grid_simd<S,V> asin(const Grid_simd<S,V> &r) { return SimdApply(ExpRealFunctor<S>(), r);
return SimdApply(AsinRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
inline Grid_simd<S,V> log(const Grid_simd<S,V> &r) { return SimdApply(NotFunctor<S>(), r);
return SimdApply(LogRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> pow(const Grid_simd<S, V> &r, double y) {
inline Grid_simd<S,V> abs(const Grid_simd<S,V> &r) { return SimdApply(PowRealFunctor<S>(y), r);
return SimdApply(AbsRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> mod(const Grid_simd<S, V> &r, Integer y) {
inline Grid_simd<S,V> exp(const Grid_simd<S,V> &r) { return SimdApply(ModIntFunctor<S>(y), r);
return SimdApply(ExpRealFunctor<S>(),r); }
} template <class S, class V>
template < class S, class V > inline Grid_simd<S, V> div(const Grid_simd<S, V> &r, Integer y) {
inline Grid_simd<S,V> Not(const Grid_simd<S,V> &r) { return SimdApply(DivIntFunctor<S>(y), r);
return SimdApply(NotFunctor<S>(),r); }
} ////////////////////////////////////////////////////////////////////////////
template < class S, class V > // Allows us to assign into **conformable** real vectors from complex
inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) { ////////////////////////////////////////////////////////////////////////////
return SimdApply(PowRealFunctor<S>(y),r); // template < class S, class V >
} // inline auto ComplexRemove(const Grid_simd<S,V> &c) ->
template < class S, class V > // Grid_simd<Grid_simd<S,V>::Real,V> {
inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) { // Grid_simd<Grid_simd<S,V>::Real,V> ret;
return SimdApply(ModIntFunctor<S>(y),r); // ret.v = c.v;
} // return ret;
template < class S, class V > // }
inline Grid_simd<S,V> div(const Grid_simd<S,V> &r,Integer y) { template <class scalar>
return SimdApply(DivIntFunctor<S>(y),r); struct AndFunctor {
} scalar operator()(const scalar &x, const scalar &y) const { return x & y; }
//////////////////////////////////////////////////////////////////////////// };
// Allows us to assign into **conformable** real vectors from complex template <class scalar>
//////////////////////////////////////////////////////////////////////////// struct OrFunctor {
// template < class S, class V > scalar operator()(const scalar &x, const scalar &y) const { return x | y; }
// inline auto ComplexRemove(const Grid_simd<S,V> &c) -> Grid_simd<Grid_simd<S,V>::Real,V> { };
// Grid_simd<Grid_simd<S,V>::Real,V> ret; template <class scalar>
// ret.v = c.v; struct AndAndFunctor {
// return ret; scalar operator()(const scalar &x, const scalar &y) const { return x && y; }
// } };
template<class scalar> struct AndFunctor { template <class scalar>
scalar operator()(const scalar &x, const scalar &y) const { struct OrOrFunctor {
return x & y; scalar operator()(const scalar &x, const scalar &y) const { return x || y; }
} };
};
template<class scalar> struct OrFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x | y;
}
};
template<class scalar> struct AndAndFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x && y;
}
};
template<class scalar> struct OrOrFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x || y;
}
};
////////////////////////////////
// Calls to simd binop functors
////////////////////////////////
template < class S, class V >
inline Grid_simd<S,V> operator &(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(AndFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator &&(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(AndAndFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator |(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(OrFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator ||(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(OrOrFunctor<S>(),x,y);
}
////////////////////////////////
// Calls to simd binop functors
////////////////////////////////
template <class S, class V>
inline Grid_simd<S, V> operator&(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(AndFunctor<S>(), x, y);
}
template <class S, class V>
inline Grid_simd<S, V> operator&&(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(AndAndFunctor<S>(), x, y);
}
template <class S, class V>
inline Grid_simd<S, V> operator|(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(OrFunctor<S>(), x, y);
}
template <class S, class V>
inline Grid_simd<S, V> operator||(const Grid_simd<S, V> &x,
const Grid_simd<S, V> &y) {
return SimdApplyBinop(OrOrFunctor<S>(), x, y);
}
} }
#endif #endif

View File

@ -1,31 +1,32 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_simd.cc Source file: ./tests/Test_simd.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#include <Grid.h> #include <Grid.h>
using namespace std; using namespace std;
@ -62,6 +63,18 @@ public:
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);} template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
std::string name(void) const { return std::string("Adj"); } std::string name(void) const { return std::string("Adj"); }
}; };
class funcImag {
public:
funcImag() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = imag(i1);}
std::string name(void) const { return std::string("imag"); }
};
class funcReal {
public:
funcReal() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = real(i1);}
std::string name(void) const { return std::string("real"); }
};
class funcTimesI { class funcTimesI {
public: public:
@ -141,7 +154,13 @@ void Tester(const functor &func)
} }
extract<vec,scal>(v_result,result); extract<vec,scal>(v_result,result);
std::cout<<GridLogMessage << " " << func.name()<<std::endl;
std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_result << std::endl;
int ok=0; int ok=0;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
@ -389,6 +408,8 @@ int main (int argc, char ** argv)
Tester<ComplexF,vComplexF>(funcTimes()); Tester<ComplexF,vComplexF>(funcTimes());
Tester<ComplexF,vComplexF>(funcConj()); Tester<ComplexF,vComplexF>(funcConj());
Tester<ComplexF,vComplexF>(funcAdj()); Tester<ComplexF,vComplexF>(funcAdj());
Tester<ComplexF,vComplexF>(funcReal());
Tester<ComplexF,vComplexF>(funcImag());
Tester<ComplexF,vComplexF>(funcInnerProduct()); Tester<ComplexF,vComplexF>(funcInnerProduct());
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce()); ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
@ -421,17 +442,21 @@ int main (int argc, char ** argv)
Tester<ComplexD,vComplexD>(funcTimes()); Tester<ComplexD,vComplexD>(funcTimes());
Tester<ComplexD,vComplexD>(funcConj()); Tester<ComplexD,vComplexD>(funcConj());
Tester<ComplexD,vComplexD>(funcAdj()); Tester<ComplexD,vComplexD>(funcAdj());
Tester<ComplexD,vComplexD>(funcInnerProduct()); Tester<ComplexD, vComplexD>(funcReal());
ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce()); Tester<ComplexD, vComplexD>(funcImag());
Tester<ComplexD, vComplexD>(funcInnerProduct());
ReductionTester<ComplexD, ComplexD, vComplexD>(funcReduce());
std::cout<<GridLogMessage << "==================================="<< std::endl; std::cout << GridLogMessage
std::cout<<GridLogMessage << "Testing vComplexD permutes "<<std::endl; << "===================================" << std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl; std::cout << GridLogMessage << "Testing vComplexD permutes " << std::endl;
std::cout << GridLogMessage
<< "===================================" << std::endl;
// Log2 iteration // Log2 iteration
for(int i=0;(1<<i)< vComplexD::Nsimd();i++){ for (int i = 0; (1 << i) < vComplexD::Nsimd(); i++) {
PermTester<ComplexD,vComplexD>(funcPermute(i)); PermTester<ComplexD, vComplexD>(funcPermute(i));
} }