mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Debugged the real() and imag() functions and added tests to Test_Simd
This commit is contained in:
parent
3e3b367aa9
commit
e3d5319470
45
lib/Simd.h
45
lib/Simd.h
@ -1,32 +1,33 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/Simd.h
|
Source file: ./lib/Simd.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_SIMD_H
|
#ifndef GRID_SIMD_H
|
||||||
#define GRID_SIMD_H
|
#define GRID_SIMD_H
|
||||||
|
|
||||||
@ -118,6 +119,14 @@ namespace Grid {
|
|||||||
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
||||||
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
||||||
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
||||||
|
|
||||||
|
// define projections to real and imaginay parts
|
||||||
|
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
|
||||||
|
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
|
||||||
|
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
|
||||||
|
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
|
||||||
|
|
||||||
|
// define auxiliary functions for complex computations
|
||||||
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
||||||
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
||||||
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
||||||
|
@ -40,7 +40,7 @@ namespace Grid {
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||||
ComplexD nrm = innerProduct(arg,arg);
|
ComplexD nrm = innerProduct(arg,arg);
|
||||||
return real(nrm);
|
return std::real(nrm);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
|
@ -5,64 +5,62 @@
|
|||||||
#ifndef STOUT_SMEAR_
|
#ifndef STOUT_SMEAR_
|
||||||
#define STOUT_SMEAR_
|
#define STOUT_SMEAR_
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
/*! @brief Stout smearing of link variable. */
|
/*! @brief Stout smearing of link variable. */
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
class Smear_Stout: public Smear<Gimpl> {
|
class Smear_Stout : public Smear<Gimpl> {
|
||||||
private:
|
private:
|
||||||
const Smear < Gimpl > * SmearBase;
|
const Smear<Gimpl>* SmearBase;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
INHERIT_GIMPL_TYPES(Gimpl)
|
INHERIT_GIMPL_TYPES(Gimpl)
|
||||||
|
|
||||||
Smear_Stout(Smear < Gimpl >* base):SmearBase(base){
|
Smear_Stout(Smear<Gimpl>* base) : SmearBase(base) {
|
||||||
static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3");
|
static_assert(Nc == 3,
|
||||||
|
"Stout smearing currently implemented only for Nc==3");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! Default constructor */
|
/*! Default constructor */
|
||||||
Smear_Stout(double rho = 1.0):SmearBase(new Smear_APE < Gimpl > (rho)){
|
Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE<Gimpl>(rho)) {
|
||||||
static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3");
|
static_assert(Nc == 3,
|
||||||
|
"Stout smearing currently implemented only for Nc==3");
|
||||||
}
|
}
|
||||||
|
|
||||||
~Smear_Stout(){} //delete SmearBase...
|
~Smear_Stout() {} // delete SmearBase...
|
||||||
|
|
||||||
void smear(GaugeField& u_smr,const GaugeField& U) const{
|
void smear(GaugeField& u_smr, const GaugeField& U) const {
|
||||||
GaugeField C(U._grid);
|
GaugeField C(U._grid);
|
||||||
GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid);
|
GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid);
|
||||||
|
|
||||||
std::cout<< GridLogDebug << "Stout smearing started\n";
|
std::cout << GridLogDebug << "Stout smearing started\n";
|
||||||
|
|
||||||
//Smear the configurations
|
// Smear the configurations
|
||||||
SmearBase->smear(C, U);
|
SmearBase->smear(C, U);
|
||||||
|
|
||||||
for (int mu = 0; mu<Nd; mu++)
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
{
|
tmp = peekLorentz(C, mu);
|
||||||
tmp = peekLorentz(C,mu);
|
Umu = peekLorentz(U, mu);
|
||||||
Umu = peekLorentz(U,mu);
|
iq_mu = Ta(
|
||||||
iq_mu = Ta(tmp * adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
|
tmp *
|
||||||
|
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
|
||||||
exponentiate_iQ(tmp, iq_mu);
|
exponentiate_iQ(tmp, iq_mu);
|
||||||
pokeLorentz(u_smr, tmp*Umu, mu);// u_smr = exp(iQ_mu)*U_mu
|
pokeLorentz(u_smr, tmp * Umu, mu); // u_smr = exp(iQ_mu)*U_mu
|
||||||
}
|
}
|
||||||
std::cout<< GridLogDebug << "Stout smearing completed\n";
|
std::cout << GridLogDebug << "Stout smearing completed\n";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void derivative(GaugeField& SigmaTerm, const GaugeField& iLambda,
|
||||||
void derivative(GaugeField& SigmaTerm,
|
const GaugeField& Gauge) const {
|
||||||
const GaugeField& iLambda,
|
|
||||||
const GaugeField& Gauge) const{
|
|
||||||
SmearBase->derivative(SigmaTerm, iLambda, Gauge);
|
SmearBase->derivative(SigmaTerm, iLambda, Gauge);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void BaseSmear(GaugeField& C, const GaugeField& U) const {
|
||||||
void BaseSmear(GaugeField& C,
|
|
||||||
const GaugeField& U) const{
|
|
||||||
SmearBase->smear(C, U);
|
SmearBase->smear(C, U);
|
||||||
};
|
};
|
||||||
|
|
||||||
void exponentiate_iQ(GaugeLinkField& e_iQ,
|
void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const {
|
||||||
const GaugeLinkField& iQ) const{
|
|
||||||
// Put this outside
|
// Put this outside
|
||||||
// only valid for SU(3) matrices
|
// only valid for SU(3) matrices
|
||||||
|
|
||||||
@ -73,9 +71,9 @@
|
|||||||
// the i sign is coming from outside
|
// the i sign is coming from outside
|
||||||
// input matrix is anti-hermitian NOT hermitian
|
// input matrix is anti-hermitian NOT hermitian
|
||||||
|
|
||||||
GridBase *grid = iQ._grid;
|
GridBase* grid = iQ._grid;
|
||||||
GaugeLinkField unity(grid);
|
GaugeLinkField unity(grid);
|
||||||
unity=1.0;
|
unity = 1.0;
|
||||||
|
|
||||||
GaugeLinkField iQ2(grid), iQ3(grid);
|
GaugeLinkField iQ2(grid), iQ3(grid);
|
||||||
LatticeComplex u(grid), w(grid);
|
LatticeComplex u(grid), w(grid);
|
||||||
@ -87,37 +85,34 @@
|
|||||||
set_uw(u, w, iQ2, iQ3);
|
set_uw(u, w, iQ2, iQ3);
|
||||||
set_fj(f0, f1, f2, u, w);
|
set_fj(f0, f1, f2, u, w);
|
||||||
|
|
||||||
e_iQ = f0*unity + timesMinusI(f1) * iQ - f2 * iQ2;
|
e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2;
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void set_uw(LatticeComplex& u, LatticeComplex& w, GaugeLinkField& iQ2,
|
||||||
|
GaugeLinkField& iQ3) const {
|
||||||
|
Complex one_over_three = 1.0 / 3.0;
|
||||||
|
Complex one_over_two = 1.0 / 2.0;
|
||||||
|
|
||||||
void set_uw(LatticeComplex& u, LatticeComplex& w,
|
GridBase* grid = u._grid;
|
||||||
GaugeLinkField& iQ2, GaugeLinkField& iQ3) const{
|
|
||||||
Complex one_over_three = 1.0/3.0;
|
|
||||||
Complex one_over_two = 1.0/2.0;
|
|
||||||
|
|
||||||
GridBase *grid = u._grid;
|
|
||||||
LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid);
|
LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid);
|
||||||
|
|
||||||
// sign in c0 from the conventions on the Ta
|
// sign in c0 from the conventions on the Ta
|
||||||
c0 = - real(timesMinusI(trace(iQ3))) * one_over_three; //temporary hack
|
c0 = -imag(trace(iQ3)) * one_over_three;
|
||||||
c1 = - real(trace(iQ2)) * one_over_two;
|
c1 = -real(trace(iQ2)) * one_over_two;
|
||||||
|
|
||||||
//Cayley Hamilton checks to machine precision, tested
|
// Cayley Hamilton checks to machine precision, tested
|
||||||
tmp = c1 * one_over_three;
|
tmp = c1 * one_over_three;
|
||||||
c0max = 2.0 * pow(tmp, 1.5);
|
c0max = 2.0 * pow(tmp, 1.5);
|
||||||
|
|
||||||
theta = acos(c0/c0max)*one_over_three; // divide by three here, now leave as it is
|
theta = acos(c0 / c0max) *
|
||||||
u = sqrt(tmp) * cos( theta );
|
one_over_three; // divide by three here, now leave as it is
|
||||||
w = sqrt(c1) * sin( theta );
|
u = sqrt(tmp) * cos(theta);
|
||||||
|
w = sqrt(c1) * sin(theta);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2,
|
void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2,
|
||||||
const LatticeComplex& u, const LatticeComplex& w) const{
|
const LatticeComplex& u, const LatticeComplex& w) const {
|
||||||
|
GridBase* grid = u._grid;
|
||||||
GridBase *grid = u._grid;
|
|
||||||
LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid);
|
LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid);
|
||||||
LatticeComplex fden(grid);
|
LatticeComplex fden(grid);
|
||||||
LatticeComplex h0(grid), h1(grid), h2(grid);
|
LatticeComplex h0(grid), h1(grid), h2(grid);
|
||||||
@ -132,36 +127,34 @@
|
|||||||
|
|
||||||
ixi0 = timesI(xi0);
|
ixi0 = timesI(xi0);
|
||||||
emiu = cos(u) - timesI(sin(u));
|
emiu = cos(u) - timesI(sin(u));
|
||||||
e2iu = cos(2.0*u) + timesI(sin(2.0*u));
|
e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
|
||||||
|
|
||||||
h0 = e2iu * (u2 - w2) + emiu * ( (8.0*u2*cosw) + (2.0*u*(3.0*u2 + w2)*ixi0));
|
h0 = e2iu * (u2 - w2) +
|
||||||
h1 = e2iu * (2.0 * u) - emiu * ( (2.0*u*cosw) - (3.0*u2-w2)*ixi0);
|
emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
|
||||||
h2 = e2iu - emiu * ( cosw + (3.0*u)*ixi0);
|
h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
|
||||||
|
h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);
|
||||||
|
|
||||||
fden = unity/(9.0*u2 - w2);// reals
|
fden = unity / (9.0 * u2 - w2); // reals
|
||||||
f0 = h0 * fden;
|
f0 = h0 * fden;
|
||||||
f1 = h1 * fden;
|
f1 = h1 * fden;
|
||||||
f2 = h2 * fden;
|
f2 = h2 * fden;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LatticeComplex func_xi0(const LatticeComplex& w) const {
|
||||||
|
|
||||||
|
|
||||||
LatticeComplex func_xi0(const LatticeComplex& w) const{
|
|
||||||
// Define a function to do the check
|
// Define a function to do the check
|
||||||
//if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small: "<< w <<"\n";
|
// if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small:
|
||||||
return sin(w)/w;
|
// "<< w <<"\n";
|
||||||
|
return sin(w) / w;
|
||||||
}
|
}
|
||||||
|
|
||||||
LatticeComplex func_xi1(const LatticeComplex& w) const{
|
LatticeComplex func_xi1(const LatticeComplex& w) const {
|
||||||
// Define a function to do the check
|
// Define a function to do the check
|
||||||
//if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small: "<< w <<"\n";
|
// if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small:
|
||||||
return cos(w)/(w*w) - sin(w)/(w*w*w);
|
// "<< w <<"\n";
|
||||||
}
|
return cos(w) / (w * w) - sin(w) / (w * w * w);
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,33 +1,34 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/simd/Grid_vector_types.h
|
Source file: ./lib/simd/Grid_vector_types.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
|
Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
//---------------------------------------------------------------------------
|
//---------------------------------------------------------------------------
|
||||||
/*! @file Grid_vector_types.h
|
/*! @file Grid_vector_types.h
|
||||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||||
@ -43,7 +44,7 @@ Author: neo <cossu@post.kek.jp>
|
|||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
#include "Grid_sse4.h"
|
#include "Grid_sse4.h"
|
||||||
#endif
|
#endif
|
||||||
#if defined (AVX1)|| defined (AVX2) || defined (AVXFMA4)
|
#if defined(AVX1) || defined(AVX2) || defined(AVXFMA4)
|
||||||
#include "Grid_avx.h"
|
#include "Grid_avx.h"
|
||||||
#endif
|
#endif
|
||||||
#if defined AVX512
|
#if defined AVX512
|
||||||
@ -61,190 +62,250 @@ Author: neo <cossu@post.kek.jp>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
// To take the floating point type of real/complex type
|
// To take the floating point type of real/complex type
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
template <typename T> struct RealPart {
|
template <typename T>
|
||||||
|
struct RealPart {
|
||||||
typedef T type;
|
typedef T type;
|
||||||
};
|
};
|
||||||
template <typename T> struct RealPart< std::complex<T> >{
|
template <typename T>
|
||||||
|
struct RealPart<std::complex<T> > {
|
||||||
typedef T type;
|
typedef T type;
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
// demote a vector to real type
|
// demote a vector to real type
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
|
|
||||||
// type alias used to simplify the syntax of std::enable_if
|
// type alias used to simplify the syntax of std::enable_if
|
||||||
template <typename T> using Invoke = typename T::type;
|
template <typename T>
|
||||||
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
|
using Invoke = typename T::type;
|
||||||
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType> >;
|
template <typename Condition, typename ReturnType>
|
||||||
|
using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
|
||||||
|
template <typename Condition, typename ReturnType>
|
||||||
|
using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// Check for complexity with type traits
|
||||||
|
template <typename T>
|
||||||
|
struct is_complex : public std::false_type {};
|
||||||
|
template <>
|
||||||
|
struct is_complex<std::complex<double> > : public std::true_type {};
|
||||||
|
template <>
|
||||||
|
struct is_complex<std::complex<float> > : public std::true_type {};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
template <typename T>
|
||||||
// Check for complexity with type traits
|
using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
|
||||||
template <typename T> struct is_complex : public std::false_type {};
|
template <typename T>
|
||||||
template <> struct is_complex<std::complex<double> >: public std::true_type {};
|
using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
|
||||||
template <> struct is_complex<std::complex<float> > : public std::true_type {};
|
template <typename T>
|
||||||
|
using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
|
||||||
|
|
||||||
template <typename T> using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value,int> > ;
|
template <typename T>
|
||||||
template <typename T> using IfComplex = Invoke<std::enable_if<is_complex<T>::value,int> > ;
|
using IfNotReal =
|
||||||
template <typename T> using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value,int> > ;
|
Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
|
||||||
|
template <typename T>
|
||||||
|
using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
|
||||||
|
template <typename T>
|
||||||
|
using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
|
||||||
|
|
||||||
template <typename T> using IfNotReal = Invoke<std::enable_if<!std::is_floating_point<T>::value,int> > ;
|
////////////////////////////////////////////////////////
|
||||||
template <typename T> using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value,int> > ;
|
// Define the operation templates functors
|
||||||
template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value,int> > ;
|
// general forms to allow for vsplat syntax
|
||||||
|
// need explicit declaration of types when used since
|
||||||
////////////////////////////////////////////////////////
|
// clang cannot automatically determine the output type sometimes
|
||||||
// Define the operation templates functors
|
template <class Out, class Input1, class Input2, class Operation>
|
||||||
// general forms to allow for vsplat syntax
|
Out binary(Input1 src_1, Input2 src_2, Operation op) {
|
||||||
// need explicit declaration of types when used since
|
|
||||||
// clang cannot automatically determine the output type sometimes
|
|
||||||
template < class Out, class Input1, class Input2, class Operation >
|
|
||||||
Out binary(Input1 src_1, Input2 src_2, Operation op){
|
|
||||||
return op(src_1, src_2);
|
return op(src_1, src_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template < class Out, class Input, class Operation >
|
template <class Out, class Input, class Operation>
|
||||||
Out unary(Input src, Operation op){
|
Out unary(Input src, Operation op) {
|
||||||
return op(src);
|
return op(src);
|
||||||
}
|
}
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
/*
|
|
||||||
@brief Grid_simd class for the SIMD vector type operations
|
@brief Grid_simd class for the SIMD vector type operations
|
||||||
*/
|
*/
|
||||||
template < class Scalar_type, class Vector_type >
|
template <class Scalar_type, class Vector_type>
|
||||||
class Grid_simd {
|
class Grid_simd {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef typename RealPart < Scalar_type >::type Real;
|
typedef typename RealPart<Scalar_type>::type Real;
|
||||||
typedef Vector_type vector_type;
|
typedef Vector_type vector_type;
|
||||||
typedef Scalar_type scalar_type;
|
typedef Scalar_type scalar_type;
|
||||||
|
|
||||||
|
|
||||||
typedef union conv_t_union {
|
typedef union conv_t_union {
|
||||||
Vector_type v;
|
Vector_type v;
|
||||||
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)];
|
Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)];
|
||||||
conv_t_union(){};
|
conv_t_union(){};
|
||||||
} conv_t;
|
} conv_t;
|
||||||
|
|
||||||
|
|
||||||
Vector_type v;
|
Vector_type v;
|
||||||
|
|
||||||
static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);}
|
static inline int Nsimd(void) {
|
||||||
|
return sizeof(Vector_type) / sizeof(Scalar_type);
|
||||||
|
}
|
||||||
|
|
||||||
Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;};
|
Grid_simd &operator=(const Grid_simd &&rhs) {
|
||||||
Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler
|
v = rhs.v;
|
||||||
Grid_simd()=default;
|
return *this;
|
||||||
Grid_simd(const Grid_simd& rhs) :v(rhs.v){}; //compiles in movaps
|
};
|
||||||
Grid_simd(const Grid_simd&& rhs):v(rhs.v){};
|
Grid_simd &operator=(const Grid_simd &rhs) {
|
||||||
|
v = rhs.v;
|
||||||
|
return *this;
|
||||||
|
}; // faster than not declaring it and leaving to the compiler
|
||||||
|
Grid_simd() = default;
|
||||||
|
Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
|
||||||
|
Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Constructors
|
// Constructors
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
Grid_simd & operator = ( Zero & z){
|
Grid_simd &operator=(Zero &z) {
|
||||||
vzero(*this);
|
vzero(*this);
|
||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Enable if complex type
|
// Enable if complex type
|
||||||
template < typename S = Scalar_type >
|
template <typename S = Scalar_type>
|
||||||
Grid_simd(const typename std::enable_if< is_complex < S >::value, S>::type a){
|
Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
|
||||||
vsplat(*this,a);
|
vsplat(*this, a);
|
||||||
};
|
};
|
||||||
|
|
||||||
Grid_simd(const Real a){
|
Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
|
||||||
vsplat(*this,Scalar_type(a));
|
|
||||||
};
|
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// mac, mult, sub, add, adj
|
// mac, mult, sub, add, adj
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
|
||||||
// FIXME -- alias this to an inline MAC struct.
|
// FIXME -- alias this to an inline MAC struct.
|
||||||
friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
|
friend inline void mac(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ a,
|
||||||
|
const Grid_simd *__restrict__ x) {
|
||||||
|
*y = (*a) * (*x) + (*y);
|
||||||
|
};
|
||||||
|
|
||||||
|
friend inline void mult(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ l,
|
||||||
|
const Grid_simd *__restrict__ r) {
|
||||||
|
*y = (*l) * (*r);
|
||||||
|
}
|
||||||
|
friend inline void sub(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ l,
|
||||||
|
const Grid_simd *__restrict__ r) {
|
||||||
|
*y = (*l) - (*r);
|
||||||
|
}
|
||||||
|
friend inline void add(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ l,
|
||||||
|
const Grid_simd *__restrict__ r) {
|
||||||
|
*y = (*l) + (*r);
|
||||||
|
}
|
||||||
|
|
||||||
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
|
friend inline void mac(Grid_simd *__restrict__ y,
|
||||||
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
|
const Scalar_type *__restrict__ a,
|
||||||
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
|
const Grid_simd *__restrict__ x) {
|
||||||
|
*y = (*a) * (*x) + (*y);
|
||||||
|
};
|
||||||
|
friend inline void mult(Grid_simd *__restrict__ y,
|
||||||
|
const Scalar_type *__restrict__ l,
|
||||||
|
const Grid_simd *__restrict__ r) {
|
||||||
|
*y = (*l) * (*r);
|
||||||
|
}
|
||||||
|
friend inline void sub(Grid_simd *__restrict__ y,
|
||||||
|
const Scalar_type *__restrict__ l,
|
||||||
|
const Grid_simd *__restrict__ r) {
|
||||||
|
*y = (*l) - (*r);
|
||||||
|
}
|
||||||
|
friend inline void add(Grid_simd *__restrict__ y,
|
||||||
|
const Scalar_type *__restrict__ l,
|
||||||
|
const Grid_simd *__restrict__ r) {
|
||||||
|
*y = (*l) + (*r);
|
||||||
|
}
|
||||||
|
|
||||||
friend inline void mac (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
|
friend inline void mac(Grid_simd *__restrict__ y,
|
||||||
friend inline void mult(Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
|
const Grid_simd *__restrict__ a,
|
||||||
friend inline void sub (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
|
const Scalar_type *__restrict__ x) {
|
||||||
friend inline void add (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
|
*y = (*a) * (*x) + (*y);
|
||||||
|
};
|
||||||
friend inline void mac (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ a,const Scalar_type *__restrict__ x){ *y = (*a)*(*x)+(*y); };
|
friend inline void mult(Grid_simd *__restrict__ y,
|
||||||
friend inline void mult(Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) * (*r); }
|
const Grid_simd *__restrict__ l,
|
||||||
friend inline void sub (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) - (*r); }
|
const Scalar_type *__restrict__ r) {
|
||||||
friend inline void add (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) + (*r); }
|
*y = (*l) * (*r);
|
||||||
|
}
|
||||||
|
friend inline void sub(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ l,
|
||||||
|
const Scalar_type *__restrict__ r) {
|
||||||
|
*y = (*l) - (*r);
|
||||||
|
}
|
||||||
|
friend inline void add(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ l,
|
||||||
|
const Scalar_type *__restrict__ r) {
|
||||||
|
*y = (*l) + (*r);
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
friend inline void vset(Grid_simd &ret, Scalar_type *a){
|
friend inline void vset(Grid_simd &ret, Scalar_type *a) {
|
||||||
ret.v = unary<Vector_type>(a, VsetSIMD());
|
ret.v = unary<Vector_type>(a, VsetSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Vstore
|
// Vstore
|
||||||
///////////////////////
|
///////////////////////
|
||||||
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
|
friend inline void vstore(const Grid_simd &ret, Scalar_type *a) {
|
||||||
binary<void>(ret.v, (Real*)a, VstoreSIMD());
|
binary<void>(ret.v, (Real *)a, VstoreSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Vprefetch
|
// Vprefetch
|
||||||
///////////////////////
|
///////////////////////
|
||||||
friend inline void vprefetch(const Grid_simd &v)
|
friend inline void vprefetch(const Grid_simd &v) {
|
||||||
{
|
prefetch_HINT_T0((const char *)&v.v);
|
||||||
prefetch_HINT_T0((const char*)&v.v);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Reduce
|
// Reduce
|
||||||
///////////////////////
|
///////////////////////
|
||||||
friend inline Scalar_type Reduce(const Grid_simd & in)
|
friend inline Scalar_type Reduce(const Grid_simd &in) {
|
||||||
{
|
|
||||||
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
|
return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// opreator scalar * simd
|
// opreator scalar * simd
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
|
friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) {
|
||||||
Grid_simd va;
|
Grid_simd va;
|
||||||
vsplat(va,a);
|
vsplat(va, a);
|
||||||
return va*b;
|
return va * b;
|
||||||
}
|
}
|
||||||
friend inline Grid_simd operator * (Grid_simd b,const Scalar_type &a){
|
friend inline Grid_simd operator*(Grid_simd b, const Scalar_type &a) {
|
||||||
return a*b;
|
return a * b;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Unary negation
|
// Unary negation
|
||||||
///////////////////////
|
///////////////////////
|
||||||
friend inline Grid_simd operator -(const Grid_simd &r) {
|
friend inline Grid_simd operator-(const Grid_simd &r) {
|
||||||
Grid_simd ret;
|
Grid_simd ret;
|
||||||
vzero(ret);
|
vzero(ret);
|
||||||
ret = ret - r;
|
ret = ret - r;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
// *=,+=,-= operators
|
// *=,+=,-= operators
|
||||||
inline Grid_simd &operator *=(const Grid_simd &r) {
|
inline Grid_simd &operator*=(const Grid_simd &r) {
|
||||||
*this = (*this)*r;
|
*this = (*this) * r;
|
||||||
return *this;
|
return *this;
|
||||||
// return (*this)*r; ?
|
// return (*this)*r; ?
|
||||||
}
|
}
|
||||||
inline Grid_simd &operator +=(const Grid_simd &r) {
|
inline Grid_simd &operator+=(const Grid_simd &r) {
|
||||||
*this = *this+r;
|
*this = *this + r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
inline Grid_simd &operator -=(const Grid_simd &r) {
|
inline Grid_simd &operator-=(const Grid_simd &r) {
|
||||||
*this = *this-r;
|
*this = *this - r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -255,26 +316,30 @@ namespace Grid {
|
|||||||
// provides support
|
// provides support
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
|
|
||||||
template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) {
|
template <class functor>
|
||||||
|
friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) {
|
||||||
Grid_simd ret;
|
Grid_simd ret;
|
||||||
Grid_simd::conv_t conv;
|
Grid_simd::conv_t conv;
|
||||||
|
|
||||||
conv.v = v.v;
|
conv.v = v.v;
|
||||||
for(int i=0;i<Nsimd();i++){
|
for (int i = 0; i < Nsimd(); i++) {
|
||||||
conv.s[i]=func(conv.s[i]);
|
conv.s[i] = func(conv.s[i]);
|
||||||
}
|
}
|
||||||
ret.v = conv.v;
|
ret.v = conv.v;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class functor> friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) {
|
template <class functor>
|
||||||
|
friend inline Grid_simd SimdApplyBinop(const functor &func,
|
||||||
|
const Grid_simd &x,
|
||||||
|
const Grid_simd &y) {
|
||||||
Grid_simd ret;
|
Grid_simd ret;
|
||||||
Grid_simd::conv_t cx;
|
Grid_simd::conv_t cx;
|
||||||
Grid_simd::conv_t cy;
|
Grid_simd::conv_t cy;
|
||||||
|
|
||||||
cx.v = x.v;
|
cx.v = x.v;
|
||||||
cy.v = y.v;
|
cy.v = y.v;
|
||||||
for(int i=0;i<Nsimd();i++){
|
for (int i = 0; i < Nsimd(); i++) {
|
||||||
cx.s[i]=func(cx.s[i],cy.s[i]);
|
cx.s[i] = func(cx.s[i], cy.s[i]);
|
||||||
}
|
}
|
||||||
ret.v = cx.v;
|
ret.v = cx.v;
|
||||||
return ret;
|
return ret;
|
||||||
@ -285,294 +350,347 @@ namespace Grid {
|
|||||||
// all subtypes; may not be a good assumption, but could
|
// all subtypes; may not be a good assumption, but could
|
||||||
// add the vector width as a template param for BG/Q for example
|
// add the vector width as a template param for BG/Q for example
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
friend inline void permute0(Grid_simd &y,Grid_simd b){
|
friend inline void permute0(Grid_simd &y, Grid_simd b) {
|
||||||
y.v = Optimization::Permute::Permute0(b.v);
|
y.v = Optimization::Permute::Permute0(b.v);
|
||||||
}
|
}
|
||||||
friend inline void permute1(Grid_simd &y,Grid_simd b){
|
friend inline void permute1(Grid_simd &y, Grid_simd b) {
|
||||||
y.v = Optimization::Permute::Permute1(b.v);
|
y.v = Optimization::Permute::Permute1(b.v);
|
||||||
}
|
}
|
||||||
friend inline void permute2(Grid_simd &y,Grid_simd b){
|
friend inline void permute2(Grid_simd &y, Grid_simd b) {
|
||||||
y.v = Optimization::Permute::Permute2(b.v);
|
y.v = Optimization::Permute::Permute2(b.v);
|
||||||
}
|
}
|
||||||
friend inline void permute3(Grid_simd &y,Grid_simd b){
|
friend inline void permute3(Grid_simd &y, Grid_simd b) {
|
||||||
y.v = Optimization::Permute::Permute3(b.v);
|
y.v = Optimization::Permute::Permute3(b.v);
|
||||||
}
|
}
|
||||||
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
|
friend inline void permute(Grid_simd &y, Grid_simd b, int perm) {
|
||||||
{
|
if (perm & RotateBit) {
|
||||||
if ( perm & RotateBit ) {
|
int dist = perm & 0xF;
|
||||||
int dist = perm&0xF;
|
y = rotate(b, dist);
|
||||||
y=rotate(b,dist);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
switch(perm){
|
switch (perm) {
|
||||||
case 3: permute3(y,b); break;
|
case 3:
|
||||||
case 2: permute2(y,b); break;
|
permute3(y, b);
|
||||||
case 1: permute1(y,b); break;
|
break;
|
||||||
case 0: permute0(y,b); break;
|
case 2:
|
||||||
default: assert(0);
|
permute2(y, b);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
permute1(y, b);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
permute0(y, b);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
};// end of Grid_simd class definition
|
}; // end of Grid_simd class definition
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// General rotate
|
// General rotate
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template <class S, class V, IfNotComplex<S> =0>
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
inline Grid_simd<S,V> rotate(Grid_simd<S,V> b,int nrot)
|
inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) {
|
||||||
{
|
nrot = nrot % Grid_simd<S, V>::Nsimd();
|
||||||
nrot = nrot % Grid_simd<S,V>::Nsimd();
|
Grid_simd<S, V> ret;
|
||||||
Grid_simd<S,V> ret;
|
|
||||||
// std::cout << "Rotate Real by "<<nrot<<std::endl;
|
// std::cout << "Rotate Real by "<<nrot<<std::endl;
|
||||||
ret.v = Optimization::Rotate::rotate(b.v,nrot);
|
ret.v = Optimization::Rotate::rotate(b.v, nrot);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template <class S, class V, IfComplex<S> =0>
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline Grid_simd<S,V> rotate(Grid_simd<S,V> b,int nrot)
|
inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) {
|
||||||
{
|
nrot = nrot % Grid_simd<S, V>::Nsimd();
|
||||||
nrot = nrot % Grid_simd<S,V>::Nsimd();
|
Grid_simd<S, V> ret;
|
||||||
Grid_simd<S,V> ret;
|
|
||||||
// std::cout << "Rotate Complex by "<<nrot<<std::endl;
|
// std::cout << "Rotate Complex by "<<nrot<<std::endl;
|
||||||
ret.v = Optimization::Rotate::rotate(b.v,2*nrot);
|
ret.v = Optimization::Rotate::rotate(b.v, 2 * nrot);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Splat
|
// Splat
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
|
||||||
// this is only for the complex version
|
// this is only for the complex version
|
||||||
template <class S, class V, IfComplex<S> =0, class ABtype>
|
template <class S, class V, IfComplex<S> = 0, class ABtype>
|
||||||
inline void vsplat(Grid_simd<S,V> &ret,ABtype a, ABtype b){
|
inline void vsplat(Grid_simd<S, V> &ret, ABtype a, ABtype b) {
|
||||||
ret.v = binary<V>(a, b, VsplatSIMD());
|
ret.v = binary<V>(a, b, VsplatSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
// overload if complex
|
// overload if complex
|
||||||
template <class S,class V> inline void vsplat(Grid_simd<S,V> &ret, EnableIf<is_complex < S >, S> c) {
|
template <class S, class V>
|
||||||
vsplat(ret,real(c),imag(c));
|
inline void vsplat(Grid_simd<S, V> &ret, EnableIf<is_complex<S>, S> c) {
|
||||||
}
|
vsplat(ret, real(c), imag(c));
|
||||||
|
}
|
||||||
|
|
||||||
//if real fill with a, if complex fill with a in the real part (first function above)
|
// if real fill with a, if complex fill with a in the real part (first function
|
||||||
template <class S,class V>
|
// above)
|
||||||
inline void vsplat(Grid_simd<S,V> &ret,NotEnableIf<is_complex< S>,S> a){
|
template <class S, class V>
|
||||||
|
inline void vsplat(Grid_simd<S, V> &ret, NotEnableIf<is_complex<S>, S> a) {
|
||||||
ret.v = unary<V>(a, VsplatSIMD());
|
ret.v = unary<V>(a, VsplatSIMD());
|
||||||
}
|
}
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Initialise to 1,0,i for the correct types
|
// Initialise to 1,0,i for the correct types
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// For complex types
|
// For complex types
|
||||||
template <class S,class V, IfComplex<S> = 0 > inline void vone(Grid_simd<S,V> &ret) { vsplat(ret,S(1.0,0.0)); }
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
template <class S,class V, IfComplex<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) { vsplat(ret,S(0.0,0.0)); }// use xor?
|
inline void vone(Grid_simd<S, V> &ret) {
|
||||||
template <class S,class V, IfComplex<S> = 0 > inline void vcomplex_i(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0,1.0));}
|
vsplat(ret, S(1.0, 0.0));
|
||||||
|
}
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
inline void vzero(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, S(0.0, 0.0));
|
||||||
|
} // use xor?
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
inline void vcomplex_i(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, S(0.0, 1.0));
|
||||||
|
}
|
||||||
|
|
||||||
template <class S,class V, IfComplex<S> = 0 > inline void visign(Grid_simd<S,V> &ret){ vsplat(ret,S(1.0,-1.0));}
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
template <class S,class V, IfComplex<S> = 0 > inline void vrsign(Grid_simd<S,V> &ret){ vsplat(ret,S(-1.0,1.0));}
|
inline void visign(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, S(1.0, -1.0));
|
||||||
|
}
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
inline void vrsign(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, S(-1.0, 1.0));
|
||||||
|
}
|
||||||
|
|
||||||
// if not complex overload here
|
// if not complex overload here
|
||||||
template <class S,class V, IfReal<S> = 0 > inline void vone (Grid_simd<S,V> &ret){ vsplat(ret,S(1.0)); }
|
template <class S, class V, IfReal<S> = 0>
|
||||||
template <class S,class V, IfReal<S> = 0 > inline void vzero(Grid_simd<S,V> &ret){ vsplat(ret,S(0.0)); }
|
inline void vone(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, S(1.0));
|
||||||
|
}
|
||||||
|
template <class S, class V, IfReal<S> = 0>
|
||||||
|
inline void vzero(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, S(0.0));
|
||||||
|
}
|
||||||
|
|
||||||
// For integral types
|
// For integral types
|
||||||
template <class S,class V,IfInteger<S> = 0 > inline void vone(Grid_simd<S,V> &ret) {vsplat(ret,1); }
|
template <class S, class V, IfInteger<S> = 0>
|
||||||
template <class S,class V,IfInteger<S> = 0 > inline void vzero(Grid_simd<S,V> &ret) {vsplat(ret,0); }
|
inline void vone(Grid_simd<S, V> &ret) {
|
||||||
template <class S,class V,IfInteger<S> = 0 > inline void vtrue (Grid_simd<S,V> &ret){vsplat(ret,0xFFFFFFFF);}
|
vsplat(ret, 1);
|
||||||
template <class S,class V,IfInteger<S> = 0 > inline void vfalse(Grid_simd<S,V> &ret){vsplat(ret,0);}
|
}
|
||||||
|
template <class S, class V, IfInteger<S> = 0>
|
||||||
|
inline void vzero(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, 0);
|
||||||
|
}
|
||||||
|
template <class S, class V, IfInteger<S> = 0>
|
||||||
|
inline void vtrue(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, 0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
template <class S, class V, IfInteger<S> = 0>
|
||||||
|
inline void vfalse(Grid_simd<S, V> &ret) {
|
||||||
|
vsplat(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
template<class S,class V> inline void zeroit(Grid_simd<S,V> &z){ vzero(z);}
|
template <class S, class V>
|
||||||
|
inline void zeroit(Grid_simd<S, V> &z) {
|
||||||
|
vzero(z);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Vstream
|
// Vstream
|
||||||
///////////////////////
|
///////////////////////
|
||||||
template <class S,class V, IfReal<S> = 0 >
|
template <class S, class V, IfReal<S> = 0>
|
||||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
|
||||||
binary<void>((S *)&out.v, in.v, VstreamSIMD());
|
binary<void>((S *)&out.v, in.v, VstreamSIMD());
|
||||||
}
|
}
|
||||||
template <class S,class V, IfComplex<S> = 0 >
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
|
||||||
typedef typename S::value_type T;
|
typedef typename S::value_type T;
|
||||||
binary<void>((T *)&out.v, in.v, VstreamSIMD());
|
binary<void>((T *)&out.v, in.v, VstreamSIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class S,class V, IfInteger<S> = 0 >
|
template <class S, class V, IfInteger<S> = 0>
|
||||||
inline void vstream(Grid_simd<S,V> &out,const Grid_simd<S,V> &in){
|
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
|
||||||
out=in;
|
out = in;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Arithmetic operator overloads +,-,*
|
// Arithmetic operator overloads +,-,*
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
template<class S,class V> inline Grid_simd<S,V> operator + (Grid_simd<S,V> a, Grid_simd<S,V> b) {
|
template <class S, class V>
|
||||||
Grid_simd<S,V> ret;
|
inline Grid_simd<S, V> operator+(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
ret.v = binary<V>(a.v, b.v, SumSIMD());
|
ret.v = binary<V>(a.v, b.v, SumSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class S,class V> inline Grid_simd<S,V> operator - (Grid_simd<S,V> a, Grid_simd<S,V> b) {
|
template <class S, class V>
|
||||||
Grid_simd<S,V> ret;
|
inline Grid_simd<S, V> operator-(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
ret.v = binary<V>(a.v, b.v, SubSIMD());
|
ret.v = binary<V>(a.v, b.v, SubSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Distinguish between complex types and others
|
// Distinguish between complex types and others
|
||||||
template<class S,class V, IfComplex<S> = 0 > inline Grid_simd<S,V> operator * (Grid_simd<S,V> a, Grid_simd<S,V> b) {
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
Grid_simd<S,V> ret;
|
inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
ret.v = binary<V>(a.v,b.v, MultComplexSIMD());
|
Grid_simd<S, V> ret;
|
||||||
|
ret.v = binary<V>(a.v, b.v, MultComplexSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Real/Integer types
|
// Real/Integer types
|
||||||
template<class S,class V, IfNotComplex<S> = 0 > inline Grid_simd<S,V> operator * (Grid_simd<S,V> a, Grid_simd<S,V> b) {
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
Grid_simd<S,V> ret;
|
inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
ret.v = binary<V>(a.v,b.v, MultSIMD());
|
Grid_simd<S, V> ret;
|
||||||
|
ret.v = binary<V>(a.v, b.v, MultSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
///////////////////////
|
// Conjugate
|
||||||
// Conjugate
|
///////////////////////
|
||||||
///////////////////////
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
template <class S,class V, IfComplex<S> = 0 >
|
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
|
||||||
inline Grid_simd<S,V> conjugate(const Grid_simd<S,V> &in){
|
Grid_simd<S, V> ret;
|
||||||
Grid_simd<S,V> ret ;
|
|
||||||
ret.v = unary<V>(in.v, ConjSIMD());
|
ret.v = unary<V>(in.v, ConjSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template <class S,class V, IfNotComplex<S> = 0 > inline Grid_simd<S,V> conjugate(const Grid_simd<S,V> &in){
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
|
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
|
||||||
return in; // for real objects
|
return in; // for real objects
|
||||||
}
|
}
|
||||||
|
|
||||||
//Suppress adj for integer types... // odd; why conjugate above but not adj??
|
// Suppress adj for integer types... // odd; why conjugate above but not adj??
|
||||||
template < class S, class V, IfNotInteger<S> = 0 >
|
template <class S, class V, IfNotInteger<S> = 0>
|
||||||
inline Grid_simd<S,V> adj(const Grid_simd<S,V> &in){ return conjugate(in); }
|
inline Grid_simd<S, V> adj(const Grid_simd<S, V> &in) {
|
||||||
|
return conjugate(in);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// timesMinusI
|
// timesMinusI
|
||||||
///////////////////////
|
///////////////////////
|
||||||
template<class S,class V,IfComplex<S> = 0 >
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline void timesMinusI( Grid_simd<S,V> &ret,const Grid_simd<S,V> &in){
|
inline void timesMinusI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
|
||||||
ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD());
|
ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class S,class V,IfComplex<S> = 0 >
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline Grid_simd<S,V> timesMinusI(const Grid_simd<S,V> &in){
|
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
|
||||||
Grid_simd<S,V> ret;
|
Grid_simd<S, V> ret;
|
||||||
timesMinusI(ret,in);
|
timesMinusI(ret, in);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class S,class V,IfNotComplex<S> = 0 >
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
inline Grid_simd<S,V> timesMinusI(const Grid_simd<S,V> &in){
|
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// timesI
|
// timesI
|
||||||
///////////////////////
|
///////////////////////
|
||||||
template<class S,class V,IfComplex<S> = 0 >
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline void timesI(Grid_simd<S,V> &ret,const Grid_simd<S,V> &in){
|
inline void timesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
|
||||||
ret.v = binary<V>(in.v, ret.v, TimesISIMD());
|
ret.v = binary<V>(in.v, ret.v, TimesISIMD());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class S,class V,IfComplex<S> = 0 >
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline Grid_simd<S,V> timesI(const Grid_simd<S,V> &in){
|
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
|
||||||
Grid_simd<S,V> ret;
|
Grid_simd<S, V> ret;
|
||||||
timesI(ret,in);
|
timesI(ret, in);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class S,class V,IfNotComplex<S> = 0 >
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
inline Grid_simd<S,V> timesI(const Grid_simd<S,V> &in){
|
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////
|
/////////////////////
|
||||||
// Inner, outer
|
// Inner, outer
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
|
||||||
template<class S, class V >
|
template <class S, class V>
|
||||||
inline Grid_simd< S, V> innerProduct(const Grid_simd< S, V> & l, const Grid_simd< S, V> & r)
|
inline Grid_simd<S, V> innerProduct(const Grid_simd<S, V> &l,
|
||||||
{
|
const Grid_simd<S, V> &r) {
|
||||||
return conjugate(l)*r;
|
return conjugate(l) * r;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class S, class V >
|
template <class S, class V>
|
||||||
inline Grid_simd< S, V> outerProduct(const Grid_simd< S, V> &l, const Grid_simd< S, V> & r)
|
inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l,
|
||||||
{
|
const Grid_simd<S, V> &r) {
|
||||||
return l*conjugate(r);
|
return l * conjugate(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class S, class V >
|
template <class S, class V>
|
||||||
inline Grid_simd< S, V> trace(const Grid_simd< S, V> &arg){
|
inline Grid_simd<S, V> trace(const Grid_simd<S, V> &arg) {
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// copy/splat complex real parts into real;
|
||||||
|
// insert real into complex and zero imag;
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
// real = toReal( complex )
|
||||||
// copy/splat complex real parts into real;
|
template <class S, class V, IfReal<S> = 0>
|
||||||
// insert real into complex and zero imag;
|
inline Grid_simd<S, V> toReal(const Grid_simd<std::complex<S>, V> &in) {
|
||||||
////////////////////////////////////////////////////////////
|
typedef Grid_simd<S, V> simd;
|
||||||
|
|
||||||
//real = toReal( complex )
|
|
||||||
template<class S,class V,IfReal<S> = 0>
|
|
||||||
inline Grid_simd<S,V> toReal(const Grid_simd<std::complex<S>,V> &in)
|
|
||||||
{
|
|
||||||
typedef Grid_simd<S,V> simd;
|
|
||||||
simd ret;
|
simd ret;
|
||||||
typename simd::conv_t conv;
|
typename simd::conv_t conv;
|
||||||
conv.v = in.v; // copy the vector content (bytewise)
|
conv.v = in.v; // copy the vector content (bytewise)
|
||||||
for(int i=0;i<simd::Nsimd();i+=2){
|
for (int i = 0; i < simd::Nsimd(); i += 2) {
|
||||||
conv.s[i+1]=conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
|
conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
|
||||||
}
|
}
|
||||||
ret.v = conv.v;
|
ret.v = conv.v;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
//complex = toComplex( real )
|
// complex = toComplex( real )
|
||||||
template<class R,class V,IfReal<R> = 0 > // must be a real arg
|
template <class R, class V, IfReal<R> = 0> // must be a real arg
|
||||||
inline Grid_simd<std::complex<R>,V> toComplex (const Grid_simd<R,V> &in)
|
inline Grid_simd<std::complex<R>, V> toComplex(const Grid_simd<R, V> &in) {
|
||||||
{
|
typedef Grid_simd<R, V> Rsimd;
|
||||||
typedef Grid_simd<R,V> Rsimd;
|
typedef Grid_simd<std::complex<R>, V> Csimd;
|
||||||
typedef Grid_simd<std::complex<R>,V> Csimd;
|
typename Rsimd::conv_t conv; // address as real
|
||||||
typename Rsimd::conv_t conv;// address as real
|
|
||||||
|
|
||||||
conv.v = in.v;
|
conv.v = in.v;
|
||||||
for(int i=0;i<Rsimd::Nsimd();i+=2){
|
for (int i = 0; i < Rsimd::Nsimd(); i += 2) {
|
||||||
assert(conv.s[i+1]==conv.s[i]); // trap any cases where real was not duplicated
|
assert(conv.s[i + 1] ==
|
||||||
// indicating the SIMD grids of real and imag assignment did not correctly match
|
conv.s[i]); // trap any cases where real was not duplicated
|
||||||
conv.s[i+1]=0.0; // zero imaginary parts
|
// indicating the SIMD grids of real and imag assignment did not correctly
|
||||||
|
// match
|
||||||
|
conv.s[i + 1] = 0.0; // zero imaginary parts
|
||||||
}
|
}
|
||||||
Csimd ret;
|
Csimd ret;
|
||||||
ret.v = conv.v;
|
ret.v = conv.v;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////
|
||||||
|
// Define available types
|
||||||
|
///////////////////////////////
|
||||||
|
typedef Grid_simd<float, SIMD_Ftype> vRealF;
|
||||||
|
typedef Grid_simd<double, SIMD_Dtype> vRealD;
|
||||||
|
typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF;
|
||||||
|
typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD;
|
||||||
|
typedef Grid_simd<Integer, SIMD_Itype> vInteger;
|
||||||
|
|
||||||
|
/////////////////////////////////////////
|
||||||
|
// Some traits to recognise the types
|
||||||
|
/////////////////////////////////////////
|
||||||
|
template <typename T>
|
||||||
|
struct is_simd : public std::false_type {};
|
||||||
|
template <>
|
||||||
|
struct is_simd<vRealF> : public std::true_type {};
|
||||||
|
template <>
|
||||||
|
struct is_simd<vRealD> : public std::true_type {};
|
||||||
|
template <>
|
||||||
|
struct is_simd<vComplexF> : public std::true_type {};
|
||||||
|
template <>
|
||||||
|
struct is_simd<vComplexD> : public std::true_type {};
|
||||||
|
template <>
|
||||||
|
struct is_simd<vInteger> : public std::true_type {};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
///////////////////////////////
|
using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >;
|
||||||
// Define available types
|
template <typename T>
|
||||||
///////////////////////////////
|
using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >;
|
||||||
typedef Grid_simd< float , SIMD_Ftype > vRealF;
|
|
||||||
typedef Grid_simd< double , SIMD_Dtype > vRealD;
|
|
||||||
typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF;
|
|
||||||
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
|
|
||||||
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
|
|
||||||
|
|
||||||
/////////////////////////////////////////
|
|
||||||
// Some traits to recognise the types
|
|
||||||
/////////////////////////////////////////
|
|
||||||
template <typename T> struct is_simd : public std::false_type{};
|
|
||||||
template <> struct is_simd<vRealF> : public std::true_type {};
|
|
||||||
template <> struct is_simd<vRealD> : public std::true_type {};
|
|
||||||
template <> struct is_simd<vComplexF>: public std::true_type {};
|
|
||||||
template <> struct is_simd<vComplexD>: public std::true_type {};
|
|
||||||
template <> struct is_simd<vInteger> : public std::true_type {};
|
|
||||||
|
|
||||||
template <typename T> using IfSimd = Invoke<std::enable_if< is_simd<T>::value,int> > ;
|
|
||||||
template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value,unsigned> > ;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,33 +1,34 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/simd/Grid_vector_unops.h
|
Source file: ./lib/simd/Grid_vector_unops.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_VECTOR_UNOPS
|
#ifndef GRID_VECTOR_UNOPS
|
||||||
#define GRID_VECTOR_UNOPS
|
#define GRID_VECTOR_UNOPS
|
||||||
|
|
||||||
@ -35,213 +36,199 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
template<class scalar> struct SqrtRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct SqrtRealFunctor {
|
||||||
return sqrt(real(a));
|
scalar operator()(const scalar &a) const { return sqrt(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct RSqrtRealFunctor {
|
template <class scalar>
|
||||||
|
struct RSqrtRealFunctor {
|
||||||
scalar operator()(const scalar &a) const {
|
scalar operator()(const scalar &a) const {
|
||||||
return scalar(1.0/sqrt(real(a)));
|
return scalar(1.0 / sqrt(real(a)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class scalar> struct CosRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct CosRealFunctor {
|
||||||
return cos(real(a));
|
scalar operator()(const scalar &a) const { return cos(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct SinRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct SinRealFunctor {
|
||||||
return sin(real(a));
|
scalar operator()(const scalar &a) const { return sin(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct AcosRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct AcosRealFunctor {
|
||||||
return acos(real(a));
|
scalar operator()(const scalar &a) const { return acos(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct AsinRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct AsinRealFunctor {
|
||||||
return asin(real(a));
|
scalar operator()(const scalar &a) const { return asin(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct LogRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct LogRealFunctor {
|
||||||
return log(real(a));
|
scalar operator()(const scalar &a) const { return log(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct ExpRealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &a) const {
|
struct ExpRealFunctor {
|
||||||
return exp(real(a));
|
scalar operator()(const scalar &a) const { return exp(real(a)); }
|
||||||
}
|
};
|
||||||
};
|
template <class scalar>
|
||||||
template<class scalar> struct NotFunctor {
|
struct NotFunctor {
|
||||||
scalar operator()(const scalar &a) const {
|
scalar operator()(const scalar &a) const { return (!a); }
|
||||||
return (!a);
|
};
|
||||||
}
|
template <class scalar>
|
||||||
};
|
struct AbsRealFunctor {
|
||||||
template<class scalar> struct AbsRealFunctor {
|
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
|
||||||
scalar operator()(const scalar &a) const {
|
};
|
||||||
return std::abs(real(a));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct PowRealFunctor {
|
template <class scalar>
|
||||||
|
struct PowRealFunctor {
|
||||||
double y;
|
double y;
|
||||||
PowRealFunctor(double _y) : y(_y) {};
|
PowRealFunctor(double _y) : y(_y){};
|
||||||
scalar operator()(const scalar &a) const {
|
scalar operator()(const scalar &a) const { return pow(real(a), y); }
|
||||||
return pow(real(a),y);
|
};
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct ModIntFunctor {
|
template <class scalar>
|
||||||
|
struct ModIntFunctor {
|
||||||
Integer y;
|
Integer y;
|
||||||
ModIntFunctor(Integer _y) : y(_y) {};
|
ModIntFunctor(Integer _y) : y(_y){};
|
||||||
scalar operator()(const scalar &a) const {
|
scalar operator()(const scalar &a) const { return Integer(a) % y; }
|
||||||
return Integer(a)%y;
|
};
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct DivIntFunctor {
|
template <class scalar>
|
||||||
|
struct DivIntFunctor {
|
||||||
Integer y;
|
Integer y;
|
||||||
DivIntFunctor(Integer _y) : y(_y) {};
|
DivIntFunctor(Integer _y) : y(_y){};
|
||||||
scalar operator()(const scalar &a) const {
|
scalar operator()(const scalar &a) const { return Integer(a) / y; }
|
||||||
return Integer(a)/y;
|
};
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class scalar> struct RealFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const std::complex<scalar> &a) const {
|
struct RealFunctor {
|
||||||
return real(a);
|
scalar operator()(const scalar &a) const { return std::real(a); }
|
||||||
}
|
};
|
||||||
};
|
template <class scalar>
|
||||||
template<class scalar> struct ImagFunctor {
|
struct ImagFunctor {
|
||||||
scalar operator()(const std::complex<scalar> &a) const {
|
scalar operator()(const scalar &a) const { return std::imag(a); }
|
||||||
return imag(a);
|
};
|
||||||
}
|
template <class S, class V>
|
||||||
};
|
inline Grid_simd<S, V> real(const Grid_simd<S, V> &r) {
|
||||||
template < class S, class V >
|
return SimdApply(RealFunctor<S>(), r);
|
||||||
inline Grid_simd<S,V> real(const Grid_simd<S,V> &r) {
|
}
|
||||||
return SimdApply(RealFunctor<S>(),r);
|
template <class S, class V>
|
||||||
}
|
inline Grid_simd<S, V> imag(const Grid_simd<S, V> &r) {
|
||||||
template < class S, class V >
|
return SimdApply(ImagFunctor<S>(), r);
|
||||||
inline Grid_simd<S,V> imag(const Grid_simd<S,V> &r) {
|
}
|
||||||
return SimdApply(ImagFunctor<S>(),r);
|
template <class S, class V>
|
||||||
}
|
inline Grid_simd<S, V> sqrt(const Grid_simd<S, V> &r) {
|
||||||
|
return SimdApply(SqrtRealFunctor<S>(), r);
|
||||||
|
}
|
||||||
|
template <class S, class V>
|
||||||
|
inline Grid_simd<S, V> rsqrt(const Grid_simd<S, V> &r) {
|
||||||
|
return SimdApply(RSqrtRealFunctor<S>(), r);
|
||||||
|
}
|
||||||
|
template <class Scalar>
|
||||||
|
inline Scalar rsqrt(const Scalar &r) {
|
||||||
|
return (RSqrtRealFunctor<Scalar>(), r);
|
||||||
|
}
|
||||||
|
|
||||||
template < class S, class V >
|
template <class S, class V>
|
||||||
inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) {
|
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
||||||
return SimdApply(SqrtRealFunctor<S>(),r);
|
return SimdApply(CosRealFunctor<S>(), r);
|
||||||
}
|
}
|
||||||
template < class S, class V >
|
template <class S, class V>
|
||||||
inline Grid_simd<S,V> rsqrt(const Grid_simd<S,V> &r) {
|
inline Grid_simd<S, V> sin(const Grid_simd<S, V> &r) {
|
||||||
return SimdApply(RSqrtRealFunctor<S>(),r);
|
return SimdApply(SinRealFunctor<S>(), r);
|
||||||
}
|
}
|
||||||
template < class Scalar >
|
template <class S, class V>
|
||||||
inline Scalar rsqrt(const Scalar &r) {
|
inline Grid_simd<S, V> acos(const Grid_simd<S, V> &r) {
|
||||||
return (RSqrtRealFunctor<Scalar>(),r);
|
return SimdApply(AcosRealFunctor<S>(), r);
|
||||||
}
|
}
|
||||||
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> asin(const Grid_simd<S, V> &r) {
|
||||||
inline Grid_simd<S,V> cos(const Grid_simd<S,V> &r) {
|
return SimdApply(AsinRealFunctor<S>(), r);
|
||||||
return SimdApply(CosRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> log(const Grid_simd<S, V> &r) {
|
||||||
inline Grid_simd<S,V> sin(const Grid_simd<S,V> &r) {
|
return SimdApply(LogRealFunctor<S>(), r);
|
||||||
return SimdApply(SinRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
|
||||||
inline Grid_simd<S,V> acos(const Grid_simd<S,V> &r) {
|
return SimdApply(AbsRealFunctor<S>(), r);
|
||||||
return SimdApply(AcosRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
|
||||||
inline Grid_simd<S,V> asin(const Grid_simd<S,V> &r) {
|
return SimdApply(ExpRealFunctor<S>(), r);
|
||||||
return SimdApply(AsinRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
|
||||||
inline Grid_simd<S,V> log(const Grid_simd<S,V> &r) {
|
return SimdApply(NotFunctor<S>(), r);
|
||||||
return SimdApply(LogRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> pow(const Grid_simd<S, V> &r, double y) {
|
||||||
inline Grid_simd<S,V> abs(const Grid_simd<S,V> &r) {
|
return SimdApply(PowRealFunctor<S>(y), r);
|
||||||
return SimdApply(AbsRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> mod(const Grid_simd<S, V> &r, Integer y) {
|
||||||
inline Grid_simd<S,V> exp(const Grid_simd<S,V> &r) {
|
return SimdApply(ModIntFunctor<S>(y), r);
|
||||||
return SimdApply(ExpRealFunctor<S>(),r);
|
}
|
||||||
}
|
template <class S, class V>
|
||||||
template < class S, class V >
|
inline Grid_simd<S, V> div(const Grid_simd<S, V> &r, Integer y) {
|
||||||
inline Grid_simd<S,V> Not(const Grid_simd<S,V> &r) {
|
return SimdApply(DivIntFunctor<S>(y), r);
|
||||||
return SimdApply(NotFunctor<S>(),r);
|
}
|
||||||
}
|
////////////////////////////////////////////////////////////////////////////
|
||||||
template < class S, class V >
|
// Allows us to assign into **conformable** real vectors from complex
|
||||||
inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) {
|
////////////////////////////////////////////////////////////////////////////
|
||||||
return SimdApply(PowRealFunctor<S>(y),r);
|
// template < class S, class V >
|
||||||
}
|
// inline auto ComplexRemove(const Grid_simd<S,V> &c) ->
|
||||||
template < class S, class V >
|
// Grid_simd<Grid_simd<S,V>::Real,V> {
|
||||||
inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) {
|
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
|
||||||
return SimdApply(ModIntFunctor<S>(y),r);
|
// ret.v = c.v;
|
||||||
}
|
// return ret;
|
||||||
template < class S, class V >
|
// }
|
||||||
inline Grid_simd<S,V> div(const Grid_simd<S,V> &r,Integer y) {
|
template <class scalar>
|
||||||
return SimdApply(DivIntFunctor<S>(y),r);
|
struct AndFunctor {
|
||||||
}
|
scalar operator()(const scalar &x, const scalar &y) const { return x & y; }
|
||||||
////////////////////////////////////////////////////////////////////////////
|
};
|
||||||
// Allows us to assign into **conformable** real vectors from complex
|
template <class scalar>
|
||||||
////////////////////////////////////////////////////////////////////////////
|
struct OrFunctor {
|
||||||
// template < class S, class V >
|
scalar operator()(const scalar &x, const scalar &y) const { return x | y; }
|
||||||
// inline auto ComplexRemove(const Grid_simd<S,V> &c) -> Grid_simd<Grid_simd<S,V>::Real,V> {
|
};
|
||||||
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
|
template <class scalar>
|
||||||
// ret.v = c.v;
|
struct AndAndFunctor {
|
||||||
// return ret;
|
scalar operator()(const scalar &x, const scalar &y) const { return x && y; }
|
||||||
// }
|
};
|
||||||
template<class scalar> struct AndFunctor {
|
template <class scalar>
|
||||||
scalar operator()(const scalar &x, const scalar &y) const {
|
struct OrOrFunctor {
|
||||||
return x & y;
|
scalar operator()(const scalar &x, const scalar &y) const { return x || y; }
|
||||||
}
|
};
|
||||||
};
|
|
||||||
template<class scalar> struct OrFunctor {
|
|
||||||
scalar operator()(const scalar &x, const scalar &y) const {
|
|
||||||
return x | y;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<class scalar> struct AndAndFunctor {
|
|
||||||
scalar operator()(const scalar &x, const scalar &y) const {
|
|
||||||
return x && y;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<class scalar> struct OrOrFunctor {
|
|
||||||
scalar operator()(const scalar &x, const scalar &y) const {
|
|
||||||
return x || y;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
////////////////////////////////
|
|
||||||
// Calls to simd binop functors
|
|
||||||
////////////////////////////////
|
|
||||||
template < class S, class V >
|
|
||||||
inline Grid_simd<S,V> operator &(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
|
||||||
return SimdApplyBinop(AndFunctor<S>(),x,y);
|
|
||||||
}
|
|
||||||
template < class S, class V >
|
|
||||||
inline Grid_simd<S,V> operator &&(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
|
||||||
return SimdApplyBinop(AndAndFunctor<S>(),x,y);
|
|
||||||
}
|
|
||||||
template < class S, class V >
|
|
||||||
inline Grid_simd<S,V> operator |(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
|
||||||
return SimdApplyBinop(OrFunctor<S>(),x,y);
|
|
||||||
}
|
|
||||||
template < class S, class V >
|
|
||||||
inline Grid_simd<S,V> operator ||(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
|
||||||
return SimdApplyBinop(OrOrFunctor<S>(),x,y);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// Calls to simd binop functors
|
||||||
|
////////////////////////////////
|
||||||
|
template <class S, class V>
|
||||||
|
inline Grid_simd<S, V> operator&(const Grid_simd<S, V> &x,
|
||||||
|
const Grid_simd<S, V> &y) {
|
||||||
|
return SimdApplyBinop(AndFunctor<S>(), x, y);
|
||||||
|
}
|
||||||
|
template <class S, class V>
|
||||||
|
inline Grid_simd<S, V> operator&&(const Grid_simd<S, V> &x,
|
||||||
|
const Grid_simd<S, V> &y) {
|
||||||
|
return SimdApplyBinop(AndAndFunctor<S>(), x, y);
|
||||||
|
}
|
||||||
|
template <class S, class V>
|
||||||
|
inline Grid_simd<S, V> operator|(const Grid_simd<S, V> &x,
|
||||||
|
const Grid_simd<S, V> &y) {
|
||||||
|
return SimdApplyBinop(OrFunctor<S>(), x, y);
|
||||||
|
}
|
||||||
|
template <class S, class V>
|
||||||
|
inline Grid_simd<S, V> operator||(const Grid_simd<S, V> &x,
|
||||||
|
const Grid_simd<S, V> &y) {
|
||||||
|
return SimdApplyBinop(OrOrFunctor<S>(), x, y);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,31 +1,32 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./tests/Test_simd.cc
|
Source file: ./tests/Test_simd.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -62,6 +63,18 @@ public:
|
|||||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
|
||||||
std::string name(void) const { return std::string("Adj"); }
|
std::string name(void) const { return std::string("Adj"); }
|
||||||
};
|
};
|
||||||
|
class funcImag {
|
||||||
|
public:
|
||||||
|
funcImag() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = imag(i1);}
|
||||||
|
std::string name(void) const { return std::string("imag"); }
|
||||||
|
};
|
||||||
|
class funcReal {
|
||||||
|
public:
|
||||||
|
funcReal() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = real(i1);}
|
||||||
|
std::string name(void) const { return std::string("real"); }
|
||||||
|
};
|
||||||
|
|
||||||
class funcTimesI {
|
class funcTimesI {
|
||||||
public:
|
public:
|
||||||
@ -141,7 +154,13 @@ void Tester(const functor &func)
|
|||||||
}
|
}
|
||||||
|
|
||||||
extract<vec,scal>(v_result,result);
|
extract<vec,scal>(v_result,result);
|
||||||
std::cout<<GridLogMessage << " " << func.name()<<std::endl;
|
|
||||||
|
std::cout << GridLogMessage << " " << func.name() << std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogDebug << v_input1 << std::endl;
|
||||||
|
std::cout << GridLogDebug << v_result << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int ok=0;
|
int ok=0;
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
@ -389,6 +408,8 @@ int main (int argc, char ** argv)
|
|||||||
Tester<ComplexF,vComplexF>(funcTimes());
|
Tester<ComplexF,vComplexF>(funcTimes());
|
||||||
Tester<ComplexF,vComplexF>(funcConj());
|
Tester<ComplexF,vComplexF>(funcConj());
|
||||||
Tester<ComplexF,vComplexF>(funcAdj());
|
Tester<ComplexF,vComplexF>(funcAdj());
|
||||||
|
Tester<ComplexF,vComplexF>(funcReal());
|
||||||
|
Tester<ComplexF,vComplexF>(funcImag());
|
||||||
Tester<ComplexF,vComplexF>(funcInnerProduct());
|
Tester<ComplexF,vComplexF>(funcInnerProduct());
|
||||||
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
|
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
|
||||||
|
|
||||||
@ -421,17 +442,21 @@ int main (int argc, char ** argv)
|
|||||||
Tester<ComplexD,vComplexD>(funcTimes());
|
Tester<ComplexD,vComplexD>(funcTimes());
|
||||||
Tester<ComplexD,vComplexD>(funcConj());
|
Tester<ComplexD,vComplexD>(funcConj());
|
||||||
Tester<ComplexD,vComplexD>(funcAdj());
|
Tester<ComplexD,vComplexD>(funcAdj());
|
||||||
Tester<ComplexD,vComplexD>(funcInnerProduct());
|
Tester<ComplexD, vComplexD>(funcReal());
|
||||||
ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
|
Tester<ComplexD, vComplexD>(funcImag());
|
||||||
|
|
||||||
|
Tester<ComplexD, vComplexD>(funcInnerProduct());
|
||||||
|
ReductionTester<ComplexD, ComplexD, vComplexD>(funcReduce());
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
std::cout << GridLogMessage
|
||||||
std::cout<<GridLogMessage << "Testing vComplexD permutes "<<std::endl;
|
<< "===================================" << std::endl;
|
||||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
std::cout << GridLogMessage << "Testing vComplexD permutes " << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "===================================" << std::endl;
|
||||||
|
|
||||||
// Log2 iteration
|
// Log2 iteration
|
||||||
for(int i=0;(1<<i)< vComplexD::Nsimd();i++){
|
for (int i = 0; (1 << i) < vComplexD::Nsimd(); i++) {
|
||||||
PermTester<ComplexD,vComplexD>(funcPermute(i));
|
PermTester<ComplexD, vComplexD>(funcPermute(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user