1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 13:57:07 +01:00

Corrected bug in integer multiplications for SSE4 and AVX2

Merge remote-tracking branch 'upstream/master'

Conflicts:
	tests/Make.inc
This commit is contained in:
neo
2015-06-16 23:34:45 +09:00
37 changed files with 1341 additions and 515 deletions

View File

@ -102,10 +102,10 @@ namespace Grid {
}
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline auto Determinant(const iMatrix<vtype,N> &arg)-> iScalar<decltype(Determinant(arg._internal[0][0]))>
inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg)
{
iMatrix<vtype,N> ret(arg);
iScalar<decltype(Determinant(arg._internal[0][0]))> det = 1.0;
iScalar<vtype> det = vtype(1.0);
/* Conversion of matrix to upper triangular */
for(int i = 0; i < N; i++){
for(int j = 0; j < N; j++){

View File

@ -9,12 +9,12 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////
// multiplication by fundamental scalar type
template<class l,int N> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs;
}
template<class l,int N> strong_inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l> strong_inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
@ -118,12 +118,12 @@ template<class l,int N> strong_inline iMatrix<l,N> operator * (Integer lhs,const
///////////////////////////////////////////////////////////////////////////////////////////////
// addition by fundamental scalar type applies to matrix(down diag) and scalar
///////////////////////////////////////////////////////////////////////////////////////////////
template<class l,int N> strong_inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l> strong_inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs;
}
template<class l,int N> strong_inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l> strong_inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
@ -176,12 +176,12 @@ template<class l,int N> strong_inline iMatrix<l,N> operator + (Integer lhs,const
///////////////////////////////////////////////////////////////////////////////////////////////
// subtraction of fundamental scalar type applies to matrix(down diag) and scalar
///////////////////////////////////////////////////////////////////////////////////////////////
template<class l,int N> strong_inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
template<class l> strong_inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs;
}
template<class l,int N> strong_inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
template<class l> strong_inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs;

View File

@ -23,13 +23,17 @@ template<class vtype> class iScalar
public:
vtype _internal;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
typedef iScalar<recurse_scalar_object> scalar_object;
// substitutes a real or complex version with same tensor structure
typedef iScalar<typename GridTypeMapper<vtype>::Complexified > Complexified;
typedef iScalar<typename GridTypeMapper<vtype>::Realified > Realified;
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
// Scalar no action
@ -86,9 +90,19 @@ public:
strong_inline const vtype & operator ()(void) const {
return _internal;
}
operator ComplexD () const { return(TensorRemove(_internal)); };
operator RealD () const { return(real(TensorRemove(_internal))); }
// Type casts meta programmed
template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = 0>
operator ComplexF () const { return(TensorRemove(_internal)); };
template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = 0>
operator ComplexD () const { return(TensorRemove(_internal)); };
template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = 0>
operator RealD () const { return(real(TensorRemove(_internal))); }
template<class U=vtype,class V=scalar_type,IfReal<V> = 0,IfNotSimd<U> = 0>
operator RealD () const { return TensorRemove(_internal); }
template<class U=vtype,class V=scalar_type,IfInteger<V> = 0,IfNotSimd<U> = 0>
operator Integer () const { return Integer(TensorRemove(_internal)); }
// convert from a something to a scalar via constructor of something arg
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline iScalar<vtype> operator = (T arg)
@ -123,6 +137,10 @@ public:
typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef iVector<recurse_scalar_object,N> scalar_object;
// substitutes a real or complex version with same tensor structure
typedef iVector<typename GridTypeMapper<vtype>::Complexified,N > Complexified;
typedef iVector<typename GridTypeMapper<vtype>::Realified,N > Realified;
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iVector<vtype,N>
{
zeroit(*this);
@ -211,6 +229,12 @@ public:
typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
// substitutes a real or complex version with same tensor structure
typedef iMatrix<typename GridTypeMapper<vtype>::Complexified,N > Complexified;
typedef iMatrix<typename GridTypeMapper<vtype>::Realified,N > Realified;
// Tensure removal
typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef iMatrix<recurse_scalar_object,N> scalar_object;

View File

@ -31,18 +31,17 @@ inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::
std::vector<scalar *> &extracted,int offset){
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();
int s=Nsimd/Nextr;
int s=Nsimd/Nextr; // can have sparse occupation of simd vector if simd_layout does not fill it
// replicate n-fold. Use to allow Integer masks to
// predicate floating point of various width assignments and maintain conformable.
scalar *buf =(scalar *) y;
for(int i=0;i<Nextr;i++){
for(int ii=0;ii<s;ii++){
buf[i*s+ii]=extracted[i][offset];
}
}
};
////////////////////////////////////////////////////////////////////////////////////////////////
// Extract a fundamental vector type to scalar array
////////////////////////////////////////////////////////////////////////////////////////////////
@ -55,8 +54,17 @@ inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const v
scalar *buf = (scalar *)&y;
for(int i=0;i<Nextr;i++){
for(int ii=0;ii<s;ii++){
extracted[i]=buf[i*s+ii];
extracted[i]=buf[i*s];
for(int ii=1;ii<s;ii++){
if ( buf[i*s]!=buf[i*s+ii] ){
std::cout << " SIMD extract failure splat = "<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
for(int vv=0;vv<Nsimd;vv++) {
std::cout<< buf[vv]<<" ";
}
std::cout<<std::endl;
assert(0);
}
assert(buf[i*s]==buf[i*s+ii]);
}
}
@ -74,21 +82,7 @@ inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::
for(int i=0;i<Nextr;i++){
for(int ii=0;ii<s;ii++){
buf[i*s+ii]=extracted[i];
}
}
};
template<class vsimd,class scalar>
inline void AmergeA(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type &y,std::vector<scalar> &extracted){
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();
int s=Nsimd/Nextr;
scalar *buf = (scalar *)&y;
for(int i=0;i<Nextr;i++){
for(int ii=0;ii<s;ii++){
buf[i*s+ii]=extracted[i];
buf[i*s+ii]=extracted[i]; // replicates value
}
}
};
@ -102,12 +96,12 @@ template<class vobj> inline void extract(const vobj &vec,std::vector<typename vo
typedef typename vobj::vector_type vector_type ;
const int Nsimd=vobj::vector_type::Nsimd();
int Nextr=extracted.size();
const int words=sizeof(vobj)/sizeof(vector_type);
int s=Nsimd/Nextr;
extracted.resize(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++)
std::vector<scalar_type *> pointers(Nextr);
for(int i=0;i<Nextr;i++)
pointers[i] =(scalar_type *)& extracted[i];
vector_type *vp = (vector_type *)&vec;
@ -127,11 +121,11 @@ void extract(const vobj &vec,std::vector<typename vobj::scalar_object *> &extrac
const int words=sizeof(vobj)/sizeof(vector_type);
const int Nsimd=vobj::vector_type::Nsimd();
assert(extracted.size()==Nsimd);
int Nextr=extracted.size();
int s = Nsimd/Nextr;
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++) {
for(int i=0;i<Nextr;i++) {
pointers[i] =(scalar_type *)& extracted[i][offset];
}
@ -153,10 +147,11 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
const int Nsimd=vobj::vector_type::Nsimd();
const int words=sizeof(vobj)/sizeof(vector_type);
assert(extracted.size()==Nsimd);
int Nextr = extracted.size();
int splat=Nsimd/Nextr;
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++)
std::vector<scalar_type *> pointers(Nextr);
for(int i=0;i<Nextr;i++)
pointers[i] =(scalar_type *)& extracted[i];
vector_type *vp = (vector_type *)&vec;
@ -177,14 +172,14 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
const int Nsimd=vobj::vector_type::Nsimd();
const int words=sizeof(vobj)/sizeof(vector_type);
assert(extracted.size()==Nsimd);
int Nextr=extracted.size();
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++)
std::vector<scalar_type *> pointers(Nextr);
for(int i=0;i<Nextr;i++)
pointers[i] =(scalar_type *)& extracted[i][offset];
vector_type *vp = (vector_type *)&vec;
assert((void *)vp!=NULL);
for(int w=0;w<words;w++){
merge<vector_type,scalar_type>(&vp[w],pointers,w);
}

View File

@ -10,7 +10,8 @@ namespace Grid {
typedef typename sobj::scalar_type scalar;
decltype(innerProduct(arg,arg)) nrm;
nrm = innerProduct(arg,arg);
return real(nrm);
RealD ret = real(nrm);
return ret;
}
template<class l,class r,int N> inline

View File

@ -0,0 +1,32 @@
#ifndef GRID_TENSOR_LOGICAL_H
#define GRID_TENSOR_LOGICAL_H
namespace Grid {
#define LOGICAL_BINOP(Op)\
template<class v> strong_inline iScalar<v> operator Op (const iScalar<v>& lhs,const iScalar<v>& rhs) \
{\
iScalar<v> ret;\
ret._internal = lhs._internal Op rhs._internal ;\
return ret;\
}\
template<class l> strong_inline iScalar<l> operator Op (const iScalar<l>& lhs,Integer rhs) \
{\
typename iScalar<l>::scalar_type t; t=rhs;\
typename iScalar<l>::tensor_reduced srhs; srhs=t;\
return lhs Op srhs;\
}\
template<class l> strong_inline iScalar<l> operator Op (Integer lhs,const iScalar<l>& rhs) \
{\
typename iScalar<l>::scalar_type t;t=lhs;\
typename iScalar<l>::tensor_reduced slhs;slhs=t;\
return slhs Op rhs;\
}
LOGICAL_BINOP(|);
LOGICAL_BINOP(&);
LOGICAL_BINOP(||);
LOGICAL_BINOP(&&);
}
#endif

View File

@ -26,6 +26,8 @@ namespace Grid {
typedef typename T::vector_type vector_type;
typedef typename T::tensor_reduced tensor_reduced;
typedef typename T::scalar_object scalar_object;
typedef typename T::Complexified Complexified;
typedef typename T::Realified Realified;
enum { TensorLevel = T::TensorLevel };
};
@ -38,6 +40,8 @@ namespace Grid {
typedef RealF vector_type;
typedef RealF tensor_reduced ;
typedef RealF scalar_object;
typedef ComplexF Complexified;
typedef RealF Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<RealD> {
@ -46,6 +50,8 @@ namespace Grid {
typedef RealD vector_type;
typedef RealD tensor_reduced;
typedef RealD scalar_object;
typedef ComplexD Complexified;
typedef RealD Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<ComplexF> {
@ -54,6 +60,8 @@ namespace Grid {
typedef ComplexF vector_type;
typedef ComplexF tensor_reduced;
typedef ComplexF scalar_object;
typedef ComplexF Complexified;
typedef RealF Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<ComplexD> {
@ -62,6 +70,8 @@ namespace Grid {
typedef ComplexD vector_type;
typedef ComplexD tensor_reduced;
typedef ComplexD scalar_object;
typedef ComplexD Complexified;
typedef RealD Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<Integer> {
@ -70,6 +80,8 @@ namespace Grid {
typedef Integer vector_type;
typedef Integer tensor_reduced;
typedef Integer scalar_object;
typedef void Complexified;
typedef void Realified;
enum { TensorLevel = 0 };
};
@ -79,6 +91,8 @@ namespace Grid {
typedef vRealF vector_type;
typedef vRealF tensor_reduced;
typedef RealF scalar_object;
typedef vComplexF Complexified;
typedef vRealF Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vRealD> {
@ -87,6 +101,8 @@ namespace Grid {
typedef vRealD vector_type;
typedef vRealD tensor_reduced;
typedef RealD scalar_object;
typedef vComplexD Complexified;
typedef vRealD Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vComplexF> {
@ -95,6 +111,8 @@ namespace Grid {
typedef vComplexF vector_type;
typedef vComplexF tensor_reduced;
typedef ComplexF scalar_object;
typedef vComplexF Complexified;
typedef vRealF Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vComplexD> {
@ -103,6 +121,8 @@ namespace Grid {
typedef vComplexD vector_type;
typedef vComplexD tensor_reduced;
typedef ComplexD scalar_object;
typedef vComplexD Complexified;
typedef vRealD Realified;
enum { TensorLevel = 0 };
};
template<> class GridTypeMapper<vInteger> {
@ -111,6 +131,8 @@ namespace Grid {
typedef vInteger vector_type;
typedef vInteger tensor_reduced;
typedef Integer scalar_object;
typedef void Complexified;
typedef void Realified;
enum { TensorLevel = 0 };
};

View File

@ -2,7 +2,7 @@
#define GRID_TENSOR_UNARY_H
namespace Grid {
#define UNARY_REAL(func)\
#define UNARY(func)\
template<class obj> inline auto func(const iScalar<obj> &z) -> iScalar<obj>\
{\
iScalar<obj> ret;\
@ -53,14 +53,71 @@ template<class obj> inline iScalar<obj> func(const iScalar<obj> &z,scal y) \
return ret;\
}
UNARY_REAL(sqrt);
UNARY_REAL(rsqrt);
UNARY_REAL(sin);
UNARY_REAL(cos);
UNARY(sqrt);
UNARY(rsqrt);
UNARY(sin);
UNARY(cos);
UNARY(log);
UNARY(exp);
UNARY(abs);
UNARY(Not);
template<class obj> inline auto toReal(const iScalar<obj> &z) -> typename iScalar<obj>::Realified
{
typename iScalar<obj>::Realified ret;
ret._internal = toReal(z._internal);
return ret;
}
template<class obj,int N> inline auto toReal(const iVector<obj,N> &z) -> typename iVector<obj,N>::Realified
{
typename iVector<obj,N>::Realified ret;
for(int c1=0;c1<N;c1++){
ret._internal[c1] = toReal(z._internal[c1]);
}
return ret;
}
template<class obj,int N> inline auto toReal(const iMatrix<obj,N> &z) -> typename iMatrix<obj,N>::Realified
{
typename iMatrix<obj,N>::Realified ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2] = toReal(z._internal[c1][c2]);
}}
return ret;
}
template<class obj> inline auto toComplex(const iScalar<obj> &z) -> typename iScalar<obj>::Complexified
{
typename iScalar<obj>::Complexified ret;
ret._internal = toComplex(z._internal);
return ret;
}
template<class obj,int N> inline auto toComplex(const iVector<obj,N> &z) -> typename iVector<obj,N>::Complexified
{
typename iVector<obj,N>::Complexified ret;
for(int c1=0;c1<N;c1++){
ret._internal[c1] = toComplex(z._internal[c1]);
}
return ret;
}
template<class obj,int N> inline auto toComplex(const iMatrix<obj,N> &z) -> typename iMatrix<obj,N>::Complexified
{
typename iMatrix<obj,N>::Complexified ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2] = toComplex(z._internal[c1][c2]);
}}
return ret;
}
BINARY_RSCALAR(mod,Integer);
BINARY_RSCALAR(pow,RealD);
#undef UNARY
#undef BINARY_RSCALAR
}
#endif