mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Better organisation
This commit is contained in:
parent
ad1b9b6ccf
commit
1a1474b323
@ -1,5 +1,7 @@
|
||||
#ifndef GRID_LATTICE_H
|
||||
#define GRID_LATTICE_H
|
||||
|
||||
#include "Grid.h"
|
||||
#include "Grid_vComplexD.h"
|
||||
|
||||
namespace dpo {
|
||||
|
||||
@ -9,7 +11,7 @@ class Lattice
|
||||
public:
|
||||
Grid *_grid;
|
||||
int checkerboard;
|
||||
std::vector<vobj,myallocator<vobj> > _odata;
|
||||
std::vector<vobj,alignedAllocator<vobj> > _odata;
|
||||
|
||||
public:
|
||||
|
||||
@ -554,95 +556,5 @@ public:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
namespace QCD {
|
||||
|
||||
static const int Nc=3;
|
||||
static const int Ns=4;
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
|
||||
// QCD iMatrix types
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<vtype> > ;
|
||||
template<typename vtype> using iSpinMatrix = iMatrix<iScalar<vtype>, Ns>;
|
||||
template<typename vtype> using iSpinColourMatrix = iMatrix<iMatrix<vtype, Nc>, Ns>;
|
||||
|
||||
template<typename vtype> using iColourMatrix = iScalar<iMatrix<vtype, Nc>> ;
|
||||
|
||||
template<typename vtype> using iSpinVector = iVector<iScalar<vtype>, Ns>;
|
||||
template<typename vtype> using iColourVector = iScalar<iVector<vtype, Nc> >;
|
||||
template<typename vtype> using iSpinColourVector = iVector<iVector<vtype, Nc>, Ns>;
|
||||
|
||||
typedef iSinglet<Complex > TComplex; // This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<vComplex > vTComplex;
|
||||
typedef iSinglet<Real > TReal; // This is painful. Tensor singlet complex type.
|
||||
|
||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef iColourMatrix<Complex > ColourMatrix;
|
||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||
|
||||
typedef iSpinVector<Complex > SpinVector;
|
||||
typedef iColourVector<Complex > ColourVector;
|
||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||
|
||||
|
||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
|
||||
typedef iSpinVector<vComplex > vSpinVector;
|
||||
typedef iColourVector<vComplex > vColourVector;
|
||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||
|
||||
|
||||
typedef Lattice<vTComplex> LatticeComplex;
|
||||
|
||||
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
||||
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
||||
typedef Lattice<vSpinColourMatrix> LatticePropagator;
|
||||
typedef LatticePropagator LatticeSpinColourMatrix;
|
||||
|
||||
typedef Lattice<vSpinColourVector> LatticeFermion;
|
||||
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
||||
typedef Lattice<vSpinVector> LatticeSpinVector;
|
||||
typedef Lattice<vColourVector> LatticeColourVector;
|
||||
|
||||
// localNorm2,
|
||||
template<class tt>
|
||||
inline LatticeComplex localNorm2 (const Lattice<tt> &rhs)
|
||||
{
|
||||
LatticeComplex ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=trace(adj(rhs)*rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
// localInnerProduct
|
||||
template<class tt>
|
||||
inline LatticeComplex localInnerProduct (const Lattice<tt> &lhs,const Lattice<tt> &rhs)
|
||||
{
|
||||
LatticeComplex ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=localInnerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} //namespace QCD
|
||||
|
||||
}
|
||||
#endif
|
||||
|
94
Grid_QCD.h
Normal file
94
Grid_QCD.h
Normal file
@ -0,0 +1,94 @@
|
||||
#ifndef GRID_QCD_H
|
||||
#define GRID_QCD_H
|
||||
namespace dpo{
|
||||
namespace QCD {
|
||||
|
||||
static const int Nc=3;
|
||||
static const int Ns=4;
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
|
||||
// QCD iMatrix types
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<vtype> > ;
|
||||
template<typename vtype> using iSpinMatrix = iMatrix<iScalar<vtype>, Ns>;
|
||||
template<typename vtype> using iSpinColourMatrix = iMatrix<iMatrix<vtype, Nc>, Ns>;
|
||||
|
||||
template<typename vtype> using iColourMatrix = iScalar<iMatrix<vtype, Nc>> ;
|
||||
|
||||
template<typename vtype> using iSpinVector = iVector<iScalar<vtype>, Ns>;
|
||||
template<typename vtype> using iColourVector = iScalar<iVector<vtype, Nc> >;
|
||||
template<typename vtype> using iSpinColourVector = iVector<iVector<vtype, Nc>, Ns>;
|
||||
|
||||
typedef iSinglet<Complex > TComplex; // This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<vComplex > vTComplex;
|
||||
typedef iSinglet<Real > TReal; // This is painful. Tensor singlet complex type.
|
||||
|
||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef iColourMatrix<Complex > ColourMatrix;
|
||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||
|
||||
typedef iSpinVector<Complex > SpinVector;
|
||||
typedef iColourVector<Complex > ColourVector;
|
||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||
|
||||
|
||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
|
||||
typedef iSpinVector<vComplex > vSpinVector;
|
||||
typedef iColourVector<vComplex > vColourVector;
|
||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||
|
||||
|
||||
typedef Lattice<vTComplex> LatticeComplex;
|
||||
|
||||
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
||||
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
||||
typedef Lattice<vSpinColourMatrix> LatticePropagator;
|
||||
typedef LatticePropagator LatticeSpinColourMatrix;
|
||||
|
||||
typedef Lattice<vSpinColourVector> LatticeFermion;
|
||||
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
||||
typedef Lattice<vSpinVector> LatticeSpinVector;
|
||||
typedef Lattice<vColourVector> LatticeColourVector;
|
||||
|
||||
// localNorm2,
|
||||
template<class tt>
|
||||
inline LatticeComplex localNorm2 (const Lattice<tt> &rhs)
|
||||
{
|
||||
LatticeComplex ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=trace(adj(rhs)*rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
// localInnerProduct
|
||||
template<class tt>
|
||||
inline LatticeComplex localInnerProduct (const Lattice<tt> &lhs,const Lattice<tt> &rhs)
|
||||
{
|
||||
LatticeComplex ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=localInnerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} //namespace QCD
|
||||
} // dpo
|
||||
#endif
|
56
Grid_aligned_allocator.h
Normal file
56
Grid_aligned_allocator.h
Normal file
@ -0,0 +1,56 @@
|
||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
||||
#define GRID_ALIGNED_ALLOCATOR_H
|
||||
namespace dpo {
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// A lattice of something, but assume the something is SIMDized.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<typename _Tp>
|
||||
class alignedAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||
alignedAllocator() throw() { }
|
||||
alignedAllocator(const alignedAllocator&) throw() { }
|
||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||
~alignedAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
const_pointer address(const_reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
// Should override allocate and deallocate
|
||||
pointer allocate(size_type __n, const void* = 0)
|
||||
{
|
||||
//_Tp * ptr = (_Tp *) memalign(sizeof(_Tp),__n*sizeof(_Tp));
|
||||
// _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||
#ifdef AVX512
|
||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||
#else
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type) {
|
||||
free(__p);
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
|
||||
template<typename _Tp> inline bool
|
||||
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||
|
||||
template<typename _Tp> inline bool
|
||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
|
||||
}; // namespace dpo
|
||||
#endif
|
101
Grid_config.h
Normal file
101
Grid_config.h
Normal file
@ -0,0 +1,101 @@
|
||||
/* Grid_config.h. Generated from Grid_config.h.in by configure. */
|
||||
/* Grid_config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* AVX */
|
||||
#define AVX1 1
|
||||
|
||||
/* AVX2 */
|
||||
/* #undef AVX2 */
|
||||
|
||||
/* AVX512 */
|
||||
/* #undef AVX512 */
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#define HAVE_GETTIMEOFDAY 1
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <malloc.h> header file. */
|
||||
/* #undef HAVE_MALLOC_H */
|
||||
|
||||
/* Define to 1 if you have the <malloc/malloc.h> header file. */
|
||||
#define HAVE_MALLOC_MALLOC_H 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define to 1 if the system has the `aligned' variable attribute */
|
||||
#define HAVE_VAR_ATTRIBUTE_ALIGNED 1
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "grid"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "Grid"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Grid 1.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "grid"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.0"
|
||||
|
||||
/* SSE2 */
|
||||
/* #undef SSE2 */
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "1.0"
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
/* #undef _UINT32_T */
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
/* #undef _UINT64_T */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef uint32_t */
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef uint64_t */
|
819
Grid_math_types.h
Normal file
819
Grid_math_types.h
Normal file
@ -0,0 +1,819 @@
|
||||
#ifndef GRID_MATH_TYPES_H
|
||||
#define GRID_MATH_TYPES_H
|
||||
namespace dpo {
|
||||
///////////////////////////////////////////////////
|
||||
// Scalar, Vector, Matrix objects.
|
||||
// These can be composed to form tensor products of internal indices.
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
template<class vtype> class iScalar
|
||||
{
|
||||
public:
|
||||
SIMDalign vtype _internal;
|
||||
iScalar(){};
|
||||
iScalar(Zero &z){ *this = zero; };
|
||||
iScalar<vtype> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
friend void zeroit(iScalar<vtype> &that){
|
||||
zeroit(that._internal);
|
||||
}
|
||||
// Unary negation
|
||||
friend inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
|
||||
iScalar<vtype> ret;
|
||||
ret._internal= -r._internal;
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators
|
||||
inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template<class vtype,int N> class iVector
|
||||
{
|
||||
public:
|
||||
SIMDalign vtype _internal[N];
|
||||
iVector(Zero &z){ *this = zero; };
|
||||
iVector() {};
|
||||
iVector<vtype,N> & operator= (Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
friend void zeroit(iVector<vtype,N> &that){
|
||||
for(int i=0;i<N;i++){
|
||||
zeroit(that._internal[i]);
|
||||
}
|
||||
}
|
||||
// Unary negation
|
||||
friend inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators
|
||||
inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class vtype,int N> class iMatrix
|
||||
{
|
||||
public:
|
||||
SIMDalign vtype _internal[N][N];
|
||||
iMatrix(Zero &z){ *this = zero; };
|
||||
iMatrix() {};
|
||||
iMatrix<vtype,N> & operator= (Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
friend void zeroit(iMatrix<vtype,N> &that){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
zeroit(that._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
// Unary negation
|
||||
friend inline iMatrix<vtype,N> operator -(const iMatrix<vtype,N> &r) {
|
||||
iMatrix<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
ret._internal[i][j]= -r._internal[i][j];
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators
|
||||
template<class T>
|
||||
inline iMatrix<vtype,N> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
template<class T>
|
||||
inline iMatrix<vtype,N> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T>
|
||||
inline iMatrix<vtype,N> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// ADD ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// ADD is simple for now; cannot mix types and straightforward template
|
||||
// Scalar +/- Scalar
|
||||
// Vector +/- Vector
|
||||
// Matrix +/- Matrix
|
||||
template<class vtype,class ltype,class rtype> inline void add(iScalar<vtype> * __restrict__ ret,
|
||||
const iScalar<ltype> * __restrict__ lhs,
|
||||
const iScalar<rtype> * __restrict__ rhs)
|
||||
{
|
||||
add(&ret->_internal,&lhs->_internal,&rhs->_internal);
|
||||
}
|
||||
template<class vtype,class ltype,class rtype,int N> inline void add(iVector<vtype,N> * __restrict__ ret,
|
||||
const iVector<ltype,N> * __restrict__ lhs,
|
||||
const iVector<rtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c=0;c<N;c++){
|
||||
ret->_internal[c]=lhs->_internal[c]+rhs->_internal[c];
|
||||
}
|
||||
return;
|
||||
}
|
||||
template<class vtype,class ltype,class rtype, int N> inline void add(iMatrix<vtype,N> * __restrict__ ret,
|
||||
const iMatrix<ltype,N> * __restrict__ lhs,
|
||||
const iMatrix<rtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class vtype,class ltype,class rtype, int N> inline void add(iMatrix<vtype,N> * __restrict__ ret,
|
||||
const iScalar<ltype> * __restrict__ lhs,
|
||||
const iMatrix<rtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class vtype,class ltype,class rtype, int N> inline void add(iMatrix<vtype,N> * __restrict__ ret,
|
||||
const iMatrix<ltype,N> * __restrict__ lhs,
|
||||
const iScalar<rtype> * __restrict__ rhs)
|
||||
{
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
if ( c1==c2)
|
||||
add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
|
||||
else
|
||||
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
|
||||
}}
|
||||
return;
|
||||
}
|
||||
// Need to figure multi-precision.
|
||||
template<class Mytype> Mytype timesI(Mytype &r)
|
||||
{
|
||||
iScalar<Complex> i;
|
||||
i._internal = Complex(0,1);
|
||||
return r*i;
|
||||
}
|
||||
|
||||
// + operator for scalar, vector, matrix
|
||||
template<class ltype,class rtype>
|
||||
//inline auto operator + (iScalar<ltype>& lhs,iScalar<rtype>&& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
|
||||
inline auto operator + (const iScalar<ltype>& lhs,const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
|
||||
{
|
||||
typedef iScalar<decltype(lhs._internal+rhs._internal)> ret_t;
|
||||
ret_t ret;
|
||||
add(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator + (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]+rhs._internal[0]),N>
|
||||
{
|
||||
typedef iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> ret_t;
|
||||
ret_t ret;
|
||||
add(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator + (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N>
|
||||
{
|
||||
typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> ret_t;
|
||||
ret_t ret;
|
||||
add(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator + (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N>
|
||||
{
|
||||
typedef iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> ret_t;
|
||||
ret_t ret;
|
||||
add(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator + (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N>
|
||||
{
|
||||
typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> ret_t;
|
||||
ret_t ret;
|
||||
add(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// SUB ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// SUB is simple for now; cannot mix types and straightforward template
|
||||
// Scalar +/- Scalar
|
||||
// Vector +/- Vector
|
||||
// Matrix +/- Matrix
|
||||
// Matrix /- scalar
|
||||
template<class vtype,class ltype,class rtype> inline void sub(iScalar<vtype> * __restrict__ ret,
|
||||
const iScalar<ltype> * __restrict__ lhs,
|
||||
const iScalar<rtype> * __restrict__ rhs)
|
||||
{
|
||||
sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
|
||||
}
|
||||
|
||||
template<class vtype,class ltype,class rtype,int N> inline void sub(iVector<vtype,N> * __restrict__ ret,
|
||||
const iVector<ltype,N> * __restrict__ lhs,
|
||||
const iVector<rtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c=0;c<N;c++){
|
||||
ret->_internal[c]=lhs->_internal[c]-rhs->_internal[c];
|
||||
}
|
||||
return;
|
||||
}
|
||||
template<class vtype,class ltype,class rtype, int N> inline void sub(iMatrix<vtype,N> * __restrict__ ret,
|
||||
const iMatrix<ltype,N> * __restrict__ lhs,
|
||||
const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class vtype,class ltype,class rtype, int N> inline void sub(iMatrix<vtype,N> * __restrict__ ret,
|
||||
const iScalar<ltype> * __restrict__ lhs,
|
||||
const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
if ( c1!=c2) {
|
||||
sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
||||
} else {
|
||||
// Fails -- need unary minus. Catalogue other unops?
|
||||
ret->_internal[c1][c2]=zero;
|
||||
ret->_internal[c1][c2]=ret->_internal[c1][c2]-rhs->_internal[c1][c2];
|
||||
|
||||
}
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class vtype,class ltype,class rtype, int N> inline void sub(iMatrix<vtype,N> * __restrict__ ret,
|
||||
const iMatrix<ltype,N> * __restrict__ lhs,
|
||||
const iScalar<rtype> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
if ( c1!=c2)
|
||||
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
|
||||
else
|
||||
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
|
||||
}}
|
||||
return;
|
||||
}
|
||||
|
||||
template<class v> void vprefetch(const iScalar<v> &vv)
|
||||
{
|
||||
vprefetch(vv._internal);
|
||||
}
|
||||
template<class v,int N> void vprefetch(const iVector<v,N> &vv)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
vprefetch(vv._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class v,int N> void vprefetch(const iMatrix<v,N> &vv)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
vprefetch(vv._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
// - operator for scalar, vector, matrix
|
||||
template<class ltype,class rtype> inline auto
|
||||
operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)>
|
||||
{
|
||||
typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
|
||||
ret_t ret;
|
||||
sub(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
|
||||
{
|
||||
typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
|
||||
ret_t ret;
|
||||
sub(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
|
||||
{
|
||||
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
|
||||
ret_t ret;
|
||||
sub(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
|
||||
{
|
||||
typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
|
||||
ret_t ret;
|
||||
sub(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class ltype,class rtype,int N>
|
||||
inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
|
||||
{
|
||||
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
|
||||
ret_t ret;
|
||||
sub(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// MAC ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
///////////////////////////
|
||||
// Legal multiplication table
|
||||
///////////////////////////
|
||||
// scal x scal = scal
|
||||
// mat x mat = mat
|
||||
// mat x scal = mat
|
||||
// scal x mat = mat
|
||||
// mat x vec = vec
|
||||
// vec x scal = vec
|
||||
// scal x vec = vec
|
||||
///////////////////////////
|
||||
template<class rtype,class vtype,class mtype>
|
||||
inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> * __restrict__ lhs,const iScalar<mtype> * __restrict__ rhs)
|
||||
{
|
||||
mac(&ret->_internal,&lhs->_internal,&rhs->_internal);
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c3=0;c3<N;c3++){
|
||||
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||
}}}
|
||||
return;
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mac(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mac(iVector<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mac(iVector<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mac(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs)
|
||||
{
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mac(&ret->_internal[c1],&lhs->_internal[c1],&rhs->_internal);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// MUL ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template<class rtype,class vtype,class mtype>
|
||||
inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
|
||||
mult(&ret->_internal,&lhs->_internal,&rhs->_internal);
|
||||
}
|
||||
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
|
||||
for(int c3=1;c3<N;c3++){
|
||||
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||
}
|
||||
}}
|
||||
return;
|
||||
}
|
||||
template<class rrtype,class ltype,class rtype,int N>
|
||||
inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
|
||||
template<class rrtype,class ltype,class rtype, int N>
|
||||
inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
||||
}}
|
||||
return;
|
||||
}
|
||||
// Matrix left multiplies vector
|
||||
template<class rtype,class vtype,class mtype,int N>
|
||||
inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,N> * __restrict__ lhs,const iVector<vtype,N> * __restrict__ rhs)
|
||||
{
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]);
|
||||
for(int c2=1;c2<N;c2++){
|
||||
mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
template<class rtype,class vtype,class mtype,int N>
|
||||
inline void mult(iVector<rtype,N> * __restrict__ ret,
|
||||
const iScalar<mtype> * __restrict__ lhs,
|
||||
const iVector<vtype,N> * __restrict__ rhs){
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
|
||||
}
|
||||
}
|
||||
template<class rtype,class vtype,class mtype,int N>
|
||||
inline void mult(iVector<rtype,N> * __restrict__ ret,
|
||||
const iVector<vtype,N> * __restrict__ rhs,
|
||||
const iScalar<mtype> * __restrict__ lhs){
|
||||
mult(ret,lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class rtype,class vtype,class mtype,int N> inline
|
||||
iVector<rtype,N> operator * (const iMatrix<mtype,N>& lhs,const iVector<vtype,N>& rhs)
|
||||
{
|
||||
iVector<rtype,N> ret;
|
||||
mult(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class rtype,class vtype,class mtype,int N> inline
|
||||
iVector<rtype,N> operator * (const iScalar<mtype>& lhs,const iVector<vtype,N>& rhs)
|
||||
{
|
||||
iVector<rtype,N> ret;
|
||||
mult(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class rtype,class vtype,class mtype,int N> inline
|
||||
iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& rhs)
|
||||
{
|
||||
iVector<rtype,N> ret;
|
||||
mult(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Glue operators to mult routines. Must resolve return type cleverly from typeof(internal)
|
||||
// since nesting matrix<scalar> x matrix<matrix>-> matrix<matrix>
|
||||
// while matrix<scalar> x matrix<scalar>-> matrix<scalar>
|
||||
// so return type depends on argument types in nasty way.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// scal x scal = scal
|
||||
// mat x mat = mat
|
||||
// mat x scal = mat
|
||||
// scal x mat = mat
|
||||
// mat x vec = vec
|
||||
// vec x scal = vec
|
||||
// scal x vec = vec
|
||||
|
||||
template<class l,class r>
|
||||
inline auto operator * (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(lhs._internal * rhs._internal)>
|
||||
{
|
||||
typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t;
|
||||
ret_t ret;
|
||||
mult(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal[0][0]),N>
|
||||
{
|
||||
typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t;
|
||||
iMatrix<ret_t,N> ret;
|
||||
mult(&ret,&lhs,&rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r, int N> inline
|
||||
auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal),N>
|
||||
{
|
||||
typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t;
|
||||
|
||||
iMatrix<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mult(&ret._internal[c1][c2],&lhs._internal[c1][c2],&rhs._internal);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal*rhs._internal[0][0]),N>
|
||||
{
|
||||
typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t;
|
||||
iMatrix<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
mult(&ret._internal[c1][c2],&lhs._internal,&rhs._internal[c1][c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal[0][0]*rhs._internal[0]),N>
|
||||
{
|
||||
typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t;
|
||||
iVector<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret._internal[c1],&lhs._internal[c1][0],&rhs._internal[0]);
|
||||
for(int c2=1;c2<N;c2++){
|
||||
mac(&ret._internal[c1],&lhs._internal[c1][c2],&rhs._internal[c2]);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal*rhs._internal[0]),N>
|
||||
{
|
||||
typedef decltype(lhs._internal*rhs._internal[0]) ret_t;
|
||||
iVector<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret._internal[c1],&lhs._internal,&rhs._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto operator * (const iVector<l,N>& lhs,const iScalar<r>& rhs) -> iVector<decltype(lhs._internal[0]*rhs._internal),N>
|
||||
{
|
||||
typedef decltype(lhs._internal[0]*rhs._internal) ret_t;
|
||||
iVector<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
mult(&ret._internal[c1],&lhs._internal[c1],&rhs._internal);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// localInnerProduct Scalar x Scalar -> Scalar
|
||||
// localInnerProduct Vector x Vector -> Scalar
|
||||
// localInnerProduct Matrix x Matrix -> Scalar
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class l,class r,int N> inline
|
||||
auto localInnerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(localInnerProduct(lhs._internal[0],rhs._internal[0]))>
|
||||
{
|
||||
typedef decltype(localInnerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||
iScalar<ret_t> ret=zero;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal += localInnerProduct(lhs._internal[c1],rhs._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto localInnerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(localInnerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
|
||||
{
|
||||
typedef decltype(localInnerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
|
||||
iScalar<ret_t> ret=zero;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal += localInnerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r> inline
|
||||
auto localInnerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(localInnerProduct(lhs._internal,rhs._internal))>
|
||||
{
|
||||
typedef decltype(localInnerProduct(lhs._internal,rhs._internal)) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
ret._internal = localInnerProduct(lhs._internal,rhs._internal);
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class l,class r,int N> inline
|
||||
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
|
||||
{
|
||||
typedef decltype(outerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||
iMatrix<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2] = outerProduct(lhs._internal[c1],rhs._internal[c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r> inline
|
||||
auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(outerProduct(lhs._internal,rhs._internal))>
|
||||
{
|
||||
typedef decltype(outerProduct(lhs._internal,rhs._internal)) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
ret._internal = outerProduct(lhs._internal,rhs._internal);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline ComplexF outerProduct(const ComplexF &l, const ComplexF& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline ComplexD outerProduct(const ComplexD &l, const ComplexD& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline RealF outerProduct(const RealF &l, const RealF& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline RealD outerProduct(const RealD &l, const RealD& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// CONJ ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Conj function for scalar, vector, matrix
|
||||
template<class vtype> inline iScalar<vtype> conj(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
ret._internal = conj(r._internal);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Adj function for scalar, vector, matrix
|
||||
template<class vtype> inline iScalar<vtype> adj(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
ret._internal = adj(r._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> inline iVector<vtype,N> adj(const iVector<vtype,N>&r)
|
||||
{
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
ret._internal[i] = adj(r._internal[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &arg)
|
||||
{
|
||||
iMatrix<vtype,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2]=adj(arg._internal[c2][c1]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Can only take the real/imag part of scalar objects, since
|
||||
// lattice objects of different complexity are non-conformable.
|
||||
/////////////////////////////////////////////////////////////////
|
||||
template<class itype> inline auto real(const iScalar<itype> &z) -> iScalar<decltype(real(z._internal))>
|
||||
{
|
||||
iScalar<decltype(real(z._internal))> ret;
|
||||
ret._internal = real(z._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class itype,int N> inline auto real(const iMatrix<itype,N> &z) -> iMatrix<decltype(real(z._internal[0][0])),N>
|
||||
{
|
||||
iMatrix<decltype(real(z._internal[0][0])),N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2] = real(z._internal[c1][c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class itype,int N> inline auto real(const iVector<itype,N> &z) -> iVector<decltype(real(z._internal[0])),N>
|
||||
{
|
||||
iVector<decltype(real(z._internal[0])),N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal[c1] = real(z._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class itype> inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))>
|
||||
{
|
||||
iScalar<decltype(imag(z._internal))> ret;
|
||||
ret._internal = imag(z._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class itype,int N> inline auto imag(const iMatrix<itype,N> &z) -> iMatrix<decltype(imag(z._internal[0][0])),N>
|
||||
{
|
||||
iMatrix<decltype(imag(z._internal[0][0])),N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2] = imag(z._internal[c1][c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class itype,int N> inline auto imag(const iVector<itype,N> &z) -> iVector<decltype(imag(z._internal[0])),N>
|
||||
{
|
||||
iVector<decltype(imag(z._internal[0])),N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal[c1] = imag(z._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/////////////////////////////////
|
||||
// Trace of scalar and matrix
|
||||
/////////////////////////////////
|
||||
|
||||
inline Complex trace( const Complex &arg){
|
||||
return arg;
|
||||
}
|
||||
//inline vComplex trace(const vComplex &arg){
|
||||
// return arg;
|
||||
//}
|
||||
template<class vtype,int N>
|
||||
inline auto trace(const iMatrix<vtype,N> &arg) -> iScalar<decltype(trace(arg._internal[0][0]))>
|
||||
{
|
||||
iScalar<decltype( trace(arg._internal[0][0] )) > ret;
|
||||
ZeroIt(ret._internal);
|
||||
for(int i=0;i<N;i++){
|
||||
ret._internal=ret._internal+trace(arg._internal[i][i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class vtype>
|
||||
inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._internal))>
|
||||
{
|
||||
iScalar<decltype(trace(arg._internal))> ret;
|
||||
ret._internal=trace(arg._internal);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Generic routine to promote object<complex> -> object<vcomplex>
|
||||
// Supports the array reordering transformation that gives me SIMD utilisation
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<template<class> class object>
|
||||
inline object<vComplex> splat(object<Complex >s){
|
||||
object<vComplex> ret;
|
||||
vComplex * v_ptr = (vComplex *)& ret;
|
||||
Complex * s_ptr = (Complex *) &s;
|
||||
for(int i=0;i<sizeof(ret);i+=sizeof(vComplex)){
|
||||
vsplat(*(v_ptr++),*(s_ptr++));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
@ -16,13 +16,15 @@
|
||||
#undef __X86_64
|
||||
namespace dpo {
|
||||
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||
void Grid_debug_handler_init(void);
|
||||
|
||||
void Grid_init(void)
|
||||
{
|
||||
Grid_debug_handler_init();
|
||||
}
|
||||
void Grid_init(void)
|
||||
{
|
||||
Grid_debug_handler_init();
|
||||
}
|
||||
double usecond(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
||||
}
|
||||
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
{
|
||||
|
68
Grid_simd.h
68
Grid_simd.h
@ -10,6 +10,33 @@
|
||||
//
|
||||
// Vector types are arch dependent
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// TODO
|
||||
//
|
||||
// Base class to share common code between vRealF, VComplexF etc...
|
||||
//
|
||||
// lattice Broad cast assignment
|
||||
//
|
||||
// where() support
|
||||
// implement with masks, and/or? Type of the mask & boolean support?
|
||||
//
|
||||
// Unary functions
|
||||
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
|
||||
// exp, log, sqrt, fabs
|
||||
//
|
||||
// transposeColor, transposeSpin,
|
||||
// adjColor, adjSpin,
|
||||
// traceColor, traceSpin.
|
||||
// peekColor, peekSpin + pokeColor PokeSpin
|
||||
//
|
||||
// copyMask.
|
||||
//
|
||||
// localMaxAbs
|
||||
//
|
||||
// norm2,
|
||||
// sumMulti equivalent.
|
||||
// Fourier transform equivalent.
|
||||
//
|
||||
|
||||
|
||||
namespace dpo {
|
||||
|
||||
@ -21,7 +48,46 @@ namespace dpo {
|
||||
typedef std::complex<RealD> ComplexD;
|
||||
typedef std::complex<Real> Complex;
|
||||
|
||||
|
||||
|
||||
inline RealF adj(const RealF & r){ return r; }
|
||||
inline RealF conj(const RealF & r){ return r; }
|
||||
inline ComplexD localInnerProduct(const ComplexD & l, const ComplexD & r) { return conj(l)*r; }
|
||||
inline ComplexF localInnerProduct(const ComplexF & l, const ComplexF & r) { return conj(l)*r; }
|
||||
inline RealD localInnerProduct(const RealD & l, const RealD & r) { return l*r; }
|
||||
inline RealF localInnerProduct(const RealF & l, const RealF & r) { return l*r; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//Provide support functions for basic real and complex data types required by dpo
|
||||
//Single and double precision versions. Should be able to template this once only.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline void mac (ComplexD * __restrict__ y,const ComplexD * __restrict__ a,const ComplexD *__restrict__ x){ *y = (*a) * (*x)+(*y); };
|
||||
inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);}
|
||||
inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);}
|
||||
inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);}
|
||||
inline ComplexD adj(const ComplexD& r){ return(conj(r)); }
|
||||
// conj already supported for complex
|
||||
|
||||
inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
|
||||
inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
|
||||
inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
|
||||
inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
|
||||
inline Complex adj(const Complex& r ){ return(conj(r)); }
|
||||
//conj already supported for complex
|
||||
|
||||
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
|
||||
inline void mult(RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) * (*r);}
|
||||
inline void sub (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) - (*r);}
|
||||
inline void add (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) + (*r);}
|
||||
inline RealD adj(const RealD & r){ return r; } // No-op for real
|
||||
inline RealD conj(const RealD & r){ return r; }
|
||||
|
||||
inline void mac (RealF * __restrict__ y,const RealF * __restrict__ a,const RealF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
|
||||
inline void mult(RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) * (*r); }
|
||||
inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
|
||||
inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
|
||||
|
||||
|
||||
|
||||
class Zero{};
|
||||
static Zero zero;
|
||||
template<class itype> inline void ZeroIt(itype &arg){ arg=zero;};
|
||||
|
Loading…
Reference in New Issue
Block a user