1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Better organisation

This commit is contained in:
Peter Boyle 2015-03-04 05:31:44 +00:00
parent ad1b9b6ccf
commit 1a1474b323
8 changed files with 1161 additions and 1213 deletions

1122
Grid.h

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,7 @@
#ifndef GRID_LATTICE_H
#define GRID_LATTICE_H
#include "Grid.h"
#include "Grid_vComplexD.h"
namespace dpo {
@ -9,7 +11,7 @@ class Lattice
public:
Grid *_grid;
int checkerboard;
std::vector<vobj,myallocator<vobj> > _odata;
std::vector<vobj,alignedAllocator<vobj> > _odata;
public:
@ -554,95 +556,5 @@ public:
return ret;
}
namespace QCD {
static const int Nc=3;
static const int Ns=4;
static const int CbRed =0;
static const int CbBlack=1;
// QCD iMatrix types
template<typename vtype> using iSinglet = iScalar<iScalar<vtype> > ;
template<typename vtype> using iSpinMatrix = iMatrix<iScalar<vtype>, Ns>;
template<typename vtype> using iSpinColourMatrix = iMatrix<iMatrix<vtype, Nc>, Ns>;
template<typename vtype> using iColourMatrix = iScalar<iMatrix<vtype, Nc>> ;
template<typename vtype> using iSpinVector = iVector<iScalar<vtype>, Ns>;
template<typename vtype> using iColourVector = iScalar<iVector<vtype, Nc> >;
template<typename vtype> using iSpinColourVector = iVector<iVector<vtype, Nc>, Ns>;
typedef iSinglet<Complex > TComplex; // This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex;
typedef iSinglet<Real > TReal; // This is painful. Tensor singlet complex type.
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iColourMatrix<Complex > ColourMatrix;
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinVector<Complex > SpinVector;
typedef iColourVector<Complex > ColourVector;
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinVector<vComplex > vSpinVector;
typedef iColourVector<vComplex > vColourVector;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinColourMatrix> LatticePropagator;
typedef LatticePropagator LatticeSpinColourMatrix;
typedef Lattice<vSpinColourVector> LatticeFermion;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vColourVector> LatticeColourVector;
// localNorm2,
template<class tt>
inline LatticeComplex localNorm2 (const Lattice<tt> &rhs)
{
LatticeComplex ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=trace(adj(rhs)*rhs);
}
return ret;
}
// localInnerProduct
template<class tt>
inline LatticeComplex localInnerProduct (const Lattice<tt> &lhs,const Lattice<tt> &rhs)
{
LatticeComplex ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=localInnerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
template<class ll,class rr>
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
{
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
} //namespace QCD
}
#endif

94
Grid_QCD.h Normal file
View File

@ -0,0 +1,94 @@
#ifndef GRID_QCD_H
#define GRID_QCD_H
namespace dpo{
namespace QCD {
static const int Nc=3;
static const int Ns=4;
static const int CbRed =0;
static const int CbBlack=1;
// QCD iMatrix types
template<typename vtype> using iSinglet = iScalar<iScalar<vtype> > ;
template<typename vtype> using iSpinMatrix = iMatrix<iScalar<vtype>, Ns>;
template<typename vtype> using iSpinColourMatrix = iMatrix<iMatrix<vtype, Nc>, Ns>;
template<typename vtype> using iColourMatrix = iScalar<iMatrix<vtype, Nc>> ;
template<typename vtype> using iSpinVector = iVector<iScalar<vtype>, Ns>;
template<typename vtype> using iColourVector = iScalar<iVector<vtype, Nc> >;
template<typename vtype> using iSpinColourVector = iVector<iVector<vtype, Nc>, Ns>;
typedef iSinglet<Complex > TComplex; // This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex;
typedef iSinglet<Real > TReal; // This is painful. Tensor singlet complex type.
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iColourMatrix<Complex > ColourMatrix;
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinVector<Complex > SpinVector;
typedef iColourVector<Complex > ColourVector;
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinVector<vComplex > vSpinVector;
typedef iColourVector<vComplex > vColourVector;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinColourMatrix> LatticePropagator;
typedef LatticePropagator LatticeSpinColourMatrix;
typedef Lattice<vSpinColourVector> LatticeFermion;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vColourVector> LatticeColourVector;
// localNorm2,
template<class tt>
inline LatticeComplex localNorm2 (const Lattice<tt> &rhs)
{
LatticeComplex ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=trace(adj(rhs)*rhs);
}
return ret;
}
// localInnerProduct
template<class tt>
inline LatticeComplex localInnerProduct (const Lattice<tt> &lhs,const Lattice<tt> &rhs)
{
LatticeComplex ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=localInnerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
template<class ll,class rr>
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
{
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
}
return ret;
}
} //namespace QCD
} // dpo
#endif

56
Grid_aligned_allocator.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef GRID_ALIGNED_ALLOCATOR_H
#define GRID_ALIGNED_ALLOCATOR_H
namespace dpo {
////////////////////////////////////////////////////////////////////
// A lattice of something, but assume the something is SIMDized.
////////////////////////////////////////////////////////////////////
template<typename _Tp>
class alignedAllocator {
public:
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef _Tp* pointer;
typedef const _Tp* const_pointer;
typedef _Tp& reference;
typedef const _Tp& const_reference;
typedef _Tp value_type;
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
alignedAllocator() throw() { }
alignedAllocator(const alignedAllocator&) throw() { }
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
~alignedAllocator() throw() { }
pointer address(reference __x) const { return &__x; }
const_pointer address(const_reference __x) const { return &__x; }
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
// Should override allocate and deallocate
pointer allocate(size_type __n, const void* = 0)
{
//_Tp * ptr = (_Tp *) memalign(sizeof(_Tp),__n*sizeof(_Tp));
// _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
#ifdef AVX512
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
#else
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
#endif
return ptr;
}
void deallocate(pointer __p, size_type) {
free(__p);
}
void construct(pointer __p, const _Tp& __val) { };
void construct(pointer __p) { };
void destroy(pointer __p) { };
};
template<typename _Tp> inline bool
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
template<typename _Tp> inline bool
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
}; // namespace dpo
#endif

101
Grid_config.h Normal file
View File

@ -0,0 +1,101 @@
/* Grid_config.h. Generated from Grid_config.h.in by configure. */
/* Grid_config.h.in. Generated from configure.ac by autoheader. */
/* AVX */
#define AVX1 1
/* AVX2 */
/* #undef AVX2 */
/* AVX512 */
/* #undef AVX512 */
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the <malloc.h> header file. */
/* #undef HAVE_MALLOC_H */
/* Define to 1 if you have the <malloc/malloc.h> header file. */
#define HAVE_MALLOC_MALLOC_H 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if the system has the `aligned' variable attribute */
#define HAVE_VAR_ATTRIBUTE_ALIGNED 1
/* Name of package */
#define PACKAGE "grid"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk"
/* Define to the full name of this package. */
#define PACKAGE_NAME "Grid"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "Grid 1.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "grid"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.0"
/* SSE2 */
/* #undef SSE2 */
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Version number of package */
#define VERSION "1.0"
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
/* #undef _UINT32_T */
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
/* #undef _UINT64_T */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
/* #undef uint32_t */
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
/* #undef uint64_t */

819
Grid_math_types.h Normal file
View File

@ -0,0 +1,819 @@
#ifndef GRID_MATH_TYPES_H
#define GRID_MATH_TYPES_H
namespace dpo {
///////////////////////////////////////////////////
// Scalar, Vector, Matrix objects.
// These can be composed to form tensor products of internal indices.
///////////////////////////////////////////////////
template<class vtype> class iScalar
{
public:
SIMDalign vtype _internal;
iScalar(){};
iScalar(Zero &z){ *this = zero; };
iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this);
return *this;
}
friend void zeroit(iScalar<vtype> &that){
zeroit(that._internal);
}
// Unary negation
friend inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
iScalar<vtype> ret;
ret._internal= -r._internal;
return ret;
}
// *=,+=,-= operators
inline iScalar<vtype> &operator *=(const iScalar<vtype> &r) {
*this = (*this)*r;
return *this;
}
inline iScalar<vtype> &operator -=(const iScalar<vtype> &r) {
*this = (*this)-r;
return *this;
}
inline iScalar<vtype> &operator +=(const iScalar<vtype> &r) {
*this = (*this)+r;
return *this;
}
};
template<class vtype,int N> class iVector
{
public:
SIMDalign vtype _internal[N];
iVector(Zero &z){ *this = zero; };
iVector() {};
iVector<vtype,N> & operator= (Zero &hero){
zeroit(*this);
return *this;
}
friend void zeroit(iVector<vtype,N> &that){
for(int i=0;i<N;i++){
zeroit(that._internal[i]);
}
}
// Unary negation
friend inline iVector<vtype,N> operator -(const iVector<vtype,N> &r) {
iVector<vtype,N> ret;
for(int i=0;i<N;i++) ret._internal[i]= -r._internal[i];
return ret;
}
// *=,+=,-= operators
inline iVector<vtype,N> &operator *=(const iScalar<vtype> &r) {
*this = (*this)*r;
return *this;
}
inline iVector<vtype,N> &operator -=(const iVector<vtype,N> &r) {
*this = (*this)-r;
return *this;
}
inline iVector<vtype,N> &operator +=(const iVector<vtype,N> &r) {
*this = (*this)+r;
return *this;
}
};
template<class vtype,int N> class iMatrix
{
public:
SIMDalign vtype _internal[N][N];
iMatrix(Zero &z){ *this = zero; };
iMatrix() {};
iMatrix<vtype,N> & operator= (Zero &hero){
zeroit(*this);
return *this;
}
friend void zeroit(iMatrix<vtype,N> &that){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
zeroit(that._internal[i][j]);
}}
}
// Unary negation
friend inline iMatrix<vtype,N> operator -(const iMatrix<vtype,N> &r) {
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j]= -r._internal[i][j];
}}
return ret;
}
// *=,+=,-= operators
template<class T>
inline iMatrix<vtype,N> &operator *=(const T &r) {
*this = (*this)*r;
return *this;
}
template<class T>
inline iMatrix<vtype,N> &operator -=(const T &r) {
*this = (*this)-r;
return *this;
}
template<class T>
inline iMatrix<vtype,N> &operator +=(const T &r) {
*this = (*this)+r;
return *this;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// ADD ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
// ADD is simple for now; cannot mix types and straightforward template
// Scalar +/- Scalar
// Vector +/- Vector
// Matrix +/- Matrix
template<class vtype,class ltype,class rtype> inline void add(iScalar<vtype> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs)
{
add(&ret->_internal,&lhs->_internal,&rhs->_internal);
}
template<class vtype,class ltype,class rtype,int N> inline void add(iVector<vtype,N> * __restrict__ ret,
const iVector<ltype,N> * __restrict__ lhs,
const iVector<rtype,N> * __restrict__ rhs)
{
for(int c=0;c<N;c++){
ret->_internal[c]=lhs->_internal[c]+rhs->_internal[c];
}
return;
}
template<class vtype,class ltype,class rtype, int N> inline void add(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs)
{
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
}}
return;
}
template<class vtype,class ltype,class rtype, int N> inline void add(iMatrix<vtype,N> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs)
{
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
}}
return;
}
template<class vtype,class ltype,class rtype, int N> inline void add(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs)
{
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
if ( c1==c2)
add(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
else
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
}}
return;
}
// Need to figure multi-precision.
template<class Mytype> Mytype timesI(Mytype &r)
{
iScalar<Complex> i;
i._internal = Complex(0,1);
return r*i;
}
// + operator for scalar, vector, matrix
template<class ltype,class rtype>
//inline auto operator + (iScalar<ltype>& lhs,iScalar<rtype>&& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
inline auto operator + (const iScalar<ltype>& lhs,const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
{
typedef iScalar<decltype(lhs._internal+rhs._internal)> ret_t;
ret_t ret;
add(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator + (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]+rhs._internal[0]),N>
{
typedef iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> ret_t;
ret_t ret;
add(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator + (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N>
{
typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> ret_t;
ret_t ret;
add(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator + (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N>
{
typedef iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> ret_t;
ret_t ret;
add(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator + (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N>
{
typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> ret_t;
ret_t ret;
add(&ret,&lhs,&rhs);
return ret;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// SUB ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
// SUB is simple for now; cannot mix types and straightforward template
// Scalar +/- Scalar
// Vector +/- Vector
// Matrix +/- Matrix
// Matrix /- scalar
template<class vtype,class ltype,class rtype> inline void sub(iScalar<vtype> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs)
{
sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
}
template<class vtype,class ltype,class rtype,int N> inline void sub(iVector<vtype,N> * __restrict__ ret,
const iVector<ltype,N> * __restrict__ lhs,
const iVector<rtype,N> * __restrict__ rhs)
{
for(int c=0;c<N;c++){
ret->_internal[c]=lhs->_internal[c]-rhs->_internal[c];
}
return;
}
template<class vtype,class ltype,class rtype, int N> inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal[c1][c2]);
}}
return;
}
template<class vtype,class ltype,class rtype, int N> inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
if ( c1!=c2) {
sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
} else {
// Fails -- need unary minus. Catalogue other unops?
ret->_internal[c1][c2]=zero;
ret->_internal[c1][c2]=ret->_internal[c1][c2]-rhs->_internal[c1][c2];
}
}}
return;
}
template<class vtype,class ltype,class rtype, int N> inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
if ( c1!=c2)
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
else
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
}}
return;
}
template<class v> void vprefetch(const iScalar<v> &vv)
{
vprefetch(vv._internal);
}
template<class v,int N> void vprefetch(const iVector<v,N> &vv)
{
for(int i=0;i<N;i++){
vprefetch(vv._internal[i]);
}
}
template<class v,int N> void vprefetch(const iMatrix<v,N> &vv)
{
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
vprefetch(vv._internal[i][j]);
}}
}
// - operator for scalar, vector, matrix
template<class ltype,class rtype> inline auto
operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)>
{
typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
ret_t ret;
sub(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
{
typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
ret_t ret;
sub(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
{
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
ret_t ret;
sub(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
{
typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
ret_t ret;
sub(&ret,&lhs,&rhs);
return ret;
}
template<class ltype,class rtype,int N>
inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
{
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
ret_t ret;
sub(&ret,&lhs,&rhs);
return ret;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// MAC ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////
// Legal multiplication table
///////////////////////////
// scal x scal = scal
// mat x mat = mat
// mat x scal = mat
// scal x mat = mat
// mat x vec = vec
// vec x scal = vec
// scal x vec = vec
///////////////////////////
template<class rtype,class vtype,class mtype>
inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> * __restrict__ lhs,const iScalar<mtype> * __restrict__ rhs)
{
mac(&ret->_internal,&lhs->_internal,&rhs->_internal);
}
template<class rrtype,class ltype,class rtype,int N>
inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
for(int c3=0;c3<N;c3++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
inline void mac(iVector<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
{
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
inline void mac(iVector<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
{
for(int c1=0;c1<N;c1++){
mac(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
}
return;
}
template<class rrtype,class ltype,class rtype,int N>
inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs)
{
for(int c1=0;c1<N;c1++){
mac(&ret->_internal[c1],&lhs->_internal[c1],&rhs->_internal);
}
return;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// MUL ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class rtype,class vtype,class mtype>
inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
mult(&ret->_internal,&lhs->_internal,&rhs->_internal);
}
template<class rrtype,class ltype,class rtype,int N>
inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
for(int c3=1;c3<N;c3++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
}}
return;
}
template<class rrtype,class ltype,class rtype, int N>
inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
}}
return;
}
// Matrix left multiplies vector
template<class rtype,class vtype,class mtype,int N>
inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,N> * __restrict__ lhs,const iVector<vtype,N> * __restrict__ rhs)
{
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]);
for(int c2=1;c2<N;c2++){
mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
}
}
return;
}
template<class rtype,class vtype,class mtype,int N>
inline void mult(iVector<rtype,N> * __restrict__ ret,
const iScalar<mtype> * __restrict__ lhs,
const iVector<vtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
}
}
template<class rtype,class vtype,class mtype,int N>
inline void mult(iVector<rtype,N> * __restrict__ ret,
const iVector<vtype,N> * __restrict__ rhs,
const iScalar<mtype> * __restrict__ lhs){
mult(ret,lhs,rhs);
}
template<class rtype,class vtype,class mtype,int N> inline
iVector<rtype,N> operator * (const iMatrix<mtype,N>& lhs,const iVector<vtype,N>& rhs)
{
iVector<rtype,N> ret;
mult(&ret,&lhs,&rhs);
return ret;
}
template<class rtype,class vtype,class mtype,int N> inline
iVector<rtype,N> operator * (const iScalar<mtype>& lhs,const iVector<vtype,N>& rhs)
{
iVector<rtype,N> ret;
mult(&ret,&lhs,&rhs);
return ret;
}
template<class rtype,class vtype,class mtype,int N> inline
iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& rhs)
{
iVector<rtype,N> ret;
mult(&ret,&lhs,&rhs);
return ret;
}
//////////////////////////////////////////////////////////////////
// Glue operators to mult routines. Must resolve return type cleverly from typeof(internal)
// since nesting matrix<scalar> x matrix<matrix>-> matrix<matrix>
// while matrix<scalar> x matrix<scalar>-> matrix<scalar>
// so return type depends on argument types in nasty way.
//////////////////////////////////////////////////////////////////
// scal x scal = scal
// mat x mat = mat
// mat x scal = mat
// scal x mat = mat
// mat x vec = vec
// vec x scal = vec
// scal x vec = vec
template<class l,class r>
inline auto operator * (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(lhs._internal * rhs._internal)>
{
typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t;
ret_t ret;
mult(&ret,&lhs,&rhs);
return ret;
}
template<class l,class r,int N> inline
auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal[0][0]),N>
{
typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t;
iMatrix<ret_t,N> ret;
mult(&ret,&lhs,&rhs);
return ret;
}
template<class l,class r, int N> inline
auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal),N>
{
typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t;
iMatrix<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mult(&ret._internal[c1][c2],&lhs._internal[c1][c2],&rhs._internal);
}}
return ret;
}
template<class l,class r,int N> inline
auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal*rhs._internal[0][0]),N>
{
typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t;
iMatrix<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mult(&ret._internal[c1][c2],&lhs._internal,&rhs._internal[c1][c2]);
}}
return ret;
}
template<class l,class r,int N> inline
auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal[0][0]*rhs._internal[0]),N>
{
typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t;
iVector<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
mult(&ret._internal[c1],&lhs._internal[c1][0],&rhs._internal[0]);
for(int c2=1;c2<N;c2++){
mac(&ret._internal[c1],&lhs._internal[c1][c2],&rhs._internal[c2]);
}
}
return ret;
}
template<class l,class r,int N> inline
auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal*rhs._internal[0]),N>
{
typedef decltype(lhs._internal*rhs._internal[0]) ret_t;
iVector<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
mult(&ret._internal[c1],&lhs._internal,&rhs._internal[c1]);
}
return ret;
}
template<class l,class r,int N> inline
auto operator * (const iVector<l,N>& lhs,const iScalar<r>& rhs) -> iVector<decltype(lhs._internal[0]*rhs._internal),N>
{
typedef decltype(lhs._internal[0]*rhs._internal) ret_t;
iVector<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
mult(&ret._internal[c1],&lhs._internal[c1],&rhs._internal);
}
return ret;
}
///////////////////////////////////////////////////////////////////////////////////////
// localInnerProduct Scalar x Scalar -> Scalar
// localInnerProduct Vector x Vector -> Scalar
// localInnerProduct Matrix x Matrix -> Scalar
///////////////////////////////////////////////////////////////////////////////////////
template<class l,class r,int N> inline
auto localInnerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(localInnerProduct(lhs._internal[0],rhs._internal[0]))>
{
typedef decltype(localInnerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
iScalar<ret_t> ret=zero;
for(int c1=0;c1<N;c1++){
ret._internal += localInnerProduct(lhs._internal[c1],rhs._internal[c1]);
}
return ret;
}
template<class l,class r,int N> inline
auto localInnerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(localInnerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
{
typedef decltype(localInnerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
iScalar<ret_t> ret=zero;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal += localInnerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
}}
return ret;
}
template<class l,class r> inline
auto localInnerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(localInnerProduct(lhs._internal,rhs._internal))>
{
typedef decltype(localInnerProduct(lhs._internal,rhs._internal)) ret_t;
iScalar<ret_t> ret;
ret._internal = localInnerProduct(lhs._internal,rhs._internal);
return ret;
}
///////////////////////////////////////////////////////////////////////////////////////
// outerProduct Scalar x Scalar -> Scalar
// Vector x Vector -> Matrix
///////////////////////////////////////////////////////////////////////////////////////
template<class l,class r,int N> inline
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
{
typedef decltype(outerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
iMatrix<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2] = outerProduct(lhs._internal[c1],rhs._internal[c2]);
}}
return ret;
}
template<class l,class r> inline
auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(outerProduct(lhs._internal,rhs._internal))>
{
typedef decltype(outerProduct(lhs._internal,rhs._internal)) ret_t;
iScalar<ret_t> ret;
ret._internal = outerProduct(lhs._internal,rhs._internal);
return ret;
}
inline ComplexF outerProduct(const ComplexF &l, const ComplexF& r)
{
return l*r;
}
inline ComplexD outerProduct(const ComplexD &l, const ComplexD& r)
{
return l*r;
}
inline RealF outerProduct(const RealF &l, const RealF& r)
{
return l*r;
}
inline RealD outerProduct(const RealD &l, const RealD& r)
{
return l*r;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// CONJ ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
// Conj function for scalar, vector, matrix
template<class vtype> inline iScalar<vtype> conj(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
ret._internal = conj(r._internal);
return ret;
}
// Adj function for scalar, vector, matrix
template<class vtype> inline iScalar<vtype> adj(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
ret._internal = adj(r._internal);
return ret;
}
template<class vtype,int N> inline iVector<vtype,N> adj(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = adj(r._internal[i]);
}
return ret;
}
template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &arg)
{
iMatrix<vtype,N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2]=adj(arg._internal[c2][c1]);
}}
return ret;
}
/////////////////////////////////////////////////////////////////
// Can only take the real/imag part of scalar objects, since
// lattice objects of different complexity are non-conformable.
/////////////////////////////////////////////////////////////////
template<class itype> inline auto real(const iScalar<itype> &z) -> iScalar<decltype(real(z._internal))>
{
iScalar<decltype(real(z._internal))> ret;
ret._internal = real(z._internal);
return ret;
}
template<class itype,int N> inline auto real(const iMatrix<itype,N> &z) -> iMatrix<decltype(real(z._internal[0][0])),N>
{
iMatrix<decltype(real(z._internal[0][0])),N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2] = real(z._internal[c1][c2]);
}}
return ret;
}
template<class itype,int N> inline auto real(const iVector<itype,N> &z) -> iVector<decltype(real(z._internal[0])),N>
{
iVector<decltype(real(z._internal[0])),N> ret;
for(int c1=0;c1<N;c1++){
ret._internal[c1] = real(z._internal[c1]);
}
return ret;
}
template<class itype> inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))>
{
iScalar<decltype(imag(z._internal))> ret;
ret._internal = imag(z._internal);
return ret;
}
template<class itype,int N> inline auto imag(const iMatrix<itype,N> &z) -> iMatrix<decltype(imag(z._internal[0][0])),N>
{
iMatrix<decltype(imag(z._internal[0][0])),N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2] = imag(z._internal[c1][c2]);
}}
return ret;
}
template<class itype,int N> inline auto imag(const iVector<itype,N> &z) -> iVector<decltype(imag(z._internal[0])),N>
{
iVector<decltype(imag(z._internal[0])),N> ret;
for(int c1=0;c1<N;c1++){
ret._internal[c1] = imag(z._internal[c1]);
}
return ret;
}
/////////////////////////////////
// Trace of scalar and matrix
/////////////////////////////////
inline Complex trace( const Complex &arg){
return arg;
}
//inline vComplex trace(const vComplex &arg){
// return arg;
//}
template<class vtype,int N>
inline auto trace(const iMatrix<vtype,N> &arg) -> iScalar<decltype(trace(arg._internal[0][0]))>
{
iScalar<decltype( trace(arg._internal[0][0] )) > ret;
ZeroIt(ret._internal);
for(int i=0;i<N;i++){
ret._internal=ret._internal+trace(arg._internal[i][i]);
}
return ret;
}
template<class vtype>
inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._internal))>
{
iScalar<decltype(trace(arg._internal))> ret;
ret._internal=trace(arg._internal);
return ret;
}
};
/////////////////////////////////////////////////////////////////////////
// Generic routine to promote object<complex> -> object<vcomplex>
// Supports the array reordering transformation that gives me SIMD utilisation
/////////////////////////////////////////////////////////////////////////
/*
template<template<class> class object>
inline object<vComplex> splat(object<Complex >s){
object<vComplex> ret;
vComplex * v_ptr = (vComplex *)& ret;
Complex * s_ptr = (Complex *) &s;
for(int i=0;i<sizeof(ret);i+=sizeof(vComplex)){
vsplat(*(v_ptr++),*(s_ptr++));
}
return ret;
}
*/
#endif

View File

@ -16,13 +16,15 @@
#undef __X86_64
namespace dpo {
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
void Grid_debug_handler_init(void);
void Grid_init(void)
{
Grid_debug_handler_init();
}
void Grid_init(void)
{
Grid_debug_handler_init();
}
double usecond(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
}
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
{

View File

@ -10,6 +10,33 @@
//
// Vector types are arch dependent
////////////////////////////////////////////////////////////////////////
// TODO
//
// Base class to share common code between vRealF, VComplexF etc...
//
// lattice Broad cast assignment
//
// where() support
// implement with masks, and/or? Type of the mask & boolean support?
//
// Unary functions
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
// exp, log, sqrt, fabs
//
// transposeColor, transposeSpin,
// adjColor, adjSpin,
// traceColor, traceSpin.
// peekColor, peekSpin + pokeColor PokeSpin
//
// copyMask.
//
// localMaxAbs
//
// norm2,
// sumMulti equivalent.
// Fourier transform equivalent.
//
namespace dpo {
@ -21,7 +48,46 @@ namespace dpo {
typedef std::complex<RealD> ComplexD;
typedef std::complex<Real> Complex;
inline RealF adj(const RealF & r){ return r; }
inline RealF conj(const RealF & r){ return r; }
inline ComplexD localInnerProduct(const ComplexD & l, const ComplexD & r) { return conj(l)*r; }
inline ComplexF localInnerProduct(const ComplexF & l, const ComplexF & r) { return conj(l)*r; }
inline RealD localInnerProduct(const RealD & l, const RealD & r) { return l*r; }
inline RealF localInnerProduct(const RealF & l, const RealF & r) { return l*r; }
////////////////////////////////////////////////////////////////////////////////
//Provide support functions for basic real and complex data types required by dpo
//Single and double precision versions. Should be able to template this once only.
////////////////////////////////////////////////////////////////////////////////
inline void mac (ComplexD * __restrict__ y,const ComplexD * __restrict__ a,const ComplexD *__restrict__ x){ *y = (*a) * (*x)+(*y); };
inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);}
inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);}
inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);}
inline ComplexD adj(const ComplexD& r){ return(conj(r)); }
// conj already supported for complex
inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
inline Complex adj(const Complex& r ){ return(conj(r)); }
//conj already supported for complex
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
inline void mult(RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) * (*r);}
inline void sub (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) - (*r);}
inline void add (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) + (*r);}
inline RealD adj(const RealD & r){ return r; } // No-op for real
inline RealD conj(const RealD & r){ return r; }
inline void mac (RealF * __restrict__ y,const RealF * __restrict__ a,const RealF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
inline void mult(RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) * (*r); }
inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
class Zero{};
static Zero zero;
template<class itype> inline void ZeroIt(itype &arg){ arg=zero;};