1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

First implementation of Dirac matrices as a Gamma class.

This commit is contained in:
Peter Boyle 2015-04-24 18:20:03 +01:00
parent b9939e3974
commit d707c4e0a3
11 changed files with 683 additions and 24 deletions

View File

@ -61,7 +61,12 @@ namespace Grid {
inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
inline ComplexF adj(const ComplexF& r ){ return(conj(r)); }
//conj already supported for complex
inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));}
inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));}
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
inline void mult(RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) * (*r);}
inline void sub (RealD * __restrict__ y,const RealD * __restrict__ l,const RealD *__restrict__ r){ *y = (*l) - (*r);}

View File

@ -63,13 +63,6 @@ namespace Grid {
return;
}
// Need to figure multi-precision.
template<class Mytype> Mytype timesI(Mytype &r)
{
iScalar<Complex> i;
i._internal = Complex(0,1);
return r*i;
}
// + operator for scalar, vector, matrix
template<class ltype,class rtype>

View File

@ -5,8 +5,63 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// CONJ ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_WARN_SUBOPTIMAL
#warning "Optimisation alert switch over to two argument form to avoid copy back in perf critical timesI "
#endif
///////////////////////////////////////////////
// multiply by I; make recursive.
///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> timesI(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
ret._internal = timesI(r._internal);
return ret;
}
template<class vtype,int N> inline iVector<vtype,N> timesI(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = timesI(r._internal[i]);
}
return ret;
}
template<class vtype,int N> inline iMatrix<vtype,N> timesI(const iMatrix<vtype,N>&r)
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j] = timesI(r._internal[i][j]);
}}
return ret;
}
template<class vtype> inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
ret._internal = timesMinusI(r._internal);
return ret;
}
template<class vtype,int N> inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = timesMinusI(r._internal[i]);
}
return ret;
}
template<class vtype,int N> inline iMatrix<vtype,N> timesMinusI(const iMatrix<vtype,N>&r)
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j] = timesMinusI(r._internal[i][j]);
}}
return ret;
}
///////////////////////////////////////////////
// Conj function for scalar, vector, matrix
///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> conj(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
@ -31,7 +86,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> conj(const iMatrix<vtype,N>&
return ret;
}
///////////////////////////////////////////////
// Adj function for scalar, vector, matrix
///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> adj(const iScalar<vtype>&r)
{
iScalar<vtype> ret;

View File

@ -72,6 +72,13 @@ public:
return _internal;
}
inline const vtype & operator ()(void) const {
return _internal;
}
// inline vtype && operator ()(void) {
// return _internal;
// }
operator ComplexD () const { return(TensorRemove(_internal)); };
operator RealD () const { return(real(TensorRemove(_internal))); }
@ -156,6 +163,12 @@ public:
inline vtype & operator ()(int i) {
return _internal[i];
}
inline const vtype & operator ()(int i) const {
return _internal[i];
}
// inline vtype && operator ()(int i) {
// return _internal[i];
// }
};
template<class vtype,int N> class iMatrix
@ -235,12 +248,22 @@ public:
*this = (*this)+r;
return *this;
}
// returns an lvalue reference
inline vtype & operator ()(int i,int j) {
return _internal[i][j];
}
inline const vtype & operator ()(int i,int j) const {
return _internal[i][j];
}
// inline vtype && operator ()(int i,int j) {
// return _internal[i][j];
// }
};
template<class vobj> inline
void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
{

View File

@ -143,4 +143,7 @@ namespace QCD {
} //namespace QCD
} // Grid
#include <qcd/Grid_qcd_dirac.h>
#endif

393
lib/qcd/Grid_qcd_dirac.h Normal file
View File

@ -0,0 +1,393 @@
#ifndef GRID_QCD_DIRAC_H
#define GRID_QCD_DIRAC_H
namespace Grid{
namespace QCD {
class Gamma {
public:
const int Ns=4;
enum GammaMatrix {
Identity,
GammaX,
GammaY,
GammaZ,
GammaT,
Gamma5,
// GammaXGamma5,
// GammaYGamma5,
// GammaZGamma5,
// GammaTGamma5,
// SigmaXY,
// SigmaXZ,
// SigmaYZ,
// SigmaXT,
// SigmaYT,
// SigmaZT,
MinusIdentity,
MinusGammaX,
MinusGammaY,
MinusGammaZ,
MinusGammaT,
MinusGamma5
// MinusGammaXGamma5, easiest to form by composition
// MinusGammaYGamma5, as performance is not critical for these
// MinusGammaZGamma5,
// MinusGammaTGamma5,
// MinusSigmaXY,
// MinusSigmaXZ,
// MinusSigmaYZ,
// MinusSigmaXT,
// MinusSigmaYT,
// MinusSigmaZT
};
Gamma (GammaMatrix g) { _g=g; }
GammaMatrix _g;
};
/* Gx
* 0 0 0 i
* 0 0 i 0
* 0 -i 0 0
* -i 0 0 0
*/
template<class vtype> inline void rmultMinusGammaX(iMatrix<vtype,Ns> &ret,const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(i,0) = timesI(rhs(i,3));
ret(i,1) = timesI(rhs(i,2));
ret(i,2) = timesMinusI(rhs(i,1));
ret(i,3) = timesMinusI(rhs(i,0));
}
};
template<class vtype> inline void rmultGammaX(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(i,0) = timesMinusI(rhs(i,3));
ret(i,1) = timesMinusI(rhs(i,2));
ret(i,2) = timesI(rhs(i,1));
ret(i,3) = timesI(rhs(i,1));
}
};
template<class vtype> inline void multGammaX(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret._internal[0] = timesI(rhs._internal[3]);
ret._internal[1] = timesI(rhs._internal[2]);
ret._internal[2] = timesMinusI(rhs._internal[1]);
ret._internal[3] = timesMinusI(rhs._internal[0]);
};
template<class vtype> inline void multMinusGammaX(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = timesMinusI(rhs(3));
ret(1) = timesMinusI(rhs(2));
ret(2) = timesI(rhs(1));
ret(3) = timesI(rhs(0));
};
template<class vtype> inline void multGammaX(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = timesI(rhs(3,i));
ret(1,i) = timesI(rhs._internal[2][i]);
ret(2,i) = timesMinusI(rhs._internal[1][i]);
ret(3,i) = timesMinusI(rhs._internal[0][i]);
}
};
template<class vtype> inline void multMinusGammaX(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = timesMinusI(rhs(3,i));
ret(1,i) = timesMinusI(rhs(2,i));
ret(2,i) = timesI(rhs(1,i));
ret(3,i) = timesI(rhs(0,i));
}
};
/*Gy
* 0 0 0 -1 [0] -+ [3]
* 0 0 1 0 [1] +- [2]
* 0 1 0 0
* -1 0 0 0
*/
template<class vtype> inline void multGammaY(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = -rhs(3);
ret(1) = rhs(2);
ret(2) = rhs(1);
ret(3) = -rhs(0);
};
template<class vtype> inline void multMinusGammaY(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = rhs(3);
ret(1) = -rhs(2);
ret(2) = -rhs(1);
ret(3) = rhs(0);
};
template<class vtype> inline void multGammaY(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = -rhs(3,i);
ret(1,i) = rhs(2,i);
ret(2,i) = rhs(1,i);
ret(3,i) = -rhs(0,i);
}
};
template<class vtype> inline void multMinusGammaY(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = rhs(3,i);
ret(1,i) = -rhs(2,i);
ret(2,i) = -rhs(1,i);
ret(3,i) = rhs(0,i);
}
};
/*Gz
* 0 0 i 0 [0]+-i[2]
* 0 0 0 -i [1]-+i[3]
* -i 0 0 0
* 0 i 0 0
*/
template<class vtype> inline void multGammaZ(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = timesI(rhs(2));
ret(1) =timesMinusI(rhs(3));
ret(2) =timesMinusI(rhs(0));
ret(3) = timesI(rhs(1));
};
template<class vtype> inline void multMinusGammaZ(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = timesMinusI(rhs(2));
ret(1) = timesI(rhs(3));
ret(2) = timesI(rhs(0));
ret(3) = timesMinusI(rhs(1));
};
template<class vtype> inline void multGammaZ(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = timesI(rhs(2,i));
ret(1,i) =timesMinusI(rhs(3,i));
ret(2,i) =timesMinusI(rhs(0,i));
ret(3,i) = timesI(rhs(1,i));
}
};
template<class vtype> inline void multMinusGammaZ(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = timesMinusI(rhs(2,i));
ret(1,i) = timesI(rhs(3,i));
ret(2,i) = timesI(rhs(0,i));
ret(3,i) = timesMinusI(rhs(1,i));
}
};
/*Gt
* 0 0 1 0 [0]+-[2]
* 0 0 0 1 [1]+-[3]
* 1 0 0 0
* 0 1 0 0
*/
template<class vtype> inline void multGammaT(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = rhs(2);
ret(1) = rhs(3);
ret(2) = rhs(0);
ret(3) = rhs(1);
};
template<class vtype> inline void multMinusGammaT(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) =-rhs(2);
ret(1) =-rhs(3);
ret(2) =-rhs(0);
ret(3) =-rhs(1);
};
template<class vtype> inline void multGammaT(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = rhs(2,i);
ret(1,i) = rhs(3,i);
ret(2,i) = rhs(0,i);
ret(3,i) = rhs(1,i);
}
};
template<class vtype> inline void multMinusGammaT(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) =-rhs(2,i);
ret(1,i) =-rhs(3,i);
ret(2,i) =-rhs(0,i);
ret(3,i) =-rhs(1,i);
}
};
/*G5
* 1 0 0 0 [0]+-[2]
* 0 1 0 0 [1]+-[3]
* 0 0 -1 0
* 0 0 0 -1
*/
template<class vtype> inline void multGamma5(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) = rhs(0);
ret(1) = rhs(1);
ret(2) =-rhs(2);
ret(3) =-rhs(3);
};
template<class vtype> inline void multMinusGamma5(iVector<vtype,Ns> &ret, iVector<vtype,Ns> &rhs){
ret(0) =-rhs(0);
ret(1) =-rhs(1);
ret(2) = rhs(2);
ret(3) = rhs(3);
};
template<class vtype> inline void multGamma5(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) = rhs(0,i);
ret(1,i) = rhs(1,i);
ret(2,i) =-rhs(2,i);
ret(3,i) =-rhs(3,i);
}
};
template<class vtype> inline void multMinusGamma5(iMatrix<vtype,Ns> &ret, const iMatrix<vtype,Ns> &rhs){
for(int i=0;i<Ns;i++){
ret(0,i) =-rhs(0,i);
ret(1,i) =-rhs(1,i);
ret(2,i) = rhs(2,i);
ret(3,i) = rhs(3,i);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Operator * : first case this is not a spin index, so recurse
///////////////////////////////////////////////////////////////////////////////////////////////////
// FIXME
//
// Optimisation; switch over to a "multGammaX(ret._internal,arg._internal)" style early and
// note that doing so from the lattice operator will avoid copy back and case switch overhead, as
// was done for the tensor math operator to remove operator * notation early
//
#ifdef GRID_WARN_SUBOPTIMAL
#warning "Optimisation alert switch over to multGammaX early "
#endif
template<class vtype> inline auto operator * ( const Gamma &G,const iScalar<vtype> &arg) ->
typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinIndex>::notvalue,iScalar<vtype> >::type
{
iScalar<vtype> ret;
ret._internal=G*arg._internal;
return ret;
}
template<class vtype,int N> inline auto operator * ( const Gamma &G,const iVector<vtype,N> &arg) ->
typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinIndex>::notvalue,iVector<vtype,N> >::type
{
iVector<vtype,N> ret;
ret._internal=G*arg._internal;
return ret;
}
template<class vtype,int N> inline auto operator * ( const Gamma &G,const iMatrix<vtype,N> &arg) ->
typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinIndex>::notvalue,iMatrix<vtype,N> >::type
{
iMatrix<vtype,N> ret;
ret._internal=G*arg._internal;
return ret;
}
////////////////////////////////////////////////////////
// When we hit the spin index this matches and we stop
////////////////////////////////////////////////////////
template<class vtype> inline auto operator * ( const Gamma &G,const iMatrix<vtype,Ns> &arg) ->
typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,Ns>,SpinIndex>::value,iMatrix<vtype,Ns> >::type
{
iMatrix<vtype,Ns> ret;
switch (G._g) {
case Gamma::Identity:
ret = arg;
break;
case Gamma::MinusIdentity:
ret = -arg;
break;
case Gamma::GammaX:
multGammaX(ret,arg);
break;
case Gamma::MinusGammaX:
multMinusGammaX(ret,arg);
break;
case Gamma::GammaY:
multGammaY(ret,arg);
break;
case Gamma::MinusGammaY:
multMinusGammaY(ret,arg);
break;
case Gamma::GammaZ:
multGammaZ(ret,arg);
break;
case Gamma::MinusGammaZ:
multMinusGammaZ(ret,arg);
break;
case Gamma::GammaT:
multGammaT(ret,arg);
break;
case Gamma::MinusGammaT:
multMinusGammaT(ret,arg);
break;
case Gamma::Gamma5:
multGamma5(ret,arg);
break;
case Gamma::MinusGamma5:
multMinusGamma5(ret,arg);
break;
default:
assert(0);
break;
}
return ret;
}
/* Output from test
./Grid_gamma
Identity((1,0),(0,0),(0,0),(0,0))
((0,0),(1,0),(0,0),(0,0))
((0,0),(0,0),(1,0),(0,0))
((0,0),(0,0),(0,0),(1,0)) OK
GammaX ((0,0),(0,0),(0,0),(0,1))
((0,0),(0,0),(0,1),(0,0))
((0,0),(0,-1),(0,0),(0,0))
((0,-1),(0,0),(0,0),(0,0)) OK
* Gx
* 0 0 0 i
* 0 0 i 0
* 0 -i 0 0
* -i 0 0 0
GammaY ((-0,-0),(-0,-0),(-0,-0),(-1,-0))
((0,0),(0,0),(1,0),(0,0))
((0,0),(1,0),(0,0),(0,0)) OK
((-1,-0),(-0,-0),(-0,-0),(-0,-0))
*Gy
* 0 0 0 -1 [0] -+ [3]
* 0 0 1 0 [1] +- [2]
* 0 1 0 0
* -1 0 0 0
GammaZ ((0,0),(0,0),(0,1),(0,0))
((0,0),(0,0),(0,0),(0,-1))
((0,-1),(0,0),(0,0),(0,0))
((0,0),(0,1),(0,0),(0,0)) OK
* 0 0 i 0 [0]+-i[2]
* 0 0 0 -i [1]-+i[3]
* -i 0 0 0
* 0 i 0 0
GammaT ((0,0),(0,0),(1,0),(0,0))
((0,0),(0,0),(0,0),(1,0)) OK
((1,0),(0,0),(0,0),(0,0))
((0,0),(1,0),(0,0),(0,0))
* 0 0 1 0 [0]+-[2]
* 0 0 0 1 [1]+-[3]
* 1 0 0 0
* 0 1 0 0
Gamma5 ((1,0),(0,0),(0,0),(0,0))
((0,0),(1,0),(0,0),(0,0))
((-0,-0),(-0,-0),(-1,-0),(-0,-0))
((-0,-0),(-0,-0),(-0,-0),(-1,-0))
* 1 0 0 0 [0]+-[2]
* 0 1 0 0 [1]+-[3] OK
* 0 0 -1 0
* 0 0 0 -1
*/
} //namespace QCD
} // Grid
#endif

View File

@ -101,13 +101,13 @@ namespace Grid {
IF IMM0[2] = 0
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
IF IMM0[3] = 0
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI;
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI; // Ox5 r<->i ; 0xC unchanged
*/
#if defined (AVX1)|| defined (AVX2)
zvec ymm0,ymm1,ymm2;
ymm0 = _mm256_shuffle_pd(a.v,a.v,0x0); // ymm0 <- ar ar, ar,ar b'00,00
ymm0 = _mm256_mul_pd(ymm0,b.v); // ymm0 <- ar bi, ar br
ymm1 = _mm256_shuffle_pd(b.v,b.v,0x5); // ymm1 <- br,bi b'01,01
ymm1 = _mm256_shuffle_pd(b.v,b.v,0x5); // ymm1 <- br,bi b'01,01
ymm2 = _mm256_shuffle_pd(a.v,a.v,0xF); // ymm2 <- ai,ai b'11,11
ymm1 = _mm256_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
ret.v= _mm256_addsub_pd(ymm0,ymm1);
@ -140,7 +140,7 @@ namespace Grid {
* publisher = {ACM},
* address = {New York, NY, USA},
* keywords = {autovectorization, fourier transform, program generation, simd, super-optimization},
* }
* }
*/
zvec vzero,ymm0,ymm1,real,imag;
vzero = _mm512_setzero();
@ -252,23 +252,71 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
vComplexD ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
__m256d tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
ret.v=_mm256_shuffle_pd(tmp,tmp,0x5);
// __m256d tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
// ret.v=_mm256_shuffle_pd(tmp,tmp,0x5);
ret.v = _mm256_addsub_pd(ret.v,in.v);
#endif
#ifdef SSE4
ret.v = _mm_addsub_pd(ret.v,in.v);
#endif
#ifdef AVX512
// Xeon does not have fmaddsub or addsub
// with mask 0xa (1010), v[0] -v[1] v[2] -v[3] ....
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
#endif
#ifdef QPX
assert(0);
#endif
return ret;
}
friend inline vComplexD timesI(const vComplexD &in){
vComplexD ret; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
/*
IF IMM0[0] = 0
THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI;
IF IMM0[1] = 0
THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI;
IF IMM0[2] = 0
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
IF IMM0[3] = 0
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI;
*/
ret.v =_mm256_shuffle_ps(tmp,tmp,0x5);
#endif
#ifdef SSE4
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
ret.v =_mm_shuffle_ps(tmp,tmp,0x5);
#endif
#ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK
#endif
#ifdef QPX
assert(0);
#endif
return ret;
}
friend inline vComplexD timesMinusI(const vComplexD &in){
vComplexD ret; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
ret.v =_mm256_addsub_ps(ret.v,tmp); // i,-r
#endif
#ifdef SSE4
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
ret.v =_mm_addsub_ps(ret.v,tmp); // r,-i
#endif
#ifdef AVX512
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag
#endif
#ifdef QPX
assert(0);
#endif
return ret;
}
// REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in)
{

View File

@ -329,9 +329,10 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
friend inline vComplexF conj(const vComplexF &in){
vComplexF ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp;
tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
// cvec tmp;
// tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
// ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
ret.v = _mm256_addsub_ps(ret.v,in.v);
#endif
#ifdef SSE4
ret.v = _mm_addsub_ps(ret.v,in.v);
@ -344,6 +345,44 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
#endif
return ret;
}
friend inline vComplexF timesI(const vComplexF &in){
vComplexF ret; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
ret.v = _mm256_shuffle_ps(tmp,tmp,0x5);
#endif
#ifdef SSE4
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
ret.v = _mm_shuffle_ps(tmp,tmp,0x5);
#endif
#ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK
#endif
#ifdef QPX
assert(0);
#endif
return ret;
}
friend inline vComplexF timesMinusI(const vComplexF &in){
vComplexF ret; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
ret.v = _mm256_addsub_ps(ret.v,tmp); // i,-r
#endif
#ifdef SSE4
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
#endif
#ifdef AVX512
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag
#endif
#ifdef QPX
assert(0);
#endif
return ret;
}
// Unary negation
friend inline vComplexF operator -(const vComplexF &r) {

95
tests/Grid_gamma.cc Normal file
View File

@ -0,0 +1,95 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,1,1,1});
std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRNG RNG(&Grid);
SpinMatrix ident=zero;
SpinMatrix ll=zero;
SpinMatrix rr=zero;
SpinMatrix result;
for(int a=0;a<Ns;a++){
ident()(a,a) = 1.0;
}
Gamma::GammaMatrix g [] = {
Gamma::Identity,
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT,
Gamma::Gamma5,
Gamma::MinusIdentity,
Gamma::MinusGammaX,
Gamma::MinusGammaY,
Gamma::MinusGammaZ,
Gamma::MinusGammaT,
Gamma::MinusGamma5
};
const char *list[] = {
"Identity ",
"GammaX ",
"GammaY ",
"GammaZ ",
"GammaT ",
"Gamma5 ",
"-Identity",
"-GammaX ",
"-GammaY ",
"-GammaZ ",
"-GammaT ",
"-Gamma5 ",
" "
};
// result == ll*Gamma(g[0])*rr;
// result == ll*Gamma(g[0]);
for(int mu=0;mu<12;mu++){
result = Gamma(g[mu])* ident;
for(int i=0;i<Ns;i++){
if(i==0) std::cout << list[mu];
else std::cout << list[12];
std::cout<<"(";
for(int j=0;j<Ns;j++){
if ( abs(result()(i,j)())==0 ) {
std::cout<< " 0";
} else if ( abs(result()(i,j)() - ComplexF(0,1))==0){
std::cout<< " i";
} else if ( abs(result()(i,j)() + ComplexF(0,1))==0){
std::cout<< "-i";
} else if ( abs(result()(i,j)() - ComplexF(1,0))==0){
std::cout<< " 1";
} else if ( abs(result()(i,j)() + ComplexF(1,0))==0){
std::cout<< "-1";
}
std::cout<< ((j==Ns-1) ? ")" : "," );
}
std::cout << std::endl;
}
std::cout << std::endl;
}
Grid_finalize();
}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_srcdir)/lib
#
# Test code
#
bin_PROGRAMS = Grid_main test_Grid_stencil Grid_nersc_io Grid_cshift
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma
Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid
@ -16,5 +16,8 @@ Grid_nersc_io_LDADD = -lGrid
Grid_cshift_SOURCES = Grid_cshift.cc
Grid_cshift_LDADD = -lGrid
test_Grid_stencil_SOURCES = test_Grid_stencil.cc
test_Grid_stencil_LDADD = -lGrid
Grid_gamma_SOURCES = Grid_gamma.cc
Grid_gamma_LDADD = -lGrid
Grid_stencil_SOURCES = Grid_stencil.cc
Grid_stencil_LDADD = -lGrid