1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Big updates with progress towards wilson matrix

This commit is contained in:
Peter Boyle 2015-04-26 15:51:09 +01:00
parent c678f2d255
commit 35cfef2129
27 changed files with 1008 additions and 355 deletions

18
TODO
View File

@ -2,6 +2,10 @@
- use protocol buffers? replace xmlReader/Writer ec.. - use protocol buffers? replace xmlReader/Writer ec..
- Binary use htonll, htonl - Binary use htonll, htonl
* Reduce implemention is poor
* Bug in SeedFixedIntegers gives same output on each site.
* Bug in RNG with complex numbers ; only filling real values; need helper function -- DONE
* Stencil operator support -----Initial thoughts, trial implementation DONE. * Stencil operator support -----Initial thoughts, trial implementation DONE.
-----some simple tests that Stencil matches Cshift. -----some simple tests that Stencil matches Cshift.
-----do all permute in comms phase, so that copy permute -----do all permute in comms phase, so that copy permute
@ -11,6 +15,7 @@
* CovariantShift support -----Use a class to store gauge field? (parallel transport?) * CovariantShift support -----Use a class to store gauge field? (parallel transport?)
* Strong test for norm2, conj and all primitive types.
* Consider switch std::vector to boost arrays or something lighter weight * Consider switch std::vector to boost arrays or something lighter weight
boost::multi_array<type, 3> A()... to replace multi1d, multi2d etc.. boost::multi_array<type, 3> A()... to replace multi1d, multi2d etc..
@ -33,10 +38,21 @@
* Make the Tensor types and Complex etc... play more nicely. * Make the Tensor types and Complex etc... play more nicely.
* TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this. want to introduce a syntax that does not require this.
- Reductions that contract indices on a site should always demote the tensor structure.
norm2(), innerProduct.
- Result of Sum(), SliceSum // spatial sums
trace, traceIndex etc.. do not.
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
pure scalar.
* Optimise the extract/merge SIMD routines; Azusa?? * Optimise the extract/merge SIMD routines; Azusa??
- I have collated into single location at least. - I have collated into single location at least.

View File

@ -11,6 +11,7 @@
#define GRID_H #define GRID_H
#include <stdio.h> #include <stdio.h>
#include <complex> #include <complex>
#include <vector> #include <vector>
#include <iostream> #include <iostream>

View File

@ -2,7 +2,7 @@
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */ /* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
/* AVX */ /* AVX */
#define AVX1 1 /* #undef AVX1 */
/* AVX2 */ /* AVX2 */
/* #undef AVX2 */ /* #undef AVX2 */
@ -77,7 +77,7 @@
#define PACKAGE_VERSION "1.0" #define PACKAGE_VERSION "1.0"
/* SSE4 */ /* SSE4 */
/* #undef SSE4 */ #define SSE4 1
/* Define to 1 if you have the ANSI C header files. */ /* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1 #define STDC_HEADERS 1

View File

@ -26,10 +26,15 @@ namespace Grid {
typedef float RealF; typedef float RealF;
typedef double RealD; typedef double RealD;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
typedef RealD Real;
#else
typedef RealF Real;
#endif
typedef std::complex<RealF> ComplexF; typedef std::complex<RealF> ComplexF;
typedef std::complex<RealD> ComplexD; typedef std::complex<RealD> ComplexD;
typedef std::complex<Real> Complex;
inline RealF adj(const RealF & r){ return r; } inline RealF adj(const RealF & r){ return r; }
inline RealF conj(const RealF & r){ return r; } inline RealF conj(const RealF & r){ return r; }
@ -63,8 +68,8 @@ namespace Grid {
//conj already supported for complex //conj already supported for complex
inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));} inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));} inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));} inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));}
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);} inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
@ -280,15 +285,11 @@ namespace Grid {
// Default precision // Default precision
#ifdef GRID_DEFAULT_PRECISION_DOUBLE #ifdef GRID_DEFAULT_PRECISION_DOUBLE
typedef RealD Real;
typedef vRealD vReal; typedef vRealD vReal;
typedef vComplexD vComplex; typedef vComplexD vComplex;
typedef std::complex<Real> Complex;
#else #else
typedef RealF Real;
typedef vRealF vReal; typedef vRealF vReal;
typedef vComplexF vComplex; typedef vComplexF vComplex;
typedef std::complex<Real> Complex;
#endif #endif
} }
#endif #endif

View File

@ -47,6 +47,101 @@ namespace Grid {
int from_rank; int from_rank;
} ; } ;
///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split with compression
///////////////////////////////////////////////////////////////////
template<class vobj,class cobj,class compressor> void
Gather_plane_simple (Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
{
int rd = rhs._grid->_rdimensions[dimension];
if ( !rhs._grid->CheckerBoarded(dimension) ) {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
// Simple block stride gather of SIMD objects
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
buffer[bo++]=compress(rhs._odata[so+o+b]);
}
o +=rhs._grid->_slice_stride[dimension];
}
} else {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) {
buffer[bo]=compress(rhs._odata[so+o+b]);
bo++;
}
}
o +=rhs._grid->_slice_stride[dimension];
}
}
}
///////////////////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split with compression
///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> void
Gather_plane_extract(Lattice<vobj> &rhs,std::vector<typename cobj::scalar_type *> pointers,int dimension,int plane,int cbmask,compressor &compress)
{
int rd = rhs._grid->_rdimensions[dimension];
if ( !rhs._grid->CheckerBoarded(dimension) ) {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
// Simple block stride gather of SIMD objects
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
cobj temp;
temp=compress(rhs._odata[so+o+b]);
extract(temp,pointers);
}
o +=rhs._grid->_slice_stride[dimension];
}
} else {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb & cbmask ) {
cobj temp;
temp =compress(rhs._odata[so+o+b]);
extract(temp,pointers);
}
}
o +=rhs._grid->_slice_stride[dimension];
}
}
}
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
public: public:
@ -86,8 +181,8 @@ namespace Grid {
// Could allow a functional munging of the halo to another type during the comms. // Could allow a functional munging of the halo to another type during the comms.
// this could implement the 16bit/32bit/64bit compression. // this could implement the 16bit/32bit/64bit compression.
template<class vobj> void HaloExchange(Lattice<vobj> &source, template<class vobj,class cobj, class compressor> void
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf) HaloExchange(Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
{ {
// conformable(source._grid,_grid); // conformable(source._grid,_grid);
assert(source._grid==_grid); assert(source._grid==_grid);
@ -95,12 +190,10 @@ namespace Grid {
int u_comm_offset=0; int u_comm_offset=0;
// Gather all comms buffers // Gather all comms buffers
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
for(int point = 0 ; point < _npoints; point++) { for(int point = 0 ; point < _npoints; point++) {
printf("Point %d \n",point);fflush(stdout); compress.Point(point);
int dimension = _directions[point]; int dimension = _directions[point];
int displacement = _distances[point]; int displacement = _distances[point];
@ -126,33 +219,30 @@ namespace Grid {
sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1); sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
if ( sshift[0] == sshift[1] ) { if ( sshift[0] == sshift[1] ) {
if (splice_dim) { if (splice_dim) {
printf("splice 0x3 \n");fflush(stdout); GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
} else { } else {
printf("NO splice 0x3 \n");fflush(stdout); GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
} }
} else { } else {
if(splice_dim){ if(splice_dim){
printf("splice 0x1,2 \n");fflush(stdout); GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);// if checkerboard is unfavourable take two passes GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);// both with block stride loop iteration
} else { } else {
printf("NO splice 0x1,2 \n");fflush(stdout); GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset); GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);
} }
} }
} }
} }
} }
template<class vobj> void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask, template<class vobj,class cobj, class compressor>
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf, void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
int &u_comm_offset) std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
int &u_comm_offset,compressor & compress)
{ {
typedef typename vobj::vector_type vector_type; typedef typename cobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type; typedef typename cobj::scalar_type scalar_type;
GridBase *grid=_grid; GridBase *grid=_grid;
assert(rhs._grid==_grid); assert(rhs._grid==_grid);
@ -169,31 +259,26 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size); // hmm... std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size); std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
int cb= (cbmask==0x2)? 1 : 0; int cb= (cbmask==0x2)? 1 : 0;
int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb); int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){ for(int x=0;x<rd;x++){
printf("GatherStartComms x %d/%d\n",x,rd);fflush(stdout);
int offnode = ( x+sshift >= rd ); int offnode = ( x+sshift >= rd );
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
int comm_proc = (x+sshift)/rd; int comm_proc = (x+sshift)/rd;
if (offnode) { if (offnode) {
printf("GatherStartComms offnode x %d\n",x);fflush(stdout);
int words = send_buf.size(); int words = send_buf.size();
if (cbmask != 0x3) words=words>>1; if (cbmask != 0x3) words=words>>1;
int bytes = words * sizeof(vobj); int bytes = words * sizeof(cobj);
printf("Gather_plane_simple dimension %d sx %d cbmask %d\n",dimension,sx,cbmask);fflush(stdout); Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
printf("GatherStartComms gathered offnode x %d\n",x);fflush(stdout);
int rank = _grid->_processor; int rank = _grid->_processor;
int recv_from_rank; int recv_from_rank;
@ -219,14 +304,14 @@ namespace Grid {
} }
template<class vobj> template<class vobj,class cobj, class compressor>
void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask, void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf, std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
int &u_comm_offset) int &u_comm_offset,compressor &compress)
{ {
const int Nsimd = _grid->Nsimd(); const int Nsimd = _grid->Nsimd();
typedef typename vobj::vector_type vector_type; typedef typename cobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type; typedef typename cobj::scalar_type scalar_type;
int fd = _grid->_fdimensions[dimension]; int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension]; int rd = _grid->_rdimensions[dimension];
@ -245,7 +330,7 @@ namespace Grid {
// Simd direction uses an extract/merge pair // Simd direction uses an extract/merge pair
/////////////////////////////////////////////// ///////////////////////////////////////////////
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(vobj)/sizeof(vector_type); int words = sizeof(cobj)/sizeof(vector_type);
/* FIXME ALTERNATE BUFFER DETERMINATION */ /* FIXME ALTERNATE BUFFER DETERMINATION */
std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) ); std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
@ -285,7 +370,7 @@ namespace Grid {
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0]; pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0];
} }
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask); Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){

View File

@ -18,6 +18,7 @@ libGrid_a_SOURCES =\
Grid_init.cc\ Grid_init.cc\
stencil/Grid_stencil_common.cc\ stencil/Grid_stencil_common.cc\
qcd/Grid_qcd_dirac.cc\ qcd/Grid_qcd_dirac.cc\
qcd/Grid_qcd_wilson_dop.cc\
$(extra_sources) $(extra_sources)
# #

View File

@ -79,18 +79,18 @@ namespace Grid {
return ret; return ret;
} }
template<class left,class right> template<class left,class right>
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])> inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
{ {
//NB mult performs conformable check. Do not reapply here for performance. //NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid); Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
add(ret,lhs,rhs); add(ret,lhs,rhs);
return ret; return ret;
} }
template<class left,class right> template<class left,class right>
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])> inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
{ {
//NB mult performs conformable check. Do not reapply here for performance. //NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid); Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
sub(ret,lhs,rhs); sub(ret,lhs,rhs);
return ret; return ret;
} }
@ -107,9 +107,9 @@ namespace Grid {
return ret; return ret;
} }
template<class left,class right> template<class left,class right>
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])> inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
{ {
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid); Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for #pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){ for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs+rhs._odata[ss]; ret._odata[ss]=lhs+rhs._odata[ss];
@ -117,9 +117,9 @@ namespace Grid {
return ret; return ret;
} }
template<class left,class right> template<class left,class right>
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])> inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
{ {
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid); Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for #pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){ for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs-rhs._odata[ss]; ret._odata[ss]=lhs-rhs._odata[ss];
@ -137,9 +137,9 @@ namespace Grid {
return ret; return ret;
} }
template<class left,class right> template<class left,class right>
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)> inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
{ {
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid); Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
#pragma omp parallel for #pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){ for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]+rhs; ret._odata[ss]=lhs._odata[ss]+rhs;
@ -147,9 +147,9 @@ namespace Grid {
return ret; return ret;
} }
template<class left,class right> template<class left,class right>
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)> inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
{ {
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid); Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
#pragma omp parallel for #pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){ for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]-rhs; ret._odata[ss]=lhs._odata[ss]-rhs;

View File

@ -14,7 +14,7 @@ namespace Grid {
typedef typename vobj::scalar_type scalar; typedef typename vobj::scalar_type scalar;
typedef typename vobj::vector_type vector; typedef typename vobj::vector_type vector;
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero; decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm;
scalar nrm; scalar nrm;
//FIXME make this loop parallelisable //FIXME make this loop parallelisable
vnrm=zero; vnrm=zero;
@ -33,10 +33,11 @@ namespace Grid {
//->decltype(innerProduct(left._odata[0],right._odata[0])) //->decltype(innerProduct(left._odata[0],right._odata[0]))
{ {
typedef typename vobj::scalar_type scalar; typedef typename vobj::scalar_type scalar;
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; decltype(innerProduct(left._odata[0],right._odata[0])) vnrm;
scalar nrm; scalar nrm;
//FIXME make this loop parallelisable //FIXME make this loop parallelisable
vnrm=zero;
for(int ss=0;ss<left._grid->oSites(); ss++){ for(int ss=0;ss<left._grid->oSites(); ss++){
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
} }
@ -94,8 +95,10 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
int ld=grid->_ldimensions[orthogdim]; int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim]; int rd=grid->_rdimensions[orthogdim];
sobj szero; szero=zero;
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,sobj(zero)); // sum across these down to scalars std::vector<sobj> lsSum(ld,szero); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file result.resize(fd); // And then global sum to return the same vector to every node for IO to file

View File

@ -26,6 +26,8 @@ namespace Grid {
} }
}; };
class GridRNGbase { class GridRNGbase {
public: public:
@ -62,6 +64,21 @@ namespace Grid {
} }
// real scalars are one component
template<class scalar,class distribution> void fillScalar(scalar &s,distribution &dist)
{
s=dist(_generators[0]);
}
template<class distribution> void fillScalar(ComplexF &s,distribution &dist)
{
s=ComplexF(dist(_generators[0]),dist(_generators[0]));
}
template<class distribution> void fillScalar(ComplexD &s,distribution &dist)
{
s=ComplexD(dist(_generators[0]),dist(_generators[0]));
}
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){ template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
typedef typename sobj::scalar_type scalar_type; typedef typename sobj::scalar_type scalar_type;
@ -71,13 +88,60 @@ namespace Grid {
scalar_type *buf = (scalar_type *) & l; scalar_type *buf = (scalar_type *) & l;
for(int idx=0;idx<words;idx++){ for(int idx=0;idx<words;idx++){
buf[idx] = dist(_generators[0]); fillScalar(buf[idx],dist);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}; };
template <class distribution> inline void fill(ComplexF &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(ComplexD &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealF &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealD &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
// vector fill
template <class distribution> inline void fill(vComplexF &l,distribution &dist){
RealF *pointer=(RealF *)&l;
for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vComplexD &l,distribution &dist){
RealD *pointer=(RealD *)&l;
for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealF &l,distribution &dist){
RealF *pointer=(RealF *)&l;
for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealD &l,distribution &dist){
RealD *pointer=(RealD *)&l;
for(int i=0;i<vRealD::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
void SeedRandomDevice(void){ void SeedRandomDevice(void){
std::random_device rd; std::random_device rd;
Seed(rd); Seed(rd);
@ -186,7 +250,6 @@ namespace Grid {
}; };
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){ template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._uniform); rng.fill(l,rng._uniform);
} }

View File

@ -11,21 +11,21 @@ namespace Grid {
// multiplication by fundamental scalar type // multiplication by fundamental scalar type
template<class l,int N> inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs(rhs); typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l,int N> inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iVector<l,N>::tensor_reduced srhs(rhs); typename iVector<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs) template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs)
{ {
typename iMatrix<l,N>::tensor_reduced srhs(rhs); typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -35,24 +35,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs) template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l> inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs) template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs) template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -62,24 +62,26 @@ template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs) template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs) template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs) template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -89,24 +91,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatr
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs) template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l> inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs) template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs) template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -118,14 +120,14 @@ template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatri
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
template<class l,int N> inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs(rhs); typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; } template<class l,int N> inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iMatrix<l,N>::tensor_reduced srhs(rhs); typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; } template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
@ -135,16 +137,16 @@ template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs) template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l> inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; } template<class l> inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs) template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; } template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
@ -155,8 +157,8 @@ template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs) template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs; return lhs+srhs;
} }
@ -164,8 +166,8 @@ template<class l> inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rh
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs) template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; } template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
@ -176,23 +178,23 @@ template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatri
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
template<class l,int N> inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs(rhs); typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) template<class l,int N> inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
{ {
typename iScalar<l>::tensor_reduced slhs(lhs); typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs; return slhs-rhs;
} }
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs(rhs); typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
{ {
typename iScalar<l>::tensor_reduced slhs(lhs); typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs; return slhs-rhs;
} }
@ -201,27 +203,27 @@ template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs) template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l> inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs) template<class l> inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
{ {
typename iScalar<l>::scalar_type t(lhs); typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs(t); typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs; return slhs-rhs;
} }
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs) template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs) template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
{ {
typename iScalar<l>::scalar_type t(lhs); typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs(t); typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs; return slhs-rhs;
} }
@ -230,26 +232,26 @@ template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs) template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l> inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs) template<class l> inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
{ {
typename iScalar<l>::scalar_type t(lhs); typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs(t); typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs; return slhs-rhs;
} }
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs) template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t(rhs); typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs(t); typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs) template<class l,int N> inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
{ {
typename iScalar<l>::scalar_type t(lhs); typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs(t); typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs; return slhs-rhs;
} }

View File

@ -17,7 +17,8 @@ namespace Grid {
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))> auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
{ {
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t; typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
iScalar<ret_t> ret=zero; iScalar<ret_t> ret;
ret=zero;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]); ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
} }
@ -27,8 +28,9 @@ namespace Grid {
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))> auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
{ {
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t; typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
iScalar<ret_t> ret=zero; iScalar<ret_t> ret;
iScalar<ret_t> tmp; iScalar<ret_t> tmp;
ret=zero;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]); ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);

View File

@ -8,6 +8,16 @@ namespace Grid {
// These can be composed to form tensor products of internal indices. // These can be composed to form tensor products of internal indices.
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// It is useful to NOT have any constructors
// so that these classes assert "is_pod<class> == true"
// because then the standard C++ valarray container eliminates fill overhead on new allocation and
// non-move copying.
//
// However note that doing this eliminates some syntactical sugar such as
// calling the constructor explicitly or implicitly
//
#define TENSOR_IS_POD
template<class vtype> class iScalar template<class vtype> class iScalar
{ {
public: public:
@ -25,16 +35,22 @@ public:
// Scalar no action // Scalar no action
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >; // template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
iScalar(){}; #ifndef TENSOR_IS_POD
iScalar(){;};
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
iScalar(const Zero &z){ *this = zero; }; #endif
iScalar<vtype> & operator= (const Zero &hero){ iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this); zeroit(*this);
return *this; return *this;
} }
iScalar<vtype> & operator= (const scalar_type s){
_internal=s;
return *this;
}
friend void zeroit(iScalar<vtype> &that){ friend void zeroit(iScalar<vtype> &that){
zeroit(that._internal); zeroit(that._internal);
} }
@ -114,8 +130,10 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1}; enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
iVector(const Zero &z){ *this = zero; }; #ifndef TENSOR_IS_POD
iVector() {};// Empty constructure iVector(const Zero &z){ *this = zero; };
iVector() {};// Empty constructure
#endif
iVector<vtype,N> & operator= (const Zero &hero){ iVector<vtype,N> & operator= (const Zero &hero){
zeroit(*this); zeroit(*this);
@ -185,8 +203,11 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1}; enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
#ifndef TENSOR_IS_POD
iMatrix(const Zero &z){ *this = zero; }; iMatrix(const Zero &z){ *this = zero; };
iMatrix() {}; iMatrix() {};
#endif
iMatrix<vtype,N> & operator= (const Zero &hero){ iMatrix<vtype,N> & operator= (const Zero &hero){
zeroit(*this); zeroit(*this);
return *this; return *this;

View File

@ -8,6 +8,7 @@ namespace QCD {
static const int Ns=4; static const int Ns=4;
static const int Nd=4; static const int Nd=4;
static const int Nhs=2; // half spinor static const int Nhs=2; // half spinor
static const int Nds=8; // double stored gauge field
static const int CbRed =0; static const int CbRed =0;
static const int CbBlack=1; static const int CbBlack=1;
@ -28,79 +29,216 @@ namespace QCD {
// //
// That probably makes for GridRedBlack4dCartesian grid. // That probably makes for GridRedBlack4dCartesian grid.
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >; // s,sp,c,spc,lc
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >; template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ; template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
typedef iSpinMatrix<ComplexD > SpinMatrixD;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >; typedef iSpinMatrix<vComplex > vSpinMatrix;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >; typedef iSpinMatrix<vComplexF> vSpinMatrixF;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >; typedef iSpinMatrix<vComplexD> vSpinMatrixD;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >; // Colour Matrix
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >; typedef iColourMatrix<Complex > ColourMatrix;
typedef iColourMatrix<ComplexF > ColourMatrixF;
typedef iColourMatrix<ComplexD > ColourMatrixD;
typedef iSpinMatrix<Complex > SpinMatrix; typedef iColourMatrix<vComplex > vColourMatrix;
typedef iColourMatrix<Complex > ColourMatrix; typedef iColourMatrix<vComplexF> vColourMatrixF;
typedef iSpinColourMatrix<Complex > SpinColourMatrix; typedef iColourMatrix<vComplexD> vColourMatrixD;
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
// SpinColour matrix
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF; typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD; typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
typedef iSpinVector<Complex > SpinVector;
typedef iColourVector<Complex > ColourVector;
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix; typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
typedef iSpinVector<ComplexD> SpinVectorD;
typedef iSpinVector<vComplex > vSpinVector; typedef iSpinVector<vComplex > vSpinVector;
typedef iSpinVector<vComplexF> vSpinVectorF;
typedef iSpinVector<vComplexD> vSpinVectorD;
// Colour vector
typedef iColourVector<Complex > ColourVector;
typedef iColourVector<ComplexF> ColourVectorF;
typedef iColourVector<ComplexD> ColourVectorD;
typedef iColourVector<vComplex > vColourVector; typedef iColourVector<vComplex > vColourVector;
typedef iColourVector<vComplexF> vColourVectorF;
typedef iColourVector<vComplexD> vColourVectorD;
// SpinColourVector
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
typedef iSpinColourVector<vComplex > vSpinColourVector; typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
// HalfSpin vector
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
typedef iHalfSpinVector<vComplex > vHalfSpinVector; typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector; typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
// HalfSpinColour vector
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type. typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex; // what if we don't know the tensor structure typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without? typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
typedef iSinglet<vReal > vTReal; typedef iSinglet<vReal > vTReal;
typedef iSinglet<vInteger > vTInteger; typedef iSinglet<vRealF> vTRealF;
typedef iSinglet<vRealD> vTRealD;
typedef iSinglet<vInteger> vTInteger;
typedef iSinglet<Integer > TInteger; typedef iSinglet<Integer > TInteger;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinVector> LatticeSpinVector; // Lattices of these
typedef Lattice<vColourVector> LatticeColourVector; typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector; typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector; typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vColourVectorF> LatticeColourVectorF;
typedef Lattice<vColourVectorD> LatticeColourVectorD;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTRealF> LatticeRealF;
typedef Lattice<vTRealD> LatticeRealD;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplexF> LatticeComplexF;
typedef Lattice<vTComplexD> LatticeComplexD;
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
/////////////////////////////////////////// ///////////////////////////////////////////
// Physical names for things // Physical names for things
/////////////////////////////////////////// ///////////////////////////////////////////
typedef Lattice<vHalfSpinColourVector> LatticeHalfFermion; typedef LatticeHalfSpinColourVector LatticeHalfFermion;
typedef Lattice<vSpinColourVector> LatticeFermion; typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
typedef Lattice<vSpinColourMatrix> LatticePropagator; typedef LatticeSpinColourVector LatticeFermion;
typedef Lattice<vLorentzColourMatrix> LatticeGaugeField; typedef LatticeSpinColourVectorF LatticeFermionF;
typedef LatticeSpinColourVectorD LatticeFermionD;
typedef LatticeSpinColourMatrix LatticePropagator;
typedef LatticeSpinColourMatrixF LatticePropagatorF;
typedef LatticeSpinColourMatrixD LatticePropagatorD;
typedef LatticeLorentzColourMatrix LatticeGaugeField;
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
// Uhgg... typing this hurt ;) // Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion) // (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion; typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator; typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Peek and Poke named after physics attributes // Peek and Poke named after physics attributes
@ -157,11 +295,14 @@ namespace QCD {
return peekIndex<LorentzIndex>(rhs,i,j); return peekIndex<LorentzIndex>(rhs,i,j);
} }
// FIXME transpose Colour, transpose Spin, traceColour traceSpin
} //namespace QCD } //namespace QCD
} // Grid } // Grid
#include <qcd/Grid_qcd_dirac.h> #include <qcd/Grid_qcd_dirac.h>
#include <qcd/Grid_qcd_2spinor.h> #include <qcd/Grid_qcd_2spinor.h>
//#include <qcd/Grid_qcd_pauli.h> //#include <qcd/Grid_qcd_pauli.h>
#include <qcd/Grid_qcd_wilson_dop.h>
#endif #endif

View File

@ -17,8 +17,14 @@ namespace QCD {
GammaZ, GammaZ,
GammaT, GammaT,
Gamma5, Gamma5,
// GammaXGamma5, MinusIdentity,
// GammaYGamma5, MinusGammaX,
MinusGammaY,
MinusGammaZ,
MinusGammaT,
MinusGamma5
// GammaXGamma5, // Rest are composite (willing to take hit for two calls sequentially)
// GammaYGamma5, // as they are less commonly used.
// GammaZGamma5, // GammaZGamma5,
// GammaTGamma5, // GammaTGamma5,
// SigmaXY, // SigmaXY,
@ -27,12 +33,6 @@ namespace QCD {
// SigmaXT, // SigmaXT,
// SigmaYT, // SigmaYT,
// SigmaZT, // SigmaZT,
MinusIdentity,
MinusGammaX,
MinusGammaY,
MinusGammaZ,
MinusGammaT,
MinusGamma5
// MinusGammaXGamma5, easiest to form by composition // MinusGammaXGamma5, easiest to form by composition
// MinusGammaYGamma5, as performance is not critical for these // MinusGammaYGamma5, as performance is not critical for these
// MinusGammaZGamma5, // MinusGammaZGamma5,
@ -54,7 +54,6 @@ namespace QCD {
}; };
/* Gx /* Gx
* 0 0 0 i * 0 0 0 i
* 0 0 i 0 * 0 0 i 0

View File

@ -1,157 +1,220 @@
#ifnfdef GRID_QCD_WILSON_DOP_H
#define GRID_QCD_WILSON_DOP_H
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0}); const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0}); const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
// Should be in header? // Should be in header?
static const int WilsonMatrix::Xp = 0; const int WilsonMatrix::Xp = 0;
static const int WilsonMatrix::Yp = 1; const int WilsonMatrix::Yp = 1;
static const int WilsonMatrix::Zp = 2; const int WilsonMatrix::Zp = 2;
static const int WilsonMatrix::Tp = 3; const int WilsonMatrix::Tp = 3;
static const int WilsonMatrix::Xm = 4; const int WilsonMatrix::Xm = 4;
static const int WilsonMatrix::Ym = 5; const int WilsonMatrix::Ym = 5;
static const int WilsonMatrix::Zm = 6; const int WilsonMatrix::Zm = 6;
static const int WilsonMatrix::Tm = 7; const int WilsonMatrix::Tm = 7;
static const int WilsonMatrix::X0 = 8; //const int WilsonMatrix::X0 = 8;
static const int WilsonMatrix::npoint=9;
class WilsonCompressor {
public:
int mu;
void Point(int p) { mu=p;};
vHalfSpinColourVector operator () (vSpinColourVector &in)
{
vHalfSpinColourVector ret;
switch(mu) {
case WilsonMatrix::Xp:
spProjXp(ret,in);
break;
case WilsonMatrix::Yp:
spProjYp(ret,in);
break;
case WilsonMatrix::Zp:
spProjZp(ret,in);
break;
case WilsonMatrix::Tp:
spProjTp(ret,in);
break;
case WilsonMatrix::Xm:
spProjXm(ret,in);
break;
case WilsonMatrix::Ym:
spProjYm(ret,in);
break;
case WilsonMatrix::Zm:
spProjZm(ret,in);
break;
case WilsonMatrix::Tm:
spProjTm(ret,in);
break;
default:
assert(0);
break;
}
return ret;
}
};
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,int _mass) WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,double _mass)
: Stencil((&Umu._grid,npoint,0,directions,displacements), : Stencil(Umu._grid,npoint,0,directions,displacements),
mass(_mass), mass(_mass),
Umu(_Umu) Umu(_Umu._grid)
{ {
// Allocate the required comms buffer // Allocate the required comms buffer
grid = _Umu._grid; grid = _Umu._grid;
comm_buf.resize(Stencil._unified_buffer_size); comm_buf.resize(Stencil._unified_buffer_size);
DoubleStore(Umu,_Umu);
} }
void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
{
LatticeColourMatrix U(grid);
for(int mu=0;mu<Nd;mu++){
U = peekIndex<LorentzIndex>(Umu,mu);
pokeIndex<LorentzIndex>(Uds,U,mu);
U = adj(Cshift(U,mu,-1));
pokeIndex<LorentzIndex>(Uds,U,mu+4);
}
}
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
{ {
Dhop(in,out);
return;
} }
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
{ {
Stencil.HaloExchange(in,comm_buf); // Stencil.HaloExchange(in,comm_buf);
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<grid->oSites();ss++){
int offset,local; int offset,local;
vSpinColourVector result; vSpinColourVector result;
vHalfSpinColourVector UChi; vHalfSpinColourVector chi;
vHalfSpinColourVector Uchi;
vHalfSpinColourVector *chi_p;
// Xp // Xp
offset = Stencil._offsets [Xp][ss]; offset = Stencil._offsets [Xp][ss];
local = Stencil._is_local[Xp][ss]; local = Stencil._is_local[Xp][ss];
if ( local ) {
Uchi = U[]*spProjXp(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
}
result = ReconXp(Uchi);
chi_p = &comm_buf[offset];
if ( local ) {
spProjXp(chi,in._odata[offset]);
chi_p = &chi;
}
mult(&(Uchi()),&(Umu._odata[ss](Xp)),&(*chi_p)());
spReconXp(result,Uchi);
#if 0
// Yp // Yp
offset = Stencil._offsets [Yp][ss]; offset = Stencil._offsets [Yp][ss];
local = Stencil._is_local[Yp][ss]; local = Stencil._is_local[Yp][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjYp(in._odata[offset]); spProjYp(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Yp)),&(*chi_p)());
result+= ReconYp(Uchi); accumReconYp(result,Uchi);
// Zp // Zp
offset = Stencil._offsets [Zp][ss]; offset = Stencil._offsets [Zp][ss];
local = Stencil._is_local[Zp][ss]; local = Stencil._is_local[Zp][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjZp(in._odata[offset]); spProjZp(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Zp)),&(*chi_p)() );
result+= ReconZp(Uchi); accumReconZp(result,Uchi);
// Tp // Tp
offset = Stencil._offsets [Tp][ss]; offset = Stencil._offsets [Tp][ss];
local = Stencil._is_local[Tp][ss]; local = Stencil._is_local[Tp][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjTp(in._odata[offset]); spProjTp(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Tp)),&(*chi_p)());
result+= ReconTp(Uchi); accumReconTp(result,Uchi);
// Xm // Xm
offset = Stencil._offsets [Xm][ss]; offset = Stencil._offsets [Xm][ss];
local = Stencil._is_local[Xm][ss]; local = Stencil._is_local[Xm][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjXm(in._odata[offset]); spProjXm(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
result+= ReconXm(Uchi); accumReconXm(result,Uchi);
// Ym // Ym
offset = Stencil._offsets [Ym][ss]; offset = Stencil._offsets [Ym][ss];
local = Stencil._is_local[Ym][ss]; local = Stencil._is_local[Ym][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjYm(in._odata[offset]); spProjYm(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Ym)),&(*chi_p)());
result+= ReconYm(Uchi); accumReconYm(result,Uchi);
// Zm // Zm
offset = Stencil._offsets [Zm][ss]; offset = Stencil._offsets [Zm][ss];
local = Stencil._is_local[Zm][ss]; local = Stencil._is_local[Zm][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjZm(in._odata[offset]); spProjZm(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Zm)),&(*chi_p)());
result+= ReconZm(Uchi); accumReconZm(result,Uchi);
// Tm // Tm
offset = Stencil._offsets [Tm][ss]; offset = Stencil._offsets [Tm][ss];
local = Stencil._is_local[Tm][ss]; local = Stencil._is_local[Tm][ss];
chi_p = &comm_buf[offset];
if ( local ) { if ( local ) {
Uchi = U[]*spProjTm(in._odata[offset]); spProjTm(chi,in._odata[offset]);
} else { chi_p = &chi;
Uchi = U[]*comm_buf._odata[offset] }
} mult(&(Uchi()),&(Umu._odata[ss](Tm)),&(*chi_p)());
result+= ReconTm(Uchi); accumReconTm(result,Uchi);
#endif
out._odata[ss] = result; out._odata[ss] = result;
} }
} }
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
{ {
return;
} }
void WilsonMatrix::MpcDag (const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MpcDag (const LatticeFermion &in, LatticeFermion &out)
{ {
return;
} }
void WilsonMatrix::Mpc (const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::Mpc (const LatticeFermion &in, LatticeFermion &out)
{ {
return;
} }
void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
{ {
return;
} }
void WilsonMatrix::MDagM (const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MDagM (const LatticeFermion &in, LatticeFermion &out)
{ {
return;
} }
}} }}
#endif

View File

@ -1,4 +1,4 @@
#ifnfdef GRID_QCD_WILSON_DOP_H #ifndef GRID_QCD_WILSON_DOP_H
#define GRID_QCD_WILSON_DOP_H #define GRID_QCD_WILSON_DOP_H
#include <Grid.h> #include <Grid.h>
@ -21,21 +21,23 @@ namespace Grid {
GridBase *grid; GridBase *grid;
// Copy of the gauge field // Copy of the gauge field
LatticeGaugeField Umu; LatticeDoubledGaugeField Umu;
//Defines the stencil //Defines the stencil
CartesianStencil Stencil; CartesianStencil Stencil;
static const int npoint=9; static const int npoint=9;
static const std::vector<int> directions ; static const std::vector<int> directions ;
static const std::vector<int> displacements; static const std::vector<int> displacements;
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm; static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
// Comms buffer // Comms buffer
std::vector<vSpinColourVector,alignedAllocator<vSpinColourVector> > comm_buf; std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > comm_buf;
// Constructor // Constructor
WilsonMatrix(LatticeGaugeField &Umu,int mass); WilsonMatrix(LatticeGaugeField &Umu,double mass);
// DoubleStore
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
// override multiply // override multiply
void multiply(const LatticeFermion &in, LatticeFermion &out); void multiply(const LatticeFermion &in, LatticeFermion &out);

View File

@ -251,13 +251,13 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
friend inline vComplexD conj(const vComplexD &in){ friend inline vComplexD conj(const vComplexD &in){
vComplexD ret ; vzero(ret); vComplexD ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ... // addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
// __m256d tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5)); zvec tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
// ret.v=_mm256_shuffle_pd(tmp,tmp,0x5); ret.v =_mm256_shuffle_pd(tmp,tmp,0x5);
ret.v = _mm256_addsub_pd(ret.v,in.v);
#endif #endif
#ifdef SSE4 #ifdef SSE4
ret.v = _mm_addsub_pd(ret.v,in.v); zvec tmp = _mm_addsub_pd(ret.v,_mm_shuffle_pd(in.v,in.v,0x1));
ret.v = _mm_shuffle_pd(tmp,tmp,0x1);
#endif #endif
#ifdef AVX512 #ifdef AVX512
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v); ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
@ -268,48 +268,41 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
return ret; return ret;
} }
friend inline vComplexD timesI(const vComplexD &in){ friend inline vComplexD timesMinusI(const vComplexD &in){
vComplexD ret; vzero(ret); vComplexD ret; vzero(ret);
vComplexD tmp;
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i
/* ret.v =_mm256_shuffle_pd(tmp.v,tmp.v,0x5);
IF IMM0[0] = 0
THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI;
IF IMM0[1] = 0
THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI;
IF IMM0[2] = 0
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
IF IMM0[3] = 0
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI;
*/
ret.v =_mm256_shuffle_ps(tmp,tmp,0x5);
#endif #endif
#ifdef SSE4 #ifdef SSE4
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i tmp.v =_mm_addsub_pd(ret.v,in.v); // r,-i
ret.v =_mm_shuffle_ps(tmp,tmp,0x5); ret.v =_mm_shuffle_pd(tmp.v,tmp.v,0x1);
#endif #endif
#ifdef AVX512 #ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag ret.v = _mm512_mask_sub_pd(in.v,0xaaaa,ret.v,in.v); // real -imag
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK ret.v = _mm512_swizzle_pd(ret.v, _MM_SWIZ_REG_CDAB);// OK
#endif #endif
#ifdef QPX #ifdef QPX
assert(0); assert(0);
#endif #endif
return ret; return ret;
} }
friend inline vComplexD timesMinusI(const vComplexD &in){
friend inline vComplexD timesI(const vComplexD &in){
vComplexD ret; vzero(ret); vComplexD ret; vzero(ret);
vComplexD tmp;
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5); tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5);
ret.v =_mm256_addsub_ps(ret.v,tmp); // i,-r ret.v =_mm256_addsub_pd(ret.v,tmp.v); // i,-r
#endif #endif
#ifdef SSE4 #ifdef SSE4
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5); tmp.v =_mm_shuffle_pd(in.v,in.v,0x1);
ret.v =_mm_addsub_ps(ret.v,tmp); // r,-i ret.v =_mm_addsub_pd(ret.v,tmp.v); // r,-i
#endif #endif
#ifdef AVX512 #ifdef AVX512
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK tmp.v = _mm512_swizzle_pd(in.v, _MM_SWIZ_REG_CDAB);// OK
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag ret.v = _mm512_mask_sub_pd(tmp.v,0xaaaa,ret.v,tmp.v); // real -imag
#endif #endif
#ifdef QPX #ifdef QPX
assert(0); assert(0);

View File

@ -214,10 +214,10 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
{ {
#ifdef SSE4 #ifdef SSE4
union { union {
__m128 v1; // SSE 4 x float vector cvec v1; // SSE 4 x float vector
float f[4]; // scalar array of 4 floats float f[4]; // scalar array of 4 floats
} u128; } u128;
u128.v1= _mm_add_ps(v, _mm_shuffle_ps(v, v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros u128.v1= _mm_add_ps(in.v, _mm_shuffle_ps(in.v,in.v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return ComplexF(u128.f[0], u128.f[1]); return ComplexF(u128.f[0], u128.f[1]);
#endif #endif
#ifdef AVX1 #ifdef AVX1
@ -329,13 +329,15 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
friend inline vComplexF conj(const vComplexF &in){ friend inline vComplexF conj(const vComplexF &in){
vComplexF ret ; vzero(ret); vComplexF ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
// cvec tmp; cvec tmp;
// tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
// ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
ret.v = _mm256_addsub_ps(ret.v,in.v);
#endif #endif
#ifdef SSE4 #ifdef SSE4
ret.v = _mm_addsub_ps(ret.v,in.v); cvec tmp;
tmp = _mm_addsub_ps(ret.v,_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
ret.v=_mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
#endif #endif
#ifdef AVX512 #ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag
@ -345,15 +347,16 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
#endif #endif
return ret; return ret;
} }
friend inline vComplexF timesI(const vComplexF &in){ friend inline vComplexF timesMinusI(const vComplexF &in){
vComplexF ret; vzero(ret); vComplexF ret;
vzero(ret);
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
ret.v = _mm256_shuffle_ps(tmp,tmp,0x5); ret.v = _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
#endif #endif
#ifdef SSE4 #ifdef SSE4
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
ret.v = _mm_shuffle_ps(tmp,tmp,0x5); ret.v = _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
#endif #endif
#ifdef AVX512 #ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
@ -364,14 +367,14 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
#endif #endif
return ret; return ret;
} }
friend inline vComplexF timesMinusI(const vComplexF &in){ friend inline vComplexF timesI(const vComplexF &in){
vComplexF ret; vzero(ret); vComplexF ret; vzero(ret);
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5); cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
ret.v = _mm256_addsub_ps(ret.v,tmp); // i,-r ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r
#endif #endif
#ifdef SSE4 #ifdef SSE4
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5); cvec tmp =_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
#endif #endif
#ifdef AVX512 #ifdef AVX512
@ -443,5 +446,8 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
inline vComplexF trace(const vComplexF &arg){ inline vComplexF trace(const vComplexF &arg){
return arg; return arg;
} }
} }
#endif #endif

View File

@ -146,7 +146,7 @@ namespace Grid {
ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]); ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]);
#endif #endif
#ifdef SSE4 #ifdef SSE4
ret.v = _mm_set_pd(a[0],a[1]); ret.v = _mm_set_pd(a[1],a[0]);
#endif #endif
#ifdef AVX512 #ifdef AVX512
ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
@ -186,6 +186,15 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in) friend inline RealD Reduce(const vRealD & in)
{ {
#if defined (SSE4)
// FIXME Hack
const RealD * ptr =(const RealD *) &in;
RealD ret = 0;
for(int i=0;i<vRealD::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#endif
#if defined (AVX1) || defined(AVX2) #if defined (AVX1) || defined(AVX2)
typedef union { typedef union {
uint64_t l; uint64_t l;

View File

@ -175,7 +175,7 @@ namespace Grid {
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
#endif #endif
#ifdef SSE4 #ifdef SSE4
ret.v = _mm_set_ps(a[0],a[1],a[2],a[3]); ret.v = _mm_set_ps(a[3],a[2],a[1],a[0]);
#endif #endif
#ifdef AVX512 #ifdef AVX512
ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8], ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
@ -220,6 +220,15 @@ friend inline void vstore(const vRealF &ret, float *a){
} }
friend inline RealF Reduce(const vRealF & in) friend inline RealF Reduce(const vRealF & in)
{ {
#if defined (SSE4)
// FIXME Hack
const RealF * ptr = (const RealF *) &in;
RealF ret = 0;
for(int i=0;i<vRealF::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#endif
#if defined (AVX1) || defined(AVX2) #if defined (AVX1) || defined(AVX2)
__attribute__ ((aligned(32))) float c_[16]; __attribute__ ((aligned(32))) float c_[16];
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01); __m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);

View File

@ -121,8 +121,9 @@ namespace Grid {
int simd_layout = _grid->_simd_layout[dimension]; int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ; int comm_dim = _grid->_processors[dimension] >1 ;
assert(simd_layout==1); // assert(simd_layout==1); // Why?
assert(comm_dim==1); assert(comm_dim==1);
shift = (shift + fd) %fd;
assert(shift>=0); assert(shift>=0);
assert(shift<fd); assert(shift<fd);

View File

@ -5,6 +5,11 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
@ -22,22 +27,33 @@ int main (int argc, char ** argv)
GridSerialRNG sRNG; GridSerialRNG sRNG;
sRNG.SeedRandomDevice(); sRNG.SeedRandomDevice();
SpinMatrix ident=zero; SpinMatrix ident; ident=zero;
SpinMatrix rnd ; random(sRNG,rnd); SpinMatrix rnd ; random(sRNG,rnd);
SpinMatrix ll=zero; SpinMatrix ll; ll=zero;
SpinMatrix rr=zero; SpinMatrix rr; rr=zero;
SpinMatrix result; SpinMatrix result;
SpinVector lv; random(sRNG,lv); SpinVector lv; random(sRNG,lv);
SpinVector rv; random(sRNG,rv); SpinVector rv; random(sRNG,rv);
std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl;
std::cout << " Is pod double " << std::is_pod<double>::value << std::endl;
std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl;
std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value << std::endl;
std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl;
std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl;
std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl;
std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl;
std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl;
std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl;
for(int a=0;a<Ns;a++){ for(int a=0;a<Ns;a++){
ident()(a,a) = 1.0; ident()(a,a) = 1.0;
} }
const Gamma::GammaMatrix *g = Gamma::GammaMatrices; const Gamma::GammaMatrix *g = Gamma::GammaMatrices;
const char **list = Gamma::GammaMatrixNames; const char **list = Gamma::GammaMatrixNames;
result =ll*Gamma(g[0])*rr; result =ll*Gamma(g[0])*rr;
result =ll*Gamma(g[0]); result =ll*Gamma(g[0]);

View File

@ -49,7 +49,6 @@ int main (int argc, char ** argv)
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu])); Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
} }
} }
double vol = Fine.gSites(); double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0); Complex PlaqScale(1.0/vol/6.0/3.0);
@ -58,7 +57,8 @@ int main (int argc, char ** argv)
sliceSum(Plaq,Plaq_T,Nd-1); sliceSum(Plaq,Plaq_T,Nd-1);
int Nt = Plaq_T.size(); int Nt = Plaq_T.size();
TComplex Plaq_T_sum=zero; TComplex Plaq_T_sum;
Plaq_T_sum=zero;
for(int t=0;t<Nt;t++){ for(int t=0;t<Nt;t++){
Plaq_T_sum = Plaq_T_sum+Plaq_T[t]; Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
Complex Pt=TensorRemove(Plaq_T[t]); Complex Pt=TensorRemove(Plaq_T[t]);

166
tests/Grid_simd.cc Normal file
View File

@ -0,0 +1,166 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
class funcPlus {
public:
funcPlus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
std::string name(void) const { return std::string("Plus"); }
};
class funcMinus {
public:
funcMinus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
std::string name(void) const { return std::string("Minus"); }
};
class funcTimes {
public:
funcTimes() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
std::string name(void) const { return std::string("Times"); }
};
class funcConj {
public:
funcConj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);}
std::string name(void) const { return std::string("Conj"); }
};
class funcAdj {
public:
funcAdj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
std::string name(void) const { return std::string("Adj"); }
};
class funcTimesI {
public:
funcTimesI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
std::string name(void) const { return std::string("timesI"); }
};
class funcTimesMinusI {
public:
funcTimesMinusI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
std::string name(void) const { return std::string("timesMinusI"); }
};
template<class scal, class vec,class functor >
void Tester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
std::vector<scal> result(Nsimd);
std::vector<scal> reference(Nsimd);
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
vec & v_result = buf[2];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
random(sRNG,result[i]);
}
Gmerge(v_input1,input1);
Gmerge(v_input2,input2);
Gmerge(v_result,result);
func(v_result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func(reference[i],input1[i],input2[i]);
}
Gextract(v_result,result);
std::cout << " " << func.name()<<std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>0){
std::cout<< "*****" << std::endl;
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
ok++;
}
}
if ( ok==0 ) std::cout << " OK!" <<std::endl;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,1,1,1});
std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
// Insist that operations on random scalars gives
// identical results to on vectors.
Tester<RealD,vRealD>(funcPlus());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vComplexF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexF,vComplexF>(funcTimesI());
Tester<ComplexF,vComplexF>(funcTimesMinusI());
Tester<ComplexF,vComplexF>(funcPlus());
Tester<ComplexF,vComplexF>(funcMinus());
Tester<ComplexF,vComplexF>(funcTimes());
Tester<ComplexF,vComplexF>(funcConj());
Tester<ComplexF,vComplexF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vComplexD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexD,vComplexD>(funcTimesI());
Tester<ComplexD,vComplexD>(funcTimesMinusI());
Tester<ComplexD,vComplexD>(funcPlus());
Tester<ComplexD,vComplexD>(funcMinus());
Tester<ComplexD,vComplexD>(funcTimes());
Tester<ComplexD,vComplexD>(funcConj());
Tester<ComplexD,vComplexD>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealF,vRealF>(funcPlus());
Tester<RealF,vRealF>(funcMinus());
Tester<RealF,vRealF>(funcTimes());
Tester<RealF,vRealF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealD,vRealD>(funcPlus());
Tester<RealD,vRealD>(funcMinus());
Tester<RealD,vRealD>(funcTimes());
Tester<RealD,vRealD>(funcAdj());
Grid_finalize();
}

View File

@ -4,60 +4,37 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
template<class vobj>
class SimpleCompressor {
public:
void Point(int) {};
vobj operator() (vobj &arg) {
return arg;
}
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
std::vector<int> latt_size (4); std::vector<int> simd_layout({1,1,2,2});
std::vector<int> simd_layout(4); std::vector<int> mpi_layout ({2,2,2,2});
std::vector<int> mpi_layout (4); std::vector<int> latt_size ({8,8,8,8});
int omp=1; double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
int lat=8;
mpi_layout[0]=1;
mpi_layout[1]=1;
mpi_layout[2]=1;
mpi_layout[3]=1;
latt_size[0] = lat;
latt_size[1] = lat;
latt_size[2] = lat;
latt_size[3] = lat;
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
#ifdef AVX512 GridCartesian Fine(latt_size,simd_layout,mpi_layout);
simd_layout[0] = 1; GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
simd_layout[1] = 2; GridParallelRNG fRNG(&Fine);
simd_layout[2] = 2; fRNG.SeedRandomDevice();
simd_layout[3] = 2;
#endif LatticeColourMatrix Foo(&Fine);
#if defined (AVX1)|| defined (AVX2) LatticeColourMatrix Bar(&Fine);
simd_layout[0] = 1; LatticeColourMatrix Check(&Fine);
simd_layout[1] = 1; LatticeColourMatrix Diff(&Fine);
simd_layout[2] = 2;
simd_layout[3] = 2; random(fRNG,Foo);
#endif gaussian(fRNG,Bar);
#if defined (SSE2)
simd_layout[0] = 1;
simd_layout[1] = 1;
simd_layout[2] = 1;
simd_layout[3] = 2;
#endif
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridParallelRNG fRNG(&Fine);
fRNG.SeedRandomDevice();
LatticeColourMatrix Foo(&Fine);
LatticeColourMatrix Bar(&Fine);
LatticeColourMatrix Check(&Fine);
LatticeColourMatrix Diff(&Fine);
random(fRNG,Foo);
gaussian(fRNG,Bar);
for(int dir=0;dir<4;dir++){ for(int dir=0;dir<4;dir++){
@ -86,7 +63,8 @@ int main (int argc, char ** argv)
fflush(stdout); fflush(stdout);
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size); std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
printf("calling halo exchange\n");fflush(stdout); printf("calling halo exchange\n");fflush(stdout);
myStencil.HaloExchange(Foo,comm_buf); SimpleCompressor<vColourMatrix> compress;
myStencil.HaloExchange(Foo,comm_buf,compress);
Bar = Cshift(Foo,dir,disp); Bar = Cshift(Foo,dir,disp);

69
tests/Grid_wilson.cc Normal file
View File

@ -0,0 +1,69 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,1,1,1});
std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
// pRNG.SeedFixedIntegers(seeds);
pRNG.SeedRandomDevice();
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
LatticeFermion ref(&Grid); ref=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = 1.0;
pokeIndex<3>(Umu,U[mu],mu);
}
{
int mu=0;
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
ref = src;
ref = U[0]*Cshift(ref,0,1);
}
RealD mass=0.1;
WilsonMatrix Dw(Umu,mass);
std::cout << "Calling Dw"<<std::endl;
Dw.multiply(src,result);
std::cout << "Called Dw"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
for(int ss=0;ss<10;ss++ ){
for(int i=0;i<Ns;i++){
for(int j=0;j<Nc;j++){
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
std::cout << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
}
}
}
ref = ref -result;
std::cout << "norm diff "<< norm2(ref)<<std::endl;
Grid_finalize();
}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_srcdir)/lib
# #
# Test code # Test code
# #
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_wilson Grid_simd
Grid_main_SOURCES = Grid_main.cc Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid Grid_main_LDADD = -lGrid
@ -21,3 +21,9 @@ Grid_gamma_LDADD = -lGrid
Grid_stencil_SOURCES = Grid_stencil.cc Grid_stencil_SOURCES = Grid_stencil.cc
Grid_stencil_LDADD = -lGrid Grid_stencil_LDADD = -lGrid
Grid_wilson_SOURCES = Grid_wilson.cc
Grid_wilson_LDADD = -lGrid
Grid_simd_SOURCES = Grid_simd.cc
Grid_simd_LDADD = -lGrid