mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Big updates with progress towards wilson matrix
This commit is contained in:
parent
51f0da7b93
commit
94f728bee4
18
TODO
18
TODO
@ -2,6 +2,10 @@
|
|||||||
- use protocol buffers? replace xmlReader/Writer ec..
|
- use protocol buffers? replace xmlReader/Writer ec..
|
||||||
- Binary use htonll, htonl
|
- Binary use htonll, htonl
|
||||||
|
|
||||||
|
* Reduce implemention is poor
|
||||||
|
* Bug in SeedFixedIntegers gives same output on each site.
|
||||||
|
* Bug in RNG with complex numbers ; only filling real values; need helper function -- DONE
|
||||||
|
|
||||||
* Stencil operator support -----Initial thoughts, trial implementation DONE.
|
* Stencil operator support -----Initial thoughts, trial implementation DONE.
|
||||||
-----some simple tests that Stencil matches Cshift.
|
-----some simple tests that Stencil matches Cshift.
|
||||||
-----do all permute in comms phase, so that copy permute
|
-----do all permute in comms phase, so that copy permute
|
||||||
@ -11,6 +15,7 @@
|
|||||||
|
|
||||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||||
|
|
||||||
|
* Strong test for norm2, conj and all primitive types.
|
||||||
|
|
||||||
* Consider switch std::vector to boost arrays or something lighter weight
|
* Consider switch std::vector to boost arrays or something lighter weight
|
||||||
boost::multi_array<type, 3> A()... to replace multi1d, multi2d etc..
|
boost::multi_array<type, 3> A()... to replace multi1d, multi2d etc..
|
||||||
@ -33,10 +38,21 @@
|
|||||||
|
|
||||||
|
|
||||||
* Make the Tensor types and Complex etc... play more nicely.
|
* Make the Tensor types and Complex etc... play more nicely.
|
||||||
* TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
|
||||||
|
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||||
want to introduce a syntax that does not require this.
|
want to introduce a syntax that does not require this.
|
||||||
|
|
||||||
|
- Reductions that contract indices on a site should always demote the tensor structure.
|
||||||
|
norm2(), innerProduct.
|
||||||
|
|
||||||
|
- Result of Sum(), SliceSum // spatial sums
|
||||||
|
trace, traceIndex etc.. do not.
|
||||||
|
|
||||||
|
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
|
||||||
|
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
|
||||||
|
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||||
|
pure scalar.
|
||||||
|
|
||||||
* Optimise the extract/merge SIMD routines; Azusa??
|
* Optimise the extract/merge SIMD routines; Azusa??
|
||||||
- I have collated into single location at least.
|
- I have collated into single location at least.
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#define GRID_H
|
#define GRID_H
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include <complex>
|
#include <complex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
|
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
/* AVX */
|
/* AVX */
|
||||||
#define AVX1 1
|
/* #undef AVX1 */
|
||||||
|
|
||||||
/* AVX2 */
|
/* AVX2 */
|
||||||
/* #undef AVX2 */
|
/* #undef AVX2 */
|
||||||
@ -77,7 +77,7 @@
|
|||||||
#define PACKAGE_VERSION "1.0"
|
#define PACKAGE_VERSION "1.0"
|
||||||
|
|
||||||
/* SSE4 */
|
/* SSE4 */
|
||||||
/* #undef SSE4 */
|
#define SSE4 1
|
||||||
|
|
||||||
/* Define to 1 if you have the ANSI C header files. */
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
#define STDC_HEADERS 1
|
#define STDC_HEADERS 1
|
||||||
|
@ -26,10 +26,15 @@ namespace Grid {
|
|||||||
|
|
||||||
typedef float RealF;
|
typedef float RealF;
|
||||||
typedef double RealD;
|
typedef double RealD;
|
||||||
|
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||||
|
typedef RealD Real;
|
||||||
|
#else
|
||||||
|
typedef RealF Real;
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef std::complex<RealF> ComplexF;
|
typedef std::complex<RealF> ComplexF;
|
||||||
typedef std::complex<RealD> ComplexD;
|
typedef std::complex<RealD> ComplexD;
|
||||||
|
typedef std::complex<Real> Complex;
|
||||||
|
|
||||||
inline RealF adj(const RealF & r){ return r; }
|
inline RealF adj(const RealF & r){ return r; }
|
||||||
inline RealF conj(const RealF & r){ return r; }
|
inline RealF conj(const RealF & r){ return r; }
|
||||||
@ -63,8 +68,8 @@ namespace Grid {
|
|||||||
//conj already supported for complex
|
//conj already supported for complex
|
||||||
|
|
||||||
inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));}
|
inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));}
|
||||||
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
|
|
||||||
inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));}
|
inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));}
|
||||||
|
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
|
||||||
inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));}
|
inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));}
|
||||||
|
|
||||||
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
|
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
|
||||||
@ -280,15 +285,11 @@ namespace Grid {
|
|||||||
|
|
||||||
// Default precision
|
// Default precision
|
||||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||||
typedef RealD Real;
|
|
||||||
typedef vRealD vReal;
|
typedef vRealD vReal;
|
||||||
typedef vComplexD vComplex;
|
typedef vComplexD vComplex;
|
||||||
typedef std::complex<Real> Complex;
|
|
||||||
#else
|
#else
|
||||||
typedef RealF Real;
|
|
||||||
typedef vRealF vReal;
|
typedef vRealF vReal;
|
||||||
typedef vComplexF vComplex;
|
typedef vComplexF vComplex;
|
||||||
typedef std::complex<Real> Complex;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -47,6 +47,101 @@ namespace Grid {
|
|||||||
int from_rank;
|
int from_rank;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Gather for when there is no need to SIMD split with compression
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
template<class vobj,class cobj,class compressor> void
|
||||||
|
Gather_plane_simple (Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
|
||||||
|
{
|
||||||
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||||
|
|
||||||
|
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int bo = 0; // offset in buffer
|
||||||
|
|
||||||
|
// Simple block stride gather of SIMD objects
|
||||||
|
#pragma omp parallel for collapse(2)
|
||||||
|
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||||
|
buffer[bo++]=compress(rhs._odata[so+o+b]);
|
||||||
|
}
|
||||||
|
o +=rhs._grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int bo = 0; // offset in buffer
|
||||||
|
|
||||||
|
#pragma omp parallel for collapse(2)
|
||||||
|
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||||
|
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
|
if ( ocb &cbmask ) {
|
||||||
|
buffer[bo]=compress(rhs._odata[so+o+b]);
|
||||||
|
bo++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
o +=rhs._grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Gather for when there *is* need to SIMD split with compression
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
template<class cobj,class vobj,class compressor> void
|
||||||
|
Gather_plane_extract(Lattice<vobj> &rhs,std::vector<typename cobj::scalar_type *> pointers,int dimension,int plane,int cbmask,compressor &compress)
|
||||||
|
{
|
||||||
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||||
|
|
||||||
|
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int bo = 0; // offset in buffer
|
||||||
|
|
||||||
|
// Simple block stride gather of SIMD objects
|
||||||
|
#pragma omp parallel for collapse(2)
|
||||||
|
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||||
|
cobj temp;
|
||||||
|
temp=compress(rhs._odata[so+o+b]);
|
||||||
|
extract(temp,pointers);
|
||||||
|
}
|
||||||
|
o +=rhs._grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int o = 0; // relative offset to base within plane
|
||||||
|
int bo = 0; // offset in buffer
|
||||||
|
|
||||||
|
#pragma omp parallel for collapse(2)
|
||||||
|
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||||
|
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||||
|
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
|
if ( ocb & cbmask ) {
|
||||||
|
cobj temp;
|
||||||
|
temp =compress(rhs._odata[so+o+b]);
|
||||||
|
extract(temp,pointers);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
o +=rhs._grid->_slice_stride[dimension];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
|
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -86,8 +181,8 @@ namespace Grid {
|
|||||||
|
|
||||||
// Could allow a functional munging of the halo to another type during the comms.
|
// Could allow a functional munging of the halo to another type during the comms.
|
||||||
// this could implement the 16bit/32bit/64bit compression.
|
// this could implement the 16bit/32bit/64bit compression.
|
||||||
template<class vobj> void HaloExchange(Lattice<vobj> &source,
|
template<class vobj,class cobj, class compressor> void
|
||||||
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf)
|
HaloExchange(Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
||||||
{
|
{
|
||||||
// conformable(source._grid,_grid);
|
// conformable(source._grid,_grid);
|
||||||
assert(source._grid==_grid);
|
assert(source._grid==_grid);
|
||||||
@ -95,12 +190,10 @@ namespace Grid {
|
|||||||
int u_comm_offset=0;
|
int u_comm_offset=0;
|
||||||
|
|
||||||
// Gather all comms buffers
|
// Gather all comms buffers
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
|
|
||||||
for(int point = 0 ; point < _npoints; point++) {
|
for(int point = 0 ; point < _npoints; point++) {
|
||||||
|
|
||||||
printf("Point %d \n",point);fflush(stdout);
|
compress.Point(point);
|
||||||
|
|
||||||
int dimension = _directions[point];
|
int dimension = _directions[point];
|
||||||
int displacement = _distances[point];
|
int displacement = _distances[point];
|
||||||
|
|
||||||
@ -126,33 +219,30 @@ namespace Grid {
|
|||||||
sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
|
sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
if (splice_dim) {
|
if (splice_dim) {
|
||||||
printf("splice 0x3 \n");fflush(stdout);
|
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
|
|
||||||
} else {
|
} else {
|
||||||
printf("NO splice 0x3 \n");fflush(stdout);
|
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||||
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if(splice_dim){
|
if(splice_dim){
|
||||||
printf("splice 0x1,2 \n");fflush(stdout);
|
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);// if checkerboard is unfavourable take two passes
|
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);// both with block stride loop iteration
|
|
||||||
} else {
|
} else {
|
||||||
printf("NO splice 0x1,2 \n");fflush(stdout);
|
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
|
||||||
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);
|
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
|
||||||
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
template<class vobj,class cobj, class compressor>
|
||||||
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf,
|
void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||||
int &u_comm_offset)
|
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||||
|
int &u_comm_offset,compressor & compress)
|
||||||
{
|
{
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename cobj::vector_type vector_type;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename cobj::scalar_type scalar_type;
|
||||||
|
|
||||||
GridBase *grid=_grid;
|
GridBase *grid=_grid;
|
||||||
assert(rhs._grid==_grid);
|
assert(rhs._grid==_grid);
|
||||||
@ -169,31 +259,26 @@ namespace Grid {
|
|||||||
|
|
||||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||||
|
|
||||||
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size); // hmm...
|
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
|
||||||
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
|
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? 1 : 0;
|
int cb= (cbmask==0x2)? 1 : 0;
|
||||||
int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
|
int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
|
||||||
|
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
printf("GatherStartComms x %d/%d\n",x,rd);fflush(stdout);
|
|
||||||
int offnode = ( x+sshift >= rd );
|
int offnode = ( x+sshift >= rd );
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
int comm_proc = (x+sshift)/rd;
|
int comm_proc = (x+sshift)/rd;
|
||||||
|
|
||||||
if (offnode) {
|
if (offnode) {
|
||||||
|
|
||||||
printf("GatherStartComms offnode x %d\n",x);fflush(stdout);
|
|
||||||
int words = send_buf.size();
|
int words = send_buf.size();
|
||||||
if (cbmask != 0x3) words=words>>1;
|
if (cbmask != 0x3) words=words>>1;
|
||||||
|
|
||||||
int bytes = words * sizeof(vobj);
|
int bytes = words * sizeof(cobj);
|
||||||
|
|
||||||
printf("Gather_plane_simple dimension %d sx %d cbmask %d\n",dimension,sx,cbmask);fflush(stdout);
|
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
|
||||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
|
||||||
|
|
||||||
printf("GatherStartComms gathered offnode x %d\n",x);fflush(stdout);
|
|
||||||
|
|
||||||
int rank = _grid->_processor;
|
int rank = _grid->_processor;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
@ -219,14 +304,14 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj,class cobj, class compressor>
|
||||||
void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||||
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf,
|
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||||
int &u_comm_offset)
|
int &u_comm_offset,compressor &compress)
|
||||||
{
|
{
|
||||||
const int Nsimd = _grid->Nsimd();
|
const int Nsimd = _grid->Nsimd();
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename cobj::vector_type vector_type;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename cobj::scalar_type scalar_type;
|
||||||
|
|
||||||
int fd = _grid->_fdimensions[dimension];
|
int fd = _grid->_fdimensions[dimension];
|
||||||
int rd = _grid->_rdimensions[dimension];
|
int rd = _grid->_rdimensions[dimension];
|
||||||
@ -245,7 +330,7 @@ namespace Grid {
|
|||||||
// Simd direction uses an extract/merge pair
|
// Simd direction uses an extract/merge pair
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||||
int words = sizeof(vobj)/sizeof(vector_type);
|
int words = sizeof(cobj)/sizeof(vector_type);
|
||||||
|
|
||||||
/* FIXME ALTERNATE BUFFER DETERMINATION */
|
/* FIXME ALTERNATE BUFFER DETERMINATION */
|
||||||
std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
|
std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
|
||||||
@ -285,7 +370,7 @@ namespace Grid {
|
|||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0];
|
pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0];
|
||||||
}
|
}
|
||||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
||||||
|
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ libGrid_a_SOURCES =\
|
|||||||
Grid_init.cc\
|
Grid_init.cc\
|
||||||
stencil/Grid_stencil_common.cc\
|
stencil/Grid_stencil_common.cc\
|
||||||
qcd/Grid_qcd_dirac.cc\
|
qcd/Grid_qcd_dirac.cc\
|
||||||
|
qcd/Grid_qcd_wilson_dop.cc\
|
||||||
$(extra_sources)
|
$(extra_sources)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -79,18 +79,18 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class left,class right>
|
template<class left,class right>
|
||||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
|
||||||
{
|
{
|
||||||
//NB mult performs conformable check. Do not reapply here for performance.
|
//NB mult performs conformable check. Do not reapply here for performance.
|
||||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
|
||||||
add(ret,lhs,rhs);
|
add(ret,lhs,rhs);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class left,class right>
|
template<class left,class right>
|
||||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
|
||||||
{
|
{
|
||||||
//NB mult performs conformable check. Do not reapply here for performance.
|
//NB mult performs conformable check. Do not reapply here for performance.
|
||||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
|
||||||
sub(ret,lhs,rhs);
|
sub(ret,lhs,rhs);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -107,9 +107,9 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class left,class right>
|
template<class left,class right>
|
||||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||||
{
|
{
|
||||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||||
ret._odata[ss]=lhs+rhs._odata[ss];
|
ret._odata[ss]=lhs+rhs._odata[ss];
|
||||||
@ -117,9 +117,9 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class left,class right>
|
template<class left,class right>
|
||||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||||
{
|
{
|
||||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||||
ret._odata[ss]=lhs-rhs._odata[ss];
|
ret._odata[ss]=lhs-rhs._odata[ss];
|
||||||
@ -137,9 +137,9 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class left,class right>
|
template<class left,class right>
|
||||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||||
{
|
{
|
||||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||||
ret._odata[ss]=lhs._odata[ss]+rhs;
|
ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||||
@ -147,9 +147,9 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class left,class right>
|
template<class left,class right>
|
||||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||||
{
|
{
|
||||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||||
ret._odata[ss]=lhs._odata[ss]-rhs;
|
ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||||
|
@ -14,7 +14,7 @@ namespace Grid {
|
|||||||
|
|
||||||
typedef typename vobj::scalar_type scalar;
|
typedef typename vobj::scalar_type scalar;
|
||||||
typedef typename vobj::vector_type vector;
|
typedef typename vobj::vector_type vector;
|
||||||
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
|
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm;
|
||||||
scalar nrm;
|
scalar nrm;
|
||||||
//FIXME make this loop parallelisable
|
//FIXME make this loop parallelisable
|
||||||
vnrm=zero;
|
vnrm=zero;
|
||||||
@ -33,10 +33,11 @@ namespace Grid {
|
|||||||
//->decltype(innerProduct(left._odata[0],right._odata[0]))
|
//->decltype(innerProduct(left._odata[0],right._odata[0]))
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_type scalar;
|
typedef typename vobj::scalar_type scalar;
|
||||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
|
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm;
|
||||||
|
|
||||||
scalar nrm;
|
scalar nrm;
|
||||||
//FIXME make this loop parallelisable
|
//FIXME make this loop parallelisable
|
||||||
|
vnrm=zero;
|
||||||
for(int ss=0;ss<left._grid->oSites(); ss++){
|
for(int ss=0;ss<left._grid->oSites(); ss++){
|
||||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||||
}
|
}
|
||||||
@ -94,8 +95,10 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
int ld=grid->_ldimensions[orthogdim];
|
int ld=grid->_ldimensions[orthogdim];
|
||||||
int rd=grid->_rdimensions[orthogdim];
|
int rd=grid->_rdimensions[orthogdim];
|
||||||
|
|
||||||
|
sobj szero; szero=zero;
|
||||||
|
|
||||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||||
std::vector<sobj> lsSum(ld,sobj(zero)); // sum across these down to scalars
|
std::vector<sobj> lsSum(ld,szero); // sum across these down to scalars
|
||||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||||
|
|
||||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||||
|
@ -26,6 +26,8 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class GridRNGbase {
|
class GridRNGbase {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -62,6 +64,21 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// real scalars are one component
|
||||||
|
template<class scalar,class distribution> void fillScalar(scalar &s,distribution &dist)
|
||||||
|
{
|
||||||
|
s=dist(_generators[0]);
|
||||||
|
}
|
||||||
|
template<class distribution> void fillScalar(ComplexF &s,distribution &dist)
|
||||||
|
{
|
||||||
|
s=ComplexF(dist(_generators[0]),dist(_generators[0]));
|
||||||
|
}
|
||||||
|
template<class distribution> void fillScalar(ComplexD &s,distribution &dist)
|
||||||
|
{
|
||||||
|
s=ComplexD(dist(_generators[0]),dist(_generators[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
|
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
|
||||||
|
|
||||||
typedef typename sobj::scalar_type scalar_type;
|
typedef typename sobj::scalar_type scalar_type;
|
||||||
@ -71,13 +88,60 @@ namespace Grid {
|
|||||||
scalar_type *buf = (scalar_type *) & l;
|
scalar_type *buf = (scalar_type *) & l;
|
||||||
|
|
||||||
for(int idx=0;idx<words;idx++){
|
for(int idx=0;idx<words;idx++){
|
||||||
buf[idx] = dist(_generators[0]);
|
fillScalar(buf[idx],dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class distribution> inline void fill(ComplexF &l,distribution &dist){
|
||||||
|
fillScalar(l,dist);
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
template <class distribution> inline void fill(ComplexD &l,distribution &dist){
|
||||||
|
fillScalar(l,dist);
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
template <class distribution> inline void fill(RealF &l,distribution &dist){
|
||||||
|
fillScalar(l,dist);
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
template <class distribution> inline void fill(RealD &l,distribution &dist){
|
||||||
|
fillScalar(l,dist);
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
// vector fill
|
||||||
|
template <class distribution> inline void fill(vComplexF &l,distribution &dist){
|
||||||
|
RealF *pointer=(RealF *)&l;
|
||||||
|
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||||
|
fillScalar(pointer[i],dist);
|
||||||
|
}
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
template <class distribution> inline void fill(vComplexD &l,distribution &dist){
|
||||||
|
RealD *pointer=(RealD *)&l;
|
||||||
|
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||||
|
fillScalar(pointer[i],dist);
|
||||||
|
}
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
template <class distribution> inline void fill(vRealF &l,distribution &dist){
|
||||||
|
RealF *pointer=(RealF *)&l;
|
||||||
|
for(int i=0;i<vRealF::Nsimd();i++){
|
||||||
|
fillScalar(pointer[i],dist);
|
||||||
|
}
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
template <class distribution> inline void fill(vRealD &l,distribution &dist){
|
||||||
|
RealD *pointer=(RealD *)&l;
|
||||||
|
for(int i=0;i<vRealD::Nsimd();i++){
|
||||||
|
fillScalar(pointer[i],dist);
|
||||||
|
}
|
||||||
|
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void SeedRandomDevice(void){
|
void SeedRandomDevice(void){
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
Seed(rd);
|
Seed(rd);
|
||||||
@ -186,7 +250,6 @@ namespace Grid {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
|
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
|
||||||
rng.fill(l,rng._uniform);
|
rng.fill(l,rng._uniform);
|
||||||
}
|
}
|
||||||
|
@ -11,21 +11,21 @@ namespace Grid {
|
|||||||
// multiplication by fundamental scalar type
|
// multiplication by fundamental scalar type
|
||||||
template<class l,int N> inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
template<class l,int N> inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||||
{
|
{
|
||||||
typename iVector<l,N>::tensor_reduced srhs(rhs);
|
typename iVector<l,N>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs)
|
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs)
|
||||||
{
|
{
|
||||||
typename iMatrix<l,N>::tensor_reduced srhs(rhs);
|
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||||
@ -35,24 +35,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
|
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t; t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l> inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
template<class l> inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
|
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
|
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||||
@ -62,24 +62,26 @@ template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
|
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
|
|
||||||
|
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
|
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
|
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||||
@ -89,24 +91,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatr
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
|
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t; t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l> inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
template<class l> inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
|
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
|
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs*srhs;
|
return lhs*srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||||
@ -118,14 +120,14 @@ template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatri
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class l,int N> inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
template<class l,int N> inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs+srhs;
|
return lhs+srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
|
template<class l,int N> inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||||
{
|
{
|
||||||
typename iMatrix<l,N>::tensor_reduced srhs(rhs);
|
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs+srhs;
|
return lhs+srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
||||||
@ -135,16 +137,16 @@ template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
|
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t; t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||||
return lhs+srhs;
|
return lhs+srhs;
|
||||||
}
|
}
|
||||||
template<class l> inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
|
template<class l> inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
|
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs+srhs;
|
return lhs+srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
||||||
@ -155,8 +157,8 @@ template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix
|
|||||||
|
|
||||||
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
|
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t; t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||||
return lhs+srhs;
|
return lhs+srhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,8 +166,8 @@ template<class l> inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rh
|
|||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
|
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs+srhs;
|
return lhs+srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
||||||
@ -176,23 +178,23 @@ template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatri
|
|||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class l,int N> inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
template<class l,int N> inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs-srhs;
|
return lhs-srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
|
template<class l,int N> inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::tensor_reduced slhs(lhs);
|
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
|
||||||
return slhs-rhs;
|
return slhs-rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||||
return lhs-srhs;
|
return lhs-srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
|
template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::tensor_reduced slhs(lhs);
|
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
|
||||||
return slhs-rhs;
|
return slhs-rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,27 +203,27 @@ template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
|
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t; t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||||
return lhs-srhs;
|
return lhs-srhs;
|
||||||
}
|
}
|
||||||
template<class l> inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
|
template<class l> inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(lhs);
|
typename iScalar<l>::scalar_type t(lhs);
|
||||||
typename iScalar<l>::tensor_reduced slhs(t);
|
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||||
return slhs-rhs;
|
return slhs-rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
|
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs-srhs;
|
return lhs-srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
|
template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(lhs);
|
typename iScalar<l>::scalar_type t(lhs);
|
||||||
typename iScalar<l>::tensor_reduced slhs(t);
|
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||||
return slhs-rhs;
|
return slhs-rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,26 +232,26 @@ template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
|
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t; t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||||
return lhs-srhs;
|
return lhs-srhs;
|
||||||
}
|
}
|
||||||
template<class l> inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
|
template<class l> inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(lhs);
|
typename iScalar<l>::scalar_type t;t=lhs;
|
||||||
typename iScalar<l>::tensor_reduced slhs(t);
|
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||||
return slhs-rhs;
|
return slhs-rhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
|
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(rhs);
|
typename iScalar<l>::scalar_type t;t=rhs;
|
||||||
typename iScalar<l>::tensor_reduced srhs(t);
|
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||||
return lhs-srhs;
|
return lhs-srhs;
|
||||||
}
|
}
|
||||||
template<class l,int N> inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
|
template<class l,int N> inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
|
||||||
{
|
{
|
||||||
typename iScalar<l>::scalar_type t(lhs);
|
typename iScalar<l>::scalar_type t;t=lhs;
|
||||||
typename iScalar<l>::tensor_reduced slhs(t);
|
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||||
return slhs-rhs;
|
return slhs-rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,8 @@ namespace Grid {
|
|||||||
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
|
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
|
||||||
{
|
{
|
||||||
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||||
iScalar<ret_t> ret=zero;
|
iScalar<ret_t> ret;
|
||||||
|
ret=zero;
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
|
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
|
||||||
}
|
}
|
||||||
@ -27,8 +28,9 @@ namespace Grid {
|
|||||||
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
|
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
|
||||||
{
|
{
|
||||||
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
|
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
|
||||||
iScalar<ret_t> ret=zero;
|
iScalar<ret_t> ret;
|
||||||
iScalar<ret_t> tmp;
|
iScalar<ret_t> tmp;
|
||||||
|
ret=zero;
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
for(int c2=0;c2<N;c2++){
|
for(int c2=0;c2<N;c2++){
|
||||||
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
|
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
|
||||||
|
@ -8,6 +8,16 @@ namespace Grid {
|
|||||||
// These can be composed to form tensor products of internal indices.
|
// These can be composed to form tensor products of internal indices.
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// It is useful to NOT have any constructors
|
||||||
|
// so that these classes assert "is_pod<class> == true"
|
||||||
|
// because then the standard C++ valarray container eliminates fill overhead on new allocation and
|
||||||
|
// non-move copying.
|
||||||
|
//
|
||||||
|
// However note that doing this eliminates some syntactical sugar such as
|
||||||
|
// calling the constructor explicitly or implicitly
|
||||||
|
//
|
||||||
|
#define TENSOR_IS_POD
|
||||||
|
|
||||||
template<class vtype> class iScalar
|
template<class vtype> class iScalar
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -25,16 +35,22 @@ public:
|
|||||||
// Scalar no action
|
// Scalar no action
|
||||||
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
|
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
|
||||||
|
|
||||||
iScalar(){};
|
#ifndef TENSOR_IS_POD
|
||||||
|
iScalar(){;};
|
||||||
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
|
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
|
||||||
|
iScalar(const Zero &z){ *this = zero; };
|
||||||
iScalar(const Zero &z){ *this = zero; };
|
#endif
|
||||||
|
|
||||||
iScalar<vtype> & operator= (const Zero &hero){
|
iScalar<vtype> & operator= (const Zero &hero){
|
||||||
zeroit(*this);
|
zeroit(*this);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
iScalar<vtype> & operator= (const scalar_type s){
|
||||||
|
_internal=s;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
friend void zeroit(iScalar<vtype> &that){
|
friend void zeroit(iScalar<vtype> &that){
|
||||||
zeroit(that._internal);
|
zeroit(that._internal);
|
||||||
}
|
}
|
||||||
@ -114,8 +130,10 @@ public:
|
|||||||
|
|
||||||
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
||||||
|
|
||||||
iVector(const Zero &z){ *this = zero; };
|
#ifndef TENSOR_IS_POD
|
||||||
iVector() {};// Empty constructure
|
iVector(const Zero &z){ *this = zero; };
|
||||||
|
iVector() {};// Empty constructure
|
||||||
|
#endif
|
||||||
|
|
||||||
iVector<vtype,N> & operator= (const Zero &hero){
|
iVector<vtype,N> & operator= (const Zero &hero){
|
||||||
zeroit(*this);
|
zeroit(*this);
|
||||||
@ -185,8 +203,11 @@ public:
|
|||||||
|
|
||||||
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
||||||
|
|
||||||
|
#ifndef TENSOR_IS_POD
|
||||||
iMatrix(const Zero &z){ *this = zero; };
|
iMatrix(const Zero &z){ *this = zero; };
|
||||||
iMatrix() {};
|
iMatrix() {};
|
||||||
|
#endif
|
||||||
|
|
||||||
iMatrix<vtype,N> & operator= (const Zero &hero){
|
iMatrix<vtype,N> & operator= (const Zero &hero){
|
||||||
zeroit(*this);
|
zeroit(*this);
|
||||||
return *this;
|
return *this;
|
||||||
|
@ -8,6 +8,7 @@ namespace QCD {
|
|||||||
static const int Ns=4;
|
static const int Ns=4;
|
||||||
static const int Nd=4;
|
static const int Nd=4;
|
||||||
static const int Nhs=2; // half spinor
|
static const int Nhs=2; // half spinor
|
||||||
|
static const int Nds=8; // double stored gauge field
|
||||||
|
|
||||||
static const int CbRed =0;
|
static const int CbRed =0;
|
||||||
static const int CbBlack=1;
|
static const int CbBlack=1;
|
||||||
@ -28,79 +29,216 @@ namespace QCD {
|
|||||||
//
|
//
|
||||||
// That probably makes for GridRedBlack4dCartesian grid.
|
// That probably makes for GridRedBlack4dCartesian grid.
|
||||||
|
|
||||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
// s,sp,c,spc,lc
|
||||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||||
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||||
|
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
||||||
|
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
|
||||||
|
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||||
|
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||||
|
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||||
|
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||||
|
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||||
|
|
||||||
|
// Spin matrix
|
||||||
|
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||||
|
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||||
|
typedef iSpinMatrix<ComplexD > SpinMatrixD;
|
||||||
|
|
||||||
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||||
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
|
||||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
|
||||||
|
|
||||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
// Colour Matrix
|
||||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
typedef iColourMatrix<Complex > ColourMatrix;
|
||||||
|
typedef iColourMatrix<ComplexF > ColourMatrixF;
|
||||||
|
typedef iColourMatrix<ComplexD > ColourMatrixD;
|
||||||
|
|
||||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||||
typedef iColourMatrix<Complex > ColourMatrix;
|
typedef iColourMatrix<vComplexF> vColourMatrixF;
|
||||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
typedef iColourMatrix<vComplexD> vColourMatrixD;
|
||||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
|
||||||
|
// SpinColour matrix
|
||||||
|
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||||
|
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
|
||||||
|
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
|
||||||
|
|
||||||
|
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||||
|
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||||
|
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||||
|
|
||||||
|
// LorentzColour
|
||||||
|
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||||
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
||||||
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
|
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
|
||||||
|
|
||||||
typedef iSpinVector<Complex > SpinVector;
|
|
||||||
typedef iColourVector<Complex > ColourVector;
|
|
||||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
|
||||||
typedef iHalfSpinVector<Complex > HalfSpinVector;
|
|
||||||
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
|
|
||||||
|
|
||||||
|
|
||||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
|
||||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
|
||||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
|
||||||
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
|
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
|
||||||
|
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
|
||||||
|
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
|
||||||
|
|
||||||
|
// DoubleStored gauge field
|
||||||
|
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
|
||||||
|
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
|
||||||
|
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
|
||||||
|
|
||||||
|
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
|
||||||
|
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
|
||||||
|
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
|
||||||
|
|
||||||
|
// Spin vector
|
||||||
|
typedef iSpinVector<Complex > SpinVector;
|
||||||
|
typedef iSpinVector<ComplexF> SpinVectorF;
|
||||||
|
typedef iSpinVector<ComplexD> SpinVectorD;
|
||||||
|
|
||||||
typedef iSpinVector<vComplex > vSpinVector;
|
typedef iSpinVector<vComplex > vSpinVector;
|
||||||
|
typedef iSpinVector<vComplexF> vSpinVectorF;
|
||||||
|
typedef iSpinVector<vComplexD> vSpinVectorD;
|
||||||
|
|
||||||
|
// Colour vector
|
||||||
|
typedef iColourVector<Complex > ColourVector;
|
||||||
|
typedef iColourVector<ComplexF> ColourVectorF;
|
||||||
|
typedef iColourVector<ComplexD> ColourVectorD;
|
||||||
|
|
||||||
typedef iColourVector<vComplex > vColourVector;
|
typedef iColourVector<vComplex > vColourVector;
|
||||||
|
typedef iColourVector<vComplexF> vColourVectorF;
|
||||||
|
typedef iColourVector<vComplexD> vColourVectorD;
|
||||||
|
|
||||||
|
// SpinColourVector
|
||||||
|
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||||
|
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
|
||||||
|
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
|
||||||
|
|
||||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||||
|
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
|
||||||
|
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
|
||||||
|
|
||||||
|
// HalfSpin vector
|
||||||
|
typedef iHalfSpinVector<Complex > HalfSpinVector;
|
||||||
|
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
|
||||||
|
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
|
||||||
|
|
||||||
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
|
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
|
||||||
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
|
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
|
||||||
|
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
|
||||||
|
|
||||||
|
// HalfSpinColour vector
|
||||||
|
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
|
||||||
|
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
|
||||||
|
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
|
||||||
|
|
||||||
|
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
|
||||||
|
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
|
||||||
|
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||||
|
|
||||||
|
// singlets
|
||||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||||
typedef iSinglet<vComplex > vTComplex; // what if we don't know the tensor structure
|
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||||
|
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||||
|
|
||||||
|
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
|
||||||
|
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||||
|
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||||
|
|
||||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||||
|
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||||
|
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||||
|
|
||||||
typedef iSinglet<vReal > vTReal;
|
typedef iSinglet<vReal > vTReal;
|
||||||
typedef iSinglet<vInteger > vTInteger;
|
typedef iSinglet<vRealF> vTRealF;
|
||||||
|
typedef iSinglet<vRealD> vTRealD;
|
||||||
|
|
||||||
|
typedef iSinglet<vInteger> vTInteger;
|
||||||
typedef iSinglet<Integer > TInteger;
|
typedef iSinglet<Integer > TInteger;
|
||||||
|
|
||||||
typedef Lattice<vTReal> LatticeReal;
|
|
||||||
typedef Lattice<vTComplex> LatticeComplex;
|
|
||||||
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
|
|
||||||
|
|
||||||
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
|
||||||
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
|
||||||
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
|
|
||||||
|
|
||||||
typedef Lattice<vSpinVector> LatticeSpinVector;
|
// Lattices of these
|
||||||
typedef Lattice<vColourVector> LatticeColourVector;
|
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
||||||
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
|
||||||
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
|
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
|
||||||
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
|
|
||||||
|
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
||||||
|
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
|
||||||
|
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
|
||||||
|
|
||||||
|
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
|
||||||
|
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||||
|
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||||
|
|
||||||
|
|
||||||
|
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||||
|
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
||||||
|
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
|
||||||
|
|
||||||
|
// DoubleStored gauge field
|
||||||
|
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
|
||||||
|
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
|
||||||
|
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
|
||||||
|
|
||||||
|
typedef Lattice<vSpinVector> LatticeSpinVector;
|
||||||
|
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
|
||||||
|
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
|
||||||
|
|
||||||
|
typedef Lattice<vColourVector> LatticeColourVector;
|
||||||
|
typedef Lattice<vColourVectorF> LatticeColourVectorF;
|
||||||
|
typedef Lattice<vColourVectorD> LatticeColourVectorD;
|
||||||
|
|
||||||
|
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
||||||
|
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
|
||||||
|
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
|
||||||
|
|
||||||
|
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
|
||||||
|
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
|
||||||
|
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
|
||||||
|
|
||||||
|
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
|
||||||
|
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
|
||||||
|
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
|
||||||
|
|
||||||
|
typedef Lattice<vTReal> LatticeReal;
|
||||||
|
typedef Lattice<vTRealF> LatticeRealF;
|
||||||
|
typedef Lattice<vTRealD> LatticeRealD;
|
||||||
|
|
||||||
|
typedef Lattice<vTComplex> LatticeComplex;
|
||||||
|
typedef Lattice<vTComplexF> LatticeComplexF;
|
||||||
|
typedef Lattice<vTComplexD> LatticeComplexD;
|
||||||
|
|
||||||
|
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// Physical names for things
|
// Physical names for things
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
typedef Lattice<vHalfSpinColourVector> LatticeHalfFermion;
|
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
|
||||||
typedef Lattice<vSpinColourVector> LatticeFermion;
|
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
|
||||||
|
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
|
||||||
|
|
||||||
typedef Lattice<vSpinColourMatrix> LatticePropagator;
|
typedef LatticeSpinColourVector LatticeFermion;
|
||||||
typedef Lattice<vLorentzColourMatrix> LatticeGaugeField;
|
typedef LatticeSpinColourVectorF LatticeFermionF;
|
||||||
|
typedef LatticeSpinColourVectorD LatticeFermionD;
|
||||||
|
|
||||||
|
typedef LatticeSpinColourMatrix LatticePropagator;
|
||||||
|
typedef LatticeSpinColourMatrixF LatticePropagatorF;
|
||||||
|
typedef LatticeSpinColourMatrixD LatticePropagatorD;
|
||||||
|
|
||||||
|
typedef LatticeLorentzColourMatrix LatticeGaugeField;
|
||||||
|
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
|
||||||
|
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
|
||||||
|
|
||||||
|
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
|
||||||
|
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||||
|
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||||
|
|
||||||
// Uhgg... typing this hurt ;)
|
// Uhgg... typing this hurt ;)
|
||||||
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
||||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||||
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
|
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
|
||||||
|
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
|
||||||
|
|
||||||
|
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
|
||||||
|
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
|
||||||
|
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Peek and Poke named after physics attributes
|
// Peek and Poke named after physics attributes
|
||||||
@ -157,11 +295,14 @@ namespace QCD {
|
|||||||
return peekIndex<LorentzIndex>(rhs,i,j);
|
return peekIndex<LorentzIndex>(rhs,i,j);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME transpose Colour, transpose Spin, traceColour traceSpin
|
||||||
|
|
||||||
} //namespace QCD
|
} //namespace QCD
|
||||||
} // Grid
|
} // Grid
|
||||||
|
|
||||||
#include <qcd/Grid_qcd_dirac.h>
|
#include <qcd/Grid_qcd_dirac.h>
|
||||||
#include <qcd/Grid_qcd_2spinor.h>
|
#include <qcd/Grid_qcd_2spinor.h>
|
||||||
//#include <qcd/Grid_qcd_pauli.h>
|
//#include <qcd/Grid_qcd_pauli.h>
|
||||||
|
#include <qcd/Grid_qcd_wilson_dop.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -17,8 +17,14 @@ namespace QCD {
|
|||||||
GammaZ,
|
GammaZ,
|
||||||
GammaT,
|
GammaT,
|
||||||
Gamma5,
|
Gamma5,
|
||||||
// GammaXGamma5,
|
MinusIdentity,
|
||||||
// GammaYGamma5,
|
MinusGammaX,
|
||||||
|
MinusGammaY,
|
||||||
|
MinusGammaZ,
|
||||||
|
MinusGammaT,
|
||||||
|
MinusGamma5
|
||||||
|
// GammaXGamma5, // Rest are composite (willing to take hit for two calls sequentially)
|
||||||
|
// GammaYGamma5, // as they are less commonly used.
|
||||||
// GammaZGamma5,
|
// GammaZGamma5,
|
||||||
// GammaTGamma5,
|
// GammaTGamma5,
|
||||||
// SigmaXY,
|
// SigmaXY,
|
||||||
@ -27,12 +33,6 @@ namespace QCD {
|
|||||||
// SigmaXT,
|
// SigmaXT,
|
||||||
// SigmaYT,
|
// SigmaYT,
|
||||||
// SigmaZT,
|
// SigmaZT,
|
||||||
MinusIdentity,
|
|
||||||
MinusGammaX,
|
|
||||||
MinusGammaY,
|
|
||||||
MinusGammaZ,
|
|
||||||
MinusGammaT,
|
|
||||||
MinusGamma5
|
|
||||||
// MinusGammaXGamma5, easiest to form by composition
|
// MinusGammaXGamma5, easiest to form by composition
|
||||||
// MinusGammaYGamma5, as performance is not critical for these
|
// MinusGammaYGamma5, as performance is not critical for these
|
||||||
// MinusGammaZGamma5,
|
// MinusGammaZGamma5,
|
||||||
@ -54,7 +54,6 @@ namespace QCD {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/* Gx
|
/* Gx
|
||||||
* 0 0 0 i
|
* 0 0 0 i
|
||||||
* 0 0 i 0
|
* 0 0 i 0
|
||||||
|
@ -1,157 +1,220 @@
|
|||||||
#ifnfdef GRID_QCD_WILSON_DOP_H
|
|
||||||
#define GRID_QCD_WILSON_DOP_H
|
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
|
|
||||||
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
|
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
|
||||||
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
|
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
|
||||||
|
|
||||||
// Should be in header?
|
// Should be in header?
|
||||||
static const int WilsonMatrix::Xp = 0;
|
const int WilsonMatrix::Xp = 0;
|
||||||
static const int WilsonMatrix::Yp = 1;
|
const int WilsonMatrix::Yp = 1;
|
||||||
static const int WilsonMatrix::Zp = 2;
|
const int WilsonMatrix::Zp = 2;
|
||||||
static const int WilsonMatrix::Tp = 3;
|
const int WilsonMatrix::Tp = 3;
|
||||||
static const int WilsonMatrix::Xm = 4;
|
const int WilsonMatrix::Xm = 4;
|
||||||
static const int WilsonMatrix::Ym = 5;
|
const int WilsonMatrix::Ym = 5;
|
||||||
static const int WilsonMatrix::Zm = 6;
|
const int WilsonMatrix::Zm = 6;
|
||||||
static const int WilsonMatrix::Tm = 7;
|
const int WilsonMatrix::Tm = 7;
|
||||||
static const int WilsonMatrix::X0 = 8;
|
//const int WilsonMatrix::X0 = 8;
|
||||||
static const int WilsonMatrix::npoint=9;
|
|
||||||
|
|
||||||
|
class WilsonCompressor {
|
||||||
|
public:
|
||||||
|
int mu;
|
||||||
|
|
||||||
|
void Point(int p) { mu=p;};
|
||||||
|
vHalfSpinColourVector operator () (vSpinColourVector &in)
|
||||||
|
{
|
||||||
|
vHalfSpinColourVector ret;
|
||||||
|
switch(mu) {
|
||||||
|
case WilsonMatrix::Xp:
|
||||||
|
spProjXp(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Yp:
|
||||||
|
spProjYp(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Zp:
|
||||||
|
spProjZp(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Tp:
|
||||||
|
spProjTp(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Xm:
|
||||||
|
spProjXm(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Ym:
|
||||||
|
spProjYm(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Zm:
|
||||||
|
spProjZm(ret,in);
|
||||||
|
break;
|
||||||
|
case WilsonMatrix::Tm:
|
||||||
|
spProjTm(ret,in);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,int _mass)
|
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,double _mass)
|
||||||
: Stencil((&Umu._grid,npoint,0,directions,displacements),
|
: Stencil(Umu._grid,npoint,0,directions,displacements),
|
||||||
mass(_mass),
|
mass(_mass),
|
||||||
Umu(_Umu)
|
Umu(_Umu._grid)
|
||||||
{
|
{
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
grid = _Umu._grid;
|
grid = _Umu._grid;
|
||||||
comm_buf.resize(Stencil._unified_buffer_size);
|
comm_buf.resize(Stencil._unified_buffer_size);
|
||||||
|
DoubleStore(Umu,_Umu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
|
||||||
|
{
|
||||||
|
LatticeColourMatrix U(grid);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U = peekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
pokeIndex<LorentzIndex>(Uds,U,mu);
|
||||||
|
U = adj(Cshift(U,mu,-1));
|
||||||
|
pokeIndex<LorentzIndex>(Uds,U,mu+4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
|
Dhop(in,out);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
Stencil.HaloExchange(in,comm_buf);
|
// Stencil.HaloExchange(in,comm_buf);
|
||||||
|
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<grid->oSites();ss++){
|
||||||
|
|
||||||
int offset,local;
|
int offset,local;
|
||||||
|
|
||||||
vSpinColourVector result;
|
vSpinColourVector result;
|
||||||
vHalfSpinColourVector UChi;
|
vHalfSpinColourVector chi;
|
||||||
|
vHalfSpinColourVector Uchi;
|
||||||
|
vHalfSpinColourVector *chi_p;
|
||||||
// Xp
|
// Xp
|
||||||
offset = Stencil._offsets [Xp][ss];
|
offset = Stencil._offsets [Xp][ss];
|
||||||
local = Stencil._is_local[Xp][ss];
|
local = Stencil._is_local[Xp][ss];
|
||||||
if ( local ) {
|
|
||||||
Uchi = U[]*spProjXp(in._odata[offset]);
|
|
||||||
} else {
|
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
|
||||||
}
|
|
||||||
result = ReconXp(Uchi);
|
|
||||||
|
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
|
if ( local ) {
|
||||||
|
spProjXp(chi,in._odata[offset]);
|
||||||
|
chi_p = χ
|
||||||
|
}
|
||||||
|
mult(&(Uchi()),&(Umu._odata[ss](Xp)),&(*chi_p)());
|
||||||
|
spReconXp(result,Uchi);
|
||||||
|
|
||||||
|
#if 0
|
||||||
// Yp
|
// Yp
|
||||||
offset = Stencil._offsets [Yp][ss];
|
offset = Stencil._offsets [Yp][ss];
|
||||||
local = Stencil._is_local[Yp][ss];
|
local = Stencil._is_local[Yp][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjYp(in._odata[offset]);
|
spProjYp(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Yp)),&(*chi_p)());
|
||||||
result+= ReconYp(Uchi);
|
accumReconYp(result,Uchi);
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
offset = Stencil._offsets [Zp][ss];
|
offset = Stencil._offsets [Zp][ss];
|
||||||
local = Stencil._is_local[Zp][ss];
|
local = Stencil._is_local[Zp][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjZp(in._odata[offset]);
|
spProjZp(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Zp)),&(*chi_p)() );
|
||||||
result+= ReconZp(Uchi);
|
accumReconZp(result,Uchi);
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
offset = Stencil._offsets [Tp][ss];
|
offset = Stencil._offsets [Tp][ss];
|
||||||
local = Stencil._is_local[Tp][ss];
|
local = Stencil._is_local[Tp][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjTp(in._odata[offset]);
|
spProjTp(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Tp)),&(*chi_p)());
|
||||||
result+= ReconTp(Uchi);
|
accumReconTp(result,Uchi);
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
offset = Stencil._offsets [Xm][ss];
|
offset = Stencil._offsets [Xm][ss];
|
||||||
local = Stencil._is_local[Xm][ss];
|
local = Stencil._is_local[Xm][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjXm(in._odata[offset]);
|
spProjXm(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
|
||||||
result+= ReconXm(Uchi);
|
accumReconXm(result,Uchi);
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
offset = Stencil._offsets [Ym][ss];
|
offset = Stencil._offsets [Ym][ss];
|
||||||
local = Stencil._is_local[Ym][ss];
|
local = Stencil._is_local[Ym][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjYm(in._odata[offset]);
|
spProjYm(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Ym)),&(*chi_p)());
|
||||||
result+= ReconYm(Uchi);
|
accumReconYm(result,Uchi);
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
offset = Stencil._offsets [Zm][ss];
|
offset = Stencil._offsets [Zm][ss];
|
||||||
local = Stencil._is_local[Zm][ss];
|
local = Stencil._is_local[Zm][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjZm(in._odata[offset]);
|
spProjZm(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Zm)),&(*chi_p)());
|
||||||
result+= ReconZm(Uchi);
|
accumReconZm(result,Uchi);
|
||||||
|
|
||||||
// Tm
|
// Tm
|
||||||
offset = Stencil._offsets [Tm][ss];
|
offset = Stencil._offsets [Tm][ss];
|
||||||
local = Stencil._is_local[Tm][ss];
|
local = Stencil._is_local[Tm][ss];
|
||||||
|
chi_p = &comm_buf[offset];
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
Uchi = U[]*spProjTm(in._odata[offset]);
|
spProjTm(chi,in._odata[offset]);
|
||||||
} else {
|
chi_p = χ
|
||||||
Uchi = U[]*comm_buf._odata[offset]
|
}
|
||||||
}
|
mult(&(Uchi()),&(Umu._odata[ss](Tm)),&(*chi_p)());
|
||||||
result+= ReconTm(Uchi);
|
accumReconTm(result,Uchi);
|
||||||
|
#endif
|
||||||
out._odata[ss] = result;
|
out._odata[ss] = result;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
void WilsonMatrix::MpcDag (const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::MpcDag (const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
void WilsonMatrix::Mpc (const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::Mpc (const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
void WilsonMatrix::MDagM (const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::MDagM (const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
#endif
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#ifnfdef GRID_QCD_WILSON_DOP_H
|
#ifndef GRID_QCD_WILSON_DOP_H
|
||||||
#define GRID_QCD_WILSON_DOP_H
|
#define GRID_QCD_WILSON_DOP_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
@ -21,21 +21,23 @@ namespace Grid {
|
|||||||
GridBase *grid;
|
GridBase *grid;
|
||||||
|
|
||||||
// Copy of the gauge field
|
// Copy of the gauge field
|
||||||
LatticeGaugeField Umu;
|
LatticeDoubledGaugeField Umu;
|
||||||
|
|
||||||
//Defines the stencil
|
//Defines the stencil
|
||||||
CartesianStencil Stencil;
|
CartesianStencil Stencil;
|
||||||
static const int npoint=9;
|
static const int npoint=9;
|
||||||
static const std::vector<int> directions ;
|
static const std::vector<int> directions ;
|
||||||
static const std::vector<int> displacements;
|
static const std::vector<int> displacements;
|
||||||
|
|
||||||
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
|
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
|
||||||
|
|
||||||
// Comms buffer
|
// Comms buffer
|
||||||
std::vector<vSpinColourVector,alignedAllocator<vSpinColourVector> > comm_buf;
|
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > comm_buf;
|
||||||
|
|
||||||
// Constructor
|
// Constructor
|
||||||
WilsonMatrix(LatticeGaugeField &Umu,int mass);
|
WilsonMatrix(LatticeGaugeField &Umu,double mass);
|
||||||
|
|
||||||
|
// DoubleStore
|
||||||
|
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
void multiply(const LatticeFermion &in, LatticeFermion &out);
|
void multiply(const LatticeFermion &in, LatticeFermion &out);
|
||||||
|
@ -251,13 +251,13 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
|||||||
friend inline vComplexD conj(const vComplexD &in){
|
friend inline vComplexD conj(const vComplexD &in){
|
||||||
vComplexD ret ; vzero(ret);
|
vComplexD ret ; vzero(ret);
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
#if defined (AVX1)|| defined (AVX2)
|
||||||
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
|
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
|
||||||
// __m256d tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
|
zvec tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
|
||||||
// ret.v=_mm256_shuffle_pd(tmp,tmp,0x5);
|
ret.v =_mm256_shuffle_pd(tmp,tmp,0x5);
|
||||||
ret.v = _mm256_addsub_pd(ret.v,in.v);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
ret.v = _mm_addsub_pd(ret.v,in.v);
|
zvec tmp = _mm_addsub_pd(ret.v,_mm_shuffle_pd(in.v,in.v,0x1));
|
||||||
|
ret.v = _mm_shuffle_pd(tmp,tmp,0x1);
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
|
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
|
||||||
@ -268,48 +268,41 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend inline vComplexD timesI(const vComplexD &in){
|
friend inline vComplexD timesMinusI(const vComplexD &in){
|
||||||
vComplexD ret; vzero(ret);
|
vComplexD ret; vzero(ret);
|
||||||
|
vComplexD tmp;
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
#if defined (AVX1)|| defined (AVX2)
|
||||||
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i
|
||||||
/*
|
ret.v =_mm256_shuffle_pd(tmp.v,tmp.v,0x5);
|
||||||
IF IMM0[0] = 0
|
|
||||||
THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI;
|
|
||||||
IF IMM0[1] = 0
|
|
||||||
THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI;
|
|
||||||
IF IMM0[2] = 0
|
|
||||||
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
|
|
||||||
IF IMM0[3] = 0
|
|
||||||
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI;
|
|
||||||
*/
|
|
||||||
ret.v =_mm256_shuffle_ps(tmp,tmp,0x5);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
|
tmp.v =_mm_addsub_pd(ret.v,in.v); // r,-i
|
||||||
ret.v =_mm_shuffle_ps(tmp,tmp,0x5);
|
ret.v =_mm_shuffle_pd(tmp.v,tmp.v,0x1);
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
|
ret.v = _mm512_mask_sub_pd(in.v,0xaaaa,ret.v,in.v); // real -imag
|
||||||
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK
|
ret.v = _mm512_swizzle_pd(ret.v, _MM_SWIZ_REG_CDAB);// OK
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
friend inline vComplexD timesMinusI(const vComplexD &in){
|
|
||||||
|
friend inline vComplexD timesI(const vComplexD &in){
|
||||||
vComplexD ret; vzero(ret);
|
vComplexD ret; vzero(ret);
|
||||||
|
vComplexD tmp;
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
#if defined (AVX1)|| defined (AVX2)
|
||||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
|
tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5);
|
||||||
ret.v =_mm256_addsub_ps(ret.v,tmp); // i,-r
|
ret.v =_mm256_addsub_pd(ret.v,tmp.v); // i,-r
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
|
tmp.v =_mm_shuffle_pd(in.v,in.v,0x1);
|
||||||
ret.v =_mm_addsub_ps(ret.v,tmp); // r,-i
|
ret.v =_mm_addsub_pd(ret.v,tmp.v); // r,-i
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK
|
tmp.v = _mm512_swizzle_pd(in.v, _MM_SWIZ_REG_CDAB);// OK
|
||||||
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag
|
ret.v = _mm512_mask_sub_pd(tmp.v,0xaaaa,ret.v,tmp.v); // real -imag
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -214,10 +214,10 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
|||||||
{
|
{
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
union {
|
union {
|
||||||
__m128 v1; // SSE 4 x float vector
|
cvec v1; // SSE 4 x float vector
|
||||||
float f[4]; // scalar array of 4 floats
|
float f[4]; // scalar array of 4 floats
|
||||||
} u128;
|
} u128;
|
||||||
u128.v1= _mm_add_ps(v, _mm_shuffle_ps(v, v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
u128.v1= _mm_add_ps(in.v, _mm_shuffle_ps(in.v,in.v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||||
return ComplexF(u128.f[0], u128.f[1]);
|
return ComplexF(u128.f[0], u128.f[1]);
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX1
|
#ifdef AVX1
|
||||||
@ -329,13 +329,15 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
|||||||
friend inline vComplexF conj(const vComplexF &in){
|
friend inline vComplexF conj(const vComplexF &in){
|
||||||
vComplexF ret ; vzero(ret);
|
vComplexF ret ; vzero(ret);
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
#if defined (AVX1)|| defined (AVX2)
|
||||||
// cvec tmp;
|
cvec tmp;
|
||||||
// tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
|
tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
|
||||||
// ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||||
ret.v = _mm256_addsub_ps(ret.v,in.v);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
ret.v = _mm_addsub_ps(ret.v,in.v);
|
cvec tmp;
|
||||||
|
tmp = _mm_addsub_ps(ret.v,_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
|
||||||
|
ret.v=_mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag
|
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag
|
||||||
@ -345,15 +347,16 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
|||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
friend inline vComplexF timesI(const vComplexF &in){
|
friend inline vComplexF timesMinusI(const vComplexF &in){
|
||||||
vComplexF ret; vzero(ret);
|
vComplexF ret;
|
||||||
|
vzero(ret);
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
#if defined (AVX1)|| defined (AVX2)
|
||||||
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
||||||
ret.v = _mm256_shuffle_ps(tmp,tmp,0x5);
|
ret.v = _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
|
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
|
||||||
ret.v = _mm_shuffle_ps(tmp,tmp,0x5);
|
ret.v = _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
|
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
|
||||||
@ -364,14 +367,14 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
|||||||
#endif
|
#endif
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
friend inline vComplexF timesMinusI(const vComplexF &in){
|
friend inline vComplexF timesI(const vComplexF &in){
|
||||||
vComplexF ret; vzero(ret);
|
vComplexF ret; vzero(ret);
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
#if defined (AVX1)|| defined (AVX2)
|
||||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
|
cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
|
||||||
ret.v = _mm256_addsub_ps(ret.v,tmp); // i,-r
|
ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
|
cvec tmp =_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));
|
||||||
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
|
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -443,5 +446,8 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
|||||||
inline vComplexF trace(const vComplexF &arg){
|
inline vComplexF trace(const vComplexF &arg){
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -146,7 +146,7 @@ namespace Grid {
|
|||||||
ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]);
|
ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]);
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
ret.v = _mm_set_pd(a[0],a[1]);
|
ret.v = _mm_set_pd(a[1],a[0]);
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
@ -186,6 +186,15 @@ namespace Grid {
|
|||||||
|
|
||||||
friend inline RealD Reduce(const vRealD & in)
|
friend inline RealD Reduce(const vRealD & in)
|
||||||
{
|
{
|
||||||
|
#if defined (SSE4)
|
||||||
|
// FIXME Hack
|
||||||
|
const RealD * ptr =(const RealD *) ∈
|
||||||
|
RealD ret = 0;
|
||||||
|
for(int i=0;i<vRealD::Nsimd();i++){
|
||||||
|
ret = ret+ptr[i];
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
#endif
|
||||||
#if defined (AVX1) || defined(AVX2)
|
#if defined (AVX1) || defined(AVX2)
|
||||||
typedef union {
|
typedef union {
|
||||||
uint64_t l;
|
uint64_t l;
|
||||||
|
@ -175,7 +175,7 @@ namespace Grid {
|
|||||||
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
ret.v = _mm_set_ps(a[0],a[1],a[2],a[3]);
|
ret.v = _mm_set_ps(a[3],a[2],a[1],a[0]);
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||||
@ -220,6 +220,15 @@ friend inline void vstore(const vRealF &ret, float *a){
|
|||||||
}
|
}
|
||||||
friend inline RealF Reduce(const vRealF & in)
|
friend inline RealF Reduce(const vRealF & in)
|
||||||
{
|
{
|
||||||
|
#if defined (SSE4)
|
||||||
|
// FIXME Hack
|
||||||
|
const RealF * ptr = (const RealF *) ∈
|
||||||
|
RealF ret = 0;
|
||||||
|
for(int i=0;i<vRealF::Nsimd();i++){
|
||||||
|
ret = ret+ptr[i];
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
#endif
|
||||||
#if defined (AVX1) || defined(AVX2)
|
#if defined (AVX1) || defined(AVX2)
|
||||||
__attribute__ ((aligned(32))) float c_[16];
|
__attribute__ ((aligned(32))) float c_[16];
|
||||||
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);
|
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);
|
||||||
|
@ -121,8 +121,9 @@ namespace Grid {
|
|||||||
int simd_layout = _grid->_simd_layout[dimension];
|
int simd_layout = _grid->_simd_layout[dimension];
|
||||||
int comm_dim = _grid->_processors[dimension] >1 ;
|
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||||
|
|
||||||
assert(simd_layout==1);
|
// assert(simd_layout==1); // Why?
|
||||||
assert(comm_dim==1);
|
assert(comm_dim==1);
|
||||||
|
shift = (shift + fd) %fd;
|
||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
|
@ -5,6 +5,11 @@ using namespace std;
|
|||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace Grid::QCD;
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
@ -22,22 +27,33 @@ int main (int argc, char ** argv)
|
|||||||
GridSerialRNG sRNG;
|
GridSerialRNG sRNG;
|
||||||
sRNG.SeedRandomDevice();
|
sRNG.SeedRandomDevice();
|
||||||
|
|
||||||
SpinMatrix ident=zero;
|
SpinMatrix ident; ident=zero;
|
||||||
SpinMatrix rnd ; random(sRNG,rnd);
|
SpinMatrix rnd ; random(sRNG,rnd);
|
||||||
|
|
||||||
SpinMatrix ll=zero;
|
SpinMatrix ll; ll=zero;
|
||||||
SpinMatrix rr=zero;
|
SpinMatrix rr; rr=zero;
|
||||||
SpinMatrix result;
|
SpinMatrix result;
|
||||||
|
|
||||||
SpinVector lv; random(sRNG,lv);
|
SpinVector lv; random(sRNG,lv);
|
||||||
SpinVector rv; random(sRNG,rv);
|
SpinVector rv; random(sRNG,rv);
|
||||||
|
|
||||||
|
std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl;
|
||||||
|
std::cout << " Is pod double " << std::is_pod<double>::value << std::endl;
|
||||||
|
std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl;
|
||||||
|
std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value << std::endl;
|
||||||
|
std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl;
|
||||||
|
std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl;
|
||||||
|
std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl;
|
||||||
|
std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl;
|
||||||
|
std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl;
|
||||||
|
std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl;
|
||||||
|
|
||||||
for(int a=0;a<Ns;a++){
|
for(int a=0;a<Ns;a++){
|
||||||
ident()(a,a) = 1.0;
|
ident()(a,a) = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Gamma::GammaMatrix *g = Gamma::GammaMatrices;
|
const Gamma::GammaMatrix *g = Gamma::GammaMatrices;
|
||||||
const char **list = Gamma::GammaMatrixNames;
|
const char **list = Gamma::GammaMatrixNames;
|
||||||
|
|
||||||
result =ll*Gamma(g[0])*rr;
|
result =ll*Gamma(g[0])*rr;
|
||||||
result =ll*Gamma(g[0]);
|
result =ll*Gamma(g[0]);
|
||||||
|
@ -49,7 +49,6 @@ int main (int argc, char ** argv)
|
|||||||
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
|
Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double vol = Fine.gSites();
|
double vol = Fine.gSites();
|
||||||
Complex PlaqScale(1.0/vol/6.0/3.0);
|
Complex PlaqScale(1.0/vol/6.0/3.0);
|
||||||
@ -58,7 +57,8 @@ int main (int argc, char ** argv)
|
|||||||
sliceSum(Plaq,Plaq_T,Nd-1);
|
sliceSum(Plaq,Plaq_T,Nd-1);
|
||||||
int Nt = Plaq_T.size();
|
int Nt = Plaq_T.size();
|
||||||
|
|
||||||
TComplex Plaq_T_sum=zero;
|
TComplex Plaq_T_sum;
|
||||||
|
Plaq_T_sum=zero;
|
||||||
for(int t=0;t<Nt;t++){
|
for(int t=0;t<Nt;t++){
|
||||||
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
|
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
|
||||||
Complex Pt=TensorRemove(Plaq_T[t]);
|
Complex Pt=TensorRemove(Plaq_T[t]);
|
||||||
|
166
tests/Grid_simd.cc
Normal file
166
tests/Grid_simd.cc
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
#include <parallelIO/GridNerscIO.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
class funcPlus {
|
||||||
|
public:
|
||||||
|
funcPlus() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
|
||||||
|
std::string name(void) const { return std::string("Plus"); }
|
||||||
|
};
|
||||||
|
class funcMinus {
|
||||||
|
public:
|
||||||
|
funcMinus() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
|
||||||
|
std::string name(void) const { return std::string("Minus"); }
|
||||||
|
};
|
||||||
|
class funcTimes {
|
||||||
|
public:
|
||||||
|
funcTimes() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
|
||||||
|
std::string name(void) const { return std::string("Times"); }
|
||||||
|
};
|
||||||
|
class funcConj {
|
||||||
|
public:
|
||||||
|
funcConj() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);}
|
||||||
|
std::string name(void) const { return std::string("Conj"); }
|
||||||
|
};
|
||||||
|
class funcAdj {
|
||||||
|
public:
|
||||||
|
funcAdj() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
|
||||||
|
std::string name(void) const { return std::string("Adj"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class funcTimesI {
|
||||||
|
public:
|
||||||
|
funcTimesI() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
|
||||||
|
std::string name(void) const { return std::string("timesI"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class funcTimesMinusI {
|
||||||
|
public:
|
||||||
|
funcTimesMinusI() {};
|
||||||
|
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
|
||||||
|
std::string name(void) const { return std::string("timesMinusI"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class scal, class vec,class functor >
|
||||||
|
void Tester(const functor &func)
|
||||||
|
{
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
sRNG.SeedRandomDevice();
|
||||||
|
|
||||||
|
int Nsimd = vec::Nsimd();
|
||||||
|
|
||||||
|
std::vector<scal> input1(Nsimd);
|
||||||
|
std::vector<scal> input2(Nsimd);
|
||||||
|
std::vector<scal> result(Nsimd);
|
||||||
|
std::vector<scal> reference(Nsimd);
|
||||||
|
|
||||||
|
std::vector<vec,alignedAllocator<vec> > buf(3);
|
||||||
|
vec & v_input1 = buf[0];
|
||||||
|
vec & v_input2 = buf[1];
|
||||||
|
vec & v_result = buf[2];
|
||||||
|
|
||||||
|
|
||||||
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
random(sRNG,input1[i]);
|
||||||
|
random(sRNG,input2[i]);
|
||||||
|
random(sRNG,result[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Gmerge(v_input1,input1);
|
||||||
|
Gmerge(v_input2,input2);
|
||||||
|
Gmerge(v_result,result);
|
||||||
|
|
||||||
|
func(v_result,v_input1,v_input2);
|
||||||
|
|
||||||
|
for(int i=0;i<Nsimd;i++) {
|
||||||
|
func(reference[i],input1[i],input2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Gextract(v_result,result);
|
||||||
|
std::cout << " " << func.name()<<std::endl;
|
||||||
|
|
||||||
|
int ok=0;
|
||||||
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
if ( abs(reference[i]-result[i])>0){
|
||||||
|
std::cout<< "*****" << std::endl;
|
||||||
|
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
|
||||||
|
ok++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( ok==0 ) std::cout << " OK!" <<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
std::vector<int> simd_layout({1,1,2,2});
|
||||||
|
std::vector<int> mpi_layout ({1,1,1,1});
|
||||||
|
std::vector<int> latt_size ({8,8,8,8});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
|
||||||
|
// Insist that operations on random scalars gives
|
||||||
|
// identical results to on vectors.
|
||||||
|
|
||||||
|
Tester<RealD,vRealD>(funcPlus());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing vComplexF "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
Tester<ComplexF,vComplexF>(funcTimesI());
|
||||||
|
Tester<ComplexF,vComplexF>(funcTimesMinusI());
|
||||||
|
Tester<ComplexF,vComplexF>(funcPlus());
|
||||||
|
Tester<ComplexF,vComplexF>(funcMinus());
|
||||||
|
Tester<ComplexF,vComplexF>(funcTimes());
|
||||||
|
Tester<ComplexF,vComplexF>(funcConj());
|
||||||
|
Tester<ComplexF,vComplexF>(funcAdj());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing vComplexD "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
Tester<ComplexD,vComplexD>(funcTimesI());
|
||||||
|
Tester<ComplexD,vComplexD>(funcTimesMinusI());
|
||||||
|
Tester<ComplexD,vComplexD>(funcPlus());
|
||||||
|
Tester<ComplexD,vComplexD>(funcMinus());
|
||||||
|
Tester<ComplexD,vComplexD>(funcTimes());
|
||||||
|
Tester<ComplexD,vComplexD>(funcConj());
|
||||||
|
Tester<ComplexD,vComplexD>(funcAdj());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing vRealF "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
Tester<RealF,vRealF>(funcPlus());
|
||||||
|
Tester<RealF,vRealF>(funcMinus());
|
||||||
|
Tester<RealF,vRealF>(funcTimes());
|
||||||
|
Tester<RealF,vRealF>(funcAdj());
|
||||||
|
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
std::cout << "Testing vRealD "<<std::endl;
|
||||||
|
std::cout << "==================================="<< std::endl;
|
||||||
|
|
||||||
|
Tester<RealD,vRealD>(funcPlus());
|
||||||
|
Tester<RealD,vRealD>(funcMinus());
|
||||||
|
Tester<RealD,vRealD>(funcTimes());
|
||||||
|
Tester<RealD,vRealD>(funcAdj());
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -4,60 +4,37 @@ using namespace std;
|
|||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace Grid::QCD;
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
class SimpleCompressor {
|
||||||
|
public:
|
||||||
|
void Point(int) {};
|
||||||
|
|
||||||
|
vobj operator() (vobj &arg) {
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
};
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::vector<int> latt_size (4);
|
std::vector<int> simd_layout({1,1,2,2});
|
||||||
std::vector<int> simd_layout(4);
|
std::vector<int> mpi_layout ({2,2,2,2});
|
||||||
std::vector<int> mpi_layout (4);
|
std::vector<int> latt_size ({8,8,8,8});
|
||||||
|
|
||||||
int omp=1;
|
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
int lat=8;
|
|
||||||
|
|
||||||
mpi_layout[0]=1;
|
|
||||||
mpi_layout[1]=1;
|
|
||||||
mpi_layout[2]=1;
|
|
||||||
mpi_layout[3]=1;
|
|
||||||
|
|
||||||
latt_size[0] = lat;
|
|
||||||
latt_size[1] = lat;
|
|
||||||
latt_size[2] = lat;
|
|
||||||
latt_size[3] = lat;
|
|
||||||
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
|
||||||
|
|
||||||
#ifdef AVX512
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
simd_layout[0] = 1;
|
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
|
||||||
simd_layout[1] = 2;
|
GridParallelRNG fRNG(&Fine);
|
||||||
simd_layout[2] = 2;
|
fRNG.SeedRandomDevice();
|
||||||
simd_layout[3] = 2;
|
|
||||||
#endif
|
LatticeColourMatrix Foo(&Fine);
|
||||||
#if defined (AVX1)|| defined (AVX2)
|
LatticeColourMatrix Bar(&Fine);
|
||||||
simd_layout[0] = 1;
|
LatticeColourMatrix Check(&Fine);
|
||||||
simd_layout[1] = 1;
|
LatticeColourMatrix Diff(&Fine);
|
||||||
simd_layout[2] = 2;
|
|
||||||
simd_layout[3] = 2;
|
random(fRNG,Foo);
|
||||||
#endif
|
gaussian(fRNG,Bar);
|
||||||
#if defined (SSE2)
|
|
||||||
simd_layout[0] = 1;
|
|
||||||
simd_layout[1] = 1;
|
|
||||||
simd_layout[2] = 1;
|
|
||||||
simd_layout[3] = 2;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
|
||||||
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
|
|
||||||
GridParallelRNG fRNG(&Fine);
|
|
||||||
fRNG.SeedRandomDevice();
|
|
||||||
|
|
||||||
LatticeColourMatrix Foo(&Fine);
|
|
||||||
LatticeColourMatrix Bar(&Fine);
|
|
||||||
LatticeColourMatrix Check(&Fine);
|
|
||||||
LatticeColourMatrix Diff(&Fine);
|
|
||||||
|
|
||||||
random(fRNG,Foo);
|
|
||||||
gaussian(fRNG,Bar);
|
|
||||||
|
|
||||||
|
|
||||||
for(int dir=0;dir<4;dir++){
|
for(int dir=0;dir<4;dir++){
|
||||||
@ -86,7 +63,8 @@ int main (int argc, char ** argv)
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
|
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
|
||||||
printf("calling halo exchange\n");fflush(stdout);
|
printf("calling halo exchange\n");fflush(stdout);
|
||||||
myStencil.HaloExchange(Foo,comm_buf);
|
SimpleCompressor<vColourMatrix> compress;
|
||||||
|
myStencil.HaloExchange(Foo,comm_buf,compress);
|
||||||
|
|
||||||
Bar = Cshift(Foo,dir,disp);
|
Bar = Cshift(Foo,dir,disp);
|
||||||
|
|
||||||
|
69
tests/Grid_wilson.cc
Normal file
69
tests/Grid_wilson.cc
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
#include <parallelIO/GridNerscIO.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
std::vector<int> simd_layout({1,1,2,2});
|
||||||
|
std::vector<int> mpi_layout ({1,1,1,1});
|
||||||
|
std::vector<int> latt_size ({8,8,8,8});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid);
|
||||||
|
// pRNG.SeedFixedIntegers(seeds);
|
||||||
|
pRNG.SeedRandomDevice();
|
||||||
|
|
||||||
|
LatticeFermion src(&Grid); random(pRNG,src);
|
||||||
|
LatticeFermion result(&Grid); result=zero;
|
||||||
|
LatticeFermion ref(&Grid); ref=zero;
|
||||||
|
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||||
|
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = 1.0;
|
||||||
|
pokeIndex<3>(Umu,U[mu],mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
int mu=0;
|
||||||
|
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||||
|
ref = src;
|
||||||
|
ref = U[0]*Cshift(ref,0,1);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
WilsonMatrix Dw(Umu,mass);
|
||||||
|
std::cout << "Calling Dw"<<std::endl;
|
||||||
|
Dw.multiply(src,result);
|
||||||
|
std::cout << "Called Dw"<<std::endl;
|
||||||
|
std::cout << "norm result "<< norm2(result)<<std::endl;
|
||||||
|
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
|
|
||||||
|
for(int ss=0;ss<10;ss++ ){
|
||||||
|
for(int i=0;i<Ns;i++){
|
||||||
|
for(int j=0;j<Nc;j++){
|
||||||
|
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
|
||||||
|
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
|
||||||
|
std::cout << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ref = ref -result;
|
||||||
|
std::cout << "norm diff "<< norm2(ref)<<std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_srcdir)/lib
|
|||||||
#
|
#
|
||||||
# Test code
|
# Test code
|
||||||
#
|
#
|
||||||
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma
|
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_wilson Grid_simd
|
||||||
|
|
||||||
Grid_main_SOURCES = Grid_main.cc
|
Grid_main_SOURCES = Grid_main.cc
|
||||||
Grid_main_LDADD = -lGrid
|
Grid_main_LDADD = -lGrid
|
||||||
@ -21,3 +21,9 @@ Grid_gamma_LDADD = -lGrid
|
|||||||
|
|
||||||
Grid_stencil_SOURCES = Grid_stencil.cc
|
Grid_stencil_SOURCES = Grid_stencil.cc
|
||||||
Grid_stencil_LDADD = -lGrid
|
Grid_stencil_LDADD = -lGrid
|
||||||
|
|
||||||
|
Grid_wilson_SOURCES = Grid_wilson.cc
|
||||||
|
Grid_wilson_LDADD = -lGrid
|
||||||
|
|
||||||
|
Grid_simd_SOURCES = Grid_simd.cc
|
||||||
|
Grid_simd_LDADD = -lGrid
|
||||||
|
Loading…
Reference in New Issue
Block a user