mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Big updates with progress towards wilson matrix
This commit is contained in:
parent
51f0da7b93
commit
94f728bee4
18
TODO
18
TODO
@ -2,6 +2,10 @@
|
||||
- use protocol buffers? replace xmlReader/Writer ec..
|
||||
- Binary use htonll, htonl
|
||||
|
||||
* Reduce implemention is poor
|
||||
* Bug in SeedFixedIntegers gives same output on each site.
|
||||
* Bug in RNG with complex numbers ; only filling real values; need helper function -- DONE
|
||||
|
||||
* Stencil operator support -----Initial thoughts, trial implementation DONE.
|
||||
-----some simple tests that Stencil matches Cshift.
|
||||
-----do all permute in comms phase, so that copy permute
|
||||
@ -11,6 +15,7 @@
|
||||
|
||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||
|
||||
* Strong test for norm2, conj and all primitive types.
|
||||
|
||||
* Consider switch std::vector to boost arrays or something lighter weight
|
||||
boost::multi_array<type, 3> A()... to replace multi1d, multi2d etc..
|
||||
@ -33,10 +38,21 @@
|
||||
|
||||
|
||||
* Make the Tensor types and Complex etc... play more nicely.
|
||||
* TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||
|
||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||
want to introduce a syntax that does not require this.
|
||||
|
||||
- Reductions that contract indices on a site should always demote the tensor structure.
|
||||
norm2(), innerProduct.
|
||||
|
||||
- Result of Sum(), SliceSum // spatial sums
|
||||
trace, traceIndex etc.. do not.
|
||||
|
||||
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
|
||||
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
|
||||
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||
pure scalar.
|
||||
|
||||
* Optimise the extract/merge SIMD routines; Azusa??
|
||||
- I have collated into single location at least.
|
||||
|
@ -11,6 +11,7 @@
|
||||
#define GRID_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
@ -2,7 +2,7 @@
|
||||
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* AVX */
|
||||
#define AVX1 1
|
||||
/* #undef AVX1 */
|
||||
|
||||
/* AVX2 */
|
||||
/* #undef AVX2 */
|
||||
@ -77,7 +77,7 @@
|
||||
#define PACKAGE_VERSION "1.0"
|
||||
|
||||
/* SSE4 */
|
||||
/* #undef SSE4 */
|
||||
#define SSE4 1
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
@ -26,10 +26,15 @@ namespace Grid {
|
||||
|
||||
typedef float RealF;
|
||||
typedef double RealD;
|
||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||
typedef RealD Real;
|
||||
#else
|
||||
typedef RealF Real;
|
||||
#endif
|
||||
|
||||
typedef std::complex<RealF> ComplexF;
|
||||
typedef std::complex<RealD> ComplexD;
|
||||
|
||||
typedef std::complex<Real> Complex;
|
||||
|
||||
inline RealF adj(const RealF & r){ return r; }
|
||||
inline RealF conj(const RealF & r){ return r; }
|
||||
@ -63,8 +68,8 @@ namespace Grid {
|
||||
//conj already supported for complex
|
||||
|
||||
inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));}
|
||||
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
|
||||
inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));}
|
||||
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
|
||||
inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));}
|
||||
|
||||
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
|
||||
@ -280,15 +285,11 @@ namespace Grid {
|
||||
|
||||
// Default precision
|
||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||
typedef RealD Real;
|
||||
typedef vRealD vReal;
|
||||
typedef vComplexD vComplex;
|
||||
typedef std::complex<Real> Complex;
|
||||
#else
|
||||
typedef RealF Real;
|
||||
typedef vRealF vReal;
|
||||
typedef vComplexF vComplex;
|
||||
typedef std::complex<Real> Complex;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@ -47,6 +47,101 @@ namespace Grid {
|
||||
int from_rank;
|
||||
} ;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there is no need to SIMD split with compression
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class cobj,class compressor> void
|
||||
Gather_plane_simple (Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int o = 0; // relative offset to base within plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
// Simple block stride gather of SIMD objects
|
||||
#pragma omp parallel for collapse(2)
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
buffer[bo++]=compress(rhs._odata[so+o+b]);
|
||||
}
|
||||
o +=rhs._grid->_slice_stride[dimension];
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int o = 0; // relative offset to base within plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
#pragma omp parallel for collapse(2)
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb &cbmask ) {
|
||||
buffer[bo]=compress(rhs._odata[so+o+b]);
|
||||
bo++;
|
||||
}
|
||||
|
||||
}
|
||||
o +=rhs._grid->_slice_stride[dimension];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split with compression
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class cobj,class vobj,class compressor> void
|
||||
Gather_plane_extract(Lattice<vobj> &rhs,std::vector<typename cobj::scalar_type *> pointers,int dimension,int plane,int cbmask,compressor &compress)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int o = 0; // relative offset to base within plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
// Simple block stride gather of SIMD objects
|
||||
#pragma omp parallel for collapse(2)
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
cobj temp;
|
||||
temp=compress(rhs._odata[so+o+b]);
|
||||
extract(temp,pointers);
|
||||
}
|
||||
o +=rhs._grid->_slice_stride[dimension];
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int o = 0; // relative offset to base within plane
|
||||
int bo = 0; // offset in buffer
|
||||
|
||||
#pragma omp parallel for collapse(2)
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb & cbmask ) {
|
||||
cobj temp;
|
||||
temp =compress(rhs._odata[so+o+b]);
|
||||
extract(temp,pointers);
|
||||
}
|
||||
|
||||
}
|
||||
o +=rhs._grid->_slice_stride[dimension];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
|
||||
public:
|
||||
|
||||
@ -86,8 +181,8 @@ namespace Grid {
|
||||
|
||||
// Could allow a functional munging of the halo to another type during the comms.
|
||||
// this could implement the 16bit/32bit/64bit compression.
|
||||
template<class vobj> void HaloExchange(Lattice<vobj> &source,
|
||||
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf)
|
||||
template<class vobj,class cobj, class compressor> void
|
||||
HaloExchange(Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
||||
{
|
||||
// conformable(source._grid,_grid);
|
||||
assert(source._grid==_grid);
|
||||
@ -95,12 +190,10 @@ namespace Grid {
|
||||
int u_comm_offset=0;
|
||||
|
||||
// Gather all comms buffers
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
for(int point = 0 ; point < _npoints; point++) {
|
||||
|
||||
printf("Point %d \n",point);fflush(stdout);
|
||||
compress.Point(point);
|
||||
|
||||
int dimension = _directions[point];
|
||||
int displacement = _distances[point];
|
||||
|
||||
@ -126,33 +219,30 @@ namespace Grid {
|
||||
sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
if (splice_dim) {
|
||||
printf("splice 0x3 \n");fflush(stdout);
|
||||
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
|
||||
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||
} else {
|
||||
printf("NO splice 0x3 \n");fflush(stdout);
|
||||
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
|
||||
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
|
||||
}
|
||||
} else {
|
||||
if(splice_dim){
|
||||
printf("splice 0x1,2 \n");fflush(stdout);
|
||||
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);// if checkerboard is unfavourable take two passes
|
||||
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);// both with block stride loop iteration
|
||||
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
|
||||
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
|
||||
} else {
|
||||
printf("NO splice 0x1,2 \n");fflush(stdout);
|
||||
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);
|
||||
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);
|
||||
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
|
||||
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf,
|
||||
int &u_comm_offset)
|
||||
template<class vobj,class cobj, class compressor>
|
||||
void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||
int &u_comm_offset,compressor & compress)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename cobj::vector_type vector_type;
|
||||
typedef typename cobj::scalar_type scalar_type;
|
||||
|
||||
GridBase *grid=_grid;
|
||||
assert(rhs._grid==_grid);
|
||||
@ -169,31 +259,26 @@ namespace Grid {
|
||||
|
||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size); // hmm...
|
||||
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
|
||||
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
|
||||
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
|
||||
|
||||
int cb= (cbmask==0x2)? 1 : 0;
|
||||
int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
printf("GatherStartComms x %d/%d\n",x,rd);fflush(stdout);
|
||||
int offnode = ( x+sshift >= rd );
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = (x+sshift)/rd;
|
||||
|
||||
if (offnode) {
|
||||
|
||||
printf("GatherStartComms offnode x %d\n",x);fflush(stdout);
|
||||
int words = send_buf.size();
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * sizeof(vobj);
|
||||
int bytes = words * sizeof(cobj);
|
||||
|
||||
printf("Gather_plane_simple dimension %d sx %d cbmask %d\n",dimension,sx,cbmask);fflush(stdout);
|
||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
||||
|
||||
printf("GatherStartComms gathered offnode x %d\n",x);fflush(stdout);
|
||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
|
||||
|
||||
int rank = _grid->_processor;
|
||||
int recv_from_rank;
|
||||
@ -219,14 +304,14 @@ namespace Grid {
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
template<class vobj,class cobj, class compressor>
|
||||
void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
|
||||
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf,
|
||||
int &u_comm_offset)
|
||||
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
|
||||
int &u_comm_offset,compressor &compress)
|
||||
{
|
||||
const int Nsimd = _grid->Nsimd();
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename cobj::vector_type vector_type;
|
||||
typedef typename cobj::scalar_type scalar_type;
|
||||
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
@ -245,7 +330,7 @@ namespace Grid {
|
||||
// Simd direction uses an extract/merge pair
|
||||
///////////////////////////////////////////////
|
||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||
int words = sizeof(vobj)/sizeof(vector_type);
|
||||
int words = sizeof(cobj)/sizeof(vector_type);
|
||||
|
||||
/* FIXME ALTERNATE BUFFER DETERMINATION */
|
||||
std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
|
||||
@ -285,7 +370,7 @@ namespace Grid {
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0];
|
||||
}
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
|
@ -18,6 +18,7 @@ libGrid_a_SOURCES =\
|
||||
Grid_init.cc\
|
||||
stencil/Grid_stencil_common.cc\
|
||||
qcd/Grid_qcd_dirac.cc\
|
||||
qcd/Grid_qcd_wilson_dop.cc\
|
||||
$(extra_sources)
|
||||
|
||||
#
|
||||
|
@ -79,18 +79,18 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
|
||||
{
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
@ -107,9 +107,9 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
@ -117,9 +117,9 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs-rhs._odata[ss];
|
||||
@ -137,9 +137,9 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
@ -147,9 +147,9 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
|
@ -14,7 +14,7 @@ namespace Grid {
|
||||
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
typedef typename vobj::vector_type vector;
|
||||
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
|
||||
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm;
|
||||
scalar nrm;
|
||||
//FIXME make this loop parallelisable
|
||||
vnrm=zero;
|
||||
@ -33,10 +33,11 @@ namespace Grid {
|
||||
//->decltype(innerProduct(left._odata[0],right._odata[0]))
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm;
|
||||
|
||||
scalar nrm;
|
||||
//FIXME make this loop parallelisable
|
||||
vnrm=zero;
|
||||
for(int ss=0;ss<left._grid->oSites(); ss++){
|
||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
@ -94,8 +95,10 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
sobj szero; szero=zero;
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<sobj> lsSum(ld,sobj(zero)); // sum across these down to scalars
|
||||
std::vector<sobj> lsSum(ld,szero); // sum across these down to scalars
|
||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
|
@ -26,6 +26,8 @@ namespace Grid {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
class GridRNGbase {
|
||||
|
||||
public:
|
||||
@ -62,6 +64,21 @@ namespace Grid {
|
||||
}
|
||||
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution> void fillScalar(scalar &s,distribution &dist)
|
||||
{
|
||||
s=dist(_generators[0]);
|
||||
}
|
||||
template<class distribution> void fillScalar(ComplexF &s,distribution &dist)
|
||||
{
|
||||
s=ComplexF(dist(_generators[0]),dist(_generators[0]));
|
||||
}
|
||||
template<class distribution> void fillScalar(ComplexD &s,distribution &dist)
|
||||
{
|
||||
s=ComplexD(dist(_generators[0]),dist(_generators[0]));
|
||||
}
|
||||
|
||||
|
||||
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
|
||||
|
||||
typedef typename sobj::scalar_type scalar_type;
|
||||
@ -71,13 +88,60 @@ namespace Grid {
|
||||
scalar_type *buf = (scalar_type *) & l;
|
||||
|
||||
for(int idx=0;idx<words;idx++){
|
||||
buf[idx] = dist(_generators[0]);
|
||||
fillScalar(buf[idx],dist);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
|
||||
};
|
||||
|
||||
template <class distribution> inline void fill(ComplexF &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(ComplexD &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealF &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealD &l,distribution &dist){
|
||||
fillScalar(l,dist);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
// vector fill
|
||||
template <class distribution> inline void fill(vComplexF &l,distribution &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vComplexD &l,distribution &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealF &l,distribution &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealD &l,distribution &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
|
||||
|
||||
void SeedRandomDevice(void){
|
||||
std::random_device rd;
|
||||
Seed(rd);
|
||||
@ -186,7 +250,6 @@ namespace Grid {
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
|
||||
rng.fill(l,rng._uniform);
|
||||
}
|
||||
|
@ -11,21 +11,21 @@ namespace Grid {
|
||||
// multiplication by fundamental scalar type
|
||||
template<class l,int N> inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||
{
|
||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||
{
|
||||
typename iVector<l,N>::tensor_reduced srhs(rhs);
|
||||
typename iVector<l,N>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs)
|
||||
{
|
||||
typename iMatrix<l,N>::tensor_reduced srhs(rhs);
|
||||
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||
@ -35,24 +35,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t; t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l> inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||
@ -62,24 +62,26 @@ template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
|
||||
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||
@ -89,24 +91,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatr
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t; t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l> inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs*srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
|
||||
@ -118,14 +120,14 @@ template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatri
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class l,int N> inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||
{
|
||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs+srhs;
|
||||
}
|
||||
template<class l,int N> inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||
{
|
||||
typename iMatrix<l,N>::tensor_reduced srhs(rhs);
|
||||
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs+srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
||||
@ -135,16 +137,16 @@ template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t; t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||
return lhs+srhs;
|
||||
}
|
||||
template<class l> inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs+srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
||||
@ -155,8 +157,8 @@ template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix
|
||||
|
||||
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t; t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||
return lhs+srhs;
|
||||
}
|
||||
|
||||
@ -164,8 +166,8 @@ template<class l> inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rh
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs+srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
|
||||
@ -176,23 +178,23 @@ template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatri
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class l,int N> inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||
{
|
||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs-srhs;
|
||||
}
|
||||
template<class l,int N> inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
|
||||
{
|
||||
typename iScalar<l>::tensor_reduced slhs(lhs);
|
||||
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
|
||||
return slhs-rhs;
|
||||
}
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
|
||||
{
|
||||
typename iScalar<l>::tensor_reduced srhs(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
|
||||
return lhs-srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
|
||||
{
|
||||
typename iScalar<l>::tensor_reduced slhs(lhs);
|
||||
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
|
||||
return slhs-rhs;
|
||||
}
|
||||
|
||||
@ -201,27 +203,27 @@ template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t; t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||
return lhs-srhs;
|
||||
}
|
||||
template<class l> inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(lhs);
|
||||
typename iScalar<l>::tensor_reduced slhs(t);
|
||||
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||
return slhs-rhs;
|
||||
}
|
||||
|
||||
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs-srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(lhs);
|
||||
typename iScalar<l>::tensor_reduced slhs(t);
|
||||
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||
return slhs-rhs;
|
||||
}
|
||||
|
||||
@ -230,26 +232,26 @@ template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t; t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs; srhs=t;
|
||||
return lhs-srhs;
|
||||
}
|
||||
template<class l> inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(lhs);
|
||||
typename iScalar<l>::tensor_reduced slhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=lhs;
|
||||
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||
return slhs-rhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(rhs);
|
||||
typename iScalar<l>::tensor_reduced srhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=rhs;
|
||||
typename iScalar<l>::tensor_reduced srhs;srhs=t;
|
||||
return lhs-srhs;
|
||||
}
|
||||
template<class l,int N> inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
|
||||
{
|
||||
typename iScalar<l>::scalar_type t(lhs);
|
||||
typename iScalar<l>::tensor_reduced slhs(t);
|
||||
typename iScalar<l>::scalar_type t;t=lhs;
|
||||
typename iScalar<l>::tensor_reduced slhs;slhs=t;
|
||||
return slhs-rhs;
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,8 @@ namespace Grid {
|
||||
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
|
||||
{
|
||||
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||
iScalar<ret_t> ret=zero;
|
||||
iScalar<ret_t> ret;
|
||||
ret=zero;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
|
||||
}
|
||||
@ -27,8 +28,9 @@ namespace Grid {
|
||||
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
|
||||
{
|
||||
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
|
||||
iScalar<ret_t> ret=zero;
|
||||
iScalar<ret_t> ret;
|
||||
iScalar<ret_t> tmp;
|
||||
ret=zero;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
|
||||
|
@ -8,6 +8,16 @@ namespace Grid {
|
||||
// These can be composed to form tensor products of internal indices.
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
// It is useful to NOT have any constructors
|
||||
// so that these classes assert "is_pod<class> == true"
|
||||
// because then the standard C++ valarray container eliminates fill overhead on new allocation and
|
||||
// non-move copying.
|
||||
//
|
||||
// However note that doing this eliminates some syntactical sugar such as
|
||||
// calling the constructor explicitly or implicitly
|
||||
//
|
||||
#define TENSOR_IS_POD
|
||||
|
||||
template<class vtype> class iScalar
|
||||
{
|
||||
public:
|
||||
@ -25,16 +35,22 @@ public:
|
||||
// Scalar no action
|
||||
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
|
||||
|
||||
iScalar(){};
|
||||
|
||||
#ifndef TENSOR_IS_POD
|
||||
iScalar(){;};
|
||||
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
|
||||
|
||||
iScalar(const Zero &z){ *this = zero; };
|
||||
#endif
|
||||
|
||||
iScalar<vtype> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
}
|
||||
iScalar<vtype> & operator= (const scalar_type s){
|
||||
_internal=s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
friend void zeroit(iScalar<vtype> &that){
|
||||
zeroit(that._internal);
|
||||
}
|
||||
@ -114,8 +130,10 @@ public:
|
||||
|
||||
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
||||
|
||||
#ifndef TENSOR_IS_POD
|
||||
iVector(const Zero &z){ *this = zero; };
|
||||
iVector() {};// Empty constructure
|
||||
#endif
|
||||
|
||||
iVector<vtype,N> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
@ -185,8 +203,11 @@ public:
|
||||
|
||||
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
||||
|
||||
#ifndef TENSOR_IS_POD
|
||||
iMatrix(const Zero &z){ *this = zero; };
|
||||
iMatrix() {};
|
||||
#endif
|
||||
|
||||
iMatrix<vtype,N> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
|
@ -8,6 +8,7 @@ namespace QCD {
|
||||
static const int Ns=4;
|
||||
static const int Nd=4;
|
||||
static const int Nhs=2; // half spinor
|
||||
static const int Nds=8; // double stored gauge field
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
@ -28,79 +29,216 @@ namespace QCD {
|
||||
//
|
||||
// That probably makes for GridRedBlack4dCartesian grid.
|
||||
|
||||
// s,sp,c,spc,lc
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
||||
|
||||
|
||||
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
|
||||
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
|
||||
// Spin matrix
|
||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||
typedef iSpinMatrix<ComplexD > SpinMatrixD;
|
||||
|
||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
|
||||
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
|
||||
|
||||
// Colour Matrix
|
||||
typedef iColourMatrix<Complex > ColourMatrix;
|
||||
typedef iColourMatrix<ComplexF > ColourMatrixF;
|
||||
typedef iColourMatrix<ComplexD > ColourMatrixD;
|
||||
|
||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||
typedef iColourMatrix<vComplexF> vColourMatrixF;
|
||||
typedef iColourMatrix<vComplexD> vColourMatrixD;
|
||||
|
||||
// SpinColour matrix
|
||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||
|
||||
// LorentzColour
|
||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
||||
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
|
||||
|
||||
typedef iSpinVector<Complex > SpinVector;
|
||||
typedef iColourVector<Complex > ColourVector;
|
||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||
typedef iHalfSpinVector<Complex > HalfSpinVector;
|
||||
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
|
||||
|
||||
|
||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
|
||||
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
|
||||
|
||||
// DoubleStored gauge field
|
||||
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
|
||||
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
|
||||
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
|
||||
|
||||
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
|
||||
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
|
||||
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
|
||||
|
||||
// Spin vector
|
||||
typedef iSpinVector<Complex > SpinVector;
|
||||
typedef iSpinVector<ComplexF> SpinVectorF;
|
||||
typedef iSpinVector<ComplexD> SpinVectorD;
|
||||
|
||||
typedef iSpinVector<vComplex > vSpinVector;
|
||||
typedef iColourVector<vComplex > vColourVector;
|
||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
|
||||
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
|
||||
typedef iSpinVector<vComplexF> vSpinVectorF;
|
||||
typedef iSpinVector<vComplexD> vSpinVectorD;
|
||||
|
||||
// Colour vector
|
||||
typedef iColourVector<Complex > ColourVector;
|
||||
typedef iColourVector<ComplexF> ColourVectorF;
|
||||
typedef iColourVector<ComplexD> ColourVectorD;
|
||||
|
||||
typedef iColourVector<vComplex > vColourVector;
|
||||
typedef iColourVector<vComplexF> vColourVectorF;
|
||||
typedef iColourVector<vComplexD> vColourVectorD;
|
||||
|
||||
// SpinColourVector
|
||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
|
||||
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
|
||||
|
||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
|
||||
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
|
||||
|
||||
// HalfSpin vector
|
||||
typedef iHalfSpinVector<Complex > HalfSpinVector;
|
||||
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
|
||||
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
|
||||
|
||||
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
|
||||
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
|
||||
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
|
||||
|
||||
// HalfSpinColour vector
|
||||
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
|
||||
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
|
||||
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
|
||||
|
||||
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
|
||||
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
|
||||
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||
|
||||
// singlets
|
||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||
|
||||
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||
|
||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||
|
||||
typedef iSinglet<vReal > vTReal;
|
||||
typedef iSinglet<vRealF> vTRealF;
|
||||
typedef iSinglet<vRealD> vTRealD;
|
||||
|
||||
typedef iSinglet<vInteger> vTInteger;
|
||||
typedef iSinglet<Integer > TInteger;
|
||||
|
||||
typedef Lattice<vTReal> LatticeReal;
|
||||
typedef Lattice<vTComplex> LatticeComplex;
|
||||
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
|
||||
|
||||
// Lattices of these
|
||||
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
||||
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
|
||||
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
|
||||
|
||||
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
||||
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
|
||||
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
|
||||
|
||||
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
|
||||
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||
|
||||
|
||||
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
||||
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
|
||||
|
||||
// DoubleStored gauge field
|
||||
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
|
||||
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
|
||||
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
|
||||
|
||||
typedef Lattice<vSpinVector> LatticeSpinVector;
|
||||
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
|
||||
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
|
||||
|
||||
typedef Lattice<vColourVector> LatticeColourVector;
|
||||
typedef Lattice<vColourVectorF> LatticeColourVectorF;
|
||||
typedef Lattice<vColourVectorD> LatticeColourVectorD;
|
||||
|
||||
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
||||
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
|
||||
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
|
||||
|
||||
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
|
||||
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
|
||||
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
|
||||
|
||||
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
|
||||
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
|
||||
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
|
||||
|
||||
typedef Lattice<vTReal> LatticeReal;
|
||||
typedef Lattice<vTRealF> LatticeRealF;
|
||||
typedef Lattice<vTRealD> LatticeRealD;
|
||||
|
||||
typedef Lattice<vTComplex> LatticeComplex;
|
||||
typedef Lattice<vTComplexF> LatticeComplexF;
|
||||
typedef Lattice<vTComplexD> LatticeComplexD;
|
||||
|
||||
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
|
||||
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Physical names for things
|
||||
///////////////////////////////////////////
|
||||
typedef Lattice<vHalfSpinColourVector> LatticeHalfFermion;
|
||||
typedef Lattice<vSpinColourVector> LatticeFermion;
|
||||
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
|
||||
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
|
||||
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
|
||||
|
||||
typedef Lattice<vSpinColourMatrix> LatticePropagator;
|
||||
typedef Lattice<vLorentzColourMatrix> LatticeGaugeField;
|
||||
typedef LatticeSpinColourVector LatticeFermion;
|
||||
typedef LatticeSpinColourVectorF LatticeFermionF;
|
||||
typedef LatticeSpinColourVectorD LatticeFermionD;
|
||||
|
||||
typedef LatticeSpinColourMatrix LatticePropagator;
|
||||
typedef LatticeSpinColourMatrixF LatticePropagatorF;
|
||||
typedef LatticeSpinColourMatrixD LatticePropagatorD;
|
||||
|
||||
typedef LatticeLorentzColourMatrix LatticeGaugeField;
|
||||
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
|
||||
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
|
||||
|
||||
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
|
||||
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||
|
||||
// Uhgg... typing this hurt ;)
|
||||
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
|
||||
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
|
||||
|
||||
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
|
||||
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
|
||||
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Peek and Poke named after physics attributes
|
||||
@ -157,11 +295,14 @@ namespace QCD {
|
||||
return peekIndex<LorentzIndex>(rhs,i,j);
|
||||
}
|
||||
|
||||
// FIXME transpose Colour, transpose Spin, traceColour traceSpin
|
||||
|
||||
} //namespace QCD
|
||||
} // Grid
|
||||
|
||||
#include <qcd/Grid_qcd_dirac.h>
|
||||
#include <qcd/Grid_qcd_2spinor.h>
|
||||
//#include <qcd/Grid_qcd_pauli.h>
|
||||
#include <qcd/Grid_qcd_wilson_dop.h>
|
||||
|
||||
#endif
|
||||
|
@ -17,8 +17,14 @@ namespace QCD {
|
||||
GammaZ,
|
||||
GammaT,
|
||||
Gamma5,
|
||||
// GammaXGamma5,
|
||||
// GammaYGamma5,
|
||||
MinusIdentity,
|
||||
MinusGammaX,
|
||||
MinusGammaY,
|
||||
MinusGammaZ,
|
||||
MinusGammaT,
|
||||
MinusGamma5
|
||||
// GammaXGamma5, // Rest are composite (willing to take hit for two calls sequentially)
|
||||
// GammaYGamma5, // as they are less commonly used.
|
||||
// GammaZGamma5,
|
||||
// GammaTGamma5,
|
||||
// SigmaXY,
|
||||
@ -27,12 +33,6 @@ namespace QCD {
|
||||
// SigmaXT,
|
||||
// SigmaYT,
|
||||
// SigmaZT,
|
||||
MinusIdentity,
|
||||
MinusGammaX,
|
||||
MinusGammaY,
|
||||
MinusGammaZ,
|
||||
MinusGammaT,
|
||||
MinusGamma5
|
||||
// MinusGammaXGamma5, easiest to form by composition
|
||||
// MinusGammaYGamma5, as performance is not critical for these
|
||||
// MinusGammaZGamma5,
|
||||
@ -54,7 +54,6 @@ namespace QCD {
|
||||
|
||||
};
|
||||
|
||||
|
||||
/* Gx
|
||||
* 0 0 0 i
|
||||
* 0 0 i 0
|
||||
|
@ -1,157 +1,220 @@
|
||||
#ifnfdef GRID_QCD_WILSON_DOP_H
|
||||
#define GRID_QCD_WILSON_DOP_H
|
||||
|
||||
#include <Grid.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
|
||||
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
|
||||
|
||||
// Should be in header?
|
||||
static const int WilsonMatrix::Xp = 0;
|
||||
static const int WilsonMatrix::Yp = 1;
|
||||
static const int WilsonMatrix::Zp = 2;
|
||||
static const int WilsonMatrix::Tp = 3;
|
||||
static const int WilsonMatrix::Xm = 4;
|
||||
static const int WilsonMatrix::Ym = 5;
|
||||
static const int WilsonMatrix::Zm = 6;
|
||||
static const int WilsonMatrix::Tm = 7;
|
||||
static const int WilsonMatrix::X0 = 8;
|
||||
static const int WilsonMatrix::npoint=9;
|
||||
const int WilsonMatrix::Xp = 0;
|
||||
const int WilsonMatrix::Yp = 1;
|
||||
const int WilsonMatrix::Zp = 2;
|
||||
const int WilsonMatrix::Tp = 3;
|
||||
const int WilsonMatrix::Xm = 4;
|
||||
const int WilsonMatrix::Ym = 5;
|
||||
const int WilsonMatrix::Zm = 6;
|
||||
const int WilsonMatrix::Tm = 7;
|
||||
//const int WilsonMatrix::X0 = 8;
|
||||
|
||||
class WilsonCompressor {
|
||||
public:
|
||||
int mu;
|
||||
|
||||
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,int _mass)
|
||||
: Stencil((&Umu._grid,npoint,0,directions,displacements),
|
||||
void Point(int p) { mu=p;};
|
||||
vHalfSpinColourVector operator () (vSpinColourVector &in)
|
||||
{
|
||||
vHalfSpinColourVector ret;
|
||||
switch(mu) {
|
||||
case WilsonMatrix::Xp:
|
||||
spProjXp(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Yp:
|
||||
spProjYp(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Zp:
|
||||
spProjZp(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Tp:
|
||||
spProjTp(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Xm:
|
||||
spProjXm(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Ym:
|
||||
spProjYm(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Zm:
|
||||
spProjZm(ret,in);
|
||||
break;
|
||||
case WilsonMatrix::Tm:
|
||||
spProjTm(ret,in);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,double _mass)
|
||||
: Stencil(Umu._grid,npoint,0,directions,displacements),
|
||||
mass(_mass),
|
||||
Umu(_Umu)
|
||||
Umu(_Umu._grid)
|
||||
{
|
||||
// Allocate the required comms buffer
|
||||
grid = _Umu._grid;
|
||||
comm_buf.resize(Stencil._unified_buffer_size);
|
||||
DoubleStore(Umu,_Umu);
|
||||
}
|
||||
|
||||
void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
|
||||
{
|
||||
LatticeColourMatrix U(grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U = peekIndex<LorentzIndex>(Umu,mu);
|
||||
pokeIndex<LorentzIndex>(Uds,U,mu);
|
||||
U = adj(Cshift(U,mu,-1));
|
||||
pokeIndex<LorentzIndex>(Uds,U,mu+4);
|
||||
}
|
||||
}
|
||||
|
||||
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
|
||||
Dhop(in,out);
|
||||
return;
|
||||
}
|
||||
|
||||
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Stencil.HaloExchange(in,comm_buf);
|
||||
// Stencil.HaloExchange(in,comm_buf);
|
||||
|
||||
for(int ss=0;ss<_grid->oSites();ss++){
|
||||
for(int ss=0;ss<grid->oSites();ss++){
|
||||
|
||||
int offset,local;
|
||||
|
||||
vSpinColourVector result;
|
||||
vHalfSpinColourVector UChi;
|
||||
|
||||
vHalfSpinColourVector chi;
|
||||
vHalfSpinColourVector Uchi;
|
||||
vHalfSpinColourVector *chi_p;
|
||||
// Xp
|
||||
offset = Stencil._offsets [Xp][ss];
|
||||
local = Stencil._is_local[Xp][ss];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjXp(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
}
|
||||
result = ReconXp(Uchi);
|
||||
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
spProjXp(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Xp)),&(*chi_p)());
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
#if 0
|
||||
// Yp
|
||||
offset = Stencil._offsets [Yp][ss];
|
||||
local = Stencil._is_local[Yp][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjYp(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjYp(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconYp(Uchi);
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Yp)),&(*chi_p)());
|
||||
accumReconYp(result,Uchi);
|
||||
|
||||
// Zp
|
||||
offset = Stencil._offsets [Zp][ss];
|
||||
local = Stencil._is_local[Zp][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjZp(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjZp(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconZp(Uchi);
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Zp)),&(*chi_p)() );
|
||||
accumReconZp(result,Uchi);
|
||||
|
||||
// Tp
|
||||
offset = Stencil._offsets [Tp][ss];
|
||||
local = Stencil._is_local[Tp][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjTp(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjTp(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconTp(Uchi);
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Tp)),&(*chi_p)());
|
||||
accumReconTp(result,Uchi);
|
||||
|
||||
// Xm
|
||||
offset = Stencil._offsets [Xm][ss];
|
||||
local = Stencil._is_local[Xm][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjXm(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjXm(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconXm(Uchi);
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
// Ym
|
||||
offset = Stencil._offsets [Ym][ss];
|
||||
local = Stencil._is_local[Ym][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjYm(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjYm(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconYm(Uchi);
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Ym)),&(*chi_p)());
|
||||
accumReconYm(result,Uchi);
|
||||
|
||||
// Zm
|
||||
offset = Stencil._offsets [Zm][ss];
|
||||
local = Stencil._is_local[Zm][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjZm(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjZm(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconZm(Uchi);
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Zm)),&(*chi_p)());
|
||||
accumReconZm(result,Uchi);
|
||||
|
||||
// Tm
|
||||
offset = Stencil._offsets [Tm][ss];
|
||||
local = Stencil._is_local[Tm][ss];
|
||||
chi_p = &comm_buf[offset];
|
||||
if ( local ) {
|
||||
Uchi = U[]*spProjTm(in._odata[offset]);
|
||||
} else {
|
||||
Uchi = U[]*comm_buf._odata[offset]
|
||||
spProjTm(chi,in._odata[offset]);
|
||||
chi_p = χ
|
||||
}
|
||||
result+= ReconTm(Uchi);
|
||||
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Tm)),&(*chi_p)());
|
||||
accumReconTm(result,Uchi);
|
||||
#endif
|
||||
out._odata[ss] = result;
|
||||
}
|
||||
|
||||
}
|
||||
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
|
||||
return;
|
||||
}
|
||||
void WilsonMatrix::MpcDag (const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
|
||||
return;
|
||||
}
|
||||
void WilsonMatrix::Mpc (const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
|
||||
return;
|
||||
}
|
||||
void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
|
||||
return;
|
||||
}
|
||||
void WilsonMatrix::MDagM (const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}}
|
||||
#endif
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#ifnfdef GRID_QCD_WILSON_DOP_H
|
||||
#ifndef GRID_QCD_WILSON_DOP_H
|
||||
#define GRID_QCD_WILSON_DOP_H
|
||||
|
||||
#include <Grid.h>
|
||||
@ -21,21 +21,23 @@ namespace Grid {
|
||||
GridBase *grid;
|
||||
|
||||
// Copy of the gauge field
|
||||
LatticeGaugeField Umu;
|
||||
LatticeDoubledGaugeField Umu;
|
||||
|
||||
//Defines the stencil
|
||||
CartesianStencil Stencil;
|
||||
static const int npoint=9;
|
||||
static const std::vector<int> directions ;
|
||||
static const std::vector<int> displacements;
|
||||
|
||||
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
|
||||
|
||||
// Comms buffer
|
||||
std::vector<vSpinColourVector,alignedAllocator<vSpinColourVector> > comm_buf;
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > comm_buf;
|
||||
|
||||
// Constructor
|
||||
WilsonMatrix(LatticeGaugeField &Umu,int mass);
|
||||
WilsonMatrix(LatticeGaugeField &Umu,double mass);
|
||||
|
||||
// DoubleStore
|
||||
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
|
||||
|
||||
// override multiply
|
||||
void multiply(const LatticeFermion &in, LatticeFermion &out);
|
||||
|
@ -252,12 +252,12 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
vComplexD ret ; vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
|
||||
// __m256d tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
|
||||
// ret.v=_mm256_shuffle_pd(tmp,tmp,0x5);
|
||||
ret.v = _mm256_addsub_pd(ret.v,in.v);
|
||||
zvec tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
|
||||
ret.v =_mm256_shuffle_pd(tmp,tmp,0x5);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_addsub_pd(ret.v,in.v);
|
||||
zvec tmp = _mm_addsub_pd(ret.v,_mm_shuffle_pd(in.v,in.v,0x1));
|
||||
ret.v = _mm_shuffle_pd(tmp,tmp,0x1);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
|
||||
@ -268,48 +268,41 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline vComplexD timesI(const vComplexD &in){
|
||||
friend inline vComplexD timesMinusI(const vComplexD &in){
|
||||
vComplexD ret; vzero(ret);
|
||||
vComplexD tmp;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
||||
/*
|
||||
IF IMM0[0] = 0
|
||||
THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI;
|
||||
IF IMM0[1] = 0
|
||||
THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI;
|
||||
IF IMM0[2] = 0
|
||||
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
|
||||
IF IMM0[3] = 0
|
||||
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI;
|
||||
*/
|
||||
ret.v =_mm256_shuffle_ps(tmp,tmp,0x5);
|
||||
tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i
|
||||
ret.v =_mm256_shuffle_pd(tmp.v,tmp.v,0x5);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
|
||||
ret.v =_mm_shuffle_ps(tmp,tmp,0x5);
|
||||
tmp.v =_mm_addsub_pd(ret.v,in.v); // r,-i
|
||||
ret.v =_mm_shuffle_pd(tmp.v,tmp.v,0x1);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
|
||||
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
ret.v = _mm512_mask_sub_pd(in.v,0xaaaa,ret.v,in.v); // real -imag
|
||||
ret.v = _mm512_swizzle_pd(ret.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexD timesMinusI(const vComplexD &in){
|
||||
|
||||
friend inline vComplexD timesI(const vComplexD &in){
|
||||
vComplexD ret; vzero(ret);
|
||||
vComplexD tmp;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
|
||||
ret.v =_mm256_addsub_ps(ret.v,tmp); // i,-r
|
||||
tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5);
|
||||
ret.v =_mm256_addsub_pd(ret.v,tmp.v); // i,-r
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
|
||||
ret.v =_mm_addsub_ps(ret.v,tmp); // r,-i
|
||||
tmp.v =_mm_shuffle_pd(in.v,in.v,0x1);
|
||||
ret.v =_mm_addsub_pd(ret.v,tmp.v); // r,-i
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag
|
||||
tmp.v = _mm512_swizzle_pd(in.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
ret.v = _mm512_mask_sub_pd(tmp.v,0xaaaa,ret.v,tmp.v); // real -imag
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
|
@ -214,10 +214,10 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
||||
{
|
||||
#ifdef SSE4
|
||||
union {
|
||||
__m128 v1; // SSE 4 x float vector
|
||||
cvec v1; // SSE 4 x float vector
|
||||
float f[4]; // scalar array of 4 floats
|
||||
} u128;
|
||||
u128.v1= _mm_add_ps(v, _mm_shuffle_ps(v, v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||
u128.v1= _mm_add_ps(in.v, _mm_shuffle_ps(in.v,in.v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
|
||||
return ComplexF(u128.f[0], u128.f[1]);
|
||||
#endif
|
||||
#ifdef AVX1
|
||||
@ -329,13 +329,15 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
||||
friend inline vComplexF conj(const vComplexF &in){
|
||||
vComplexF ret ; vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
// cvec tmp;
|
||||
// tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
|
||||
// ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
ret.v = _mm256_addsub_ps(ret.v,in.v);
|
||||
cvec tmp;
|
||||
tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
|
||||
ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_addsub_ps(ret.v,in.v);
|
||||
cvec tmp;
|
||||
tmp = _mm_addsub_ps(ret.v,_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
|
||||
ret.v=_mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag
|
||||
@ -345,15 +347,16 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexF timesI(const vComplexF &in){
|
||||
vComplexF ret; vzero(ret);
|
||||
friend inline vComplexF timesMinusI(const vComplexF &in){
|
||||
vComplexF ret;
|
||||
vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
||||
ret.v = _mm256_shuffle_ps(tmp,tmp,0x5);
|
||||
ret.v = _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
|
||||
ret.v = _mm_shuffle_ps(tmp,tmp,0x5);
|
||||
ret.v = _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
|
||||
@ -364,14 +367,14 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexF timesMinusI(const vComplexF &in){
|
||||
friend inline vComplexF timesI(const vComplexF &in){
|
||||
vComplexF ret; vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
|
||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
|
||||
ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
|
||||
cvec tmp =_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));
|
||||
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
@ -443,5 +446,8 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
||||
inline vComplexF trace(const vComplexF &arg){
|
||||
return arg;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -146,7 +146,7 @@ namespace Grid {
|
||||
ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_pd(a[0],a[1]);
|
||||
ret.v = _mm_set_pd(a[1],a[0]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
@ -186,6 +186,15 @@ namespace Grid {
|
||||
|
||||
friend inline RealD Reduce(const vRealD & in)
|
||||
{
|
||||
#if defined (SSE4)
|
||||
// FIXME Hack
|
||||
const RealD * ptr =(const RealD *) ∈
|
||||
RealD ret = 0;
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
ret = ret+ptr[i];
|
||||
}
|
||||
return ret;
|
||||
#endif
|
||||
#if defined (AVX1) || defined(AVX2)
|
||||
typedef union {
|
||||
uint64_t l;
|
||||
|
@ -175,7 +175,7 @@ namespace Grid {
|
||||
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_ps(a[0],a[1],a[2],a[3]);
|
||||
ret.v = _mm_set_ps(a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||
@ -220,6 +220,15 @@ friend inline void vstore(const vRealF &ret, float *a){
|
||||
}
|
||||
friend inline RealF Reduce(const vRealF & in)
|
||||
{
|
||||
#if defined (SSE4)
|
||||
// FIXME Hack
|
||||
const RealF * ptr = (const RealF *) ∈
|
||||
RealF ret = 0;
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
ret = ret+ptr[i];
|
||||
}
|
||||
return ret;
|
||||
#endif
|
||||
#if defined (AVX1) || defined(AVX2)
|
||||
__attribute__ ((aligned(32))) float c_[16];
|
||||
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);
|
||||
|
@ -121,8 +121,9 @@ namespace Grid {
|
||||
int simd_layout = _grid->_simd_layout[dimension];
|
||||
int comm_dim = _grid->_processors[dimension] >1 ;
|
||||
|
||||
assert(simd_layout==1);
|
||||
// assert(simd_layout==1); // Why?
|
||||
assert(comm_dim==1);
|
||||
shift = (shift + fd) %fd;
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
|
@ -5,6 +5,11 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
@ -22,16 +27,27 @@ int main (int argc, char ** argv)
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedRandomDevice();
|
||||
|
||||
SpinMatrix ident=zero;
|
||||
SpinMatrix ident; ident=zero;
|
||||
SpinMatrix rnd ; random(sRNG,rnd);
|
||||
|
||||
SpinMatrix ll=zero;
|
||||
SpinMatrix rr=zero;
|
||||
SpinMatrix ll; ll=zero;
|
||||
SpinMatrix rr; rr=zero;
|
||||
SpinMatrix result;
|
||||
|
||||
SpinVector lv; random(sRNG,lv);
|
||||
SpinVector rv; random(sRNG,rv);
|
||||
|
||||
std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl;
|
||||
std::cout << " Is pod double " << std::is_pod<double>::value << std::endl;
|
||||
std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl;
|
||||
std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl;
|
||||
|
||||
for(int a=0;a<Ns;a++){
|
||||
ident()(a,a) = 1.0;
|
||||
}
|
||||
|
@ -50,7 +50,6 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double vol = Fine.gSites();
|
||||
Complex PlaqScale(1.0/vol/6.0/3.0);
|
||||
|
||||
@ -58,7 +57,8 @@ int main (int argc, char ** argv)
|
||||
sliceSum(Plaq,Plaq_T,Nd-1);
|
||||
int Nt = Plaq_T.size();
|
||||
|
||||
TComplex Plaq_T_sum=zero;
|
||||
TComplex Plaq_T_sum;
|
||||
Plaq_T_sum=zero;
|
||||
for(int t=0;t<Nt;t++){
|
||||
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
|
||||
Complex Pt=TensorRemove(Plaq_T[t]);
|
||||
|
166
tests/Grid_simd.cc
Normal file
166
tests/Grid_simd.cc
Normal file
@ -0,0 +1,166 @@
|
||||
#include <Grid.h>
|
||||
#include <parallelIO/GridNerscIO.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
class funcPlus {
|
||||
public:
|
||||
funcPlus() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
|
||||
std::string name(void) const { return std::string("Plus"); }
|
||||
};
|
||||
class funcMinus {
|
||||
public:
|
||||
funcMinus() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
|
||||
std::string name(void) const { return std::string("Minus"); }
|
||||
};
|
||||
class funcTimes {
|
||||
public:
|
||||
funcTimes() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
|
||||
std::string name(void) const { return std::string("Times"); }
|
||||
};
|
||||
class funcConj {
|
||||
public:
|
||||
funcConj() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);}
|
||||
std::string name(void) const { return std::string("Conj"); }
|
||||
};
|
||||
class funcAdj {
|
||||
public:
|
||||
funcAdj() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
|
||||
std::string name(void) const { return std::string("Adj"); }
|
||||
};
|
||||
|
||||
class funcTimesI {
|
||||
public:
|
||||
funcTimesI() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
|
||||
std::string name(void) const { return std::string("timesI"); }
|
||||
};
|
||||
|
||||
class funcTimesMinusI {
|
||||
public:
|
||||
funcTimesMinusI() {};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
|
||||
std::string name(void) const { return std::string("timesMinusI"); }
|
||||
};
|
||||
|
||||
template<class scal, class vec,class functor >
|
||||
void Tester(const functor &func)
|
||||
{
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedRandomDevice();
|
||||
|
||||
int Nsimd = vec::Nsimd();
|
||||
|
||||
std::vector<scal> input1(Nsimd);
|
||||
std::vector<scal> input2(Nsimd);
|
||||
std::vector<scal> result(Nsimd);
|
||||
std::vector<scal> reference(Nsimd);
|
||||
|
||||
std::vector<vec,alignedAllocator<vec> > buf(3);
|
||||
vec & v_input1 = buf[0];
|
||||
vec & v_input2 = buf[1];
|
||||
vec & v_result = buf[2];
|
||||
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
random(sRNG,input1[i]);
|
||||
random(sRNG,input2[i]);
|
||||
random(sRNG,result[i]);
|
||||
}
|
||||
|
||||
Gmerge(v_input1,input1);
|
||||
Gmerge(v_input2,input2);
|
||||
Gmerge(v_result,result);
|
||||
|
||||
func(v_result,v_input1,v_input2);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) {
|
||||
func(reference[i],input1[i],input2[i]);
|
||||
}
|
||||
|
||||
Gextract(v_result,result);
|
||||
std::cout << " " << func.name()<<std::endl;
|
||||
|
||||
int ok=0;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( abs(reference[i]-result[i])>0){
|
||||
std::cout<< "*****" << std::endl;
|
||||
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
|
||||
ok++;
|
||||
}
|
||||
}
|
||||
if ( ok==0 ) std::cout << " OK!" <<std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({1,1,1,1});
|
||||
std::vector<int> latt_size ({8,8,8,8});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
||||
// Insist that operations on random scalars gives
|
||||
// identical results to on vectors.
|
||||
|
||||
Tester<RealD,vRealD>(funcPlus());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing vComplexF "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
Tester<ComplexF,vComplexF>(funcTimesI());
|
||||
Tester<ComplexF,vComplexF>(funcTimesMinusI());
|
||||
Tester<ComplexF,vComplexF>(funcPlus());
|
||||
Tester<ComplexF,vComplexF>(funcMinus());
|
||||
Tester<ComplexF,vComplexF>(funcTimes());
|
||||
Tester<ComplexF,vComplexF>(funcConj());
|
||||
Tester<ComplexF,vComplexF>(funcAdj());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing vComplexD "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
|
||||
Tester<ComplexD,vComplexD>(funcTimesI());
|
||||
Tester<ComplexD,vComplexD>(funcTimesMinusI());
|
||||
Tester<ComplexD,vComplexD>(funcPlus());
|
||||
Tester<ComplexD,vComplexD>(funcMinus());
|
||||
Tester<ComplexD,vComplexD>(funcTimes());
|
||||
Tester<ComplexD,vComplexD>(funcConj());
|
||||
Tester<ComplexD,vComplexD>(funcAdj());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing vRealF "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
|
||||
Tester<RealF,vRealF>(funcPlus());
|
||||
Tester<RealF,vRealF>(funcMinus());
|
||||
Tester<RealF,vRealF>(funcTimes());
|
||||
Tester<RealF,vRealF>(funcAdj());
|
||||
|
||||
std::cout << "==================================="<< std::endl;
|
||||
std::cout << "Testing vRealD "<<std::endl;
|
||||
std::cout << "==================================="<< std::endl;
|
||||
|
||||
Tester<RealD,vRealD>(funcPlus());
|
||||
Tester<RealD,vRealD>(funcMinus());
|
||||
Tester<RealD,vRealD>(funcTimes());
|
||||
Tester<RealD,vRealD>(funcAdj());
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -4,48 +4,25 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class vobj>
|
||||
class SimpleCompressor {
|
||||
public:
|
||||
void Point(int) {};
|
||||
|
||||
vobj operator() (vobj &arg) {
|
||||
return arg;
|
||||
}
|
||||
};
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size (4);
|
||||
std::vector<int> simd_layout(4);
|
||||
std::vector<int> mpi_layout (4);
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({2,2,2,2});
|
||||
std::vector<int> latt_size ({8,8,8,8});
|
||||
|
||||
int omp=1;
|
||||
int lat=8;
|
||||
|
||||
mpi_layout[0]=1;
|
||||
mpi_layout[1]=1;
|
||||
mpi_layout[2]=1;
|
||||
mpi_layout[3]=1;
|
||||
|
||||
latt_size[0] = lat;
|
||||
latt_size[1] = lat;
|
||||
latt_size[2] = lat;
|
||||
latt_size[3] = lat;
|
||||
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
#ifdef AVX512
|
||||
simd_layout[0] = 1;
|
||||
simd_layout[1] = 2;
|
||||
simd_layout[2] = 2;
|
||||
simd_layout[3] = 2;
|
||||
#endif
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
simd_layout[0] = 1;
|
||||
simd_layout[1] = 1;
|
||||
simd_layout[2] = 2;
|
||||
simd_layout[3] = 2;
|
||||
#endif
|
||||
#if defined (SSE2)
|
||||
simd_layout[0] = 1;
|
||||
simd_layout[1] = 1;
|
||||
simd_layout[2] = 1;
|
||||
simd_layout[3] = 2;
|
||||
#endif
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG fRNG(&Fine);
|
||||
@ -86,7 +63,8 @@ int main (int argc, char ** argv)
|
||||
fflush(stdout);
|
||||
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
|
||||
printf("calling halo exchange\n");fflush(stdout);
|
||||
myStencil.HaloExchange(Foo,comm_buf);
|
||||
SimpleCompressor<vColourMatrix> compress;
|
||||
myStencil.HaloExchange(Foo,comm_buf,compress);
|
||||
|
||||
Bar = Cshift(Foo,dir,disp);
|
||||
|
||||
|
69
tests/Grid_wilson.cc
Normal file
69
tests/Grid_wilson.cc
Normal file
@ -0,0 +1,69 @@
|
||||
#include <Grid.h>
|
||||
#include <parallelIO/GridNerscIO.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({1,1,1,1});
|
||||
std::vector<int> latt_size ({8,8,8,8});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
// pRNG.SeedFixedIntegers(seeds);
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeFermion ref(&Grid); ref=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = 1.0;
|
||||
pokeIndex<3>(Umu,U[mu],mu);
|
||||
}
|
||||
|
||||
{
|
||||
int mu=0;
|
||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||
ref = src;
|
||||
ref = U[0]*Cshift(ref,0,1);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
WilsonMatrix Dw(Umu,mass);
|
||||
std::cout << "Calling Dw"<<std::endl;
|
||||
Dw.multiply(src,result);
|
||||
std::cout << "Called Dw"<<std::endl;
|
||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
||||
|
||||
for(int ss=0;ss<10;ss++ ){
|
||||
for(int i=0;i<Ns;i++){
|
||||
for(int j=0;j<Nc;j++){
|
||||
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
|
||||
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
|
||||
std::cout << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ref = ref -result;
|
||||
std::cout << "norm diff "<< norm2(ref)<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_srcdir)/lib
|
||||
#
|
||||
# Test code
|
||||
#
|
||||
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma
|
||||
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_wilson Grid_simd
|
||||
|
||||
Grid_main_SOURCES = Grid_main.cc
|
||||
Grid_main_LDADD = -lGrid
|
||||
@ -21,3 +21,9 @@ Grid_gamma_LDADD = -lGrid
|
||||
|
||||
Grid_stencil_SOURCES = Grid_stencil.cc
|
||||
Grid_stencil_LDADD = -lGrid
|
||||
|
||||
Grid_wilson_SOURCES = Grid_wilson.cc
|
||||
Grid_wilson_LDADD = -lGrid
|
||||
|
||||
Grid_simd_SOURCES = Grid_simd.cc
|
||||
Grid_simd_LDADD = -lGrid
|
||||
|
Loading…
Reference in New Issue
Block a user