1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Big updates with progress towards wilson matrix

This commit is contained in:
Peter Boyle 2015-04-26 15:51:09 +01:00
parent 51f0da7b93
commit 94f728bee4
27 changed files with 1008 additions and 355 deletions

18
TODO
View File

@ -2,6 +2,10 @@
- use protocol buffers? replace xmlReader/Writer ec..
- Binary use htonll, htonl
* Reduce implemention is poor
* Bug in SeedFixedIntegers gives same output on each site.
* Bug in RNG with complex numbers ; only filling real values; need helper function -- DONE
* Stencil operator support -----Initial thoughts, trial implementation DONE.
-----some simple tests that Stencil matches Cshift.
-----do all permute in comms phase, so that copy permute
@ -11,6 +15,7 @@
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
* Strong test for norm2, conj and all primitive types.
* Consider switch std::vector to boost arrays or something lighter weight
boost::multi_array<type, 3> A()... to replace multi1d, multi2d etc..
@ -33,10 +38,21 @@
* Make the Tensor types and Complex etc... play more nicely.
* TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this.
- Reductions that contract indices on a site should always demote the tensor structure.
norm2(), innerProduct.
- Result of Sum(), SliceSum // spatial sums
trace, traceIndex etc.. do not.
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
pure scalar.
* Optimise the extract/merge SIMD routines; Azusa??
- I have collated into single location at least.

View File

@ -11,6 +11,7 @@
#define GRID_H
#include <stdio.h>
#include <complex>
#include <vector>
#include <iostream>

View File

@ -2,7 +2,7 @@
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
/* AVX */
#define AVX1 1
/* #undef AVX1 */
/* AVX2 */
/* #undef AVX2 */
@ -77,7 +77,7 @@
#define PACKAGE_VERSION "1.0"
/* SSE4 */
/* #undef SSE4 */
#define SSE4 1
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1

View File

@ -26,10 +26,15 @@ namespace Grid {
typedef float RealF;
typedef double RealD;
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
typedef RealD Real;
#else
typedef RealF Real;
#endif
typedef std::complex<RealF> ComplexF;
typedef std::complex<RealD> ComplexD;
typedef std::complex<Real> Complex;
inline RealF adj(const RealF & r){ return r; }
inline RealF conj(const RealF & r){ return r; }
@ -63,8 +68,8 @@ namespace Grid {
//conj already supported for complex
inline ComplexF timesI(const ComplexF r) { return(r*ComplexF(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesI(const ComplexD r) { return(r*ComplexD(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesMinusI(const ComplexD r){ return(r*ComplexD(0.0,-1.0));}
inline void mac (RealD * __restrict__ y,const RealD * __restrict__ a,const RealD *__restrict__ x){ *y = (*a) * (*x)+(*y);}
@ -280,15 +285,11 @@ namespace Grid {
// Default precision
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
typedef RealD Real;
typedef vRealD vReal;
typedef vComplexD vComplex;
typedef std::complex<Real> Complex;
#else
typedef RealF Real;
typedef vRealF vReal;
typedef vComplexF vComplex;
typedef std::complex<Real> Complex;
#endif
}
#endif

View File

@ -47,6 +47,101 @@ namespace Grid {
int from_rank;
} ;
///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split with compression
///////////////////////////////////////////////////////////////////
template<class vobj,class cobj,class compressor> void
Gather_plane_simple (Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
{
int rd = rhs._grid->_rdimensions[dimension];
if ( !rhs._grid->CheckerBoarded(dimension) ) {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
// Simple block stride gather of SIMD objects
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
buffer[bo++]=compress(rhs._odata[so+o+b]);
}
o +=rhs._grid->_slice_stride[dimension];
}
} else {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) {
buffer[bo]=compress(rhs._odata[so+o+b]);
bo++;
}
}
o +=rhs._grid->_slice_stride[dimension];
}
}
}
///////////////////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split with compression
///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> void
Gather_plane_extract(Lattice<vobj> &rhs,std::vector<typename cobj::scalar_type *> pointers,int dimension,int plane,int cbmask,compressor &compress)
{
int rd = rhs._grid->_rdimensions[dimension];
if ( !rhs._grid->CheckerBoarded(dimension) ) {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
// Simple block stride gather of SIMD objects
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
cobj temp;
temp=compress(rhs._odata[so+o+b]);
extract(temp,pointers);
}
o +=rhs._grid->_slice_stride[dimension];
}
} else {
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int o = 0; // relative offset to base within plane
int bo = 0; // offset in buffer
#pragma omp parallel for collapse(2)
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb & cbmask ) {
cobj temp;
temp =compress(rhs._odata[so+o+b]);
extract(temp,pointers);
}
}
o +=rhs._grid->_slice_stride[dimension];
}
}
}
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
public:
@ -86,8 +181,8 @@ namespace Grid {
// Could allow a functional munging of the halo to another type during the comms.
// this could implement the 16bit/32bit/64bit compression.
template<class vobj> void HaloExchange(Lattice<vobj> &source,
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf)
template<class vobj,class cobj, class compressor> void
HaloExchange(Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
{
// conformable(source._grid,_grid);
assert(source._grid==_grid);
@ -95,12 +190,10 @@ namespace Grid {
int u_comm_offset=0;
// Gather all comms buffers
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
for(int point = 0 ; point < _npoints; point++) {
printf("Point %d \n",point);fflush(stdout);
compress.Point(point);
int dimension = _directions[point];
int displacement = _distances[point];
@ -126,33 +219,30 @@ namespace Grid {
sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
if ( sshift[0] == sshift[1] ) {
if (splice_dim) {
printf("splice 0x3 \n");fflush(stdout);
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
} else {
printf("NO splice 0x3 \n");fflush(stdout);
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset);
GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
}
} else {
if(splice_dim){
printf("splice 0x1,2 \n");fflush(stdout);
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);// if checkerboard is unfavourable take two passes
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);// both with block stride loop iteration
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
} else {
printf("NO splice 0x1,2 \n");fflush(stdout);
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset);
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset);
GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
}
}
}
}
}
template<class vobj> void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf,
int &u_comm_offset)
template<class vobj,class cobj, class compressor>
void GatherStartComms(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
int &u_comm_offset,compressor & compress)
{
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
GridBase *grid=_grid;
assert(rhs._grid==_grid);
@ -169,31 +259,26 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size); // hmm...
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
int cb= (cbmask==0x2)? 1 : 0;
int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
for(int x=0;x<rd;x++){
printf("GatherStartComms x %d/%d\n",x,rd);fflush(stdout);
int offnode = ( x+sshift >= rd );
int sx = (x+sshift)%rd;
int comm_proc = (x+sshift)/rd;
if (offnode) {
printf("GatherStartComms offnode x %d\n",x);fflush(stdout);
int words = send_buf.size();
if (cbmask != 0x3) words=words>>1;
int bytes = words * sizeof(vobj);
int bytes = words * sizeof(cobj);
printf("Gather_plane_simple dimension %d sx %d cbmask %d\n",dimension,sx,cbmask);fflush(stdout);
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
printf("GatherStartComms gathered offnode x %d\n",x);fflush(stdout);
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
int rank = _grid->_processor;
int recv_from_rank;
@ -219,14 +304,14 @@ namespace Grid {
}
template<class vobj>
template<class vobj,class cobj, class compressor>
void GatherStartCommsSimd(Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
std::vector<vobj,alignedAllocator<vobj> > &u_comm_buf,
int &u_comm_offset)
std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
int &u_comm_offset,compressor &compress)
{
const int Nsimd = _grid->Nsimd();
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type;
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
@ -245,7 +330,7 @@ namespace Grid {
// Simd direction uses an extract/merge pair
///////////////////////////////////////////////
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(vobj)/sizeof(vector_type);
int words = sizeof(cobj)/sizeof(vector_type);
/* FIXME ALTERNATE BUFFER DETERMINATION */
std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
@ -285,7 +370,7 @@ namespace Grid {
for(int i=0;i<Nsimd;i++){
pointers[Nsimd-1-i] = (scalar_type *)&send_buf_extract[i][0];
}
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
for(int i=0;i<Nsimd;i++){

View File

@ -18,6 +18,7 @@ libGrid_a_SOURCES =\
Grid_init.cc\
stencil/Grid_stencil_common.cc\
qcd/Grid_qcd_dirac.cc\
qcd/Grid_qcd_wilson_dop.cc\
$(extra_sources)
#

View File

@ -79,18 +79,18 @@ namespace Grid {
return ret;
}
template<class left,class right>
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
{
//NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
add(ret,lhs,rhs);
return ret;
}
template<class left,class right>
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
{
//NB mult performs conformable check. Do not reapply here for performance.
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
sub(ret,lhs,rhs);
return ret;
}
@ -107,9 +107,9 @@ namespace Grid {
return ret;
}
template<class left,class right>
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs+rhs._odata[ss];
@ -117,9 +117,9 @@ namespace Grid {
return ret;
}
template<class left,class right>
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
{
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs-rhs._odata[ss];
@ -137,9 +137,9 @@ namespace Grid {
return ret;
}
template<class left,class right>
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]+rhs;
@ -147,9 +147,9 @@ namespace Grid {
return ret;
}
template<class left,class right>
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
{
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
#pragma omp parallel for
for(int ss=0;ss<rhs._grid->oSites(); ss++){
ret._odata[ss]=lhs._odata[ss]-rhs;

View File

@ -14,7 +14,7 @@ namespace Grid {
typedef typename vobj::scalar_type scalar;
typedef typename vobj::vector_type vector;
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm;
scalar nrm;
//FIXME make this loop parallelisable
vnrm=zero;
@ -33,10 +33,11 @@ namespace Grid {
//->decltype(innerProduct(left._odata[0],right._odata[0]))
{
typedef typename vobj::scalar_type scalar;
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm;
scalar nrm;
//FIXME make this loop parallelisable
vnrm=zero;
for(int ss=0;ss<left._grid->oSites(); ss++){
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
}
@ -94,8 +95,10 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
sobj szero; szero=zero;
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,sobj(zero)); // sum across these down to scalars
std::vector<sobj> lsSum(ld,szero); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file

View File

@ -26,6 +26,8 @@ namespace Grid {
}
};
class GridRNGbase {
public:
@ -62,6 +64,21 @@ namespace Grid {
}
// real scalars are one component
template<class scalar,class distribution> void fillScalar(scalar &s,distribution &dist)
{
s=dist(_generators[0]);
}
template<class distribution> void fillScalar(ComplexF &s,distribution &dist)
{
s=ComplexF(dist(_generators[0]),dist(_generators[0]));
}
template<class distribution> void fillScalar(ComplexD &s,distribution &dist)
{
s=ComplexD(dist(_generators[0]),dist(_generators[0]));
}
template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
typedef typename sobj::scalar_type scalar_type;
@ -71,13 +88,60 @@ namespace Grid {
scalar_type *buf = (scalar_type *) & l;
for(int idx=0;idx<words;idx++){
buf[idx] = dist(_generators[0]);
fillScalar(buf[idx],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
};
template <class distribution> inline void fill(ComplexF &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(ComplexD &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealF &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(RealD &l,distribution &dist){
fillScalar(l,dist);
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
// vector fill
template <class distribution> inline void fill(vComplexF &l,distribution &dist){
RealF *pointer=(RealF *)&l;
for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vComplexD &l,distribution &dist){
RealD *pointer=(RealD *)&l;
for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealF &l,distribution &dist){
RealF *pointer=(RealF *)&l;
for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
template <class distribution> inline void fill(vRealD &l,distribution &dist){
RealD *pointer=(RealD *)&l;
for(int i=0;i<vRealD::Nsimd();i++){
fillScalar(pointer[i],dist);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
void SeedRandomDevice(void){
std::random_device rd;
Seed(rd);
@ -186,7 +250,6 @@ namespace Grid {
};
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._uniform);
}

View File

@ -11,21 +11,21 @@ namespace Grid {
// multiplication by fundamental scalar type
template<class l,int N> inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs(rhs);
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs;
}
template<class l,int N> inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iVector<l,N>::tensor_reduced srhs(rhs);
typename iVector<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs;
}
template<class l,int N> inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs)
{
typename iMatrix<l,N>::tensor_reduced srhs(rhs);
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs;
}
template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -35,24 +35,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (const typename iScalar<l
////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l> inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -62,24 +62,26 @@ template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix
////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -89,24 +91,24 @@ template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatr
////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs*srhs;
}
template<class l> inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs;
}
template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
@ -118,14 +120,14 @@ template<class l,int N> inline iMatrix<l,N> operator * (Integer lhs,const iMatri
///////////////////////////////////////////////////////////////////////////////////////////////
template<class l,int N> inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs(rhs);
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs;
}
template<class l,int N> inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iMatrix<l,N>::tensor_reduced srhs(rhs);
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs;
}
template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
@ -135,16 +137,16 @@ template<class l,int N> inline iMatrix<l,N> operator + (const typename iScalar<l
////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs;
}
template<class l> inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs;
}
template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
@ -155,8 +157,8 @@ template<class l,int N> inline iMatrix<l,N> operator + (double lhs,const iMatrix
template<class l> inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs;
}
@ -164,8 +166,8 @@ template<class l> inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rh
template<class l,int N> inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs;
}
template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
@ -176,23 +178,23 @@ template<class l,int N> inline iMatrix<l,N> operator + (Integer lhs,const iMatri
///////////////////////////////////////////////////////////////////////////////////////////////
template<class l,int N> inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs(rhs);
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs;
}
template<class l,int N> inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::tensor_reduced slhs(lhs);
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs;
}
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{
typename iScalar<l>::tensor_reduced srhs(rhs);
typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs;
}
template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
{
typename iScalar<l>::tensor_reduced slhs(lhs);
typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs;
}
@ -201,27 +203,27 @@ template<class l,int N> inline iMatrix<l,N> operator - (const typename iScalar<l
////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs;
}
template<class l> inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs(t);
typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs;
}
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs;
}
template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
{
typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs(t);
typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs;
}
@ -230,26 +232,26 @@ template<class l,int N> inline iMatrix<l,N> operator - (double lhs,const iMatrix
////////////////////////////////////////////////////////////////////
template<class l> inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs;
}
template<class l> inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
{
typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs(t);
typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs;
}
template<class l,int N> inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
{
typename iScalar<l>::scalar_type t(rhs);
typename iScalar<l>::tensor_reduced srhs(t);
typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs;
}
template<class l,int N> inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
{
typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs(t);
typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs;
}

View File

@ -17,7 +17,8 @@ namespace Grid {
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
{
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
iScalar<ret_t> ret=zero;
iScalar<ret_t> ret;
ret=zero;
for(int c1=0;c1<N;c1++){
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
}
@ -27,8 +28,9 @@ namespace Grid {
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
{
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
iScalar<ret_t> ret=zero;
iScalar<ret_t> ret;
iScalar<ret_t> tmp;
ret=zero;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);

View File

@ -8,6 +8,16 @@ namespace Grid {
// These can be composed to form tensor products of internal indices.
///////////////////////////////////////////////////
// It is useful to NOT have any constructors
// so that these classes assert "is_pod<class> == true"
// because then the standard C++ valarray container eliminates fill overhead on new allocation and
// non-move copying.
//
// However note that doing this eliminates some syntactical sugar such as
// calling the constructor explicitly or implicitly
//
#define TENSOR_IS_POD
template<class vtype> class iScalar
{
public:
@ -25,16 +35,22 @@ public:
// Scalar no action
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
iScalar(){};
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
#ifndef TENSOR_IS_POD
iScalar(){;};
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
#endif
iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this);
return *this;
zeroit(*this);
return *this;
}
iScalar<vtype> & operator= (const scalar_type s){
_internal=s;
return *this;
}
friend void zeroit(iScalar<vtype> &that){
zeroit(that._internal);
}
@ -114,8 +130,10 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
iVector(const Zero &z){ *this = zero; };
iVector() {};// Empty constructure
#ifndef TENSOR_IS_POD
iVector(const Zero &z){ *this = zero; };
iVector() {};// Empty constructure
#endif
iVector<vtype,N> & operator= (const Zero &hero){
zeroit(*this);
@ -185,8 +203,11 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
#ifndef TENSOR_IS_POD
iMatrix(const Zero &z){ *this = zero; };
iMatrix() {};
#endif
iMatrix<vtype,N> & operator= (const Zero &hero){
zeroit(*this);
return *this;

View File

@ -8,6 +8,7 @@ namespace QCD {
static const int Ns=4;
static const int Nd=4;
static const int Nhs=2; // half spinor
static const int Nds=8; // double stored gauge field
static const int CbRed =0;
static const int CbBlack=1;
@ -28,79 +29,216 @@ namespace QCD {
//
// That probably makes for GridRedBlack4dCartesian grid.
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
// s,sp,c,spc,lc
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
// Spin matrix
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iSpinMatrix<ComplexF > SpinMatrixF;
typedef iSpinMatrix<ComplexD > SpinMatrixD;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
// Colour Matrix
typedef iColourMatrix<Complex > ColourMatrix;
typedef iColourMatrix<ComplexF > ColourMatrixF;
typedef iColourMatrix<ComplexD > ColourMatrixD;
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iColourMatrix<Complex > ColourMatrix;
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iColourMatrix<vComplexF> vColourMatrixF;
typedef iColourMatrix<vComplexD> vColourMatrixD;
// SpinColour matrix
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
typedef iSpinVector<Complex > SpinVector;
typedef iColourVector<Complex > ColourVector;
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iSpinMatrix<vComplex > vSpinMatrix;
typedef iColourMatrix<vComplex > vColourMatrix;
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
// Spin vector
typedef iSpinVector<Complex > SpinVector;
typedef iSpinVector<ComplexF> SpinVectorF;
typedef iSpinVector<ComplexD> SpinVectorD;
typedef iSpinVector<vComplex > vSpinVector;
typedef iColourVector<vComplex > vColourVector;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iSpinVector<vComplexF> vSpinVectorF;
typedef iSpinVector<vComplexD> vSpinVectorD;
// Colour vector
typedef iColourVector<Complex > ColourVector;
typedef iColourVector<ComplexF> ColourVectorF;
typedef iColourVector<ComplexD> ColourVectorD;
typedef iColourVector<vComplex > vColourVector;
typedef iColourVector<vComplexF> vColourVectorF;
typedef iColourVector<vComplexD> vColourVectorD;
// SpinColourVector
typedef iSpinColourVector<Complex > SpinColourVector;
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
// HalfSpin vector
typedef iHalfSpinVector<Complex > HalfSpinVector;
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
// HalfSpinColour vector
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
// singlets
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex; // what if we don't know the tensor structure
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
typedef iSinglet<vReal > vTReal;
typedef iSinglet<vInteger > vTInteger;
typedef iSinglet<vRealF> vTRealF;
typedef iSinglet<vRealD> vTRealD;
typedef iSinglet<vInteger> vTInteger;
typedef iSinglet<Integer > TInteger;
// Lattices of these
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vColourVectorF> LatticeColourVectorF;
typedef Lattice<vColourVectorD> LatticeColourVectorD;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
typedef Lattice<vTReal> LatticeReal;
typedef Lattice<vTRealF> LatticeRealF;
typedef Lattice<vTRealD> LatticeRealD;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplexF> LatticeComplexF;
typedef Lattice<vTComplexD> LatticeComplexD;
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
typedef Lattice<vSpinVector> LatticeSpinVector;
typedef Lattice<vColourVector> LatticeColourVector;
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
///////////////////////////////////////////
// Physical names for things
///////////////////////////////////////////
typedef Lattice<vHalfSpinColourVector> LatticeHalfFermion;
typedef Lattice<vSpinColourVector> LatticeFermion;
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
typedef Lattice<vSpinColourMatrix> LatticePropagator;
typedef Lattice<vLorentzColourMatrix> LatticeGaugeField;
typedef LatticeSpinColourVector LatticeFermion;
typedef LatticeSpinColourVectorF LatticeFermionF;
typedef LatticeSpinColourVectorD LatticeFermionD;
typedef LatticeSpinColourMatrix LatticePropagator;
typedef LatticeSpinColourMatrixF LatticePropagatorF;
typedef LatticeSpinColourMatrixD LatticePropagatorD;
typedef LatticeLorentzColourMatrix LatticeGaugeField;
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
// Uhgg... typing this hurt ;)
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourVector> LatticeStaggeredFermion;
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
//////////////////////////////////////////////////////////////////////////////
// Peek and Poke named after physics attributes
@ -157,11 +295,14 @@ namespace QCD {
return peekIndex<LorentzIndex>(rhs,i,j);
}
// FIXME transpose Colour, transpose Spin, traceColour traceSpin
} //namespace QCD
} // Grid
#include <qcd/Grid_qcd_dirac.h>
#include <qcd/Grid_qcd_2spinor.h>
//#include <qcd/Grid_qcd_pauli.h>
#include <qcd/Grid_qcd_wilson_dop.h>
#endif

View File

@ -17,8 +17,14 @@ namespace QCD {
GammaZ,
GammaT,
Gamma5,
// GammaXGamma5,
// GammaYGamma5,
MinusIdentity,
MinusGammaX,
MinusGammaY,
MinusGammaZ,
MinusGammaT,
MinusGamma5
// GammaXGamma5, // Rest are composite (willing to take hit for two calls sequentially)
// GammaYGamma5, // as they are less commonly used.
// GammaZGamma5,
// GammaTGamma5,
// SigmaXY,
@ -27,12 +33,6 @@ namespace QCD {
// SigmaXT,
// SigmaYT,
// SigmaZT,
MinusIdentity,
MinusGammaX,
MinusGammaY,
MinusGammaZ,
MinusGammaT,
MinusGamma5
// MinusGammaXGamma5, easiest to form by composition
// MinusGammaYGamma5, as performance is not critical for these
// MinusGammaZGamma5,
@ -54,7 +54,6 @@ namespace QCD {
};
/* Gx
* 0 0 0 i
* 0 0 i 0

View File

@ -1,157 +1,220 @@
#ifnfdef GRID_QCD_WILSON_DOP_H
#define GRID_QCD_WILSON_DOP_H
#include <Grid.h>
namespace Grid {
namespace QCD {
const std::vector<int> WilsonMatrix::directions ({0,1,2,3, 0, 1, 2, 3,0});
const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1,0});
// Should be in header?
static const int WilsonMatrix::Xp = 0;
static const int WilsonMatrix::Yp = 1;
static const int WilsonMatrix::Zp = 2;
static const int WilsonMatrix::Tp = 3;
static const int WilsonMatrix::Xm = 4;
static const int WilsonMatrix::Ym = 5;
static const int WilsonMatrix::Zm = 6;
static const int WilsonMatrix::Tm = 7;
static const int WilsonMatrix::X0 = 8;
static const int WilsonMatrix::npoint=9;
const int WilsonMatrix::Xp = 0;
const int WilsonMatrix::Yp = 1;
const int WilsonMatrix::Zp = 2;
const int WilsonMatrix::Tp = 3;
const int WilsonMatrix::Xm = 4;
const int WilsonMatrix::Ym = 5;
const int WilsonMatrix::Zm = 6;
const int WilsonMatrix::Tm = 7;
//const int WilsonMatrix::X0 = 8;
class WilsonCompressor {
public:
int mu;
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,int _mass)
: Stencil((&Umu._grid,npoint,0,directions,displacements),
void Point(int p) { mu=p;};
vHalfSpinColourVector operator () (vSpinColourVector &in)
{
vHalfSpinColourVector ret;
switch(mu) {
case WilsonMatrix::Xp:
spProjXp(ret,in);
break;
case WilsonMatrix::Yp:
spProjYp(ret,in);
break;
case WilsonMatrix::Zp:
spProjZp(ret,in);
break;
case WilsonMatrix::Tp:
spProjTp(ret,in);
break;
case WilsonMatrix::Xm:
spProjXm(ret,in);
break;
case WilsonMatrix::Ym:
spProjYm(ret,in);
break;
case WilsonMatrix::Zm:
spProjZm(ret,in);
break;
case WilsonMatrix::Tm:
spProjTm(ret,in);
break;
default:
assert(0);
break;
}
return ret;
}
};
WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,double _mass)
: Stencil(Umu._grid,npoint,0,directions,displacements),
mass(_mass),
Umu(_Umu)
Umu(_Umu._grid)
{
// Allocate the required comms buffer
grid = _Umu._grid;
comm_buf.resize(Stencil._unified_buffer_size);
DoubleStore(Umu,_Umu);
}
void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
{
LatticeColourMatrix U(grid);
for(int mu=0;mu<Nd;mu++){
U = peekIndex<LorentzIndex>(Umu,mu);
pokeIndex<LorentzIndex>(Uds,U,mu);
U = adj(Cshift(U,mu,-1));
pokeIndex<LorentzIndex>(Uds,U,mu+4);
}
}
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
return;
}
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
{
Stencil.HaloExchange(in,comm_buf);
// Stencil.HaloExchange(in,comm_buf);
for(int ss=0;ss<_grid->oSites();ss++){
for(int ss=0;ss<grid->oSites();ss++){
int offset,local;
vSpinColourVector result;
vHalfSpinColourVector UChi;
vHalfSpinColourVector chi;
vHalfSpinColourVector Uchi;
vHalfSpinColourVector *chi_p;
// Xp
offset = Stencil._offsets [Xp][ss];
local = Stencil._is_local[Xp][ss];
if ( local ) {
Uchi = U[]*spProjXp(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
}
result = ReconXp(Uchi);
chi_p = &comm_buf[offset];
if ( local ) {
spProjXp(chi,in._odata[offset]);
chi_p = &chi;
}
mult(&(Uchi()),&(Umu._odata[ss](Xp)),&(*chi_p)());
spReconXp(result,Uchi);
#if 0
// Yp
offset = Stencil._offsets [Yp][ss];
local = Stencil._is_local[Yp][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjYp(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjYp(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconYp(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Yp)),&(*chi_p)());
accumReconYp(result,Uchi);
// Zp
offset = Stencil._offsets [Zp][ss];
local = Stencil._is_local[Zp][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjZp(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjZp(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconZp(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Zp)),&(*chi_p)() );
accumReconZp(result,Uchi);
// Tp
offset = Stencil._offsets [Tp][ss];
local = Stencil._is_local[Tp][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjTp(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjTp(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconTp(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Tp)),&(*chi_p)());
accumReconTp(result,Uchi);
// Xm
offset = Stencil._offsets [Xm][ss];
local = Stencil._is_local[Xm][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjXm(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjXm(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconXm(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
accumReconXm(result,Uchi);
// Ym
offset = Stencil._offsets [Ym][ss];
local = Stencil._is_local[Ym][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjYm(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjYm(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconYm(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Ym)),&(*chi_p)());
accumReconYm(result,Uchi);
// Zm
offset = Stencil._offsets [Zm][ss];
local = Stencil._is_local[Zm][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjZm(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjZm(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconZm(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Zm)),&(*chi_p)());
accumReconZm(result,Uchi);
// Tm
offset = Stencil._offsets [Tm][ss];
local = Stencil._is_local[Tm][ss];
chi_p = &comm_buf[offset];
if ( local ) {
Uchi = U[]*spProjTm(in._odata[offset]);
} else {
Uchi = U[]*comm_buf._odata[offset]
spProjTm(chi,in._odata[offset]);
chi_p = &chi;
}
result+= ReconTm(Uchi);
mult(&(Uchi()),&(Umu._odata[ss](Tm)),&(*chi_p)());
accumReconTm(result,Uchi);
#endif
out._odata[ss] = result;
}
}
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
{
return;
}
void WilsonMatrix::MpcDag (const LatticeFermion &in, LatticeFermion &out)
{
return;
}
void WilsonMatrix::Mpc (const LatticeFermion &in, LatticeFermion &out)
{
return;
}
void WilsonMatrix::MpcDagMpc(const LatticeFermion &in, LatticeFermion &out)
{
return;
}
void WilsonMatrix::MDagM (const LatticeFermion &in, LatticeFermion &out)
{
return;
}
}}
#endif

View File

@ -1,4 +1,4 @@
#ifnfdef GRID_QCD_WILSON_DOP_H
#ifndef GRID_QCD_WILSON_DOP_H
#define GRID_QCD_WILSON_DOP_H
#include <Grid.h>
@ -21,21 +21,23 @@ namespace Grid {
GridBase *grid;
// Copy of the gauge field
LatticeGaugeField Umu;
LatticeDoubledGaugeField Umu;
//Defines the stencil
CartesianStencil Stencil;
static const int npoint=9;
static const std::vector<int> directions ;
static const std::vector<int> displacements;
static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
// Comms buffer
std::vector<vSpinColourVector,alignedAllocator<vSpinColourVector> > comm_buf;
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > comm_buf;
// Constructor
WilsonMatrix(LatticeGaugeField &Umu,int mass);
WilsonMatrix(LatticeGaugeField &Umu,double mass);
// DoubleStore
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
// override multiply
void multiply(const LatticeFermion &in, LatticeFermion &out);

View File

@ -251,13 +251,13 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
friend inline vComplexD conj(const vComplexD &in){
vComplexD ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
// __m256d tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
// ret.v=_mm256_shuffle_pd(tmp,tmp,0x5);
ret.v = _mm256_addsub_pd(ret.v,in.v);
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
zvec tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
ret.v =_mm256_shuffle_pd(tmp,tmp,0x5);
#endif
#ifdef SSE4
ret.v = _mm_addsub_pd(ret.v,in.v);
zvec tmp = _mm_addsub_pd(ret.v,_mm_shuffle_pd(in.v,in.v,0x1));
ret.v = _mm_shuffle_pd(tmp,tmp,0x1);
#endif
#ifdef AVX512
ret.v = _mm512_mask_sub_pd(in.v, 0xaaaa,ret.v, in.v);
@ -268,48 +268,41 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
return ret;
}
friend inline vComplexD timesI(const vComplexD &in){
friend inline vComplexD timesMinusI(const vComplexD &in){
vComplexD ret; vzero(ret);
vComplexD tmp;
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
/*
IF IMM0[0] = 0
THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI;
IF IMM0[1] = 0
THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI;
IF IMM0[2] = 0
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
IF IMM0[3] = 0
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI;
*/
ret.v =_mm256_shuffle_ps(tmp,tmp,0x5);
tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i
ret.v =_mm256_shuffle_pd(tmp.v,tmp.v,0x5);
#endif
#ifdef SSE4
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
ret.v =_mm_shuffle_ps(tmp,tmp,0x5);
tmp.v =_mm_addsub_pd(ret.v,in.v); // r,-i
ret.v =_mm_shuffle_pd(tmp.v,tmp.v,0x1);
#endif
#ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK
ret.v = _mm512_mask_sub_pd(in.v,0xaaaa,ret.v,in.v); // real -imag
ret.v = _mm512_swizzle_pd(ret.v, _MM_SWIZ_REG_CDAB);// OK
#endif
#ifdef QPX
assert(0);
#endif
return ret;
}
friend inline vComplexD timesMinusI(const vComplexD &in){
friend inline vComplexD timesI(const vComplexD &in){
vComplexD ret; vzero(ret);
vComplexD tmp;
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
ret.v =_mm256_addsub_ps(ret.v,tmp); // i,-r
tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5);
ret.v =_mm256_addsub_pd(ret.v,tmp.v); // i,-r
#endif
#ifdef SSE4
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
ret.v =_mm_addsub_ps(ret.v,tmp); // r,-i
tmp.v =_mm_shuffle_pd(in.v,in.v,0x1);
ret.v =_mm_addsub_pd(ret.v,tmp.v); // r,-i
#endif
#ifdef AVX512
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag
tmp.v = _mm512_swizzle_pd(in.v, _MM_SWIZ_REG_CDAB);// OK
ret.v = _mm512_mask_sub_pd(tmp.v,0xaaaa,ret.v,tmp.v); // real -imag
#endif
#ifdef QPX
assert(0);

View File

@ -214,10 +214,10 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
{
#ifdef SSE4
union {
__m128 v1; // SSE 4 x float vector
cvec v1; // SSE 4 x float vector
float f[4]; // scalar array of 4 floats
} u128;
u128.v1= _mm_add_ps(v, _mm_shuffle_ps(v, v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
u128.v1= _mm_add_ps(in.v, _mm_shuffle_ps(in.v,in.v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return ComplexF(u128.f[0], u128.f[1]);
#endif
#ifdef AVX1
@ -329,13 +329,15 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
friend inline vComplexF conj(const vComplexF &in){
vComplexF ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
// cvec tmp;
// tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
// ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
ret.v = _mm256_addsub_ps(ret.v,in.v);
cvec tmp;
tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
#endif
#ifdef SSE4
ret.v = _mm_addsub_ps(ret.v,in.v);
cvec tmp;
tmp = _mm_addsub_ps(ret.v,_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
ret.v=_mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
#endif
#ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag
@ -345,15 +347,16 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
#endif
return ret;
}
friend inline vComplexF timesI(const vComplexF &in){
vComplexF ret; vzero(ret);
friend inline vComplexF timesMinusI(const vComplexF &in){
vComplexF ret;
vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
ret.v = _mm256_shuffle_ps(tmp,tmp,0x5);
ret.v = _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
#endif
#ifdef SSE4
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
ret.v = _mm_shuffle_ps(tmp,tmp,0x5);
ret.v = _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
#endif
#ifdef AVX512
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
@ -364,14 +367,14 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
#endif
return ret;
}
friend inline vComplexF timesMinusI(const vComplexF &in){
friend inline vComplexF timesI(const vComplexF &in){
vComplexF ret; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp =_mm256_shuffle_ps(in.v,in.v,0x5);
ret.v = _mm256_addsub_ps(ret.v,tmp); // i,-r
cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r
#endif
#ifdef SSE4
cvec tmp =_mm_shuffle_ps(in.v,in.v,0x5);
cvec tmp =_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
#endif
#ifdef AVX512
@ -443,5 +446,8 @@ friend inline void vstore(const vComplexF &ret, ComplexF *a){
inline vComplexF trace(const vComplexF &arg){
return arg;
}
}
#endif

View File

@ -146,7 +146,7 @@ namespace Grid {
ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]);
#endif
#ifdef SSE4
ret.v = _mm_set_pd(a[0],a[1]);
ret.v = _mm_set_pd(a[1],a[0]);
#endif
#ifdef AVX512
ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
@ -186,6 +186,15 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in)
{
#if defined (SSE4)
// FIXME Hack
const RealD * ptr =(const RealD *) &in;
RealD ret = 0;
for(int i=0;i<vRealD::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#endif
#if defined (AVX1) || defined(AVX2)
typedef union {
uint64_t l;

View File

@ -175,7 +175,7 @@ namespace Grid {
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
#endif
#ifdef SSE4
ret.v = _mm_set_ps(a[0],a[1],a[2],a[3]);
ret.v = _mm_set_ps(a[3],a[2],a[1],a[0]);
#endif
#ifdef AVX512
ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
@ -220,6 +220,15 @@ friend inline void vstore(const vRealF &ret, float *a){
}
friend inline RealF Reduce(const vRealF & in)
{
#if defined (SSE4)
// FIXME Hack
const RealF * ptr = (const RealF *) &in;
RealF ret = 0;
for(int i=0;i<vRealF::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#endif
#if defined (AVX1) || defined(AVX2)
__attribute__ ((aligned(32))) float c_[16];
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);

View File

@ -121,8 +121,9 @@ namespace Grid {
int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ;
assert(simd_layout==1);
// assert(simd_layout==1); // Why?
assert(comm_dim==1);
shift = (shift + fd) %fd;
assert(shift>=0);
assert(shift<fd);

View File

@ -5,6 +5,11 @@ using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
int main (int argc, char ** argv)
{
@ -22,22 +27,33 @@ int main (int argc, char ** argv)
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
SpinMatrix ident=zero;
SpinMatrix ident; ident=zero;
SpinMatrix rnd ; random(sRNG,rnd);
SpinMatrix ll=zero;
SpinMatrix rr=zero;
SpinMatrix ll; ll=zero;
SpinMatrix rr; rr=zero;
SpinMatrix result;
SpinVector lv; random(sRNG,lv);
SpinVector rv; random(sRNG,rv);
std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl;
std::cout << " Is pod double " << std::is_pod<double>::value << std::endl;
std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl;
std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value << std::endl;
std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl;
std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl;
std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl;
std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl;
std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl;
std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl;
for(int a=0;a<Ns;a++){
ident()(a,a) = 1.0;
}
const Gamma::GammaMatrix *g = Gamma::GammaMatrices;
const char **list = Gamma::GammaMatrixNames;
const char **list = Gamma::GammaMatrixNames;
result =ll*Gamma(g[0])*rr;
result =ll*Gamma(g[0]);

View File

@ -50,7 +50,6 @@ int main (int argc, char ** argv)
}
}
double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0);
@ -58,7 +57,8 @@ int main (int argc, char ** argv)
sliceSum(Plaq,Plaq_T,Nd-1);
int Nt = Plaq_T.size();
TComplex Plaq_T_sum=zero;
TComplex Plaq_T_sum;
Plaq_T_sum=zero;
for(int t=0;t<Nt;t++){
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
Complex Pt=TensorRemove(Plaq_T[t]);

166
tests/Grid_simd.cc Normal file
View File

@ -0,0 +1,166 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
class funcPlus {
public:
funcPlus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
std::string name(void) const { return std::string("Plus"); }
};
class funcMinus {
public:
funcMinus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
std::string name(void) const { return std::string("Minus"); }
};
class funcTimes {
public:
funcTimes() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
std::string name(void) const { return std::string("Times"); }
};
class funcConj {
public:
funcConj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);}
std::string name(void) const { return std::string("Conj"); }
};
class funcAdj {
public:
funcAdj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
std::string name(void) const { return std::string("Adj"); }
};
class funcTimesI {
public:
funcTimesI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
std::string name(void) const { return std::string("timesI"); }
};
class funcTimesMinusI {
public:
funcTimesMinusI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
std::string name(void) const { return std::string("timesMinusI"); }
};
template<class scal, class vec,class functor >
void Tester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
std::vector<scal> result(Nsimd);
std::vector<scal> reference(Nsimd);
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
vec & v_result = buf[2];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
random(sRNG,result[i]);
}
Gmerge(v_input1,input1);
Gmerge(v_input2,input2);
Gmerge(v_result,result);
func(v_result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func(reference[i],input1[i],input2[i]);
}
Gextract(v_result,result);
std::cout << " " << func.name()<<std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>0){
std::cout<< "*****" << std::endl;
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
ok++;
}
}
if ( ok==0 ) std::cout << " OK!" <<std::endl;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,1,1,1});
std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
// Insist that operations on random scalars gives
// identical results to on vectors.
Tester<RealD,vRealD>(funcPlus());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vComplexF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexF,vComplexF>(funcTimesI());
Tester<ComplexF,vComplexF>(funcTimesMinusI());
Tester<ComplexF,vComplexF>(funcPlus());
Tester<ComplexF,vComplexF>(funcMinus());
Tester<ComplexF,vComplexF>(funcTimes());
Tester<ComplexF,vComplexF>(funcConj());
Tester<ComplexF,vComplexF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vComplexD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexD,vComplexD>(funcTimesI());
Tester<ComplexD,vComplexD>(funcTimesMinusI());
Tester<ComplexD,vComplexD>(funcPlus());
Tester<ComplexD,vComplexD>(funcMinus());
Tester<ComplexD,vComplexD>(funcTimes());
Tester<ComplexD,vComplexD>(funcConj());
Tester<ComplexD,vComplexD>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealF,vRealF>(funcPlus());
Tester<RealF,vRealF>(funcMinus());
Tester<RealF,vRealF>(funcTimes());
Tester<RealF,vRealF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealD,vRealD>(funcPlus());
Tester<RealD,vRealD>(funcMinus());
Tester<RealD,vRealD>(funcTimes());
Tester<RealD,vRealD>(funcAdj());
Grid_finalize();
}

View File

@ -4,60 +4,37 @@ using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class vobj>
class SimpleCompressor {
public:
void Point(int) {};
vobj operator() (vobj &arg) {
return arg;
}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size (4);
std::vector<int> simd_layout(4);
std::vector<int> mpi_layout (4);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({2,2,2,2});
std::vector<int> latt_size ({8,8,8,8});
int omp=1;
int lat=8;
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
mpi_layout[0]=1;
mpi_layout[1]=1;
mpi_layout[2]=1;
mpi_layout[3]=1;
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridParallelRNG fRNG(&Fine);
fRNG.SeedRandomDevice();
latt_size[0] = lat;
latt_size[1] = lat;
latt_size[2] = lat;
latt_size[3] = lat;
double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
LatticeColourMatrix Foo(&Fine);
LatticeColourMatrix Bar(&Fine);
LatticeColourMatrix Check(&Fine);
LatticeColourMatrix Diff(&Fine);
#ifdef AVX512
simd_layout[0] = 1;
simd_layout[1] = 2;
simd_layout[2] = 2;
simd_layout[3] = 2;
#endif
#if defined (AVX1)|| defined (AVX2)
simd_layout[0] = 1;
simd_layout[1] = 1;
simd_layout[2] = 2;
simd_layout[3] = 2;
#endif
#if defined (SSE2)
simd_layout[0] = 1;
simd_layout[1] = 1;
simd_layout[2] = 1;
simd_layout[3] = 2;
#endif
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridParallelRNG fRNG(&Fine);
fRNG.SeedRandomDevice();
LatticeColourMatrix Foo(&Fine);
LatticeColourMatrix Bar(&Fine);
LatticeColourMatrix Check(&Fine);
LatticeColourMatrix Diff(&Fine);
random(fRNG,Foo);
gaussian(fRNG,Bar);
random(fRNG,Foo);
gaussian(fRNG,Bar);
for(int dir=0;dir<4;dir++){
@ -86,7 +63,8 @@ int main (int argc, char ** argv)
fflush(stdout);
std::vector<vColourMatrix,alignedAllocator<vColourMatrix> > comm_buf(myStencil._unified_buffer_size);
printf("calling halo exchange\n");fflush(stdout);
myStencil.HaloExchange(Foo,comm_buf);
SimpleCompressor<vColourMatrix> compress;
myStencil.HaloExchange(Foo,comm_buf,compress);
Bar = Cshift(Foo,dir,disp);

69
tests/Grid_wilson.cc Normal file
View File

@ -0,0 +1,69 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({1,1,1,1});
std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
// pRNG.SeedFixedIntegers(seeds);
pRNG.SeedRandomDevice();
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
LatticeFermion ref(&Grid); ref=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = 1.0;
pokeIndex<3>(Umu,U[mu],mu);
}
{
int mu=0;
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
ref = src;
ref = U[0]*Cshift(ref,0,1);
}
RealD mass=0.1;
WilsonMatrix Dw(Umu,mass);
std::cout << "Calling Dw"<<std::endl;
Dw.multiply(src,result);
std::cout << "Called Dw"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
for(int ss=0;ss<10;ss++ ){
for(int i=0;i<Ns;i++){
for(int j=0;j<Nc;j++){
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
ComplexF * res_p = (ComplexF *)&result._odata[ss]()(i)(j);
std::cout << ss<< " "<<i<<" "<<j<<" "<< (*ref_p)<<" " <<(*res_p)<<std::endl;
}
}
}
ref = ref -result;
std::cout << "norm diff "<< norm2(ref)<<std::endl;
Grid_finalize();
}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_srcdir)/lib
#
# Test code
#
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_wilson Grid_simd
Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid
@ -21,3 +21,9 @@ Grid_gamma_LDADD = -lGrid
Grid_stencil_SOURCES = Grid_stencil.cc
Grid_stencil_LDADD = -lGrid
Grid_wilson_SOURCES = Grid_wilson.cc
Grid_wilson_LDADD = -lGrid
Grid_simd_SOURCES = Grid_simd.cc
Grid_simd_LDADD = -lGrid