1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Comms and memory benchmarks added

This commit is contained in:
Peter Boyle
2015-05-03 09:44:47 +01:00
parent 99a1ff423d
commit 193860dbc8
14 changed files with 300 additions and 59 deletions

View File

@ -14,6 +14,7 @@
#include <complex>
#include <vector>
#include <valarray>
#include <iostream>
#include <cassert>
#include <random>

View File

@ -26,7 +26,8 @@ class Lattice
public:
GridBase *_grid;
int checkerboard;
std::vector<vobj,alignedAllocator<vobj> > _odata;
//std::vector<vobj,alignedAllocator<vobj> > _odata;
std::valarray<vobj> _odata;
public:
typedef typename vobj::scalar_type scalar_type;
@ -36,9 +37,9 @@ public:
// Constructor requires "grid" passed.
// what about a default grid?
//////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _grid(grid) {
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
// _odata.reserve(_grid->oSites());
_odata.resize(_grid->oSites());
// _odata.resize(_grid->oSites());
assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0;
}

View File

@ -93,7 +93,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);

View File

@ -3,6 +3,9 @@
namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////////////////
// unary negation
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj>
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
{
@ -13,25 +16,10 @@ namespace Grid {
}
return ret;
}
template<class vobj>
inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class vobj>
inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// avoid copy back routines for mult, mac, sub, add
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
@ -69,7 +57,89 @@ namespace Grid {
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// avoid copy back routines for mult, mac, sub, add
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mult(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
template<class obj1,class obj2,class obj3>
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mac(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
template<class obj1,class obj2,class obj3>
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
template<class obj1,class obj2,class obj3>
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// avoid copy back routines for mult, mac, sub, add
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
/////////////////////////////////////////////////////////////////////////////////////
// Lattice BinOp Lattice,
/////////////////////////////////////////////////////////////////////////////////////
template<class left,class right>
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
{
@ -156,5 +226,17 @@ namespace Grid {
}
return ret;
}
template<class sobj,class vobj>
inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
vobj tmp;
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
tmp = a*lhs._odata[ss];
ret._odata[ss]= tmp+rhs._odata[ss];
}
}
}
#endif

View File

@ -7,6 +7,7 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// MAC ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////
///////////////////////////
// Legal multiplication table
@ -74,8 +75,6 @@ inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __
}
return;
}
}
#endif

View File

@ -7,7 +7,6 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// MUL ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
template<class rtype,class vtype,class mtype>
inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){

View File

@ -16,7 +16,7 @@ namespace Grid {
// However note that doing this eliminates some syntactical sugar such as
// calling the constructor explicitly or implicitly
//
#define TENSOR_IS_POD
#undef TENSOR_IS_POD
template<class vtype> class iScalar
{
@ -36,7 +36,7 @@ public:
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
#ifndef TENSOR_IS_POD
iScalar(){;};
iScalar()=default;
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
#endif
@ -126,7 +126,7 @@ public:
#ifndef TENSOR_IS_POD
iVector(const Zero &z){ *this = zero; };
iVector() {};// Empty constructure
iVector() =default;
#endif
iVector<vtype,N> & operator= (const Zero &hero){
@ -189,7 +189,7 @@ public:
#ifndef TENSOR_IS_POD
iMatrix(const Zero &z){ *this = zero; };
iMatrix() {};
iMatrix() =default;
#endif
iMatrix<vtype,N> & operator= (const Zero &hero){

View File

@ -13,7 +13,7 @@ namespace Grid {
vzero(*this);
return (*this);
}
vComplexD(){};
vComplexD()=default;
vComplexD(ComplexD a){
vsplat(*this,a);
};

View File

@ -28,7 +28,7 @@ namespace Grid {
vzero(*this);
return (*this);
}
vComplexF(){};
vComplexF()=default;
vComplexF(ComplexF a){
vsplat(*this,a);
};

View File

@ -10,10 +10,13 @@ namespace Grid {
typedef dvec vector_type;
typedef RealD scalar_type;
vRealD(){};
vRealD()=default;
vRealD(RealD a){
vsplat(*this,a);
};
vRealD(Zero &zero){
zeroit(*this);
}
friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}

View File

@ -8,14 +8,16 @@ namespace Grid {
fvec v;
public:
typedef fvec vector_type;
typedef RealF scalar_type;
vRealF(){};
vRealF()=default;
vRealF(RealF a){
vsplat(*this,a);
};
vRealF(Zero &zero){
zeroit(*this);
}
////////////////////////////////////
// Arithmetic operator overloads +,-,*
////////////////////////////////////