mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Comms and memory benchmarks added
This commit is contained in:
parent
99a1ff423d
commit
193860dbc8
@ -25,17 +25,19 @@ int main (int argc, char ** argv)
|
||||
for(int lat=4;lat<=16;lat+=4){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double start=usecond();
|
||||
int ncomm=0;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
|
||||
ncomm=0;
|
||||
@ -68,11 +70,10 @@ int main (int argc, char ** argv)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Grid.SendToRecvFromComplete(requests);
|
||||
Grid.Barrier();
|
||||
}
|
||||
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
double xbytes = Nloop*bytes*2*ncomm;
|
||||
@ -96,18 +97,20 @@ int main (int argc, char ** argv)
|
||||
for(int lat=4;lat<=16;lat+=4){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double start=usecond();
|
||||
int ncomm=0;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
@ -131,7 +134,6 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
{
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
150
benchmarks/Grid_memory_bandwidth.cc
Normal file
150
benchmarks/Grid_memory_bandwidth.cc
Normal file
@ -0,0 +1,150 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,2,2,2});
|
||||
std::vector<int> mpi_layout ({1,1,1,1});
|
||||
|
||||
const int Nvec=8;
|
||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||
|
||||
int Nloop=100;
|
||||
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << "= Benchmarking AXPY bandwidth"<<std::endl;
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
|
||||
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
double a=1.0;
|
||||
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
// z=a*x+y;
|
||||
// inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
axpy(z,a,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = stop-start;
|
||||
|
||||
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
|
||||
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
|
||||
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
double a=1.0;
|
||||
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=a*x+y;
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = stop-start;
|
||||
|
||||
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
|
||||
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << "= Benchmarking COPY bandwidth"<<std::endl;
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
|
||||
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
RealD a=1.0;
|
||||
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
x=z;
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = stop-start;
|
||||
|
||||
double bytes=2*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
|
||||
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << "= Benchmarking READ bandwidth"<<std::endl;
|
||||
std::cout << "===================================================================================================="<<std::endl;
|
||||
std::cout << " L "<<"\t\t"<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
|
||||
|
||||
|
||||
for(int lat=4;lat<=32;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid); random(pRNG,z);
|
||||
LatticeVec x(&Grid); random(pRNG,x);
|
||||
LatticeVec y(&Grid); random(pRNG,y);
|
||||
RealD a=1.0;
|
||||
ComplexD nn;
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
nn=norm2(x);
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = stop-start;
|
||||
|
||||
double bytes=lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
|
||||
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <valarray>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <random>
|
||||
|
@ -26,7 +26,8 @@ class Lattice
|
||||
public:
|
||||
GridBase *_grid;
|
||||
int checkerboard;
|
||||
std::vector<vobj,alignedAllocator<vobj> > _odata;
|
||||
//std::vector<vobj,alignedAllocator<vobj> > _odata;
|
||||
std::valarray<vobj> _odata;
|
||||
public:
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
@ -36,9 +37,9 @@ public:
|
||||
// Constructor requires "grid" passed.
|
||||
// what about a default grid?
|
||||
//////////////////////////////////////////////////////////////////
|
||||
Lattice(GridBase *grid) : _grid(grid) {
|
||||
Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
|
||||
// _odata.reserve(_grid->oSites());
|
||||
_odata.resize(_grid->oSites());
|
||||
// _odata.resize(_grid->oSites());
|
||||
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
||||
checkerboard=0;
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
MPI_Request rrq;
|
||||
int rank = _processor;
|
||||
int ierr;
|
||||
ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
|
||||
assert(ierr==0);
|
||||
|
@ -3,6 +3,9 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// unary negation
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
@ -13,25 +16,10 @@ namespace Grid {
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
@ -69,7 +57,89 @@ namespace Grid {
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mult(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mac(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
uint32_t vec_len = lhs._grid->oSites();
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<vec_len;ss++){
|
||||
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3>
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice BinOp Lattice,
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
@ -156,5 +226,17 @@ namespace Grid {
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sobj,class vobj>
|
||||
inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||
conformable(lhs,rhs);
|
||||
vobj tmp;
|
||||
#pragma omp parallel for
|
||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
tmp = a*lhs._odata[ss];
|
||||
ret._odata[ss]= tmp+rhs._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -7,6 +7,7 @@ namespace Grid {
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// MAC ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////
|
||||
|
||||
///////////////////////////
|
||||
// Legal multiplication table
|
||||
@ -74,8 +75,6 @@ inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -7,7 +7,6 @@ namespace Grid {
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// MUL ///////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template<class rtype,class vtype,class mtype>
|
||||
inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
|
||||
|
@ -16,7 +16,7 @@ namespace Grid {
|
||||
// However note that doing this eliminates some syntactical sugar such as
|
||||
// calling the constructor explicitly or implicitly
|
||||
//
|
||||
#define TENSOR_IS_POD
|
||||
#undef TENSOR_IS_POD
|
||||
|
||||
template<class vtype> class iScalar
|
||||
{
|
||||
@ -36,7 +36,7 @@ public:
|
||||
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
|
||||
|
||||
#ifndef TENSOR_IS_POD
|
||||
iScalar(){;};
|
||||
iScalar()=default;
|
||||
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
|
||||
iScalar(const Zero &z){ *this = zero; };
|
||||
#endif
|
||||
@ -126,7 +126,7 @@ public:
|
||||
|
||||
#ifndef TENSOR_IS_POD
|
||||
iVector(const Zero &z){ *this = zero; };
|
||||
iVector() {};// Empty constructure
|
||||
iVector() =default;
|
||||
#endif
|
||||
|
||||
iVector<vtype,N> & operator= (const Zero &hero){
|
||||
@ -189,7 +189,7 @@ public:
|
||||
|
||||
#ifndef TENSOR_IS_POD
|
||||
iMatrix(const Zero &z){ *this = zero; };
|
||||
iMatrix() {};
|
||||
iMatrix() =default;
|
||||
#endif
|
||||
|
||||
iMatrix<vtype,N> & operator= (const Zero &hero){
|
||||
|
@ -13,7 +13,7 @@ namespace Grid {
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
vComplexD(){};
|
||||
vComplexD()=default;
|
||||
vComplexD(ComplexD a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
|
@ -28,7 +28,7 @@ namespace Grid {
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
vComplexF(){};
|
||||
vComplexF()=default;
|
||||
vComplexF(ComplexF a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
|
@ -10,10 +10,13 @@ namespace Grid {
|
||||
typedef dvec vector_type;
|
||||
typedef RealD scalar_type;
|
||||
|
||||
vRealD(){};
|
||||
vRealD()=default;
|
||||
vRealD(RealD a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
vRealD(Zero &zero){
|
||||
zeroit(*this);
|
||||
}
|
||||
|
||||
friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
|
||||
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
|
||||
|
@ -8,14 +8,16 @@ namespace Grid {
|
||||
fvec v;
|
||||
|
||||
public:
|
||||
|
||||
typedef fvec vector_type;
|
||||
typedef RealF scalar_type;
|
||||
|
||||
vRealF(){};
|
||||
vRealF()=default;
|
||||
vRealF(RealF a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
vRealF(Zero &zero){
|
||||
zeroit(*this);
|
||||
}
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
|
@ -5,11 +5,10 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
//template<class vobj> class is_pod< iScalar<vobj> >
|
||||
//{
|
||||
//
|
||||
//};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
@ -40,13 +39,16 @@ int main (int argc, char ** argv)
|
||||
std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl;
|
||||
std::cout << " Is pod double " << std::is_pod<double>::value << std::endl;
|
||||
std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl;
|
||||
std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value << std::endl;
|
||||
std::cout << " Is triv double " << std::is_trivially_default_constructible<double>::value << std::endl;
|
||||
std::cout << " Is triv ComplexF " << std::is_trivially_default_constructible<ComplexF>::value << std::endl;
|
||||
std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl;
|
||||
std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl;
|
||||
std::cout << " Is triv Scalar<double> " <<std::is_trivially_default_constructible<iScalar<double> >::value << std::endl;
|
||||
std::cout << " Is triv Scalar<vComplexD> "<<std::is_trivially_default_constructible<iScalar<vComplexD> >::value << std::endl;
|
||||
|
||||
for(int a=0;a<Ns;a++){
|
||||
ident()(a,a) = 1.0;
|
||||
|
Loading…
Reference in New Issue
Block a user