1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Comms and memory benchmarks added

This commit is contained in:
Peter Boyle 2015-05-03 09:44:47 +01:00
parent 253362f978
commit 9d93d1e6d4
14 changed files with 300 additions and 59 deletions

View File

@ -25,17 +25,19 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=16;lat+=4){ for(int lat=4;lat<=16;lat+=4){
for(int Ls=1;Ls<=16;Ls*=2){ for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond(); double start=usecond();
int ncomm=0;
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0; ncomm=0;
@ -68,11 +70,10 @@ int main (int argc, char ** argv)
} }
} }
Grid.SendToRecvFromComplete(requests); Grid.SendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
}
}
double stop=usecond(); double stop=usecond();
double xbytes = Nloop*bytes*2*ncomm; double xbytes = Nloop*bytes*2*ncomm;
@ -96,18 +97,20 @@ int main (int argc, char ** argv)
for(int lat=4;lat<=16;lat+=4){ for(int lat=4;lat<=16;lat+=4){
for(int Ls=1;Ls<=16;Ls*=2){ for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond(); double start=usecond();
int ncomm=0;
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -131,7 +134,6 @@ int main (int argc, char ** argv)
} }
comm_proc = mpi_layout[mu]-1; comm_proc = mpi_layout[mu]-1;
{ {
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);

View File

@ -0,0 +1,150 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,2,2,2});
std::vector<int> mpi_layout ({1,1,1,1});
const int Nvec=8;
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
int Nloop=100;
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking AXPY bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); random(pRNG,z);
LatticeVec x(&Grid); random(pRNG,x);
LatticeVec y(&Grid); random(pRNG,y);
double a=1.0;
double start=usecond();
for(int i=0;i<Nloop;i++){
// z=a*x+y;
// inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
axpy(z,a,x,y);
}
double stop=usecond();
double time = stop-start;
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking a*x + y bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); random(pRNG,z);
LatticeVec x(&Grid); random(pRNG,x);
LatticeVec y(&Grid); random(pRNG,y);
double a=1.0;
double start=usecond();
for(int i=0;i<Nloop;i++){
z=a*x+y;
}
double stop=usecond();
double time = stop-start;
double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking COPY bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); random(pRNG,z);
LatticeVec x(&Grid); random(pRNG,x);
LatticeVec y(&Grid); random(pRNG,y);
RealD a=1.0;
double start=usecond();
for(int i=0;i<Nloop;i++){
x=z;
}
double stop=usecond();
double time = stop-start;
double bytes=2*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
}
std::cout << "===================================================================================================="<<std::endl;
std::cout << "= Benchmarking READ bandwidth"<<std::endl;
std::cout << "===================================================================================================="<<std::endl;
std::cout << " L "<<"\t\t"<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
for(int lat=4;lat<=32;lat+=4){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); random(pRNG,z);
LatticeVec x(&Grid); random(pRNG,x);
LatticeVec y(&Grid); random(pRNG,y);
RealD a=1.0;
ComplexD nn;
double start=usecond();
for(int i=0;i<Nloop;i++){
nn=norm2(x);
}
double stop=usecond();
double time = stop-start;
double bytes=lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
}
Grid_finalize();
}

View File

@ -14,6 +14,7 @@
#include <complex> #include <complex>
#include <vector> #include <vector>
#include <valarray>
#include <iostream> #include <iostream>
#include <cassert> #include <cassert>
#include <random> #include <random>

View File

@ -26,7 +26,8 @@ class Lattice
public: public:
GridBase *_grid; GridBase *_grid;
int checkerboard; int checkerboard;
std::vector<vobj,alignedAllocator<vobj> > _odata; //std::vector<vobj,alignedAllocator<vobj> > _odata;
std::valarray<vobj> _odata;
public: public:
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
@ -36,9 +37,9 @@ public:
// Constructor requires "grid" passed. // Constructor requires "grid" passed.
// what about a default grid? // what about a default grid?
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _grid(grid) { Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
// _odata.reserve(_grid->oSites()); // _odata.reserve(_grid->oSites());
_odata.resize(_grid->oSites()); // _odata.resize(_grid->oSites());
assert((((uint64_t)&_odata[0])&0xF) ==0); assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0; checkerboard=0;
} }

View File

@ -93,7 +93,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
MPI_Request rrq; MPI_Request rrq;
int rank = _processor; int rank = _processor;
int ierr; int ierr;
ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0); assert(ierr==0);

View File

@ -3,6 +3,9 @@
namespace Grid { namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////////////////
// unary negation
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> template<class vobj>
inline Lattice<vobj> operator -(const Lattice<vobj> &r) inline Lattice<vobj> operator -(const Lattice<vobj> &r)
{ {
@ -13,25 +16,10 @@ namespace Grid {
} }
return ret; return ret;
} }
template<class vobj> //////////////////////////////////////////////////////////////////////////////////////////////////////
inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){ // avoid copy back routines for mult, mac, sub, add
conformable(lhs,rhs); //////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class vobj>
inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3> template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs); conformable(lhs,rhs);
@ -69,7 +57,89 @@ namespace Grid {
} }
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////
// avoid copy back routines for mult, mac, sub, add
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mult(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
template<class obj1,class obj2,class obj3>
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mac(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
template<class obj1,class obj2,class obj3>
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
template<class obj1,class obj2,class obj3>
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// avoid copy back routines for mult, mac, sub, add
//////////////////////////////////////////////////////////////////////////////////////////////////////
template<class obj1,class obj2,class obj3>
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
uint32_t vec_len = lhs._grid->oSites();
#pragma omp parallel for
for(int ss=0;ss<vec_len;ss++){
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
template<class obj1,class obj2,class obj3>
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
conformable(lhs,rhs);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
}
}
/////////////////////////////////////////////////////////////////////////////////////
// Lattice BinOp Lattice, // Lattice BinOp Lattice,
/////////////////////////////////////////////////////////////////////////////////////
template<class left,class right> template<class left,class right>
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])> inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
{ {
@ -156,5 +226,17 @@ namespace Grid {
} }
return ret; return ret;
} }
template<class sobj,class vobj>
inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
vobj tmp;
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
tmp = a*lhs._odata[ss];
ret._odata[ss]= tmp+rhs._odata[ss];
}
}
} }
#endif #endif

View File

@ -7,6 +7,7 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// MAC /////////////////////////////////////////// /////////////////////////////////////////// MAC ///////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////
/////////////////////////// ///////////////////////////
// Legal multiplication table // Legal multiplication table
@ -74,8 +75,6 @@ inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __
} }
return; return;
} }
} }
#endif #endif

View File

@ -7,7 +7,6 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// MUL /////////////////////////////////////////// /////////////////////////////////////////// MUL ///////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
template<class rtype,class vtype,class mtype> template<class rtype,class vtype,class mtype>
inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){ inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){

View File

@ -16,7 +16,7 @@ namespace Grid {
// However note that doing this eliminates some syntactical sugar such as // However note that doing this eliminates some syntactical sugar such as
// calling the constructor explicitly or implicitly // calling the constructor explicitly or implicitly
// //
#define TENSOR_IS_POD #undef TENSOR_IS_POD
template<class vtype> class iScalar template<class vtype> class iScalar
{ {
@ -36,7 +36,7 @@ public:
// template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >; // template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
#ifndef TENSOR_IS_POD #ifndef TENSOR_IS_POD
iScalar(){;}; iScalar()=default;
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; }; iScalar(const Zero &z){ *this = zero; };
#endif #endif
@ -126,7 +126,7 @@ public:
#ifndef TENSOR_IS_POD #ifndef TENSOR_IS_POD
iVector(const Zero &z){ *this = zero; }; iVector(const Zero &z){ *this = zero; };
iVector() {};// Empty constructure iVector() =default;
#endif #endif
iVector<vtype,N> & operator= (const Zero &hero){ iVector<vtype,N> & operator= (const Zero &hero){
@ -189,7 +189,7 @@ public:
#ifndef TENSOR_IS_POD #ifndef TENSOR_IS_POD
iMatrix(const Zero &z){ *this = zero; }; iMatrix(const Zero &z){ *this = zero; };
iMatrix() {}; iMatrix() =default;
#endif #endif
iMatrix<vtype,N> & operator= (const Zero &hero){ iMatrix<vtype,N> & operator= (const Zero &hero){

View File

@ -13,7 +13,7 @@ namespace Grid {
vzero(*this); vzero(*this);
return (*this); return (*this);
} }
vComplexD(){}; vComplexD()=default;
vComplexD(ComplexD a){ vComplexD(ComplexD a){
vsplat(*this,a); vsplat(*this,a);
}; };

View File

@ -28,7 +28,7 @@ namespace Grid {
vzero(*this); vzero(*this);
return (*this); return (*this);
} }
vComplexF(){}; vComplexF()=default;
vComplexF(ComplexF a){ vComplexF(ComplexF a){
vsplat(*this,a); vsplat(*this,a);
}; };

View File

@ -10,10 +10,13 @@ namespace Grid {
typedef dvec vector_type; typedef dvec vector_type;
typedef RealD scalar_type; typedef RealD scalar_type;
vRealD(){}; vRealD()=default;
vRealD(RealD a){ vRealD(RealD a){
vsplat(*this,a); vsplat(*this,a);
}; };
vRealD(Zero &zero){
zeroit(*this);
}
friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);} friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);} friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}

View File

@ -8,14 +8,16 @@ namespace Grid {
fvec v; fvec v;
public: public:
typedef fvec vector_type; typedef fvec vector_type;
typedef RealF scalar_type; typedef RealF scalar_type;
vRealF(){}; vRealF()=default;
vRealF(RealF a){ vRealF(RealF a){
vsplat(*this,a); vsplat(*this,a);
}; };
vRealF(Zero &zero){
zeroit(*this);
}
//////////////////////////////////// ////////////////////////////////////
// Arithmetic operator overloads +,-,* // Arithmetic operator overloads +,-,*
//////////////////////////////////// ////////////////////////////////////

View File

@ -5,11 +5,10 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
template<class d> //template<class vobj> class is_pod< iScalar<vobj> >
struct scal { //{
d internal; //
}; //};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
@ -40,13 +39,16 @@ int main (int argc, char ** argv)
std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl; std::cout << " Is pod " << std::is_pod<SpinVector>::value << std::endl;
std::cout << " Is pod double " << std::is_pod<double>::value << std::endl; std::cout << " Is pod double " << std::is_pod<double>::value << std::endl;
std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl; std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value << std::endl;
std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value << std::endl; std::cout << " Is triv double " << std::is_trivially_default_constructible<double>::value << std::endl;
std::cout << " Is triv ComplexF " << std::is_trivially_default_constructible<ComplexF>::value << std::endl;
std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl; std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value << std::endl;
std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl; std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value << std::endl;
std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl; std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value << std::endl;
std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl; std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value << std::endl;
std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl; std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value << std::endl;
std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl; std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value << std::endl;
std::cout << " Is triv Scalar<double> " <<std::is_trivially_default_constructible<iScalar<double> >::value << std::endl;
std::cout << " Is triv Scalar<vComplexD> "<<std::is_trivially_default_constructible<iScalar<vComplexD> >::value << std::endl;
for(int a=0;a<Ns;a++){ for(int a=0;a<Ns;a++){
ident()(a,a) = 1.0; ident()(a,a) = 1.0;