mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Comms and memory benchmarks added
This commit is contained in:
		@@ -25,17 +25,19 @@ int main (int argc, char ** argv)
 | 
			
		||||
  for(int lat=4;lat<=16;lat+=4){
 | 
			
		||||
    for(int Ls=1;Ls<=16;Ls*=2){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
 | 
			
		||||
      int ncomm;
 | 
			
		||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
			
		||||
 | 
			
		||||
      double start=usecond();
 | 
			
		||||
      int ncomm=0;
 | 
			
		||||
      for(int i=0;i<Nloop;i++){
 | 
			
		||||
	std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
    
 | 
			
		||||
	GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
	std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
	std::vector<CartesianCommunicator::CommsRequest_t> requests;
 | 
			
		||||
 | 
			
		||||
	ncomm=0;
 | 
			
		||||
@@ -68,11 +70,10 @@ int main (int argc, char ** argv)
 | 
			
		||||
	  
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Grid.SendToRecvFromComplete(requests);
 | 
			
		||||
	Grid.Barrier();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
      double stop=usecond();
 | 
			
		||||
 | 
			
		||||
      double xbytes    = Nloop*bytes*2*ncomm;
 | 
			
		||||
@@ -96,18 +97,20 @@ int main (int argc, char ** argv)
 | 
			
		||||
  for(int lat=4;lat<=16;lat+=4){
 | 
			
		||||
    for(int Ls=1;Ls<=16;Ls*=2){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      int ncomm;
 | 
			
		||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
			
		||||
 | 
			
		||||
      double start=usecond();
 | 
			
		||||
      int ncomm=0;
 | 
			
		||||
      for(int i=0;i<Nloop;i++){
 | 
			
		||||
	std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
    
 | 
			
		||||
	GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
	std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
 | 
			
		||||
	ncomm=0;
 | 
			
		||||
	for(int mu=0;mu<4;mu++){
 | 
			
		||||
	
 | 
			
		||||
@@ -131,7 +134,6 @@ int main (int argc, char ** argv)
 | 
			
		||||
	    }
 | 
			
		||||
 | 
			
		||||
	    comm_proc = mpi_layout[mu]-1;
 | 
			
		||||
	  
 | 
			
		||||
	    {
 | 
			
		||||
	      std::vector<CartesianCommunicator::CommsRequest_t> requests;
 | 
			
		||||
	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										150
									
								
								benchmarks/Grid_memory_bandwidth.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								benchmarks/Grid_memory_bandwidth.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,150 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> simd_layout({1,2,2,2});
 | 
			
		||||
  std::vector<int> mpi_layout ({1,1,1,1});
 | 
			
		||||
 | 
			
		||||
  const int Nvec=8;
 | 
			
		||||
  typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
 | 
			
		||||
 | 
			
		||||
  int Nloop=100;
 | 
			
		||||
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "= Benchmarking AXPY bandwidth"<<std::endl;
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "  L  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  for(int lat=4;lat<=32;lat+=4){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();
 | 
			
		||||
 | 
			
		||||
      LatticeVec z(&Grid); random(pRNG,z);
 | 
			
		||||
      LatticeVec x(&Grid); random(pRNG,x);
 | 
			
		||||
      LatticeVec y(&Grid); random(pRNG,y);
 | 
			
		||||
      double a=1.0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      double start=usecond();
 | 
			
		||||
      for(int i=0;i<Nloop;i++){
 | 
			
		||||
	//	z=a*x+y;
 | 
			
		||||
	//   inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
	axpy(z,a,x,y);
 | 
			
		||||
      }
 | 
			
		||||
      double stop=usecond();
 | 
			
		||||
      double time = stop-start;
 | 
			
		||||
      
 | 
			
		||||
      double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
 | 
			
		||||
      std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "= Benchmarking a*x + y bandwidth"<<std::endl;
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "  L  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  for(int lat=4;lat<=32;lat+=4){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();
 | 
			
		||||
 | 
			
		||||
      LatticeVec z(&Grid); random(pRNG,z);
 | 
			
		||||
      LatticeVec x(&Grid); random(pRNG,x);
 | 
			
		||||
      LatticeVec y(&Grid); random(pRNG,y);
 | 
			
		||||
      double a=1.0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      double start=usecond();
 | 
			
		||||
      for(int i=0;i<Nloop;i++){
 | 
			
		||||
	z=a*x+y;
 | 
			
		||||
      }
 | 
			
		||||
      double stop=usecond();
 | 
			
		||||
      double time = stop-start;
 | 
			
		||||
      
 | 
			
		||||
      double bytes=3*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
 | 
			
		||||
      std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "= Benchmarking COPY bandwidth"<<std::endl;
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "  L  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  for(int lat=4;lat<=32;lat+=4){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();
 | 
			
		||||
 | 
			
		||||
      LatticeVec z(&Grid); random(pRNG,z);
 | 
			
		||||
      LatticeVec x(&Grid); random(pRNG,x);
 | 
			
		||||
      LatticeVec y(&Grid); random(pRNG,y);
 | 
			
		||||
      RealD a=1.0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      double start=usecond();
 | 
			
		||||
      for(int i=0;i<Nloop;i++){
 | 
			
		||||
	x=z;
 | 
			
		||||
      }
 | 
			
		||||
      double stop=usecond();
 | 
			
		||||
      double time = stop-start;
 | 
			
		||||
      
 | 
			
		||||
      double bytes=2*lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
 | 
			
		||||
      std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "= Benchmarking READ bandwidth"<<std::endl;
 | 
			
		||||
  std::cout << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout << "  L  "<<"\t\t"<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s"<<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  for(int lat=4;lat<=32;lat+=4){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();
 | 
			
		||||
 | 
			
		||||
      LatticeVec z(&Grid); random(pRNG,z);
 | 
			
		||||
      LatticeVec x(&Grid); random(pRNG,x);
 | 
			
		||||
      LatticeVec y(&Grid); random(pRNG,y);
 | 
			
		||||
      RealD a=1.0;
 | 
			
		||||
      ComplexD nn;
 | 
			
		||||
 | 
			
		||||
      double start=usecond();
 | 
			
		||||
      for(int i=0;i<Nloop;i++){
 | 
			
		||||
	nn=norm2(x);
 | 
			
		||||
      }
 | 
			
		||||
      double stop=usecond();
 | 
			
		||||
      double time = stop-start;
 | 
			
		||||
      
 | 
			
		||||
      double bytes=lat*lat*lat*lat*Nvec*sizeof(Real)*Nloop;
 | 
			
		||||
      std::cout << lat<<"\t\t"<<bytes<<"\t\t"<<bytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
  }    
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
@@ -14,6 +14,7 @@
 | 
			
		||||
 | 
			
		||||
#include <complex>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <valarray>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <random>
 | 
			
		||||
 
 | 
			
		||||
@@ -26,7 +26,8 @@ class Lattice
 | 
			
		||||
public:
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int checkerboard;
 | 
			
		||||
    std::vector<vobj,alignedAllocator<vobj> > _odata;
 | 
			
		||||
    //std::vector<vobj,alignedAllocator<vobj> > _odata;
 | 
			
		||||
    std::valarray<vobj> _odata;
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
@@ -36,9 +37,9 @@ public:
 | 
			
		||||
    // Constructor requires "grid" passed.
 | 
			
		||||
    // what about a default grid?
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    Lattice(GridBase *grid) : _grid(grid) {
 | 
			
		||||
 Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
 | 
			
		||||
      //        _odata.reserve(_grid->oSites());
 | 
			
		||||
        _odata.resize(_grid->oSites());
 | 
			
		||||
      //        _odata.resize(_grid->oSites());
 | 
			
		||||
        assert((((uint64_t)&_odata[0])&0xF) ==0);
 | 
			
		||||
        checkerboard=0;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -93,7 +93,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  int rank = _processor;
 | 
			
		||||
  int ierr;
 | 
			
		||||
  ierr=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
  
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,9 @@
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // unary negation
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  inline Lattice<vobj> operator -(const Lattice<vobj> &r)
 | 
			
		||||
  {
 | 
			
		||||
@@ -13,25 +16,10 @@ namespace Grid {
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //  avoid copy back routines for mult, mac, sub, add
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
@@ -69,7 +57,89 @@ namespace Grid {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //  avoid copy back routines for mult, mac, sub, add
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
      mult(&ret._odata[ss],&lhs._odata[ss],&rhs);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
      mac(&ret._odata[ss],&lhs._odata[ss],&rhs);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      add(&ret._odata[ss],&lhs._odata[ss],&rhs);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //  avoid copy back routines for mult, mac, sub, add
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
      mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
      mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Lattice BinOp Lattice,
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
  {
 | 
			
		||||
@@ -156,5 +226,17 @@ namespace Grid {
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  template<class sobj,class vobj>
 | 
			
		||||
  inline void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    vobj tmp;
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      tmp = a*lhs._odata[ss];
 | 
			
		||||
      ret._odata[ss]= tmp+rhs._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -7,6 +7,7 @@ namespace Grid {
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
/////////////////////////////////////////// MAC         ///////////////////////////////////////////
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    ///////////////////////////
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////
 | 
			
		||||
    // Legal multiplication table
 | 
			
		||||
@@ -74,8 +75,6 @@ inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@ namespace Grid {
 | 
			
		||||
    ///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    /////////////////////////////////////////// MUL         ///////////////////////////////////////////
 | 
			
		||||
    ///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
template<class rtype,class vtype,class mtype>
 | 
			
		||||
inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
 | 
			
		||||
 
 | 
			
		||||
@@ -16,7 +16,7 @@ namespace Grid {
 | 
			
		||||
// However note that doing this eliminates some syntactical sugar such as 
 | 
			
		||||
// calling the constructor explicitly or implicitly
 | 
			
		||||
//
 | 
			
		||||
#define TENSOR_IS_POD
 | 
			
		||||
#undef TENSOR_IS_POD
 | 
			
		||||
 | 
			
		||||
template<class vtype> class iScalar
 | 
			
		||||
{
 | 
			
		||||
@@ -36,7 +36,7 @@ public:
 | 
			
		||||
  //  template<int Level> using tensor_reduce_level = typename iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
 | 
			
		||||
 | 
			
		||||
#ifndef TENSOR_IS_POD
 | 
			
		||||
  iScalar(){;};
 | 
			
		||||
  iScalar()=default;
 | 
			
		||||
  iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
 | 
			
		||||
  iScalar(const Zero &z){ *this = zero; };
 | 
			
		||||
#endif
 | 
			
		||||
@@ -126,7 +126,7 @@ public:
 | 
			
		||||
 | 
			
		||||
#ifndef TENSOR_IS_POD
 | 
			
		||||
  iVector(const Zero &z){ *this = zero; };
 | 
			
		||||
  iVector() {};// Empty constructure
 | 
			
		||||
  iVector() =default;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    iVector<vtype,N> & operator= (const Zero &hero){
 | 
			
		||||
@@ -189,7 +189,7 @@ public:
 | 
			
		||||
 | 
			
		||||
#ifndef TENSOR_IS_POD
 | 
			
		||||
  iMatrix(const Zero &z){ *this = zero; };
 | 
			
		||||
  iMatrix() {};
 | 
			
		||||
  iMatrix() =default;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  iMatrix<vtype,N> & operator= (const Zero &hero){
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ namespace Grid {
 | 
			
		||||
            vzero(*this);
 | 
			
		||||
            return (*this);
 | 
			
		||||
        }
 | 
			
		||||
        vComplexD(){};
 | 
			
		||||
        vComplexD()=default;
 | 
			
		||||
        vComplexD(ComplexD a){
 | 
			
		||||
	  vsplat(*this,a);
 | 
			
		||||
	};
 | 
			
		||||
 
 | 
			
		||||
@@ -28,7 +28,7 @@ namespace Grid {
 | 
			
		||||
            vzero(*this);
 | 
			
		||||
            return (*this);
 | 
			
		||||
        }
 | 
			
		||||
        vComplexF(){};
 | 
			
		||||
        vComplexF()=default;
 | 
			
		||||
        vComplexF(ComplexF a){
 | 
			
		||||
	  vsplat(*this,a);
 | 
			
		||||
	};
 | 
			
		||||
 
 | 
			
		||||
@@ -10,10 +10,13 @@ namespace Grid {
 | 
			
		||||
	typedef dvec  vector_type;
 | 
			
		||||
	typedef RealD scalar_type;
 | 
			
		||||
 | 
			
		||||
        vRealD(){};
 | 
			
		||||
        vRealD()=default;
 | 
			
		||||
        vRealD(RealD a){
 | 
			
		||||
	  vsplat(*this,a);
 | 
			
		||||
	};
 | 
			
		||||
        vRealD(Zero &zero){
 | 
			
		||||
	  zeroit(*this);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
 | 
			
		||||
        friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
 | 
			
		||||
 
 | 
			
		||||
@@ -8,14 +8,16 @@ namespace Grid {
 | 
			
		||||
        fvec v;
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
	typedef fvec  vector_type;
 | 
			
		||||
	typedef RealF scalar_type;
 | 
			
		||||
 | 
			
		||||
        vRealF(){};
 | 
			
		||||
        vRealF()=default;
 | 
			
		||||
        vRealF(RealF a){
 | 
			
		||||
	  vsplat(*this,a);
 | 
			
		||||
	};
 | 
			
		||||
        vRealF(Zero &zero){
 | 
			
		||||
	  zeroit(*this);
 | 
			
		||||
	}
 | 
			
		||||
        ////////////////////////////////////
 | 
			
		||||
        // Arithmetic operator overloads +,-,*
 | 
			
		||||
        ////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -5,11 +5,10 @@ using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
  d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//template<class vobj> class is_pod< iScalar<vobj> >
 | 
			
		||||
//{
 | 
			
		||||
//
 | 
			
		||||
//};
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
@@ -40,13 +39,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout << " Is pod " << std::is_pod<SpinVector>::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod double   " << std::is_pod<double>::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod ComplexF " << std::is_pod<ComplexF>::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod scal<double> " << std::is_pod<scal<double> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is triv double " << std::is_trivially_default_constructible<double>::value  << std::endl;
 | 
			
		||||
  std::cout << " Is triv ComplexF " << std::is_trivially_default_constructible<ComplexF>::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod Scalar<double> " << std::is_pod<iScalar<double> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod Scalar<ComplexF> " << std::is_pod<iScalar<ComplexF> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod Scalar<vComplexF> " << std::is_pod<iScalar<vComplexF> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod Scalar<vComplexD> " << std::is_pod<iScalar<vComplexD> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod Scalar<vRealF> " << std::is_pod<iScalar<vRealF> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is pod Scalar<vRealD> " << std::is_pod<iScalar<vRealD> >::value  << std::endl;
 | 
			
		||||
  std::cout << " Is triv Scalar<double> " <<std::is_trivially_default_constructible<iScalar<double> >::value << std::endl;
 | 
			
		||||
  std::cout << " Is triv Scalar<vComplexD> "<<std::is_trivially_default_constructible<iScalar<vComplexD> >::value  << std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int a=0;a<Ns;a++){
 | 
			
		||||
    ident()(a,a) = 1.0;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user