mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge commit '20a091c3eddfdb67a82ece6413740a93650a2f98' into feature/feynman-rules
This commit is contained in:
		@@ -244,6 +244,9 @@ case ${ac_COMMS} in
 | 
				
			|||||||
     mpi)
 | 
					     mpi)
 | 
				
			||||||
       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
					       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
 | 
					     mpi3)
 | 
				
			||||||
 | 
					       AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
 | 
				
			||||||
 | 
					     ;;
 | 
				
			||||||
     shmem)
 | 
					     shmem)
 | 
				
			||||||
       AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
 | 
					       AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
@@ -253,6 +256,7 @@ case ${ac_COMMS} in
 | 
				
			|||||||
esac
 | 
					esac
 | 
				
			||||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
 | 
					AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
 | 
				
			||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
 | 
					AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
 | 
				
			||||||
 | 
					AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] )
 | 
				
			||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
					AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
############### RNG selection
 | 
					############### RNG selection
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,14 +40,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <mm_malloc.h>
 | 
					#include <mm_malloc.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_COMMS_SHMEM
 | 
					 | 
				
			||||||
extern "C" { 
 | 
					 | 
				
			||||||
#include <mpp/shmem.h>
 | 
					 | 
				
			||||||
extern void * shmem_align(size_t, size_t);
 | 
					 | 
				
			||||||
extern void  shmem_free(void *);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -65,28 +57,85 @@ public:
 | 
				
			|||||||
  typedef _Tp        value_type;
 | 
					  typedef _Tp        value_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<typename _Tp1>  struct rebind { typedef alignedAllocator<_Tp1> other; };
 | 
					  template<typename _Tp1>  struct rebind { typedef alignedAllocator<_Tp1> other; };
 | 
				
			||||||
 | 
					 | 
				
			||||||
  alignedAllocator() throw() { }
 | 
					  alignedAllocator() throw() { }
 | 
				
			||||||
 | 
					 | 
				
			||||||
  alignedAllocator(const alignedAllocator&) throw() { }
 | 
					  alignedAllocator(const alignedAllocator&) throw() { }
 | 
				
			||||||
 | 
					 | 
				
			||||||
  template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
 | 
					  template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
 | 
				
			||||||
 | 
					 | 
				
			||||||
  ~alignedAllocator() throw() { }
 | 
					  ~alignedAllocator() throw() { }
 | 
				
			||||||
 | 
					 | 
				
			||||||
  pointer       address(reference __x)       const { return &__x; }
 | 
					  pointer       address(reference __x)       const { return &__x; }
 | 
				
			||||||
  //  const_pointer address(const_reference __x) const { return &__x; }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
					  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pointer allocate(size_type __n, const void* _p= 0)
 | 
					  pointer allocate(size_type __n, const void* _p= 0)
 | 
				
			||||||
  { 
 | 
					  { 
 | 
				
			||||||
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
 | 
					    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _Tp tmp;
 | 
				
			||||||
 | 
					#ifdef GRID_NUMA
 | 
				
			||||||
 | 
					#pragma omp parallel for schedule(static)
 | 
				
			||||||
 | 
					  for(int i=0;i<__n;i++){
 | 
				
			||||||
 | 
					    ptr[i]=tmp;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#endif 
 | 
				
			||||||
 | 
					    return ptr;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void deallocate(pointer __p, size_type) { 
 | 
				
			||||||
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
 | 
					    _mm_free((void *)__p); 
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    free((void *)__p);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void construct(pointer __p, const _Tp& __val) { };
 | 
				
			||||||
 | 
					  void construct(pointer __p) { };
 | 
				
			||||||
 | 
					  void destroy(pointer __p) { };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<typename _Tp>  inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
 | 
				
			||||||
 | 
					template<typename _Tp>  inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// MPI3 : comms must use shm region
 | 
				
			||||||
 | 
					// SHMEM: comms must use symmetric heap
 | 
				
			||||||
 | 
					//////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
#ifdef GRID_COMMS_SHMEM
 | 
					#ifdef GRID_COMMS_SHMEM
 | 
				
			||||||
 | 
					extern "C" { 
 | 
				
			||||||
    _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
 | 
					#include <mpp/shmem.h>
 | 
				
			||||||
 | 
					extern void * shmem_align(size_t, size_t);
 | 
				
			||||||
 | 
					extern void  shmem_free(void *);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#define PARANOID_SYMMETRIC_HEAP
 | 
					#define PARANOID_SYMMETRIC_HEAP
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<typename _Tp>
 | 
				
			||||||
 | 
					class commAllocator {
 | 
				
			||||||
 | 
					public: 
 | 
				
			||||||
 | 
					  typedef std::size_t     size_type;
 | 
				
			||||||
 | 
					  typedef std::ptrdiff_t  difference_type;
 | 
				
			||||||
 | 
					  typedef _Tp*       pointer;
 | 
				
			||||||
 | 
					  typedef const _Tp* const_pointer;
 | 
				
			||||||
 | 
					  typedef _Tp&       reference;
 | 
				
			||||||
 | 
					  typedef const _Tp& const_reference;
 | 
				
			||||||
 | 
					  typedef _Tp        value_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<typename _Tp1>  struct rebind { typedef commAllocator<_Tp1> other; };
 | 
				
			||||||
 | 
					  commAllocator() throw() { }
 | 
				
			||||||
 | 
					  commAllocator(const commAllocator&) throw() { }
 | 
				
			||||||
 | 
					  template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
 | 
				
			||||||
 | 
					  ~commAllocator() throw() { }
 | 
				
			||||||
 | 
					  pointer       address(reference __x)       const { return &__x; }
 | 
				
			||||||
 | 
					  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_COMMS_SHMEM
 | 
				
			||||||
 | 
					  pointer allocate(size_type __n, const void* _p= 0)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					#ifdef CRAY
 | 
				
			||||||
 | 
					    _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    _Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#ifdef PARANOID_SYMMETRIC_HEAP
 | 
					#ifdef PARANOID_SYMMETRIC_HEAP
 | 
				
			||||||
    static void * bcast;
 | 
					    static void * bcast;
 | 
				
			||||||
    static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
					    static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
				
			||||||
@@ -99,51 +148,53 @@ public:
 | 
				
			|||||||
      BACKTRACEFILE();
 | 
					      BACKTRACEFILE();
 | 
				
			||||||
      exit(0);
 | 
					      exit(0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    assert( bcast == (void *) ptr);
 | 
					    assert( bcast == (void *) ptr);
 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif 
 | 
					#endif 
 | 
				
			||||||
#else
 | 
					    return ptr;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void deallocate(pointer __p, size_type) { 
 | 
				
			||||||
 | 
					    shmem_free((void *)__p);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#elif defined(GRID_COMMS_MPI3)
 | 
				
			||||||
 | 
					  pointer allocate(size_type __n, const void* _p= 0)
 | 
				
			||||||
 | 
					  { 
 | 
				
			||||||
 | 
					#error "implement MPI3 windowed allocate"
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void deallocate(pointer __p, size_type) { 
 | 
				
			||||||
 | 
					#error "implement MPI3 windowed allocate"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  pointer allocate(size_type __n, const void* _p= 0) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
#ifdef HAVE_MM_MALLOC_H
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 | 
					    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 | 
					    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    _Tp tmp;
 | 
					 | 
				
			||||||
#ifdef GRID_NUMA
 | 
					 | 
				
			||||||
#pragma omp parallel for schedule(static)
 | 
					 | 
				
			||||||
  for(int i=0;i<__n;i++){
 | 
					 | 
				
			||||||
    ptr[i]=tmp;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
#endif 
 | 
					 | 
				
			||||||
    return ptr;
 | 
					    return ptr;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					 | 
				
			||||||
  void deallocate(pointer __p, size_type) { 
 | 
					  void deallocate(pointer __p, size_type) { 
 | 
				
			||||||
#ifdef GRID_COMMS_SHMEM
 | 
					 | 
				
			||||||
    shmem_free((void *)__p);
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#ifdef HAVE_MM_MALLOC_H
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
    _mm_free((void *)__p); 
 | 
					    _mm_free((void *)__p); 
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    free((void *)__p);
 | 
					    free((void *)__p);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  void construct(pointer __p, const _Tp& __val) { };
 | 
					  void construct(pointer __p, const _Tp& __val) { };
 | 
				
			||||||
  void construct(pointer __p) { };
 | 
					  void construct(pointer __p) { };
 | 
				
			||||||
 | 
					 | 
				
			||||||
  void destroy(pointer __p) { };
 | 
					  void destroy(pointer __p) { };
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					template<typename _Tp>  inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
 | 
				
			||||||
 | 
					template<typename _Tp>  inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<typename _Tp>  inline bool
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
 | 
					// Template typedefs
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
template<typename _Tp>  inline bool
 | 
					template<class T> using Vector     = std::vector<T,alignedAllocator<T> >;           
 | 
				
			||||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
 | 
					template<class T> using commVector = std::vector<T,commAllocator<T> >;              
 | 
				
			||||||
 | 
					template<class T> using Matrix     = std::vector<std::vector<T,alignedAllocator<T> > >;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
}; // namespace Grid
 | 
					}; // namespace Grid
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,6 +38,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/cshift/Cshift_mpi.h>
 | 
					#include <Grid/cshift/Cshift_mpi.h>
 | 
				
			||||||
#endif 
 | 
					#endif 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_COMMS_MPI3
 | 
				
			||||||
 | 
					#include <Grid/cshift/Cshift_mpi.h>
 | 
				
			||||||
 | 
					#endif 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_COMMS_SHMEM
 | 
					#ifdef GRID_COMMS_SHMEM
 | 
				
			||||||
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
 | 
					#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
 | 
				
			||||||
#endif 
 | 
					#endif 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,6 +3,10 @@ if BUILD_COMMS_MPI
 | 
				
			|||||||
  extra_sources+=communicator/Communicator_mpi.cc
 | 
					  extra_sources+=communicator/Communicator_mpi.cc
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if BUILD_COMMS_MPI3
 | 
				
			||||||
 | 
					  extra_sources+=communicator/Communicator_mpi3.cc
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if BUILD_COMMS_SHMEM
 | 
					if BUILD_COMMS_SHMEM
 | 
				
			||||||
  extra_sources+=communicator/Communicator_shmem.cc
 | 
					  extra_sources+=communicator/Communicator_shmem.cc
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -71,7 +71,7 @@
 | 
				
			|||||||
 namespace Grid {
 | 
					 namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj,class cobj,class compressor> void 
 | 
					template<class vobj,class cobj,class compressor> void 
 | 
				
			||||||
Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
 | 
					Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  table.resize(0);
 | 
					  table.resize(0);
 | 
				
			||||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
					  int rd = rhs._grid->_rdimensions[dimension];
 | 
				
			||||||
@@ -109,7 +109,7 @@ Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,ali
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj,class cobj,class compressor> void 
 | 
					template<class vobj,class cobj,class compressor> void 
 | 
				
			||||||
Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,
 | 
					Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,commVector<cobj> &buffer,
 | 
				
			||||||
			   compressor &compress, int off,int so)
 | 
								   compressor &compress, int off,int so)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
PARALLEL_FOR_LOOP     
 | 
					PARALLEL_FOR_LOOP     
 | 
				
			||||||
@@ -119,7 +119,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj,class cobj,class compressor> void 
 | 
					template<class vobj,class cobj,class compressor> void 
 | 
				
			||||||
Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
 | 
					Gather_plane_simple_stencil (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
 | 
				
			||||||
			     double &t_table ,double & t_data )
 | 
								     double &t_table ,double & t_data )
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  std::vector<std::pair<int,int> > table;
 | 
					  std::vector<std::pair<int,int> > table;
 | 
				
			||||||
@@ -347,10 +347,10 @@ Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAl
 | 
				
			|||||||
       }
 | 
					       }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
       // Comms buffers
 | 
					       // Comms buffers
 | 
				
			||||||
       std::vector<Vector<scalar_object> > u_simd_send_buf;
 | 
					       std::vector<commVector<scalar_object> > u_simd_send_buf;
 | 
				
			||||||
       std::vector<Vector<scalar_object> > u_simd_recv_buf;
 | 
					       std::vector<commVector<scalar_object> > u_simd_recv_buf;
 | 
				
			||||||
       Vector<cobj>          u_send_buf;
 | 
					       commVector<cobj>          u_send_buf;
 | 
				
			||||||
       Vector<cobj>          comm_buf;
 | 
					       commVector<cobj>          comm_buf;
 | 
				
			||||||
       int u_comm_offset;
 | 
					       int u_comm_offset;
 | 
				
			||||||
       int _unified_buffer_size;
 | 
					       int _unified_buffer_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#ifdef GRID_COMMS_MPI
 | 
					#ifdef GRID_COMMS_MPI
 | 
				
			||||||
#include <mpi.h>
 | 
					#include <mpi.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_COMMS_MPI3
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_COMMS_SHMEM
 | 
					#ifdef GRID_COMMS_SHMEM
 | 
				
			||||||
#include <mpp/shmem.h>
 | 
					#include <mpp/shmem.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -52,6 +55,29 @@ class CartesianCommunicator {
 | 
				
			|||||||
#ifdef GRID_COMMS_MPI
 | 
					#ifdef GRID_COMMS_MPI
 | 
				
			||||||
    MPI_Comm communicator;
 | 
					    MPI_Comm communicator;
 | 
				
			||||||
    typedef MPI_Request CommsRequest_t;
 | 
					    typedef MPI_Request CommsRequest_t;
 | 
				
			||||||
 | 
					#elif  GRID_COMMS_MPI3
 | 
				
			||||||
 | 
					    MPI_Comm communicator;
 | 
				
			||||||
 | 
					    typedef MPI_Request CommsRequest_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int>  WorldDims;
 | 
				
			||||||
 | 
					    std::vector<int>  GroupDims;
 | 
				
			||||||
 | 
					    std::vector<int>  ShmDims;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int> GroupCoor;
 | 
				
			||||||
 | 
					    std::vector<int> ShmCoor;
 | 
				
			||||||
 | 
					    std::vector<int> WorldCoor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int GroupRank;
 | 
				
			||||||
 | 
					    int ShmRank;
 | 
				
			||||||
 | 
					    int WorldRank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int GroupSize;
 | 
				
			||||||
 | 
					    int ShmSize;
 | 
				
			||||||
 | 
					    int WorldSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int>  LexicographicToWorldRank;
 | 
				
			||||||
#else 
 | 
					#else 
 | 
				
			||||||
    typedef int CommsRequest_t;
 | 
					    typedef int CommsRequest_t;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										358
									
								
								lib/communicator/Communicator_mpi3.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										358
									
								
								lib/communicator/Communicator_mpi3.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,358 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/communicator/Communicator_mpi.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include "Grid.h"
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Global used by Init and nowhere else. How to hide?
 | 
				
			||||||
 | 
					int Rank(void) {
 | 
				
			||||||
 | 
					  int pe;
 | 
				
			||||||
 | 
					  MPI_Comm_rank(MPI_COMM_WORLD,&pe);
 | 
				
			||||||
 | 
					  return pe;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					  // Should error check all MPI calls.
 | 
				
			||||||
 | 
					void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
				
			||||||
 | 
					  int flag;
 | 
				
			||||||
 | 
					  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
				
			||||||
 | 
					  if ( !flag ) {
 | 
				
			||||||
 | 
					    MPI_Init(argc,argv);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Want to implement some magic ... Group sub-cubes into those on same node
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> coor = _processor_coor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(std::abs(shift) <_processors[dim]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(coor,source,_processors);
 | 
				
			||||||
 | 
					  source = LexicographicToWorldRank[source];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(coor,dest,_processors);
 | 
				
			||||||
 | 
					  dest = LexicographicToWorldRank[dest];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(coor,rank,_processors);
 | 
				
			||||||
 | 
					  rank = LexicographicToWorldRank[rank];
 | 
				
			||||||
 | 
					  return rank;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Lexicographic::CoorFromIndex(coor,rank,_processors);
 | 
				
			||||||
 | 
					  rank = LexicographicToWorldRank[rank];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  _ndimension = processors.size();
 | 
				
			||||||
 | 
					  std::cout << "Creating "<< _ndimension << " dim communicator "<<std::endl;
 | 
				
			||||||
 | 
					  for(int d =0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    std::cout << processors[d]<<" ";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  WorldDims = processors;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  communicator = MPI_COMM_WORLD;
 | 
				
			||||||
 | 
					  MPI_Comm shmcomm;
 | 
				
			||||||
 | 
					  MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&shmcomm);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(communicator,&WorldRank);
 | 
				
			||||||
 | 
					  MPI_Comm_size(communicator,&WorldSize);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(shmcomm     ,&ShmRank);
 | 
				
			||||||
 | 
					  MPI_Comm_size(shmcomm     ,&ShmSize);
 | 
				
			||||||
 | 
					  GroupSize = WorldSize/ShmSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<< "Ranks per node "<< ShmSize << std::endl;
 | 
				
			||||||
 | 
					  std::cout<< "Nodes          "<< GroupSize << std::endl;
 | 
				
			||||||
 | 
					  std::cout<< "Ranks          "<< WorldSize << std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Assert power of two shm_size.
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int log2size = -1;
 | 
				
			||||||
 | 
					  for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){  
 | 
				
			||||||
 | 
					    if ( (0x1<<i) == ShmSize ) {
 | 
				
			||||||
 | 
					      log2size = i;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(log2size != -1);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Identify subblock of ranks on node spreading across dims
 | 
				
			||||||
 | 
					  // in a maximally symmetrical way
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int dim = 0;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ShmDims.resize(_ndimension,1);
 | 
				
			||||||
 | 
					  GroupDims.resize(_ndimension);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ShmCoor.resize(_ndimension);
 | 
				
			||||||
 | 
					  GroupCoor.resize(_ndimension);
 | 
				
			||||||
 | 
					  WorldCoor.resize(_ndimension);
 | 
				
			||||||
 | 
					  for(int l2=0;l2<log2size;l2++){
 | 
				
			||||||
 | 
					    while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
 | 
				
			||||||
 | 
					    ShmDims[dim]*=2;
 | 
				
			||||||
 | 
					    dim=(dim+1)%_ndimension;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout << "Shm group dims "<<std::endl;
 | 
				
			||||||
 | 
					  for(int d =0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    std::cout << ShmDims[d]<<" ";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Establish torus of processes and nodes with sub-blockings
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  for(int d=0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    GroupDims[d] = WorldDims[d]/ShmDims[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << "Group dims "<<std::endl;
 | 
				
			||||||
 | 
					  for(int d =0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    std::cout << GroupDims[d]<<" ";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  MPI_Group WorldGroup, ShmGroup;
 | 
				
			||||||
 | 
					  MPI_Comm_group (communicator, &WorldGroup); 
 | 
				
			||||||
 | 
					  MPI_Comm_group (shmcomm, &ShmGroup);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<int> world_ranks(WorldSize); 
 | 
				
			||||||
 | 
					  std::vector<int> group_ranks(WorldSize); 
 | 
				
			||||||
 | 
					  std::vector<int> mygroup(GroupSize);
 | 
				
			||||||
 | 
					  for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &group_ranks[0]); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Check processor counts match
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  _Nprocessors=1;
 | 
				
			||||||
 | 
					  _processors = processors;
 | 
				
			||||||
 | 
					  _processor_coor.resize(_ndimension);
 | 
				
			||||||
 | 
					  for(int i=0;i<_ndimension;i++){
 | 
				
			||||||
 | 
					    std::cout << " p " << _processors[i]<<std::endl;
 | 
				
			||||||
 | 
					    _Nprocessors*=_processors[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << " World " <<WorldSize <<" Nproc "<<_Nprocessors<<std::endl;
 | 
				
			||||||
 | 
					  assert(WorldSize==_Nprocessors);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Identify who is in my group and noninate the leader
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int g=0;
 | 
				
			||||||
 | 
					  for(int rank=0;rank<WorldSize;rank++){
 | 
				
			||||||
 | 
					    if(group_ranks[rank]!=MPI_UNDEFINED){
 | 
				
			||||||
 | 
						  mygroup[g] = rank;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::sort(mygroup.begin(),mygroup.end(),std::greater<int>());
 | 
				
			||||||
 | 
					  int myleader = mygroup[0];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<int> leaders_1hot(WorldSize,0);
 | 
				
			||||||
 | 
					  std::vector<int> leaders_group(GroupSize,0);
 | 
				
			||||||
 | 
					  leaders_1hot [ myleader ] = 1;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // global sum leaders over comm world
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // find the group leaders world rank
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int group=0;
 | 
				
			||||||
 | 
					  for(int l=0;l<WorldSize;l++){
 | 
				
			||||||
 | 
					    if(leaders_1hot[l]){
 | 
				
			||||||
 | 
					      leaders_group[group++] = l;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Identify the rank of the group in which I (and my leader) live
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  GroupRank=-1;
 | 
				
			||||||
 | 
					  for(int g=0;g<GroupSize;g++){
 | 
				
			||||||
 | 
					    if (myleader == leaders_group[g]){
 | 
				
			||||||
 | 
					      GroupRank=g;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(GroupRank!=-1);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Establish mapping between lexico physics coord and WorldRank
 | 
				
			||||||
 | 
					  // 
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  LexicographicToWorldRank.resize(WorldSize,0);
 | 
				
			||||||
 | 
					  Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
 | 
				
			||||||
 | 
					  Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
 | 
				
			||||||
 | 
					  for(int d=0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  _processor_coor = WorldCoor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int lexico;
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
 | 
				
			||||||
 | 
					  LexicographicToWorldRank[lexico]=WorldRank;
 | 
				
			||||||
 | 
					  _processor = lexico;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // global sum Lexico to World mapping
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(float &f){
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSumVector(float *f,int N)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(double &d)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSumVector(double *d,int N)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Basic Halo comms primitive
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
				
			||||||
 | 
										   int dest,
 | 
				
			||||||
 | 
										   void *recv,
 | 
				
			||||||
 | 
										   int from,
 | 
				
			||||||
 | 
										   int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<CommsRequest_t> reqs(0);
 | 
				
			||||||
 | 
					  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
				
			||||||
 | 
					  SendToRecvFromComplete(reqs);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
				
			||||||
 | 
										   void *recv,
 | 
				
			||||||
 | 
										   int sender,
 | 
				
			||||||
 | 
										   int receiver,
 | 
				
			||||||
 | 
										   int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MPI_Status stat;
 | 
				
			||||||
 | 
					  assert(sender != receiver);
 | 
				
			||||||
 | 
					  int tag = sender;
 | 
				
			||||||
 | 
					  if ( _processor == sender ) {
 | 
				
			||||||
 | 
					    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if ( _processor == receiver ) { 
 | 
				
			||||||
 | 
					    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Basic Halo comms primitive
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
				
			||||||
 | 
											void *xmit,
 | 
				
			||||||
 | 
											int dest,
 | 
				
			||||||
 | 
											void *recv,
 | 
				
			||||||
 | 
											int from,
 | 
				
			||||||
 | 
											int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MPI_Request xrq;
 | 
				
			||||||
 | 
					  MPI_Request rrq;
 | 
				
			||||||
 | 
					  int rank = _processor;
 | 
				
			||||||
 | 
					  int ierr;
 | 
				
			||||||
 | 
					  ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
				
			||||||
 | 
					  ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  list.push_back(xrq);
 | 
				
			||||||
 | 
					  list.push_back(rrq);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int nreq=list.size();
 | 
				
			||||||
 | 
					  std::vector<MPI_Status> status(nreq);
 | 
				
			||||||
 | 
					  int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::Barrier(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr = MPI_Barrier(communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr=MPI_Bcast(data,
 | 
				
			||||||
 | 
							     bytes,
 | 
				
			||||||
 | 
							     MPI_BYTE,
 | 
				
			||||||
 | 
							     root,
 | 
				
			||||||
 | 
							     communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr= MPI_Bcast(data,
 | 
				
			||||||
 | 
							      bytes,
 | 
				
			||||||
 | 
							      MPI_BYTE,
 | 
				
			||||||
 | 
							      root,
 | 
				
			||||||
 | 
							      MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -45,7 +45,7 @@ public:
 | 
				
			|||||||
// Gather for when there is no need to SIMD split with compression
 | 
					// Gather for when there is no need to SIMD split with compression
 | 
				
			||||||
///////////////////////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////////////////////
 | 
				
			||||||
template<class vobj,class cobj,class compressor> void 
 | 
					template<class vobj,class cobj,class compressor> void 
 | 
				
			||||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
 | 
					Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
					  int rd = rhs._grid->_rdimensions[dimension];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -114,6 +114,7 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
	int o      =   n*n1;
 | 
						int o      =   n*n1;
 | 
				
			||||||
	int offset = b+n*n2;
 | 
						int offset = b+n*n2;
 | 
				
			||||||
	cobj temp =compress(rhs._odata[so+o+b]);
 | 
						cobj temp =compress(rhs._odata[so+o+b]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	extract<cobj>(temp,pointers,offset);
 | 
						extract<cobj>(temp,pointers,offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
@@ -121,6 +122,7 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
  } else { 
 | 
					  } else { 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(0); //Fixme think this is buggy
 | 
					    assert(0); //Fixme think this is buggy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
	int o=n*rhs._grid->_slice_stride[dimension];
 | 
						int o=n*rhs._grid->_slice_stride[dimension];
 | 
				
			||||||
@@ -139,7 +141,7 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
// Gather for when there is no need to SIMD split
 | 
					// Gather for when there is no need to SIMD split
 | 
				
			||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer,             int dimension,int plane,int cbmask)
 | 
					template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  SimpleCompressor<vobj> dontcompress;
 | 
					  SimpleCompressor<vobj> dontcompress;
 | 
				
			||||||
  Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
 | 
					  Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
 | 
				
			||||||
@@ -157,7 +159,7 @@ template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vec
 | 
				
			|||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
// Scatter for when there is no need to SIMD split
 | 
					// Scatter for when there is no need to SIMD split
 | 
				
			||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
 | 
					template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
					  int rd = rhs._grid->_rdimensions[dimension];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -119,8 +119,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
 | 
				
			|||||||
  assert(shift<fd);
 | 
					  assert(shift<fd);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
 | 
					  int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
 | 
				
			||||||
  std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
 | 
					  commVector<vobj> send_buf(buffer_size);
 | 
				
			||||||
  std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
 | 
					  commVector<vobj> recv_buf(buffer_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int cb= (cbmask==0x2)? Odd : Even;
 | 
					  int cb= (cbmask==0x2)? Odd : Even;
 | 
				
			||||||
  int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
					  int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
				
			||||||
@@ -191,8 +191,8 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
 | 
				
			|||||||
  int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
 | 
					  int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
 | 
				
			||||||
  int words = sizeof(vobj)/sizeof(vector_type);
 | 
					  int words = sizeof(vobj)/sizeof(vector_type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::vector<Vector<scalar_object> >   send_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
 | 
					  std::vector<commVector<scalar_object> >   send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
 | 
				
			||||||
  std::vector<Vector<scalar_object> >   recv_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
 | 
					  std::vector<commVector<scalar_object> >   recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int bytes = buffer_size*sizeof(scalar_object);
 | 
					  int bytes = buffer_size*sizeof(scalar_object);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,9 +65,6 @@ public:
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
class LatticeExpressionBase {};
 | 
					class LatticeExpressionBase {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;               // Aligned allocator??
 | 
					 | 
				
			||||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template <typename Op, typename T1>                           
 | 
					template <typename Op, typename T1>                           
 | 
				
			||||||
class LatticeUnaryExpression  : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
 | 
					class LatticeUnaryExpression  : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -45,7 +45,7 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
 | 
				
			|||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
					void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
				
			||||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
					    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
					    commVector<SiteHalfSpinor> &buf, int sF,
 | 
				
			||||||
    int sU, const FermionField &in, FermionField &out) {
 | 
					    int sU, const FermionField &in, FermionField &out) {
 | 
				
			||||||
  SiteHalfSpinor tmp;
 | 
					  SiteHalfSpinor tmp;
 | 
				
			||||||
  SiteHalfSpinor chi;
 | 
					  SiteHalfSpinor chi;
 | 
				
			||||||
@@ -222,7 +222,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
				
			|||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
					void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
				
			||||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
					    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
					    commVector<SiteHalfSpinor> &buf, int sF,
 | 
				
			||||||
    int sU, const FermionField &in, FermionField &out) {
 | 
					    int sU, const FermionField &in, FermionField &out) {
 | 
				
			||||||
  SiteHalfSpinor tmp;
 | 
					  SiteHalfSpinor tmp;
 | 
				
			||||||
  SiteHalfSpinor chi;
 | 
					  SiteHalfSpinor chi;
 | 
				
			||||||
@@ -398,7 +398,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
				
			|||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopDir(
 | 
					void WilsonKernels<Impl>::DiracOptDhopDir(
 | 
				
			||||||
    StencilImpl &st, DoubledGaugeField &U,
 | 
					    StencilImpl &st, DoubledGaugeField &U,
 | 
				
			||||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
					    commVector<SiteHalfSpinor> &buf, int sF,
 | 
				
			||||||
    int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
 | 
					    int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
 | 
				
			||||||
  SiteHalfSpinor tmp;
 | 
					  SiteHalfSpinor tmp;
 | 
				
			||||||
  SiteHalfSpinor chi;
 | 
					  SiteHalfSpinor chi;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -58,7 +58,7 @@ namespace Grid {
 | 
				
			|||||||
      typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
					      typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
				
			||||||
	DiracOptDhopSite(
 | 
						DiracOptDhopSite(
 | 
				
			||||||
			 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
			 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
								 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
			 int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
								 int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
			 FermionField &out) {
 | 
								 FermionField &out) {
 | 
				
			||||||
#ifdef AVX512
 | 
					#ifdef AVX512
 | 
				
			||||||
@@ -89,7 +89,7 @@ namespace Grid {
 | 
				
			|||||||
	  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
						  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
				
			||||||
	  DiracOptDhopSite(
 | 
						  DiracOptDhopSite(
 | 
				
			||||||
			   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
								   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
			   std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
								   commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
			   int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
								   int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
			   FermionField &out) {
 | 
								   FermionField &out) {
 | 
				
			||||||
	  for (int site = 0; site < Ns; site++) {
 | 
						  for (int site = 0; site < Ns; site++) {
 | 
				
			||||||
@@ -107,7 +107,7 @@ namespace Grid {
 | 
				
			|||||||
				  void>::type
 | 
									  void>::type
 | 
				
			||||||
	  DiracOptDhopSiteDag(
 | 
						  DiracOptDhopSiteDag(
 | 
				
			||||||
			      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
								      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
			      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
								      commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
			      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
								      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
			      FermionField &out) {
 | 
								      FermionField &out) {
 | 
				
			||||||
#ifdef AVX512
 | 
					#ifdef AVX512
 | 
				
			||||||
@@ -139,7 +139,7 @@ namespace Grid {
 | 
				
			|||||||
				      void>::type
 | 
									      void>::type
 | 
				
			||||||
				      DiracOptDhopSiteDag(
 | 
									      DiracOptDhopSiteDag(
 | 
				
			||||||
							  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
												  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
							  std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												  commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							  int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
												  int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
							  FermionField &out) {
 | 
												  FermionField &out) {
 | 
				
			||||||
					for (int site = 0; site < Ns; site++) {
 | 
										for (int site = 0; site < Ns; site++) {
 | 
				
			||||||
@@ -154,7 +154,7 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
				    void DiracOptDhopDir(
 | 
									    void DiracOptDhopDir(
 | 
				
			||||||
							 StencilImpl &st, DoubledGaugeField &U,
 | 
												 StencilImpl &st, DoubledGaugeField &U,
 | 
				
			||||||
							 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
												 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
				
			||||||
							 int gamma);
 | 
												 int gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -162,34 +162,34 @@ namespace Grid {
 | 
				
			|||||||
				    // Specialised variants
 | 
									    // Specialised variants
 | 
				
			||||||
				    void DiracOptGenericDhopSite(
 | 
									    void DiracOptGenericDhopSite(
 | 
				
			||||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
													 int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptGenericDhopSiteDag(
 | 
									    void DiracOptGenericDhopSiteDag(
 | 
				
			||||||
								    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													    commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								    int sF, int sU, const FermionField &in, FermionField &out);
 | 
													    int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptAsmDhopSite(
 | 
									    void DiracOptAsmDhopSite(
 | 
				
			||||||
							     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
												     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												     commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							     int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
												     int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
							     FermionField &out);
 | 
												     FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptAsmDhopSiteDag(
 | 
									    void DiracOptAsmDhopSiteDag(
 | 
				
			||||||
								StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
													int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
								FermionField &out);
 | 
													FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptHandDhopSite(
 | 
									    void DiracOptHandDhopSite(
 | 
				
			||||||
							      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
												      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
							      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												      commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							      int sF, int sU, const FermionField &in, FermionField &out);
 | 
												      int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptHandDhopSiteDag(
 | 
									    void DiracOptHandDhopSiteDag(
 | 
				
			||||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
													 int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public:
 | 
						public:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,14 +40,14 @@ namespace Grid {
 | 
				
			|||||||
    ///////////////////////////////////////////////////////////
 | 
					    ///////////////////////////////////////////////////////////
 | 
				
			||||||
    template<class Impl>
 | 
					    template<class Impl>
 | 
				
			||||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					      void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
                             std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					                             commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
                             int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
					                             int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      assert(0);
 | 
					      assert(0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    template<class Impl>
 | 
					    template<class Impl>
 | 
				
			||||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					      void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
                                std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					                                commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
                                int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
					                                int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      assert(0);
 | 
					      assert(0);
 | 
				
			||||||
@@ -86,14 +86,14 @@ namespace Grid {
 | 
				
			|||||||
#undef KERNEL_DAG
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
							 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												 commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
												 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
#define KERNEL_DAG
 | 
					#define KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
							    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												    commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
												    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
				    
 | 
									    
 | 
				
			||||||
@@ -111,14 +111,14 @@ namespace Grid {
 | 
				
			|||||||
#undef KERNEL_DAG
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													  commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
													  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
				    
 | 
									    
 | 
				
			||||||
#define KERNEL_DAG
 | 
					#define KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
								     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													     commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
													     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
				    
 | 
									    
 | 
				
			||||||
@@ -127,10 +127,10 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define INSTANTIATE_ASM(A)\
 | 
					#define INSTANTIATE_ASM(A)\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
				
			||||||
                                   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
					                                   commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
				
			||||||
                                   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
					                                   commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -313,7 +313,7 @@ namespace QCD {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
  void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					  void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										       commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
										       int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  typedef typename Simd::scalar_type S;
 | 
					  typedef typename Simd::scalar_type S;
 | 
				
			||||||
@@ -556,7 +556,7 @@ namespace QCD {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
  void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					  void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										       commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
										       int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
					  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
				
			||||||
@@ -804,7 +804,7 @@ namespace QCD {
 | 
				
			|||||||
  ////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												     commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -812,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,Leb
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								int sF,int sU,const FermionField &in, FermionField &out)
 | 
													int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -820,7 +820,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												     commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -828,7 +828,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Leb
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								int sF,int sU,const FermionField &in, FermionField &out)
 | 
													int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -841,10 +841,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define INSTANTIATE_THEM(A) \
 | 
					#define INSTANTIATE_THEM(A) \
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
				
			||||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
												       commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
							       int ss,int sU,const FermionField &in, FermionField &out);\
 | 
												       int ss,int sU,const FermionField &in, FermionField &out);\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
				
			||||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
													  commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
								  int ss,int sU,const FermionField &in, FermionField &out);
 | 
													  int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
INSTANTIATE_THEM(WilsonImplF);
 | 
					INSTANTIATE_THEM(WilsonImplF);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -42,6 +42,22 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
namespace Grid{
 | 
					namespace Grid{
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<class vtype>
 | 
				
			||||||
 | 
					  union uconv {
 | 
				
			||||||
 | 
					    __m512 f;
 | 
				
			||||||
 | 
					    vtype v;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  union u512f {
 | 
				
			||||||
 | 
					    __m512 v;
 | 
				
			||||||
 | 
					    float f[8];
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  union u512d {
 | 
				
			||||||
 | 
					    __m512 v;
 | 
				
			||||||
 | 
					    double f[4];
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  struct Vsplat{
 | 
					  struct Vsplat{
 | 
				
			||||||
    //Complex float
 | 
					    //Complex float
 | 
				
			||||||
    inline __m512 operator()(float a, float b){
 | 
					    inline __m512 operator()(float a, float b){
 | 
				
			||||||
@@ -372,8 +388,9 @@ namespace Optimization {
 | 
				
			|||||||
  // Some Template specialization
 | 
					  // Some Template specialization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
 | 
					  // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
 | 
				
			||||||
#undef GNU_CLANG_COMPILER 
 | 
					
 | 
				
			||||||
#ifdef GNU_CLANG_COMPILER
 | 
					#ifndef __INTEL_COMPILER
 | 
				
			||||||
 | 
					#warning "Slow reduction due to incomplete reduce intrinsics"
 | 
				
			||||||
  //Complex float Reduce
 | 
					  //Complex float Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
 | 
					    inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -250,8 +250,7 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 }
 | 
					 }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj> inline 
 | 
					template<class vobj> inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
 | 
				
			||||||
void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  typedef typename vobj::scalar_type scalar_type ;
 | 
					  typedef typename vobj::scalar_type scalar_type ;
 | 
				
			||||||
  typedef typename vobj::vector_type vector_type ;
 | 
					  typedef typename vobj::vector_type vector_type ;
 | 
				
			||||||
@@ -269,8 +268,7 @@ void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
 | 
				
			|||||||
  }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj> inline 
 | 
					template<class vobj> inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
 | 
				
			||||||
void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  typedef typename vobj::scalar_type scalar_type ;
 | 
					  typedef typename vobj::scalar_type scalar_type ;
 | 
				
			||||||
  typedef typename vobj::vector_type vector_type ;
 | 
					  typedef typename vobj::vector_type vector_type ;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user