mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 05:24:32 +00:00 
			
		
		
		
	checked performance of new vector libaries.
Added check for c++11 support on the configure.ac
This commit is contained in:
		@@ -16,6 +16,9 @@
 | 
			
		||||
/* GRID_COMMS_NONE */
 | 
			
		||||
#define GRID_COMMS_NONE 1
 | 
			
		||||
 | 
			
		||||
/* define if the compiler supports basic C++11 syntax */
 | 
			
		||||
/* #undef HAVE_CXX11 */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
			
		||||
   don't. */
 | 
			
		||||
#define HAVE_DECL_BE64TOH 1
 | 
			
		||||
 
 | 
			
		||||
@@ -15,6 +15,9 @@
 | 
			
		||||
/* GRID_COMMS_NONE */
 | 
			
		||||
#undef GRID_COMMS_NONE
 | 
			
		||||
 | 
			
		||||
/* define if the compiler supports basic C++11 syntax */
 | 
			
		||||
#undef HAVE_CXX11
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
			
		||||
   don't. */
 | 
			
		||||
#undef HAVE_DECL_BE64TOH
 | 
			
		||||
 
 | 
			
		||||
@@ -103,6 +103,10 @@ namespace Grid {
 | 
			
		||||
    inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
 | 
			
		||||
    inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
 | 
			
		||||
    
 | 
			
		||||
    inline void vstream(ComplexF &l, const ComplexF &r){ l=r;}
 | 
			
		||||
    inline void vstream(ComplexD &l, const ComplexD &r){ l=r;}
 | 
			
		||||
    inline void vstream(RealF &l, const RealF &r){ l=r;}
 | 
			
		||||
    inline void vstream(RealD &l, const RealD &r){ l=r;}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  class Zero{};
 | 
			
		||||
 
 | 
			
		||||
@@ -160,13 +160,21 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
      
 | 
			
		||||
      /*
 | 
			
		||||
      int o =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
 | 
			
		||||
      if ( ocb&cbmask ) {
 | 
			
		||||
	lhs._odata[lo+o+b]=rhs._odata[ro+o+b];
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
      int o =n*rhs._grid->_slice_stride[dimension]+b;
 | 
			
		||||
      int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
 | 
			
		||||
      if ( ocb&cbmask ) {
 | 
			
		||||
	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
	vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
@@ -38,6 +38,10 @@ public:
 | 
			
		||||
  iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
 | 
			
		||||
  iScalar(const Zero &z){ *this = zero; };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    iScalar<vtype> & operator= (const Zero &hero){
 | 
			
		||||
      zeroit(*this);
 | 
			
		||||
      return *this;
 | 
			
		||||
@@ -206,6 +210,16 @@ public:
 | 
			
		||||
  iMatrix(const Zero &z){ *this = zero; };
 | 
			
		||||
  iMatrix() =default;
 | 
			
		||||
 | 
			
		||||
  // No copy constructor...
 | 
			
		||||
  
 | 
			
		||||
  iMatrix& operator=(const iMatrix& rhs){
 | 
			
		||||
    for(int i=0;i<N;i++)
 | 
			
		||||
      for(int j=0;j<N;j++)
 | 
			
		||||
	vstream(_internal[i][j],rhs._internal[i][j]);
 | 
			
		||||
    return *this;
 | 
			
		||||
  }; 
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
  iMatrix<vtype,N> & operator= (const Zero &hero){
 | 
			
		||||
    zeroit(*this);
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-22 15:51:24 neo>
 | 
			
		||||
// Time-stamp: <2015-05-22 18:58:27 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
@@ -307,9 +307,7 @@ namespace Optimization {
 | 
			
		||||
    conv.v = b;
 | 
			
		||||
    switch (perm){
 | 
			
		||||
      // 8x32 bits=>3 permutes
 | 
			
		||||
    case 2: 
 | 
			
		||||
      conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); 
 | 
			
		||||
      break;
 | 
			
		||||
    case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
    case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
 | 
			
		||||
    case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
 | 
			
		||||
    default: assert(0); break;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user