mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	BG/Q compatibility fix
This commit is contained in:
		@@ -71,7 +71,7 @@ AC_CHECK_FUNCS([gettimeofday])
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
 | 
					AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
 | 
				
			||||||
	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
 | 
						[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
 | 
				
			||||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
 | 
						[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
supported=no
 | 
					supported=no
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -36,7 +36,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <malloc.h>
 | 
					#include <malloc.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <immintrin.h>
 | 
					 | 
				
			||||||
#ifdef HAVE_MM_MALLOC_H
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
#include <mm_malloc.h>
 | 
					#include <mm_malloc.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,15 +30,6 @@
 | 
				
			|||||||
/* GRID_DEFAULT_PRECISION is SINGLE */
 | 
					/* GRID_DEFAULT_PRECISION is SINGLE */
 | 
				
			||||||
#undef GRID_DEFAULT_PRECISION_SINGLE
 | 
					#undef GRID_DEFAULT_PRECISION_SINGLE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Support Altivec instructions */
 | 
					 | 
				
			||||||
#undef HAVE_ALTIVEC
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support AVX (Advanced Vector Extensions) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_AVX
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_AVX2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
					/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
				
			||||||
   don't. */
 | 
					   don't. */
 | 
				
			||||||
#undef HAVE_DECL_BE64TOH
 | 
					#undef HAVE_DECL_BE64TOH
 | 
				
			||||||
@@ -53,9 +44,6 @@
 | 
				
			|||||||
/* Define to 1 if you have the <execinfo.h> header file. */
 | 
					/* Define to 1 if you have the <execinfo.h> header file. */
 | 
				
			||||||
#undef HAVE_EXECINFO_H
 | 
					#undef HAVE_EXECINFO_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Support FMA3 (Fused Multiply-Add) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_FMA
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Define to 1 if you have the `gettimeofday' function. */
 | 
					/* Define to 1 if you have the `gettimeofday' function. */
 | 
				
			||||||
#undef HAVE_GETTIMEOFDAY
 | 
					#undef HAVE_GETTIMEOFDAY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -74,30 +62,9 @@
 | 
				
			|||||||
/* Define to 1 if you have the <memory.h> header file. */
 | 
					/* Define to 1 if you have the <memory.h> header file. */
 | 
				
			||||||
#undef HAVE_MEMORY_H
 | 
					#undef HAVE_MEMORY_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Support mmx instructions */
 | 
					 | 
				
			||||||
#undef HAVE_MMX
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
 | 
					/* Define to 1 if you have the <mm_malloc.h> header file. */
 | 
				
			||||||
#undef HAVE_MM_MALLOC_H
 | 
					#undef HAVE_MM_MALLOC_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Support SSE (Streaming SIMD Extensions) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_SSE
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_SSE2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_SSE3
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_SSE4_1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_SSE4_2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
 | 
					 | 
				
			||||||
#undef HAVE_SSSE3
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Define to 1 if you have the <stdint.h> header file. */
 | 
					/* Define to 1 if you have the <stdint.h> header file. */
 | 
				
			||||||
#undef HAVE_STDINT_H
 | 
					#undef HAVE_STDINT_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -289,14 +289,13 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
				
			|||||||
  printf("         code %d\n",si->si_code);
 | 
					  printf("         code %d\n",si->si_code);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Linux/Posix
 | 
					  // Linux/Posix
 | 
				
			||||||
#ifdef __linux__ 
 | 
					#ifdef __linux__
 | 
				
			||||||
  // And x86 64bit
 | 
					  // And x86 64bit
 | 
				
			||||||
    ucontext_t * uc= (ucontext_t *)ptr;
 | 
					#ifdef __x86_64__
 | 
				
			||||||
 | 
					  ucontext_t * uc= (ucontext_t *)ptr;
 | 
				
			||||||
  struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
 | 
					  struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
 | 
				
			||||||
  printf("  instruction %llx\n",(unsigned long long)sc->rip);
 | 
					  printf("  instruction %llx\n",(unsigned long long)sc->rip);
 | 
				
			||||||
 | 
					 | 
				
			||||||
#define REG(A)  printf("  %s %lx\n",#A,sc-> A);
 | 
					#define REG(A)  printf("  %s %lx\n",#A,sc-> A);
 | 
				
			||||||
 | 
					 | 
				
			||||||
  REG(rdi);
 | 
					  REG(rdi);
 | 
				
			||||||
  REG(rsi);
 | 
					  REG(rsi);
 | 
				
			||||||
  REG(rbp);
 | 
					  REG(rbp);
 | 
				
			||||||
@@ -316,6 +315,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
				
			|||||||
  REG(r13);
 | 
					  REG(r13);
 | 
				
			||||||
  REG(r14);
 | 
					  REG(r14);
 | 
				
			||||||
  REG(r15);
 | 
					  REG(r15);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  BACKTRACE();
 | 
					  BACKTRACE();
 | 
				
			||||||
  exit(0);
 | 
					  exit(0);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -43,8 +43,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#else
 | 
					#else
 | 
				
			||||||
#include <sys/syscall.h>
 | 
					#include <sys/syscall.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
namespace Grid {
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __linux__
 | 
					#ifdef __linux__
 | 
				
			||||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
					static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
				
			||||||
@@ -58,6 +58,22 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __bgq__
 | 
				
			||||||
 | 
					inline uint64_t cyclecount(void){ 
 | 
				
			||||||
 | 
					   uint64_t tmp;
 | 
				
			||||||
 | 
					   asm volatile ("mfspr %0,0x10C" : "=&r" (tmp)  );
 | 
				
			||||||
 | 
					   return tmp;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#elif defined __i386__
 | 
				
			||||||
 | 
					inline uint64_t cyclecount(void){ 
 | 
				
			||||||
 | 
					   return __rdtsc();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#warning No cycle counter implemented for this architecture
 | 
				
			||||||
 | 
					inline uint64_t cyclecount(void){ 
 | 
				
			||||||
 | 
					   return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class PerformanceCounter {
 | 
					class PerformanceCounter {
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
@@ -149,7 +165,7 @@ public:
 | 
				
			|||||||
      ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
					      ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
				
			||||||
      ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
					      ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    begin  =__rdtsc();
 | 
					    begin  =cyclecount();
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    begin = 0;
 | 
					    begin = 0;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -162,7 +178,7 @@ public:
 | 
				
			|||||||
      ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
					      ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
				
			||||||
      ::read(fd, &count, sizeof(long long));
 | 
					      ::read(fd, &count, sizeof(long long));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    elapsed = __rdtsc() - begin;
 | 
					    elapsed = cyclecount() - begin;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    elapsed = 0;
 | 
					    elapsed = 0;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -118,12 +118,12 @@ void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField
 | 
				
			|||||||
  
 | 
					  
 | 
				
			||||||
  StencilEntry *SE;
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //#define STAMP(i) timers[i] = __rdtsc() ; 
 | 
					  //#define STAMP(i) timers[i] = cyclecount() ; 
 | 
				
			||||||
#define STAMP(i) //timers[i] = __rdtsc() ; 
 | 
					#define STAMP(i) //timers[i] = cyclecount() ; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  MASK_REGS;
 | 
					  MASK_REGS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  first = __rdtsc();
 | 
					  first = cyclecount();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SE=st.GetEntry(ptype,Xm,ss);
 | 
					  SE=st.GetEntry(ptype,Xm,ss);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,6 +35,7 @@ Author: neo <cossu@post.kek.jp>
 | 
				
			|||||||
// Time-stamp: <2015-06-09 14:28:02 neo>
 | 
					// Time-stamp: <2015-06-09 14:28:02 neo>
 | 
				
			||||||
//----------------------------------------------------------------------
 | 
					//----------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
namespace Optimization {
 | 
					namespace Optimization {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<class vtype>
 | 
					  template<class vtype>
 | 
				
			||||||
@@ -243,6 +244,36 @@ namespace Optimization {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  //////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////
 | 
				
			||||||
  // Some Template specialization
 | 
					  // Some Template specialization
 | 
				
			||||||
 | 
					  struct Permute{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static inline float Permute0(float in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline float Permute1(float in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline float Permute2(float in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline float Permute3(float in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static inline double Permute0(double in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline double Permute1(double in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline double Permute2(double in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static inline double Permute3(double in){
 | 
				
			||||||
 | 
					      return in;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  template < typename vtype > 
 | 
					  template < typename vtype > 
 | 
				
			||||||
    void permute(vtype &a, vtype b, int perm) {
 | 
					    void permute(vtype &a, vtype b, int perm) {
 | 
				
			||||||
   }; 
 | 
					   }; 
 | 
				
			||||||
@@ -282,7 +313,6 @@ namespace Optimization {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Here assign types 
 | 
					// Here assign types 
 | 
				
			||||||
namespace Grid {
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef float SIMD_Ftype;  // Single precision type
 | 
					  typedef float SIMD_Ftype;  // Single precision type
 | 
				
			||||||
  typedef double SIMD_Dtype; // Double precision type
 | 
					  typedef double SIMD_Dtype; // Double precision type
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user