From 69ae817d1c59b94c9e1dfc0860831982bc49b337 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 8 Dec 2016 16:43:28 +0000 Subject: [PATCH 01/37] Updates for supporting Mobius better --- lib/simd/Grid_avx.h | 27 ++++++++++++++++++++++++++- lib/simd/Grid_avx512.h | 25 +++++++++++++++++++++++++ lib/simd/Grid_generic.h | 16 ++++++++++++++++ lib/simd/Grid_qpx.h | 9 +++++++++ lib/simd/Grid_sse4.h | 25 +++++++++++++++++++++++++ lib/simd/Grid_vector_types.h | 33 +++++++++++++++++++++++++++++++-- lib/simd/Intel512avx.h | 16 ++++++++++++---- lib/simd/Intel512common.h | 11 ++++++----- lib/simd/Intel512double.h | 2 ++ lib/simd/Intel512single.h | 2 ++ 10 files changed, 154 insertions(+), 12 deletions(-) diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index 36360102..e2729187 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -213,6 +213,29 @@ namespace Optimization { } }; + struct MultRealPart{ + inline __m256 operator()(__m256 a, __m256 b){ + __m256 ymm0; + ymm0 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, + return _mm256_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br + } + inline __m256d operator()(__m256d a, __m256d b){ + __m256d ymm0; + ymm0 = _mm256_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00 + return _mm256_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br + } + }; + struct MaddRealPart{ + inline __m256 operator()(__m256 a, __m256 b, __m256 c){ + __m256 ymm0 = _mm256_moveldup_ps(a); // ymm0 <- ar ar, + _mm256_add_ps(_mm256_mul_ps( ymm0, b),c); + } + inline __m256d operator()(__m256d a, __m256d b, __m256d c){ + __m256d ymm0 = _mm256_shuffle_pd( a, a, 0x0 ); + return _mm256_add_pd(_mm256_mul_pd( ymm0, b),c); + } + }; + struct MultComplex{ // Complex float inline __m256 operator()(__m256 a, __m256 b){ @@ -627,7 +650,9 @@ namespace Optimization { typedef Optimization::Sub SubSIMD; typedef Optimization::Div DivSIMD; typedef Optimization::Mult MultSIMD; - typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultRealPart MultRealPartSIMD; + typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index d6531d57..ebf99e16 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -189,6 +189,29 @@ namespace Optimization { // 2mul,4 mac +add+sub = 8 flop type insns // 3shuf + 2 (+shuf) = 5/6 simd perm and 1/2 the load. + struct MultRealPart{ + inline __m512 operator()(__m512 a, __m512 b){ + __m512 ymm0; + ymm0 = _mm512_moveldup_ps(a); // ymm0 <- ar ar, + return _mm512_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br + } + inline __m512d operator()(__m512d a, __m512d b){ + __m512d ymm0; + ymm0 = _mm512_shuffle_pd(a,a,0x00); // ymm0 <- ar ar, ar,ar b'00,00 + return _mm512_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br + } + }; + struct MaddRealPart{ + inline __m512 operator()(__m512 a, __m512 b, __m512 c){ + __m512 ymm0 = _mm512_moveldup_ps(a); // ymm0 <- ar ar, + return _mm512_fmadd_ps( ymm0, b, c); + } + inline __m512d operator()(__m512d a, __m512d b, __m512d c){ + __m512d ymm0 = _mm512_shuffle_pd( a, a, 0x00 ); + return _mm512_fmadd_pd( ymm0, b, c); + } + }; + struct MultComplex{ // Complex float inline __m512 operator()(__m512 a, __m512 b){ @@ -501,6 +524,8 @@ namespace Optimization { typedef Optimization::Mult MultSIMD; typedef Optimization::Div DivSIMD; typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultRealPart MultRealPartSIMD; + typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_generic.h b/lib/simd/Grid_generic.h index 62c78afb..91e9cda2 100644 --- a/lib/simd/Grid_generic.h +++ b/lib/simd/Grid_generic.h @@ -224,6 +224,21 @@ namespace Optimization { #define cmul(a, b, c, i)\ c[i] = a[i]*b[i] - a[i+1]*b[i+1];\ c[i+1] = a[i]*b[i+1] + a[i+1]*b[i]; + + struct MultRealPart{ + template + inline vec operator()(vec a, vec b){ + vec out; + + VECTOR_FOR(i, W::c, 1) + { + out.v[2*i] = a[2*i]*b[2*i]; + out.v[2*i+1] = a[2*i]*b[2*i+1]; + } + return out; + }; + }; + struct MultComplex{ // Complex @@ -456,6 +471,7 @@ namespace Optimization { typedef Optimization::Div DivSIMD; typedef Optimization::Mult MultSIMD; typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultRealPart MultRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index bc86291d..99a9ea68 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -220,6 +220,14 @@ namespace Optimization { } }; + struct MultRealPart{ + // Complex double + inline vector4double operator()(vector4double a, vector4double b){ + // return vec_xmul(b, a); + return vec_xmul(a, b); + } + FLOAT_WRAP_2(operator(), inline) + }; struct MultComplex{ // Complex double inline vector4double operator()(vector4double a, vector4double b){ @@ -430,6 +438,7 @@ typedef Optimization::Sub SubSIMD; typedef Optimization::Mult MultSIMD; typedef Optimization::Div DivSIMD; typedef Optimization::MultComplex MultComplexSIMD; +typedef Optimization::MultRealPart MultRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 560eda11..abd688ab 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -177,6 +177,29 @@ namespace Optimization { } }; + struct MultRealPart{ + inline __m128 operator()(__m128 a, __m128 b){ + __m128 ymm0; + ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, + return _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br + } + inline __m128d operator()(__m128d a, __m128d b){ + __m128d ymm0; + ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00 + return _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br + } + }; + struct MaddRealPart{ + inline __m128 operator()(__m128 a, __m128 b, __m128 c){ + __m128 ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, + _mm_add_ps(_mm_mul_ps( ymm0, b),c); + } + inline __m128d operator()(__m128d a, __m128d b, __m128 c){ + __m128d ymm0 = _mm_shuffle_pd( a, a, 0x0 ); + return _mm_add_pd(_mm_mul_pd( ymm0, b),c); + } + }; + struct MultComplex{ // Complex float inline __m128 operator()(__m128 a, __m128 b){ @@ -415,6 +438,8 @@ namespace Optimization { typedef Optimization::Div DivSIMD; typedef Optimization::Mult MultSIMD; typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultRealPart MultRealPartSIMD; + typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 42f28b34..8a6ab2e7 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -101,6 +101,11 @@ template using IfNotInteger = Invoke +Out trinary(Input1 src_1, Input2 src_2, Input3 src_3, Operation op) { + return op(src_1, src_2, src_3); +} + template Out binary(Input1 src_1, Input2 src_2, Operation op) { return op(src_1, src_2); @@ -178,6 +183,7 @@ class Grid_simd { const Grid_simd *__restrict__ r) { *y = (*l) * (*r); } + friend inline void sub(Grid_simd *__restrict__ y, const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ r) { @@ -188,7 +194,6 @@ class Grid_simd { const Grid_simd *__restrict__ r) { *y = (*l) + (*r); } - friend inline void mac(Grid_simd *__restrict__ y, const Scalar_type *__restrict__ a, const Grid_simd *__restrict__ x) { @@ -260,7 +265,7 @@ class Grid_simd { } //////////////////////////// - // opreator scalar * simd + // operator scalar * simd //////////////////////////// friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) { Grid_simd va; @@ -433,6 +438,11 @@ inline void vbroadcast(Grid_simd &ret,const Grid_simd &src,int lane){ S* typepun =(S*) &src; vsplat(ret,typepun[lane]); } +template =0> +inline void rbroadcast(Grid_simd &ret,const Grid_simd &src,int lane){ + S* typepun =(S*) &src; + ret.v = unary(real(typepun[lane]), VsplatSIMD()); +} /////////////////////// // Splat @@ -449,6 +459,10 @@ template inline void vsplat(Grid_simd &ret, EnableIf, S> c) { vsplat(ret, real(c), imag(c)); } +template +inline void rsplat(Grid_simd &ret, EnableIf, S> c) { + vsplat(ret, real(c), real(c)); +} // if real fill with a, if complex fill with a in the real part (first function // above) @@ -550,6 +564,21 @@ inline Grid_simd operator-(Grid_simd a, Grid_simd b) { return ret; }; +// Distinguish between complex types and others +template = 0> +inline Grid_simd real_mult(Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, MultRealPartSIMD()); + return ret; +}; +template = 0> +inline Grid_simd real_madd(Grid_simd a, Grid_simd b, Grid_simd c) { + Grid_simd ret; + ret.v = trinary(a.v, b.v, c.v, MaddRealPartSIMD()); + return ret; +}; + + // Distinguish between complex types and others template = 0> inline Grid_simd operator*(Grid_simd a, Grid_simd b) { diff --git a/lib/simd/Intel512avx.h b/lib/simd/Intel512avx.h index 19157db4..7b5964ad 100644 --- a/lib/simd/Intel512avx.h +++ b/lib/simd/Intel512avx.h @@ -95,10 +95,14 @@ Author: paboyle #define VIDUPd(SRC,DEST) "vpshufd $0xee," #SRC"," #DEST ";\n" // 32 bit level: 3,2,3,2 #define VIDUPf(SRC,DEST) "vmovshdup " #SRC ", " #DEST ";\n" -#define VBCASTRDUPd(OFF,A,DEST) "vbroadcastsd (" #OFF "*16+0)(" #A ")," #DEST ";\n" -#define VBCASTIDUPd(OFF,A,DEST) "vbroadcastsd (" #OFF "*16+8)(" #A ")," #DEST ";\n" -#define VBCASTRDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +0)(" #PTR "), " #DEST ";\n" -#define VBCASTIDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +4)(" #PTR "), " #DEST ";\n" +#define VBCASTRDUPd(OFF,A,DEST) "vbroadcastsd (" #OFF "*16+0)(" #A ")," #DEST ";\n" +#define VBCASTIDUPd(OFF,A,DEST) "vbroadcastsd (" #OFF "*16+8)(" #A ")," #DEST ";\n" +#define VBCASTRDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +0)(" #PTR "), " #DEST ";\n" +#define VBCASTIDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +4)(" #PTR "), " #DEST ";\n" +#define VBCASTCDUPf(OFF,A,DEST) "vbroadcastsd (" #OFF "*64 )(" #A ")," #DEST ";\n" +#define VBCASTZDUPf(OFF,A,DEST) "vbroadcastf32x4 (" #OFF "*64 )(" #A ")," #DEST ";\n" +#define VBCASTCDUP(OFF,A,DEST) VBCASTCDUPf(OFF,A,DEST) +#define VBCASTZDUP(OFF,A,DEST) VBCASTZDUPf(OFF,A,DEST) #define VMADDSUBf(A,B,accum) "vfmaddsub231ps " #A "," #B "," #accum ";\n" #define VMADDSUBd(A,B,accum) "vfmaddsub231pd " #A "," #B "," #accum ";\n" @@ -106,11 +110,15 @@ Author: paboyle #define VMADDSUBMEMd(O,P,B,accum) "vfmaddsub231pd " #O"*64("#P "),"#B "," #accum ";\n" +#define VMADDRDUPf(O,P,B,accum) "vfmadd231ps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n" +#define VMADDIDUPf(O,P,B,accum) "vfmadd231ps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n" #define VMADDSUBRDUPf(O,P,B,accum) "vfmaddsub231ps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n" #define VMADDSUBIDUPf(O,P,B,accum) "vfmaddsub231ps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n" #define VMULRDUPf(O,P,B,accum) "vmulps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n" #define VMULIDUPf(O,P,B,accum) "vmulps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n" +#define VMADDRDUPd(O,P,B,accum) "vfmadd231pd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n" +#define VMADDIDUPd(O,P,B,accum) "vfmadd231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n" #define VMADDSUBRDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n" #define VMADDSUBIDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n" #define VMULRDUPd(O,P,B,accum) "vmulpd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n" diff --git a/lib/simd/Intel512common.h b/lib/simd/Intel512common.h index cfa20c26..e69e541c 100644 --- a/lib/simd/Intel512common.h +++ b/lib/simd/Intel512common.h @@ -87,7 +87,8 @@ Author: paboyle VACCTIMESMINUSI1d(A,ACC,tmp) \ VACCTIMESMINUSI2d(A,ACC,tmp) -#define LOAD64i(A,ptr) __asm__ ( "movq %0, %" #A : : "r"(ptr) : #A ); +#define LOAD64a(A,ptr) "movq %0, %" #A : : "r"(ptr) : #A +#define LOAD64i(A,ptr) __asm__ ( LOAD64a(A,ptr)); #define LOAD64(A,ptr) LOAD64i(A,ptr) #define VMOVf(A,DEST) "vmovaps " #A ", " #DEST ";\n" @@ -108,8 +109,8 @@ Author: paboyle //"vprefetche0 "#O"*64("#A");\n" "vprefetche1 ("#O"+12)*64("#A");\n" // "clevict0 "#O"*64("#A");\n" -#define VLOADf(OFF,PTR,DEST) "vmovaps " #OFF "*64(" #PTR "), " #DEST ";\n" -#define VLOADd(OFF,PTR,DEST) "vmovapd " #OFF "*64(" #PTR "), " #DEST ";\n" +#define VLOADf(OFF,PTR,DEST) "vmovups " #OFF "*64(" #PTR "), " #DEST ";\n" +#define VLOADd(OFF,PTR,DEST) "vmovupd " #OFF "*64(" #PTR "), " #DEST ";\n" #define VADDf(A,B,DEST) "vaddps " #A "," #B "," #DEST ";\n" #define VADDd(A,B,DEST) "vaddpd " #A "," #B "," #DEST ";\n" @@ -143,8 +144,8 @@ Author: paboyle #define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n" #define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n" #else -#define VSTOREf(OFF,PTR,SRC) "vmovaps " #SRC "," #OFF "*64(" #PTR ")" ";\n" -#define VSTOREd(OFF,PTR,SRC) "vmovapd " #SRC "," #OFF "*64(" #PTR ")" ";\n" +#define VSTOREf(OFF,PTR,SRC) "vmovups " #SRC "," #OFF "*64(" #PTR ")" ";\n" +#define VSTOREd(OFF,PTR,SRC) "vmovupd " #SRC "," #OFF "*64(" #PTR ")" ";\n" #endif // Swaps Re/Im ; could unify this with IMCI diff --git a/lib/simd/Intel512double.h b/lib/simd/Intel512double.h index 224c593d..632b5639 100644 --- a/lib/simd/Intel512double.h +++ b/lib/simd/Intel512double.h @@ -144,10 +144,12 @@ Author: paboyle #define VMADDSUBMEM(O,P,B,accum) VMADDSUBMEMd(O,P,B,accum) #define VMADDMEM(O,P,B,accum) VMADDMEMd(O,P,B,accum) #define VMULMEM(O,P,B,accum) VMULMEMd(O,P,B,accum) +#undef VMADDRDUP #undef VMADDSUBRDUP #undef VMADDSUBIDUP #undef VMULRDUP #undef VMULIDUP +#define VMADDRDUP(O,P,B,accum) VMADDRDUPd(O,P,B,accum) #define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPd(O,P,B,accum) #define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPd(O,P,B,accum) #define VMULRDUP(O,P,B,accum) VMULRDUPd(O,P,B,accum) diff --git a/lib/simd/Intel512single.h b/lib/simd/Intel512single.h index 3fa47668..ed135651 100644 --- a/lib/simd/Intel512single.h +++ b/lib/simd/Intel512single.h @@ -144,10 +144,12 @@ Author: paboyle #define VMADDMEM(O,P,B,accum) VMADDMEMf(O,P,B,accum) #define VMULMEM(O,P,B,accum) VMULMEMf(O,P,B,accum) +#undef VMADDRDUP #undef VMADDSUBRDUP #undef VMADDSUBIDUP #undef VMULRDUP #undef VMULIDUP +#define VMADDRDUP(O,P,B,accum) VMADDRDUPf(O,P,B,accum) #define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPf(O,P,B,accum) #define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPf(O,P,B,accum) #define VMULRDUP(O,P,B,accum) VMULRDUPf(O,P,B,accum) From 7a61feb6d3c49637ab3afb4182a9915734ba6e48 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 8 Dec 2016 16:58:01 +0000 Subject: [PATCH 02/37] Allocator added with caching for Linux VM subsystem optimisation --- lib/AlignedAllocator.cc | 65 +++++++++++++++++++++++++++++++++++++++++ lib/AlignedAllocator.h | 49 ++++++++++++++++++++++--------- 2 files changed, 101 insertions(+), 13 deletions(-) create mode 100644 lib/AlignedAllocator.cc diff --git a/lib/AlignedAllocator.cc b/lib/AlignedAllocator.cc new file mode 100644 index 00000000..9df4ec1c --- /dev/null +++ b/lib/AlignedAllocator.cc @@ -0,0 +1,65 @@ + + + +#include + +namespace Grid { + +int PointerCache::victim; + + PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache]; + +void *PointerCache::Insert(void *ptr,size_t bytes) { + + if (bytes < 4096 ) return NULL; + +#ifdef _OPENMP + assert(omp_in_parallel()==0); +#endif + void * ret = NULL; + int v = -1; + + for(int e=0;e namespace Grid { + class PointerCache { + private: + + static const int Ncache=8; + static int victim; + + typedef struct { + void *address; + size_t bytes; + int valid; + } PointerCacheEntry; + + static PointerCacheEntry Entries[Ncache]; + + public: + + + static void *Insert(void *ptr,size_t bytes) ; + static void *Lookup(size_t bytes) ; + + }; + //////////////////////////////////////////////////////////////////// // A lattice of something, but assume the something is SIMDized. //////////////////////////////////////////////////////////////////// + template class alignedAllocator { public: @@ -66,27 +89,27 @@ public: pointer allocate(size_type __n, const void* _p= 0) { + size_type bytes = __n*sizeof(_Tp); + + _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); + #ifdef HAVE_MM_MALLOC_H - _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); + if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); #else - _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); + if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); #endif - _Tp tmp; -#ifdef GRID_NUMA -#pragma omp parallel for schedule(static) - for(int i=0;i<__n;i++){ - ptr[i]=tmp; - } -#endif return ptr; } - void deallocate(pointer __p, size_type) { + void deallocate(pointer __p, size_type __n) { + size_type bytes = __n * sizeof(_Tp); + pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); + #ifdef HAVE_MM_MALLOC_H - _mm_free((void *)__p); + if ( __freeme ) _mm_free((void *)__freeme); #else - free((void *)__p); + if ( __freeme ) free((void *)__freeme); #endif } void construct(pointer __p, const _Tp& __val) { }; From 83fa038bdfb9fbaf9ecbeca0d7150a7dfc903edb Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 8 Dec 2016 16:58:42 +0000 Subject: [PATCH 03/37] Streaming stores --- lib/Stencil.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Stencil.h b/lib/Stencil.h index 5c3a5ef9..89533b82 100644 --- a/lib/Stencil.h +++ b/lib/Stencil.h @@ -113,7 +113,7 @@ Gather_plane_simple_table (std::vector >& table,const Lattice { PARALLEL_FOR_LOOP for(int i=0;i Date: Thu, 8 Dec 2016 17:00:32 +0000 Subject: [PATCH 04/37] Ready for sim --- benchmarks/Benchmark_mooee.cc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/benchmarks/Benchmark_mooee.cc b/benchmarks/Benchmark_mooee.cc index dfaea627..e8f0d16b 100644 --- a/benchmarks/Benchmark_mooee.cc +++ b/benchmarks/Benchmark_mooee.cc @@ -113,6 +113,22 @@ int main (int argc, char ** argv) std::cout<Barrier(); \ + t0=usecond(); \ + for(int i=0;iBarrier(); \ + Dw.CayleyReport(); \ + std::cout< Date: Thu, 8 Dec 2016 17:28:28 +0000 Subject: [PATCH 05/37] Lots of debug on performance Mobius --- lib/qcd/action/fermion/CayleyFermion5D.cc | 34 +- lib/qcd/action/fermion/CayleyFermion5D.h | 5 + lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 337 +++++++++++++++---- lib/qcd/action/fermion/FermionOperator.h | 2 + lib/qcd/action/fermion/WilsonFermion.cc | 4 +- lib/qcd/action/fermion/WilsonFermion.h | 3 + lib/qcd/action/fermion/WilsonFermion5D.cc | 3 +- lib/qcd/action/fermion/WilsonFermion5D.h | 3 + 8 files changed, 304 insertions(+), 87 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index b8e98dce..d8978890 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -54,12 +54,11 @@ template void CayleyFermion5D::Dminus(const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - FermionField tmp(psi._grid); - this->DW(psi,tmp,DaggerNo); + this->DW(psi,this->tmp(),DaggerNo); for(int s=0;stmp(),s,s);// chi = (1-c[s] D_W) psi } } @@ -87,8 +86,8 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl; - // Flops = 9*12*Ls*vol/2 - RealD mflops = 9.0*12*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting + // Flops = MADD * Ls *Ls *4dvol * spin/colour/complex + RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; } @@ -110,12 +109,11 @@ template void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - FermionField tmp(psi._grid); - this->DW(psi,tmp,DaggerYes); + this->DW(psi,this->tmp(),DaggerYes); for(int s=0;stmp(),s,s);// chi = (1-c[s] D_W) psi } } template @@ -138,6 +136,7 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D lower[0] =-mass*lower[0]; M5D(psi,psi,Din,lower,diag,upper); } +// FIXME Redunant with the above routine; check this and eliminate template void CayleyFermion5D::Meo5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; @@ -259,36 +258,33 @@ template void CayleyFermion5D::Meooe (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - FermionField tmp(psi._grid); - Meooe5D(psi,tmp); + Meooe5D(psi,this->tmp()); if ( psi.checkerboard == Odd ) { - this->DhopEO(tmp,chi,DaggerNo); + this->DhopEO(this->tmp(),chi,DaggerNo); } else { - this->DhopOE(tmp,chi,DaggerNo); + this->DhopOE(this->tmp(),chi,DaggerNo); } } template void CayleyFermion5D::MeooeDag (const FermionField &psi, FermionField &chi) { - FermionField tmp(psi._grid); // Apply 4d dslash if ( psi.checkerboard == Odd ) { - this->DhopEO(psi,tmp,DaggerYes); + this->DhopEO(psi,this->tmp(),DaggerYes); } else { - this->DhopOE(psi,tmp,DaggerYes); + this->DhopOE(psi,this->tmp(),DaggerYes); } - MeooeDag5D(tmp,chi); + MeooeDag5D(this->tmp(),chi); } template void CayleyFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ - FermionField tmp(psi._grid); - Meo5D(psi,tmp); + Meo5D(psi,this->tmp()); // Apply 4d dslash fragment - this->DhopDir(tmp,chi,dir,disp); + this->DhopDir(this->tmp(),chi,dir,disp); } // force terms; five routines; default to Dhop on diagonal template diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index 6fb58234..0eb68034 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -76,6 +76,11 @@ namespace Grid { std::vector &diag, std::vector &upper); void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); + void MooeeInternalAsm(const FermionField &in, FermionField &out, + int LLs, int site, + Vector > &Matp, + Vector > &Matm); + virtual void Instantiatable(void)=0; diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 35a10de2..29f10b0a 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -34,8 +34,7 @@ Author: paboyle namespace Grid { -namespace QCD { - /* +namespace QCD { /* * Dense matrix versions of routines */ template @@ -126,7 +125,6 @@ PARALLEL_FOR_LOOP for(int v=0;v(hp_00.v); hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); @@ -165,42 +160,20 @@ PARALLEL_FOR_LOOP hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); } - /* - if ( ss==0) std::cout << " dphi_00 " <::M5Ddag(const FermionField &psi, M5Dtime-=usecond(); PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss+=LLs){ // adds LLs - +#if 0 alignas(64) SiteHalfSpinor hp; alignas(64) SiteHalfSpinor hm; alignas(64) SiteSpinor fp; @@ -287,9 +260,231 @@ PARALLEL_FOR_LOOP chi[ss+v] = chi[ss+v] +l[v]*fm; } +#else + for(int v=0;v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + if ( vm>=v ) { + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + Simd p_00 = real_mult(d[v]()()(), phi[ss+v]()(0)(0)) + real_mult(u[v]()()(),hp_00); + Simd p_01 = real_mult(d[v]()()(), phi[ss+v]()(0)(1)) + real_mult(u[v]()()(),hp_01); + Simd p_02 = real_mult(d[v]()()(), phi[ss+v]()(0)(2)) + real_mult(u[v]()()(),hp_02); + Simd p_10 = real_mult(d[v]()()(), phi[ss+v]()(1)(0)) + real_mult(u[v]()()(),hp_10); + Simd p_11 = real_mult(d[v]()()(), phi[ss+v]()(1)(1)) + real_mult(u[v]()()(),hp_11); + Simd p_12 = real_mult(d[v]()()(), phi[ss+v]()(1)(2)) + real_mult(u[v]()()(),hp_12); + + Simd p_20 = real_mult(d[v]()()(), phi[ss+v]()(2)(0)) + real_mult(l[v]()()(),hm_00); + Simd p_21 = real_mult(d[v]()()(), phi[ss+v]()(2)(1)) + real_mult(l[v]()()(),hm_01); + Simd p_22 = real_mult(d[v]()()(), phi[ss+v]()(2)(2)) + real_mult(l[v]()()(),hm_02); + Simd p_30 = real_mult(d[v]()()(), phi[ss+v]()(3)(0)) + real_mult(l[v]()()(),hm_10); + Simd p_31 = real_mult(d[v]()()(), phi[ss+v]()(3)(1)) + real_mult(l[v]()()(),hm_11); + Simd p_32 = real_mult(d[v]()()(), phi[ss+v]()(3)(2)) + real_mult(l[v]()()(),hm_12); + + vstream(chi[ss+v]()(0)(0),p_00); + vstream(chi[ss+v]()(0)(1),p_01); + vstream(chi[ss+v]()(0)(2),p_02); + vstream(chi[ss+v]()(1)(0),p_10); + vstream(chi[ss+v]()(1)(1),p_11); + vstream(chi[ss+v]()(1)(2),p_12); + vstream(chi[ss+v]()(2)(0),p_20); + vstream(chi[ss+v]()(2)(1),p_21); + vstream(chi[ss+v]()(2)(2),p_22); + vstream(chi[ss+v]()(3)(0),p_30); + vstream(chi[ss+v]()(3)(1),p_31); + vstream(chi[ss+v]()(3)(2),p_32); + } +#endif } M5Dtime+=usecond(); } + + +#include +#include +#include + + +template +void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionField &chi, + int LLs, int site, + Vector > &Matp, + Vector > &Matm) +{ +#if 0 + { + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; + + // Ls*Ls * 2 * 12 * vol flops + for(int s1=0;s1); + for(int s1=0;s1 void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) { @@ -342,37 +537,38 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField for(int s1=0;s1 SitePplus(LLs); - Vector SitePminus(LLs); - Vector SiteChiP(LLs); - Vector SiteChiM(LLs); - Vector SiteChi(LLs); - - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; + std::vector SitePplus(LLs); + std::vector SitePminus(LLs); + std::vector SiteChiP(LLs); + std::vector SiteChiM(LLs); + std::vector SiteChi(LLs); #pragma omp for for(auto site=0;site::MooeeInternal(const FermionField &psi, FermionField SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP; SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM; } - s++; - }} + s++; + } + } for(int s=0;s::MooeeInternal(const FermionField &psi, FermionField accumRecon5m(SiteChi[s],SiteChiM[s]); chi[lex] = SiteChi[s]*0.5; } + }} +#else + PARALLEL_FOR_LOOP + for(auto site=0;site::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, LebesgueEvenOdd(_cbgrid), Umu(&Fgrid), UmuEven(&Hgrid), - UmuOdd(&Hgrid) { + UmuOdd(&Hgrid), + _tmp(&Hgrid) +{ // Allocate the required comms buffer ImportGauge(_Umu); } diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 40fbd1bf..933be732 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -58,6 +58,9 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { GridBase *FermionGrid(void) { return _grid; } GridBase *FermionRedBlackGrid(void) { return _cbgrid; } + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + ////////////////////////////////////////////////////////////////// // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d2ac96e3..d70c98c3 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -60,7 +60,8 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, UmuEven(_FourDimRedBlackGrid), UmuOdd (_FourDimRedBlackGrid), Lebesgue(_FourDimGrid), - LebesgueEvenOdd(_FourDimRedBlackGrid) + LebesgueEvenOdd(_FourDimRedBlackGrid), + _tmp(&FiveDimRedBlackGrid) { if (Impl::LsVectorised) { diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index ffb5c58e..fb4fa925 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -74,6 +74,9 @@ namespace QCD { typedef WilsonKernels Kernels; PmuStat stat; + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + void Report(void); void ZeroCounters(void); double DhopCalls; From 0091b50f499c17b17208febe24b466aa9cf8ed97 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 9 Dec 2016 22:51:32 +0000 Subject: [PATCH 06/37] Zmobius working -- not asm yet --- lib/qcd/action/fermion/CayleyFermion5D.cc | 86 +++++- lib/qcd/action/fermion/CayleyFermion5D.h | 18 ++ lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 305 ++++++++++++------- 3 files changed, 303 insertions(+), 106 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index d8978890..781380e5 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -29,6 +29,7 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ +#include #include @@ -48,7 +49,8 @@ namespace QCD { FourDimGrid, FourDimRedBlackGrid,_M5,p), mass(_mass) - { } + { + } template void CayleyFermion5D::Dminus(const FermionField &psi, FermionField &chi) @@ -455,9 +457,91 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vectorMooeeInternalCompute(0,inv,MatpInv,MatmInv); + this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); + } +template +void CayleyFermion5D::MooeeInternalCompute(int dag, int inv, + Vector > & Matp, + Vector > & Matm) +{ + int Ls=this->Ls; + + GridBase *grid = this->FermionRedBlackGrid(); + int LLs = grid->_rdimensions[0]; + + if ( LLs == Ls ) return; // Not vectorised in 5th direction + + Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); + Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); + + for(int s=0;s::iscomplex() ) { + sp[l] = PplusMat (l*istride+s1*ostride,s2); + sm[l] = PminusMat(l*istride+s1*ostride,s2); + } else { + // if real + scalar_type tmp; + tmp = PplusMat (l*istride+s1*ostride,s2); + sp[l] = scalar_type(tmp.real(),tmp.real()); + tmp = PminusMat(l*istride+s1*ostride,s2); + sm[l] = scalar_type(tmp.real(),tmp.real()); + } + } + Matp[LLs*s2+s1] = Vp; + Matm[LLs*s2+s1] = Vm; + }} +} + FermOpTemplateInstantiate(CayleyFermion5D); GparityFermOpTemplateInstantiate(CayleyFermion5D); diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index 0eb68034..2392fcf0 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -33,6 +33,11 @@ namespace Grid { namespace QCD { + template struct switcheroo { static int iscomplex() { return 0; } }; + template<> struct switcheroo { static int iscomplex() { return 1; } }; + template<> struct switcheroo { static int iscomplex() { return 1; } }; + + template class CayleyFermion5D : public WilsonFermion5D { @@ -75,11 +80,18 @@ namespace Grid { std::vector &lower, std::vector &diag, std::vector &upper); + void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); + void MooeeInternalCompute(int dag, int inv, Vector > & Matp, Vector > & Matm); + void MooeeInternalAsm(const FermionField &in, FermionField &out, int LLs, int site, Vector > &Matp, Vector > &Matm); + void MooeeInternalZAsm(const FermionField &in, FermionField &out, + int LLs, int site, + Vector > &Matp, + Vector > &Matm); virtual void Instantiatable(void)=0; @@ -117,6 +129,12 @@ namespace Grid { std::vector ueem; std::vector dee; + // Matrices of 5d ee inverse params + Vector > MatpInv; + Vector > MatmInv; + Vector > MatpInvDag; + Vector > MatmInvDag; + // Constructors CayleyFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 29f10b0a..6d07d5de 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -29,7 +29,7 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ -#include + #include @@ -343,7 +343,7 @@ void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionFie Vector > &Matp, Vector > &Matm) { -#if 0 +#ifndef AVX512 { SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; @@ -485,6 +485,177 @@ void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionFie #endif }; + // Z-mobius version +template +void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionField &chi, + int LLs, int site, Vector > &Matp, Vector > &Matm) +{ +#if 1 + { + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; + + // Ls*Ls * 2 * 12 * vol flops + for(int s1=0;s1); + for(int s1=0;s1 void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) { @@ -494,118 +665,41 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField chi.checkerboard=psi.checkerboard; - Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); - Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); + Vector > Matp; + Vector > Matm; + Vector > *_Matp; + Vector > *_Matm; - for(int s=0;s > Matp(Ls*LLs); - Vector > Matm(Ls*LLs); + assert(_Matp->size()==Ls*LLs); - for(int s2=0;s2 SitePplus(LLs); - std::vector SitePminus(LLs); - std::vector SiteChiP(LLs); - std::vector SiteChiM(LLs); - std::vector SiteChi(LLs); - -#pragma omp for - for(auto site=0;site::iscomplex() ) { PARALLEL_FOR_LOOP - for(auto site=0;site::MooeeInternal(const Fermion template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); + }} From fe187e9ed3960772417cc49845abb6a18ecdaaf0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 10 Dec 2016 00:47:48 +0000 Subject: [PATCH 07/37] Compiles and passes under ZMobius with assembler --- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 194 ++++++++++++------- 1 file changed, 125 insertions(+), 69 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 6d07d5de..91ab386a 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -336,7 +336,6 @@ PARALLEL_FOR_LOOP #include #include - template void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionField &chi, int LLs, int site, @@ -482,6 +481,31 @@ void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionFie } } } +#undef Chi_00 +#undef Chi_01 +#undef Chi_02 +#undef Chi_10 +#undef Chi_11 +#undef Chi_12 +#undef Chi_20 +#undef Chi_21 +#undef Chi_22 +#undef Chi_30 +#undef Chi_31 +#undef Chi_32 + +#undef BCAST0 +#undef BCAST1 +#undef BCAST2 +#undef BCAST3 +#undef BCAST4 +#undef BCAST5 +#undef BCAST6 +#undef BCAST7 +#undef BCAST8 +#undef BCAST9 +#undef BCAST10 +#undef BCAST11 #endif }; @@ -541,31 +565,31 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi { // pointers // MASK_REGS; -#define Chi_00 %%zmm0 -#define Chi_01 %%zmm1 -#define Chi_02 %%zmm2 -#define Chi_10 %%zmm3 -#define Chi_11 %%zmm4 -#define Chi_12 %%zmm5 -#define Chi_20 %%zmm6 -#define Chi_21 %%zmm7 -#define Chi_22 %%zmm8 -#define Chi_30 %%zmm9 -#define Chi_31 %%zmm10 -#define Chi_32 %%zmm11 +#define Chi_00 %zmm0 +#define Chi_01 %zmm1 +#define Chi_02 %zmm2 +#define Chi_10 %zmm3 +#define Chi_11 %zmm4 +#define Chi_12 %zmm5 +#define Chi_20 %zmm6 +#define Chi_21 %zmm7 +#define Chi_22 %zmm8 +#define Chi_30 %zmm9 +#define Chi_31 %zmm10 +#define Chi_32 %zmm11 -#define BCAST0 %%zmm12 -#define BCAST1 %%zmm13 -#define BCAST2 %%zmm14 -#define BCAST3 %%zmm15 -#define BCAST4 %%zmm16 -#define BCAST5 %%zmm17 -#define BCAST6 %%zmm18 -#define BCAST7 %%zmm19 -#define BCAST8 %%zmm20 -#define BCAST9 %%zmm21 -#define BCAST10 %%zmm22 -#define BCAST11 %%zmm23 +#define BCAST0 %zmm12 +#define BCAST1 %zmm13 +#define BCAST2 %zmm14 +#define BCAST3 %zmm15 +#define BCAST4 %zmm16 +#define BCAST5 %zmm17 +#define BCAST6 %zmm18 +#define BCAST7 %zmm19 +#define BCAST8 %zmm20 +#define BCAST9 %zmm21 +#define BCAST10 %zmm22 +#define BCAST11 %zmm23 int incr=LLs*LLs*sizeof(iSinglet); for(int s1=0;s1::MooeeInternalZAsm(const FermionField &psi, FermionFi uint64_t a2 = (uint64_t)&psi[lex]; for(int l=0; l::MooeeInternalZAsm(const FermionField &psi, FermionFi } } } +#undef Chi_00 +#undef Chi_01 +#undef Chi_02 +#undef Chi_10 +#undef Chi_11 +#undef Chi_12 +#undef Chi_20 +#undef Chi_21 +#undef Chi_22 +#undef Chi_30 +#undef Chi_31 +#undef Chi_32 + +#undef BCAST0 +#undef BCAST1 +#undef BCAST2 +#undef BCAST3 +#undef BCAST4 +#undef BCAST5 +#undef BCAST6 +#undef BCAST7 +#undef BCAST8 +#undef BCAST9 +#undef BCAST10 +#undef BCAST11 + #endif }; From 55cb22ad674610d12bff674e6a0d47c0f14be097 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 18 Dec 2016 00:55:37 +0000 Subject: [PATCH 08/37] Z mobius bmark --- benchmarks/Benchmark_mooee.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/benchmarks/Benchmark_mooee.cc b/benchmarks/Benchmark_mooee.cc index e8f0d16b..1e51c9d2 100644 --- a/benchmarks/Benchmark_mooee.cc +++ b/benchmarks/Benchmark_mooee.cc @@ -113,6 +113,20 @@ int main (int argc, char ** argv) std::cout<Barrier(); \ + t0=usecond(); \ + for(int i=0;iBarrier(); \ + zDw.CayleyReport(); \ + std::cout< gamma(Ls,std::complex(1.0,0.0)); + ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c); + std::cout<Barrier(); @@ -193,6 +213,9 @@ int main (int argc, char ** argv) BENCH_DW(Mooee ,src_o,r_o); BENCH_DW(MooeeInv,src_o,r_o); + BENCH_ZDW(Mooee ,src_o,r_o); + BENCH_ZDW(MooeeInv,src_o,r_o); + } Grid_finalize(); From fa6acccf556480f1eff84784750c350cc8c3f672 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sun, 18 Dec 2016 00:56:19 +0000 Subject: [PATCH 09/37] Zmobius asm --- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 193 ++++++++++++------- 1 file changed, 119 insertions(+), 74 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 91ab386a..38bceafe 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -514,7 +514,8 @@ template void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionField &chi, int LLs, int site, Vector > &Matp, Vector > &Matm) { -#if 1 +#ifndef AVX512 + //#if 0 { SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; @@ -542,12 +543,13 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi for(int co=0;co::MooeeInternalZAsm(const FermionField &psi, FermionFi #else { // pointers - // MASK_REGS; + // MASK_REGS; #define Chi_00 %zmm0 #define Chi_01 %zmm1 #define Chi_02 %zmm2 @@ -577,20 +579,37 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi #define Chi_30 %zmm9 #define Chi_31 %zmm10 #define Chi_32 %zmm11 +#define pChi_00 %%zmm0 +#define pChi_01 %%zmm1 +#define pChi_02 %%zmm2 +#define pChi_10 %%zmm3 +#define pChi_11 %%zmm4 +#define pChi_12 %%zmm5 +#define pChi_20 %%zmm6 +#define pChi_21 %%zmm7 +#define pChi_22 %%zmm8 +#define pChi_30 %%zmm9 +#define pChi_31 %%zmm10 +#define pChi_32 %%zmm11 -#define BCAST0 %zmm12 -#define BCAST1 %zmm13 -#define BCAST2 %zmm14 -#define BCAST3 %zmm15 -#define BCAST4 %zmm16 -#define BCAST5 %zmm17 -#define BCAST6 %zmm18 -#define BCAST7 %zmm19 -#define BCAST8 %zmm20 -#define BCAST9 %zmm21 -#define BCAST10 %zmm22 -#define BCAST11 %zmm23 +#define BCAST_00 %zmm12 +#define SHUF_00 %zmm13 +#define BCAST_01 %zmm14 +#define SHUF_01 %zmm15 +#define BCAST_02 %zmm16 +#define SHUF_02 %zmm17 +#define BCAST_10 %zmm18 +#define SHUF_10 %zmm19 +#define BCAST_11 %zmm20 +#define SHUF_11 %zmm21 +#define BCAST_12 %zmm22 +#define SHUF_12 %zmm23 +#define Mp %zmm24 +#define Mps %zmm25 +#define Mm %zmm26 +#define Mms %zmm27 +#define N 8 int incr=LLs*LLs*sizeof(iSinglet); for(int s1=0;s1::MooeeInternalZAsm(const FermionField &psi, FermionFi LOAD64(%r9,a1); LOAD64(%r10,a2); asm ( - VPREFETCH1(0,%r10) VPREFETCH1(0,%r9) - VPREFETCH1(12,%r10) VPREFETCH1(13,%r10) - VPREFETCH1(14,%r10) VPREFETCH1(15,%r10) - VBCASTCDUP(0,%r10,BCAST0) VBCASTCDUP(1,%r10,BCAST1) - VBCASTCDUP(2,%r10,BCAST2) VBCASTCDUP(3,%r10,BCAST3) - VBCASTCDUP(4,%r10,BCAST4) VBCASTCDUP(5,%r10,BCAST5) - VBCASTCDUP(6,%r10,BCAST6) VBCASTCDUP(7,%r10,BCAST7) - VBCASTCDUP(8,%r10,BCAST8) VBCASTCDUP(9,%r10,BCAST9) - VBCASTCDUP(10,%r10,BCAST10) VBCASTCDUP(11,%r10,BCAST11) - VMULIDUP (0,%r8,BCAST0,Chi_00) VMULIDUP(0,%r8,BCAST1,Chi_01) // II RI from Mat / Psi - VMULIDUP (0,%r8,BCAST2,Chi_02) VMULIDUP(0,%r8,BCAST3,Chi_10) - VMULIDUP (0,%r8,BCAST4,Chi_11) VMULIDUP(0,%r8,BCAST5,Chi_12) - VMULIDUP (0,%r9,BCAST6,Chi_20) VMULIDUP(0,%r9,BCAST7,Chi_21) - VMULIDUP (0,%r9,BCAST8,Chi_22) VMULIDUP(0,%r9,BCAST9,Chi_30) - VMULIDUP (0,%r9,BCAST10,Chi_31) VMULIDUP(0,%r9,BCAST11,Chi_32) - VSHUF(BCAST0,BCAST0) VSHUF(BCAST1,BCAST1) - VSHUF(BCAST2,BCAST2) VSHUF(BCAST3,BCAST3) - VSHUF(BCAST4,BCAST4) VSHUF(BCAST5,BCAST5) - VSHUF(BCAST6,BCAST6) VSHUF(BCAST7,BCAST7) - VSHUF(BCAST8,BCAST8) VSHUF(BCAST9,BCAST9) - VSHUF(BCAST10,BCAST10) VSHUF(BCAST11,BCAST11) - VMADDSUBRDUP(0,%r8,BCAST0,Chi_00) VMADDSUBRDUP(0,%r8,BCAST1,Chi_01) - VMADDSUBRDUP(0,%r8,BCAST2,Chi_02) VMADDSUBRDUP(0,%r8,BCAST3,Chi_10) - VMADDSUBRDUP(0,%r8,BCAST4,Chi_11) VMADDSUBRDUP(0,%r8,BCAST5,Chi_12) - VMADDSUBRDUP(0,%r9,BCAST6,Chi_20) VMADDSUBRDUP(0,%r9,BCAST7,Chi_21) - VMADDSUBRDUP(0,%r9,BCAST8,Chi_22) VMADDSUBRDUP(0,%r9,BCAST9,Chi_30) - VMADDSUBRDUP(0,%r9,BCAST10,Chi_31) VMADDSUBRDUP(0,%r9,BCAST11,Chi_32) ); + VLOAD(0,%r8,Mp)// i r + VLOAD(0,%r9,Mm) + VSHUF(Mp,Mps) // r i + VSHUF(Mm,Mms) + VPREFETCH1(12,%r10) VPREFETCH1(13,%r10) + VPREFETCH1(14,%r10) VPREFETCH1(15,%r10) + VMULIDUP(0*N,%r10,Mps,Chi_00) + VMULIDUP(1*N,%r10,Mps,Chi_01) + VMULIDUP(2*N,%r10,Mps,Chi_02) + VMULIDUP(3*N,%r10,Mps,Chi_10) + VMULIDUP(4*N,%r10,Mps,Chi_11) + VMULIDUP(5*N,%r10,Mps,Chi_12) + + VMULIDUP(6*N ,%r10,Mms,Chi_20) + VMULIDUP(7*N ,%r10,Mms,Chi_21) + VMULIDUP(8*N ,%r10,Mms,Chi_22) + VMULIDUP(9*N ,%r10,Mms,Chi_30) + VMULIDUP(10*N,%r10,Mms,Chi_31) + VMULIDUP(11*N,%r10,Mms,Chi_32) + + VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) + VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) + VMADDSUBRDUP(2*N,%r10,Mp,Chi_02) + VMADDSUBRDUP(3*N,%r10,Mp,Chi_10) + VMADDSUBRDUP(4*N,%r10,Mp,Chi_11) + VMADDSUBRDUP(5*N,%r10,Mp,Chi_12) + + VMADDSUBRDUP(6*N ,%r10,Mm,Chi_20) + VMADDSUBRDUP(7*N ,%r10,Mm,Chi_21) + VMADDSUBRDUP(8*N ,%r10,Mm,Chi_22) + VMADDSUBRDUP(9*N ,%r10,Mm,Chi_30) + VMADDSUBRDUP(10*N,%r10,Mm,Chi_31) + VMADDSUBRDUP(11*N,%r10,Mm,Chi_32) + ); } else { LOAD64(%r8,a0); LOAD64(%r9,a1); LOAD64(%r10,a2); asm ( - VPREFETCH1(0,%r10) VPREFETCH1(0,%r9) - VPREFETCH1(12,%r10) VPREFETCH1(13,%r10) - VPREFETCH1(14,%r10) VPREFETCH1(15,%r10) - VBCASTCDUP(0,%r10,BCAST0) VBCASTCDUP(1,%r10,BCAST1) - VBCASTCDUP(2,%r10,BCAST2) VBCASTCDUP(3,%r10,BCAST3) - VBCASTCDUP(4,%r10,BCAST4) VBCASTCDUP(5,%r10,BCAST5) - VBCASTCDUP(6,%r10,BCAST6) VBCASTCDUP(7,%r10,BCAST7) - VBCASTCDUP(8,%r10,BCAST8) VBCASTCDUP(9,%r10,BCAST9) - VBCASTCDUP(10,%r10,BCAST10) VBCASTCDUP(11,%r10,BCAST11) - VMADDSUBIDUP (0,%r8,BCAST0,Chi_00) VMADDSUBIDUP(0,%r8,BCAST1,Chi_01) // II RI from Mat / Psi - VMADDSUBIDUP (0,%r8,BCAST2,Chi_02) VMADDSUBIDUP(0,%r8,BCAST3,Chi_10) - VMADDSUBIDUP (0,%r8,BCAST4,Chi_11) VMADDSUBIDUP(0,%r8,BCAST5,Chi_12) - VMADDSUBIDUP (0,%r9,BCAST6,Chi_20) VMADDSUBIDUP(0,%r9,BCAST7,Chi_21) - VMADDSUBIDUP (0,%r9,BCAST8,Chi_22) VMADDSUBIDUP(0,%r9,BCAST9,Chi_30) - VMADDSUBIDUP (0,%r9,BCAST10,Chi_31) VMADDSUBIDUP(0,%r9,BCAST11,Chi_32) - VSHUF(BCAST0,BCAST0) VSHUF(BCAST1,BCAST1) - VSHUF(BCAST2,BCAST2) VSHUF(BCAST3,BCAST3) - VSHUF(BCAST4,BCAST4) VSHUF(BCAST5,BCAST5) - VSHUF(BCAST6,BCAST6) VSHUF(BCAST7,BCAST7) - VSHUF(BCAST8,BCAST8) VSHUF(BCAST9,BCAST9) - VSHUF(BCAST10,BCAST10) VSHUF(BCAST11,BCAST11) - VMADDSUBRDUP(0,%r8,BCAST0,Chi_00) VMADDSUBRDUP(0,%r8,BCAST1,Chi_01) - VMADDSUBRDUP(0,%r8,BCAST2,Chi_02) VMADDSUBRDUP(0,%r8,BCAST3,Chi_10) - VMADDSUBRDUP(0,%r8,BCAST4,Chi_11) VMADDSUBRDUP(0,%r8,BCAST5,Chi_12) - VMADDSUBRDUP(0,%r9,BCAST6,Chi_20) VMADDSUBRDUP(0,%r9,BCAST7,Chi_21) - VMADDSUBRDUP(0,%r9,BCAST8,Chi_22) VMADDSUBRDUP(0,%r9,BCAST9,Chi_30) - VMADDSUBRDUP(0,%r9,BCAST10,Chi_31) VMADDSUBRDUP(0,%r9,BCAST11,Chi_32) - ); + VLOAD(0,%r8,Mp) + VSHUF(Mp,Mps) + + VLOAD(0,%r9,Mm) + VSHUF(Mm,Mms) + + VMADDSUBIDUP(0*N,%r10,Mps,Chi_00) // Mri * Pii +- Cir + VMADDSUBIDUP(1*N,%r10,Mps,Chi_01) + VMADDSUBIDUP(2*N,%r10,Mps,Chi_02) + VMADDSUBIDUP(3*N,%r10,Mps,Chi_10) + VMADDSUBIDUP(4*N,%r10,Mps,Chi_11) + VMADDSUBIDUP(5*N,%r10,Mps,Chi_12) + + VMADDSUBIDUP(6 *N,%r10,Mms,Chi_20) + VMADDSUBIDUP(7 *N,%r10,Mms,Chi_21) + VMADDSUBIDUP(8 *N,%r10,Mms,Chi_22) + VMADDSUBIDUP(9 *N,%r10,Mms,Chi_30) + VMADDSUBIDUP(10*N,%r10,Mms,Chi_31) + VMADDSUBIDUP(11*N,%r10,Mms,Chi_32) + + VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) // Cir = Mir * Prr +- ( Mri * Pii +- Cir) + VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) // Ci = MiPr + Ci + MrPi ; Cr = MrPr - ( MiPi - Cr) + VMADDSUBRDUP(2*N,%r10,Mp,Chi_02) + VMADDSUBRDUP(3*N,%r10,Mp,Chi_10) + VMADDSUBRDUP(4*N,%r10,Mp,Chi_11) + VMADDSUBRDUP(5*N,%r10,Mp,Chi_12) + + VMADDSUBRDUP(6 *N,%r10,Mm,Chi_20) + VMADDSUBRDUP(7 *N,%r10,Mm,Chi_21) + VMADDSUBRDUP(8 *N,%r10,Mm,Chi_22) + VMADDSUBRDUP(9 *N,%r10,Mm,Chi_30) + VMADDSUBRDUP(10*N,%r10,Mm,Chi_31) + VMADDSUBRDUP(11*N,%r10,Mm,Chi_32) + ); } a0 = a0+incr; a1 = a1+incr; @@ -672,13 +704,26 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi }} { int lexa = s1+LLs*site; + /* + SiteSpinor tmp; asm ( - VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02) - VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12) - VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22) - VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32) + VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02) + VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12) + VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22) + VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32) + : : "r" ((uint64_t)&tmp) : "memory" ); + */ + + asm ( + VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02) + VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12) + VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22) + VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32) : : "r" ((uint64_t)&chi[lexa]) : "memory" ); + // if ( 1 || (site==0) ) { + // std::cout< Date: Sun, 18 Dec 2016 01:27:34 +0000 Subject: [PATCH 10/37] Bad commit fixed --- lib/simd/Grid_avx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index e2729187..724f52bb 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -228,7 +228,7 @@ namespace Optimization { struct MaddRealPart{ inline __m256 operator()(__m256 a, __m256 b, __m256 c){ __m256 ymm0 = _mm256_moveldup_ps(a); // ymm0 <- ar ar, - _mm256_add_ps(_mm256_mul_ps( ymm0, b),c); + return _mm256_add_ps(_mm256_mul_ps( ymm0, b),c); } inline __m256d operator()(__m256d a, __m256d b, __m256d c){ __m256d ymm0 = _mm256_shuffle_pd( a, a, 0x0 ); From 87be03006abe7d6da0ca966bed84e63bbba95a41 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Dec 2016 01:45:09 +0000 Subject: [PATCH 11/37] AVX 512 code broke other compiles; fixing --- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 38bceafe..f8c64b91 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -332,9 +332,11 @@ PARALLEL_FOR_LOOP } +#ifdef AVX512 #include #include #include +#endif template void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionField &chi, @@ -515,7 +517,6 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi int LLs, int site, Vector > &Matp, Vector > &Matm) { #ifndef AVX512 - //#if 0 { SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; From 3e6945cd656e3703eb5042564aec71d8bb9f1b78 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Dec 2016 02:05:11 +0000 Subject: [PATCH 12/37] Fixing AVX Z-mobius --- lib/qcd/action/fermion/CayleyFermion5D.h | 26 ++++++++-- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 52 ++++++++++---------- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index 2392fcf0..86255be6 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -33,9 +33,29 @@ namespace Grid { namespace QCD { - template struct switcheroo { static int iscomplex() { return 0; } }; - template<> struct switcheroo { static int iscomplex() { return 1; } }; - template<> struct switcheroo { static int iscomplex() { return 1; } }; + template struct switcheroo { + static inline int iscomplex() { return 0; } + + template + static inline vec mult(vec a, vec b) { + return real_mult(a,b); + } + }; + template<> struct switcheroo { + static inline int iscomplex() { return 1; } + + template + static inline vec mult(vec a, vec b) { + return a*b; + } + }; + template<> struct switcheroo { + static inline int iscomplex() { return 1; } + template + static inline vec mult(vec a, vec b) { + return a*b; + } + }; template diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index f8c64b91..ed742ea3 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -161,18 +161,18 @@ PARALLEL_FOR_LOOP } // Can force these to real arithmetic and save 2x. - Simd p_00 = real_mult(d[v]()()(), phi[ss+v]()(0)(0)) + real_mult(l[v]()()(),hm_00); - Simd p_01 = real_mult(d[v]()()(), phi[ss+v]()(0)(1)) + real_mult(l[v]()()(),hm_01); - Simd p_02 = real_mult(d[v]()()(), phi[ss+v]()(0)(2)) + real_mult(l[v]()()(),hm_02); - Simd p_10 = real_mult(d[v]()()(), phi[ss+v]()(1)(0)) + real_mult(l[v]()()(),hm_10); - Simd p_11 = real_mult(d[v]()()(), phi[ss+v]()(1)(1)) + real_mult(l[v]()()(),hm_11); - Simd p_12 = real_mult(d[v]()()(), phi[ss+v]()(1)(2)) + real_mult(l[v]()()(),hm_12); - Simd p_20 = real_mult(d[v]()()(), phi[ss+v]()(2)(0)) + real_mult(u[v]()()(),hp_00); - Simd p_21 = real_mult(d[v]()()(), phi[ss+v]()(2)(1)) + real_mult(u[v]()()(),hp_01); - Simd p_22 = real_mult(d[v]()()(), phi[ss+v]()(2)(2)) + real_mult(u[v]()()(),hp_02); - Simd p_30 = real_mult(d[v]()()(), phi[ss+v]()(3)(0)) + real_mult(u[v]()()(),hp_10); - Simd p_31 = real_mult(d[v]()()(), phi[ss+v]()(3)(1)) + real_mult(u[v]()()(),hp_11); - Simd p_32 = real_mult(d[v]()()(), phi[ss+v]()(3)(2)) + real_mult(u[v]()()(),hp_12); + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(),hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(),hp_12); vstream(chi[ss+v]()(0)(0),p_00); vstream(chi[ss+v]()(0)(1),p_01); @@ -299,19 +299,19 @@ PARALLEL_FOR_LOOP hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); } - Simd p_00 = real_mult(d[v]()()(), phi[ss+v]()(0)(0)) + real_mult(u[v]()()(),hp_00); - Simd p_01 = real_mult(d[v]()()(), phi[ss+v]()(0)(1)) + real_mult(u[v]()()(),hp_01); - Simd p_02 = real_mult(d[v]()()(), phi[ss+v]()(0)(2)) + real_mult(u[v]()()(),hp_02); - Simd p_10 = real_mult(d[v]()()(), phi[ss+v]()(1)(0)) + real_mult(u[v]()()(),hp_10); - Simd p_11 = real_mult(d[v]()()(), phi[ss+v]()(1)(1)) + real_mult(u[v]()()(),hp_11); - Simd p_12 = real_mult(d[v]()()(), phi[ss+v]()(1)(2)) + real_mult(u[v]()()(),hp_12); + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(),hp_12); - Simd p_20 = real_mult(d[v]()()(), phi[ss+v]()(2)(0)) + real_mult(l[v]()()(),hm_00); - Simd p_21 = real_mult(d[v]()()(), phi[ss+v]()(2)(1)) + real_mult(l[v]()()(),hm_01); - Simd p_22 = real_mult(d[v]()()(), phi[ss+v]()(2)(2)) + real_mult(l[v]()()(),hm_02); - Simd p_30 = real_mult(d[v]()()(), phi[ss+v]()(3)(0)) + real_mult(l[v]()()(),hm_10); - Simd p_31 = real_mult(d[v]()()(), phi[ss+v]()(3)(1)) + real_mult(l[v]()()(),hm_11); - Simd p_32 = real_mult(d[v]()()(), phi[ss+v]()(3)(2)) + real_mult(l[v]()()(),hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(),hm_12); vstream(chi[ss+v]()(0)(0),p_00); vstream(chi[ss+v]()(0)(1),p_01); @@ -544,13 +544,13 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi for(int co=0;co Date: Sun, 18 Dec 2016 02:07:45 +0000 Subject: [PATCH 13/37] Precision error --- lib/simd/Grid_sse4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index abd688ab..398a8691 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -194,7 +194,7 @@ namespace Optimization { __m128 ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, _mm_add_ps(_mm_mul_ps( ymm0, b),c); } - inline __m128d operator()(__m128d a, __m128d b, __m128 c){ + inline __m128d operator()(__m128d a, __m128d b, __m128d c){ __m128d ymm0 = _mm_shuffle_pd( a, a, 0x0 ); return _mm_add_pd(_mm_mul_pd( ymm0, b),c); } From 629f43e36c14fe60eba53a651cafac7d3862a7fc Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Dec 2016 02:09:37 +0000 Subject: [PATCH 14/37] Return statement needed --- lib/simd/Grid_sse4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 398a8691..090f0cc9 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -192,7 +192,7 @@ namespace Optimization { struct MaddRealPart{ inline __m128 operator()(__m128 a, __m128 b, __m128 c){ __m128 ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, - _mm_add_ps(_mm_mul_ps( ymm0, b),c); + return _mm_add_ps(_mm_mul_ps( ymm0, b),c); } inline __m128d operator()(__m128d a, __m128d b, __m128d c){ __m128d ymm0 = _mm_shuffle_pd( a, a, 0x0 ); From 4b220972ac1d7551c27f0ca8c1d535487530400f Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Dec 2016 02:14:17 +0000 Subject: [PATCH 15/37] Warning fix --- lib/simd/Grid_sse4.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 090f0cc9..943756b2 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -348,9 +348,11 @@ namespace Optimization { } } +#ifndef _mm_alignr_epi64 #define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16) #define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16) - +#endif + template static inline __m128 tRotate(__m128 in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); }; template static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); }; From a59f5374d724b40cc08908577acffc3ea6d50a44 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Dec 2016 02:23:55 +0000 Subject: [PATCH 16/37] Evade warning --- lib/PerfCount.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/PerfCount.h b/lib/PerfCount.h index 5ab07c02..749441c5 100644 --- a/lib/PerfCount.h +++ b/lib/PerfCount.h @@ -205,12 +205,13 @@ public: void Stop(void) { count=0; cycles=0; + size_t ign; #ifdef __linux__ if ( fd!= -1) { ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); - ::read(fd, &count, sizeof(long long)); - ::read(cyclefd, &cycles, sizeof(long long)); + ign=::read(fd, &count, sizeof(long long)); + ign=::read(cyclefd, &cycles, sizeof(long long)); } elapsed = cyclecount() - begin; #else From 8a337f307074a81e429c24ce00403bbe434d302e Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 18 Dec 2016 02:35:31 +0000 Subject: [PATCH 17/37] Move cayley into mainstream tests --- tests/{debug => }/Test_cayley_even_odd_vec.cc | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{debug => }/Test_cayley_even_odd_vec.cc (100%) diff --git a/tests/debug/Test_cayley_even_odd_vec.cc b/tests/Test_cayley_even_odd_vec.cc similarity index 100% rename from tests/debug/Test_cayley_even_odd_vec.cc rename to tests/Test_cayley_even_odd_vec.cc From f8d11ff67315f3030733a74b55cb0ab9a5cf538c Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 20 Dec 2016 12:31:49 +0100 Subject: [PATCH 18/37] better serialisable enums (can be encapsulated into classes) --- lib/serialisation/BaseIO.h | 39 +--------- lib/serialisation/MacroMagic.h | 130 +++++++++++++++++---------------- 2 files changed, 72 insertions(+), 97 deletions(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 7761a8e6..1095baf1 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -83,12 +83,7 @@ namespace Grid { typename std::enable_if::value, void>::type write(const std::string& s, const U &output); template - typename std::enable_if::value, void>::type - write(const std::string& s, const U &output); - template - typename std::enable_if< - !(std::is_base_of::value or std::is_enum::value), - void>::type + typename std::enable_if::value, void>::type write(const std::string& s, const U &output); private: T *upcast; @@ -107,12 +102,7 @@ namespace Grid { typename std::enable_if::value, void>::type read(const std::string& s, U &output); template - typename std::enable_if::value, void>::type - read(const std::string& s, U &output); - template - typename std::enable_if< - !(std::is_base_of::value or std::is_enum::value), - void>::type + typename std::enable_if::value, void>::type read(const std::string& s, U &output); protected: template @@ -221,17 +211,7 @@ namespace Grid { template template - typename std::enable_if::value, void>::type - Writer::write(const std::string &s, const U &output) - { - EnumIO::write(*this, s, output); - } - - template - template - typename std::enable_if< - !(std::is_base_of::value or std::is_enum::value), - void>::type + typename std::enable_if::value, void>::type Writer::write(const std::string &s, const U &output) { upcast->writeDefault(s, output); @@ -266,17 +246,7 @@ namespace Grid { template template - typename std::enable_if::value, void>::type - Reader::read(const std::string &s, U &output) - { - EnumIO::read(*this, s, output); - } - - template - template - typename std::enable_if< - !(std::is_base_of::value or std::is_enum::value), - void>::type + typename std::enable_if::value, void>::type Reader::read(const std::string &s, U &output) { upcast->readDefault(s, output); @@ -300,7 +270,6 @@ namespace Grid { abort(); } } - } #endif diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index c78bba0c..c9137dfe 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -114,35 +114,33 @@ THE SOFTWARE. #define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B); #define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...) \ - \ - \ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__)) \ - \ - \ - template \ - static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ - push(WR,s);\ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \ - pop(WR);\ - } \ - \ - \ - template \ - static inline void read(Reader &RD,const std::string &s, cname &obj){ \ - push(RD,s);\ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \ - pop(RD);\ - } \ - \ - \ - friend inline std::ostream & operator << (std::ostream &os, const cname &obj ) { \ - os<<"class "<<#cname<<" {"<\ +static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ + push(WR,s);\ + GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \ + pop(WR);\ +} \ +\ +\ +template \ +static inline void read(Reader &RD,const std::string &s, cname &obj){ \ + push(RD,s);\ + GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \ + pop(RD);\ +} \ +\ +\ +friend inline std::ostream & operator << (std::ostream &os, const cname &obj ) { \ + os<<"class "<<#cname<<" {"<::type #define GRID_MACRO_ENUMVAL(A,B) A = B, @@ -150,44 +148,52 @@ THE SOFTWARE. #define GRID_MACRO_ENUMTEST(A,B) else if (buf == #A) {obj = GRID_ENUM_TYPE(obj)::A;} #define GRID_MACRO_ENUMCASEIO(A,B) case GRID_ENUM_TYPE(obj)::A: os << #A; break; -namespace Grid { - template - class EnumIO {}; -} - #define GRID_SERIALIZABLE_ENUM(name,undefname,...)\ - enum class name {\ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMVAL,__VA_ARGS__))\ - undefname = -1\ +class name: public Serializable\ +{\ +public:\ + enum EnumType\ + {\ + GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMVAL,__VA_ARGS__))\ + undefname = -1\ };\ +public:\ + name(void): value_(undefname) {};\ + name(EnumType value): value_(value) {};\ + template \ + static inline void write(Writer &WR,const std::string &s, const name &obj)\ + {\ + switch (obj.value_)\ + {\ + GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASE,__VA_ARGS__))\ + default: Grid::write(WR,s,#undefname); break;\ + }\ + }\ \ - template<>\ - class EnumIO {\ - public:\ - template \ - static inline void write(Writer &WR,const std::string &s, const name &obj){ \ - switch (obj) {\ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASE,__VA_ARGS__))\ - default: Grid::write(WR,s,#undefname); break;\ - }\ - }\ - \ - template \ - static inline void read(Reader &RD,const std::string &s, name &obj){ \ - std::string buf;\ - Grid::read(RD, s, buf);\ - if (buf == #undefname) {obj = name::undefname;}\ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMTEST,__VA_ARGS__))\ - else {obj = name::undefname;}\ - }\ - };\ - \ - inline std::ostream & operator << (std::ostream &os, const name &obj ) { \ + template \ + static inline void read(Reader &RD,const std::string &s, name &obj)\ + {\ + std::string buf;\ + Grid::read(RD, s, buf);\ + if (buf == #undefname) {obj = name::undefname;}\ + GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMTEST,__VA_ARGS__))\ + else {obj = name::undefname;}\ + }\ + inline operator EnumType(void) const\ + {\ + return value_;\ + }\ + inline friend std::ostream & operator<<(std::ostream &os, const name &obj)\ + {\ switch (obj) {\ - GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASEIO,__VA_ARGS__))\ - default: os << #undefname; break;\ + GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASEIO,__VA_ARGS__))\ + default: os << #undefname; break;\ }\ return os;\ - }; + }\ +private:\ + EnumType value_;\ +}; + #endif From 41df1db811dacca67cead6c7ca5512b72ba40fbe Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 11 Jan 2017 18:37:49 +0000 Subject: [PATCH 19/37] Hadrons: number of dimensions entirely determined by the initial grid --- extras/Hadrons/Environment.cc | 8 +++++++- extras/Hadrons/Environment.hpp | 2 ++ extras/Hadrons/Modules/MSource/SeqGamma.hpp | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 68c170b8..37f2a3d7 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -41,8 +41,9 @@ using namespace Hadrons; // constructor ///////////////////////////////////////////////////////////////// Environment::Environment(void) { + nd_ = GridDefaultLatt().size(); grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()), + GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), GridDefaultMpi())); gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); auto loc = getGrid()->LocalDimensions(); @@ -126,6 +127,11 @@ GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const } } +unsigned int Environment::getNd(void) const +{ + return nd_; +} + // random number generator ///////////////////////////////////////////////////// void Environment::setSeed(const std::vector &seed) { diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 041bcc0e..2628e5a0 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -106,6 +106,7 @@ public: void createGrid(const unsigned int Ls); GridCartesian * getGrid(const unsigned int Ls = 1) const; GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; + unsigned int getNd(void) const; // random number generator void setSeed(const std::vector &seed); GridParallelRNG * get4dRng(void) const; @@ -200,6 +201,7 @@ private: std::map grid5d_; GridRbPt gridRb4d_; std::map gridRb5d_; + unsigned int nd_; // random number generator RngPt rng4d_; // module and related maps diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index 181f9532..611b0108 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -147,7 +147,7 @@ void TSeqGamma::execute(void) g = makeGammaProd(par().gamma); p = strToVec(par().mom); ph = zero; - for(unsigned int mu = 0; mu < Nd; mu++) + for(unsigned int mu = 0; mu < env().getNd(); mu++) { LatticeCoordinate(coor, mu); ph = ph + p[mu]*coor; From 16a8e3d0d432bb71183c05c09cde3de86a7eff83 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 16 Jan 2017 06:32:05 +0000 Subject: [PATCH 20/37] gitignore update for ST3 --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index da7de5e4..5838caf7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ ################ *~ *# +*.sublime-* # Precompiled Headers # ####################### From 91a35340543217dbb7ed6a0a3ab049cbd575976f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 16 Jan 2017 06:32:25 +0000 Subject: [PATCH 21/37] Lattice slice utilities now thread safe --- lib/lattice/Lattice_transfer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index cc4617de..a49b1b5f 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -386,7 +386,7 @@ void InsertSlice(Lattice &lowDim,Lattice & higherDim,int slice, int } // the above should guarantee that the operations are local - //PARALLEL_FOR_LOOP + PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ std::vector lcoor(nl); std::vector hcoor(nh); @@ -428,7 +428,7 @@ void ExtractSlice(Lattice &lowDim, Lattice & higherDim,int slice, in } } // the above should guarantee that the operations are local - //PARALLEL_FOR_LOOP + PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ std::vector lcoor(nl); std::vector hcoor(nh); From 5803933aea55df67c733cf1e4201f354d8a8965b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 17 Jan 2017 16:21:18 -0800 Subject: [PATCH 22/37] First implementation of HDF5 serial IO writer, reader is still empty --- configure.ac | 14 +++ lib/Grid.h | 2 +- lib/Makefile.am | 13 ++- lib/serialisation/Hdf5IO.cc | 84 +++++++++++++++ lib/serialisation/Hdf5IO.h | 169 ++++++++++++++++++++++++++++++ lib/serialisation/Hdf5Type.cc | 8 ++ lib/serialisation/Hdf5Type.h | 48 +++++++++ lib/serialisation/Serialisation.h | 3 + scripts/filelist | 5 +- tests/IO/Test_serialisation.cc | 16 +++ 10 files changed, 357 insertions(+), 5 deletions(-) create mode 100644 lib/serialisation/Hdf5IO.cc create mode 100644 lib/serialisation/Hdf5IO.h create mode 100644 lib/serialisation/Hdf5Type.cc create mode 100644 lib/serialisation/Hdf5Type.h diff --git a/configure.ac b/configure.ac index f413cde8..f848bd23 100644 --- a/configure.ac +++ b/configure.ac @@ -99,6 +99,13 @@ case ${ac_MKL} in AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; esac +############### HDF5 +AC_ARG_WITH([hdf5], + [AS_HELP_STRING([--with-hdf5=prefix], + [try this for a non-standard install prefix of the HDF5 library])], + [AM_CXXFLAGS="-I$with_hdf5/include $AM_CXXFLAGS"] + [AM_LDFLAGS="-L$with_hdf5/lib $AM_LDFLAGS"]) + ############### first-touch AC_ARG_ENABLE([numa], [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], @@ -145,6 +152,12 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] [have_fftw=true]) +AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], + [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] + [have_hdf5=true] + [LIBS="${LIBS} -lhdf5"], [], [-lhdf5]) +AM_CONDITIONAL(BUILD_HDF5, [ test "${have_hdf5}X" == "trueX" ]) + CXXFLAGS=$CXXFLAGS_CPY LDFLAGS=$LDFLAGS_CPY @@ -410,6 +423,7 @@ RNG choice : ${ac_RNG} GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` LAPACK : ${ac_LAPACK} FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` +HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` ----- BUILD FLAGS ------------------------------------- CXXFLAGS: diff --git a/lib/Grid.h b/lib/Grid.h index 0c5983f3..0f57c8a6 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -59,8 +59,8 @@ Author: paboyle /////////////////// // Grid headers /////////////////// -#include #include "Config.h" +#include #include #include #include diff --git a/lib/Makefile.am b/lib/Makefile.am index a779135f..9aa6af92 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,4 +1,5 @@ extra_sources= +extra_headers= if BUILD_COMMS_MPI extra_sources+=communicator/Communicator_mpi.cc extra_sources+=communicator/Communicator_base.cc @@ -24,6 +25,13 @@ if BUILD_COMMS_NONE extra_sources+=communicator/Communicator_base.cc endif +if BUILD_HDF5 + extra_sources+=serialisation/Hdf5IO.cc + extra_sources+=serialisation/Hdf5Type.cc + extra_headers+=serialisation/Hdf5IO.h + extra_headers+=serialisation/Hdf5Type.h +endif + # # Libraries # @@ -32,6 +40,9 @@ include Eigen.inc lib_LIBRARIES = libGrid.a -libGrid_a_SOURCES = $(CCFILES) $(extra_sources) +CCFILES += $(extra_sources) +HFILES += $(extra_headers) + +libGrid_a_SOURCES = $(CCFILES) libGrid_adir = $(pkgincludedir) nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc new file mode 100644 index 00000000..0d62fdd8 --- /dev/null +++ b/lib/serialisation/Hdf5IO.cc @@ -0,0 +1,84 @@ +#include + +using namespace Grid; +#ifndef H5_NO_NAMESPACE +using namespace H5NS; +#endif + +// Writer implementation /////////////////////////////////////////////////////// +Hdf5Writer::Hdf5Writer(const std::string &fileName) +: fileName_(fileName) +, file_(fileName.c_str(), H5F_ACC_TRUNC) +{ + group_ = file_.openGroup("/"); +} + +Hdf5Writer::~Hdf5Writer(void) +{ + file_.close(); +} + +void Hdf5Writer::push(const std::string &s) +{ + group_ = group_.createGroup(s); + path_.push_back(s); +} + +void Hdf5Writer::pop(void) +{ + path_.pop_back(); + if (path_.empty()) + { + group_ = file_.openGroup("/"); + } + else + { + auto binOp = [](const std::string &a, const std::string &b)->std::string + { + return a + "/" + b; + }; + + group_ = group_.openGroup(std::accumulate(path_.begin(), path_.end(), + std::string(""), binOp)); + } +} + +template <> +void Hdf5Writer::writeDefault(const std::string &s, const std::string &x) +{ + StrType strType(PredType::C_S1, x.size()); + Attribute attribute; + hsize_t attrDim = 1; + DataSpace attrSpace(1, &attrDim); + + attribute = group_.createAttribute(s, strType, attrSpace); + attribute.write(strType, x.data()); +} + +void Hdf5Writer::writeDefault(const std::string &s, const char *x) +{ + std::string sx(x); + + writeDefault(s, sx); +} + +// Reader implementation /////////////////////////////////////////////////////// +Hdf5Reader::Hdf5Reader(const std::string &fileName) +{ + +} + +Hdf5Reader::~Hdf5Reader(void) +{ + +} + +void Hdf5Reader::push(const std::string &s) +{ + +} + +void Hdf5Reader::pop(void) +{ + +} diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h new file mode 100644 index 00000000..481fa1cf --- /dev/null +++ b/lib/serialisation/Hdf5IO.h @@ -0,0 +1,169 @@ +#ifndef GRID_SERIALISATION_HDF5_H +#define GRID_SERIALISATION_HDF5_H + +#include +#include +#include +#include +#include "Hdf5Type.h" + +#ifndef H5_NO_NAMESPACE +#define H5NS H5 +#endif + +// default thresold above which datasets are used instead of attributes +#ifndef H5_DEF_DATASET_THRES +#define H5_DEF_DATASET_THRES 6u +#endif + +namespace Grid +{ + template + struct is_arithmetic_vector + { + static constexpr bool value = false; + }; + + template + struct is_arithmetic_vector> + { + static constexpr bool value = std::is_arithmetic::value + or is_arithmetic_vector::value; + }; + + class Hdf5Writer: public Writer + { + public: + Hdf5Writer(const std::string &fileName); + virtual ~Hdf5Writer(void); + void push(const std::string &s); + void pop(void); + void writeDefault(const std::string &s, const char *x); + template + void writeDefault(const std::string &s, const U &x); + template + typename std::enable_if>::value + and std::is_arithmetic::value, void>::type + writeDefault(const std::string &s, const std::vector &x); + template + typename std::enable_if>::value + and !std::is_arithmetic::value, void>::type + writeDefault(const std::string &s, const std::vector &x); + template + typename std::enable_if>::value, void>::type + writeDefault(const std::string &s, const std::vector &x); + private: + std::string fileName_; + std::vector path_; + std::vector dim_; + bool multiDim_{true}; + H5NS::H5File file_; + H5NS::Group group_; + unsigned int datasetThres_{H5_DEF_DATASET_THRES}; + }; + + class Hdf5Reader: public Reader + { + public: + Hdf5Reader(const std::string &fileName); + virtual ~Hdf5Reader(void); + void push(const std::string &s); + void pop(void); + template + void readDefault(const std::string &s, U &output); + template + void readDefault(const std::string &s, std::vector &output); + private: + }; + + // Writer template implementation //////////////////////////////////////////// + template + void Hdf5Writer::writeDefault(const std::string &s, const U &x) + { + H5NS::Attribute attribute; + hsize_t attrDim = 1; + H5NS::DataSpace attrSpace(1, &attrDim); + + attribute = group_.createAttribute(s, *Hdf5Type::type, attrSpace); + attribute.write(*Hdf5Type::type, &x); + } + + template <> + void Hdf5Writer::writeDefault(const std::string &s, const std::string &x); + + template + typename std::enable_if>::value + and std::is_arithmetic::value, void>::type + Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) + { + hsize_t size = 1; + + dim_.push_back(x.size()); + for (auto d: dim_) + { + size *= d; + } + + H5NS::DataSpace dataspace(dim_.size(), dim_.data()); + + if (size > datasetThres_) + { + H5NS::DataSet dataset; + + dataset = group_.createDataSet(s, *Hdf5Type::type, dataspace); + dataset.write(x.data(), *Hdf5Type::type); + } + else + { + H5NS::Attribute attribute; + + attribute = group_.createAttribute(s, *Hdf5Type::type, dataspace); + attribute.write(*Hdf5Type::type, x.data()); + } + dim_.clear(); + multiDim_ = true; + } + + template + typename std::enable_if>::value + and !std::is_arithmetic::value, void>::type + Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) + { + hsize_t firstSize = x[0].size(); + + for (auto &v: x) + { + multiDim_ = (multiDim_ and (v.size() == firstSize)); + } + assert(multiDim_); + dim_.push_back(x.size()); + writeDefault(s, x[0]); + } + + template + typename std::enable_if>::value, void>::type + Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) + { + push(s); + for (hsize_t i = 0; i < x.size(); ++i) + { + write(s + "_" + std::to_string(i), x[i]); + } + pop(); + } + + // Reader template implementation //////////////////////////////////////////// + template + void Hdf5Reader::readDefault(const std::string &s, U &output) + { + + } + + template + void Hdf5Reader::readDefault(const std::string &s, std::vector &output) + { + + } +} + +#endif diff --git a/lib/serialisation/Hdf5Type.cc b/lib/serialisation/Hdf5Type.cc new file mode 100644 index 00000000..75c7692e --- /dev/null +++ b/lib/serialisation/Hdf5Type.cc @@ -0,0 +1,8 @@ +#include "Hdf5Type.h" + +using namespace Grid; + +#define HDF5_NATIVE_TYPE(predType, cType)\ +const H5NS::PredType * Hdf5Type::type = &H5NS::PredType::predType; + +DEFINE_HDF5_NATIVE_TYPES; diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h new file mode 100644 index 00000000..beb509c2 --- /dev/null +++ b/lib/serialisation/Hdf5Type.h @@ -0,0 +1,48 @@ +#ifndef GRID_SERIALISATION_HDF5_TYPE_H +#define GRID_SERIALISATION_HDF5_TYPE_H + +#include +#include + +#ifndef H5_NO_NAMESPACE +#define H5NS H5 +#endif + +#define HDF5_NATIVE_TYPE(predType, cType)\ +template <>\ +struct Hdf5Type\ +{\ +static const H5NS::PredType *type;\ +static constexpr bool isNative = true;\ +}; + +#define DEFINE_HDF5_NATIVE_TYPES \ +HDF5_NATIVE_TYPE(NATIVE_B8, bool);\ +HDF5_NATIVE_TYPE(NATIVE_CHAR, char);\ +HDF5_NATIVE_TYPE(NATIVE_SCHAR, signed char);\ +HDF5_NATIVE_TYPE(NATIVE_UCHAR, unsigned char);\ +HDF5_NATIVE_TYPE(NATIVE_SHORT, short);\ +HDF5_NATIVE_TYPE(NATIVE_USHORT, unsigned short);\ +HDF5_NATIVE_TYPE(NATIVE_INT, int);\ +HDF5_NATIVE_TYPE(NATIVE_UINT, unsigned int);\ +HDF5_NATIVE_TYPE(NATIVE_LONG, long);\ +HDF5_NATIVE_TYPE(NATIVE_ULONG, unsigned long);\ +HDF5_NATIVE_TYPE(NATIVE_LLONG, long long);\ +HDF5_NATIVE_TYPE(NATIVE_ULLONG, unsigned long long);\ +HDF5_NATIVE_TYPE(NATIVE_FLOAT, float);\ +HDF5_NATIVE_TYPE(NATIVE_DOUBLE, double);\ +HDF5_NATIVE_TYPE(NATIVE_LDOUBLE, long double); + +namespace Grid +{ + template struct Hdf5Type + { + static constexpr bool isNative = false; + }; + + DEFINE_HDF5_NATIVE_TYPES; +} + +#undef HDF5_NATIVE_TYPE + +#endif /* GRID_SERIALISATION_HDF5_TYPE_H */ diff --git a/lib/serialisation/Serialisation.h b/lib/serialisation/Serialisation.h index 8f405d73..aa84e989 100644 --- a/lib/serialisation/Serialisation.h +++ b/lib/serialisation/Serialisation.h @@ -36,6 +36,9 @@ Author: Peter Boyle #include "BinaryIO.h" #include "TextIO.h" #include "XmlIO.h" +#ifdef HAVE_HDF5 +#include "Hdf5IO.h" +#endif ////////////////////////////////////////// // Todo: ////////////////////////////////////////// diff --git a/scripts/filelist b/scripts/filelist index 1ab95c7c..bf2fbc41 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -4,9 +4,8 @@ home=`pwd` # library Make.inc cd $home/lib -HFILES=`find . -type f -name '*.h' -not -path '*/Old/*' -not -path '*/Eigen/*'` -HFILES="$HFILES" -CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'` +HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/Old/*' -not -path '*/Eigen/*'` +CCFILES=`find . -type f -name '*.cc' -not -name '*Communicator*.cc' -not -name '*Hdf5*'` echo HFILES=$HFILES > Make.inc echo >> Make.inc echo CCFILES=$CCFILES >> Make.inc diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index e23aa1a3..7250d618 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -140,6 +140,22 @@ int main(int argc,char **argv) std::cout << "Loaded (txt) -----------------" << std::endl; std::cout << copy3 << std::endl << veccopy3 << std::endl; } +#ifdef HAVE_HDF5 + //// HDF5 + //// HDF5 does not accept elements with the duplicated names, hence "discard2" + { + Hdf5Writer TWR("bother.h5"); + write(TWR,"discard",copy1 ); + write(TWR,"discard2",veccopy1 ); + } + { + Hdf5Reader TRD("bother.h5"); + read (TRD,"discard",copy3 ); + read (TRD,"discard2",veccopy3 ); + std::cout << "Loaded (h5) -----------------" << std::endl; + std::cout << copy3 << std::endl << veccopy3 << std::endl; + } +#endif std::vector iv = strToVec("1 2 2 4"); std::vector sv = strToVec("bli bla blu"); From f599cb5b177ce38a2f4acc5a97bbf556cc6d0784 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 18 Jan 2017 16:50:21 -0800 Subject: [PATCH 23/37] HDF5 serial IO implemented and tested --- lib/serialisation/BaseIO.h | 196 ++++++++++++++++++++++++++++++- lib/serialisation/Hdf5IO.cc | 61 +++++++--- lib/serialisation/Hdf5IO.h | 206 +++++++++++++++++++++++---------- tests/IO/Test_serialisation.cc | 47 +++++--- 4 files changed, 412 insertions(+), 98 deletions(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 1095baf1..5b5ef427 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -32,6 +32,7 @@ Author: Peter Boyle #include namespace Grid { + // Vector IO utilities /////////////////////////////////////////////////////// // helper function to read space-separated values template std::vector strToVec(const std::string s) @@ -67,6 +68,76 @@ namespace Grid { return os; } + // Vector element trait ////////////////////////////////////////////////////// + template + struct element + { + typedef T type; + static constexpr bool is_arithmetic = false; + }; + + template + struct element> + { + typedef typename element::type type; + static constexpr bool is_arithmetic = std::is_arithmetic::value + or element::is_arithmetic; + }; + + // Vector flatening utility class //////////////////////////////////////////// + // Class to flatten a multidimensional std::vector + template + class Flatten + { + public: + typedef typename element::type Element; + public: + explicit Flatten(const V &vector); + const V & getVector(void); + const std::vector & getFlatVector(void); + const std::vector & getDim(void); + private: + void accumulate(const Element &e); + template + void accumulate(const W &v); + void accumulateDim(const Element &e); + template + void accumulateDim(const W &v); + private: + const V &vector_; + std::vector flatVector_; + std::vector dim_; + }; + + + // Class to reconstruct a multidimensional std::vector + template + class Reconstruct + { + public: + typedef typename element::type Element; + public: + Reconstruct(const std::vector &flatVector, + const std::vector &dim); + const V & getVector(void); + const std::vector & getFlatVector(void); + const std::vector & getDim(void); + private: + void fill(std::vector &v); + template + void fill(W &v); + void resize(std::vector &v, const unsigned int dim); + template + void resize(W &v, const unsigned int dim); + private: + V vector_; + const std::vector &flatVector_; + std::vector dim_; + size_t ind_{0}; + unsigned int dimInd_{0}; + }; + + // Abstract writer/reader classes //////////////////////////////////////////// // static polymorphism implemented using CRTP idiom class Serializable; @@ -132,7 +203,128 @@ namespace Grid { } }; - // Generic writer interface + // Flatten class template implementation ///////////////////////////////////// + template + void Flatten::accumulate(const Element &e) + { + flatVector_.push_back(e); + } + + template + template + void Flatten::accumulate(const W &v) + { + for (auto &e: v) + { + accumulate(e); + } + } + + template + void Flatten::accumulateDim(const Element &e) {}; + + template + template + void Flatten::accumulateDim(const W &v) + { + dim_.push_back(v.size()); + accumulateDim(v[0]); + } + + template + Flatten::Flatten(const V &vector) + : vector_(vector) + { + accumulate(vector_); + accumulateDim(vector_); + } + + template + const V & Flatten::getVector(void) + { + return vector_; + } + + template + const std::vector::Element> & + Flatten::getFlatVector(void) + { + return flatVector_; + } + + template + const std::vector & Flatten::getDim(void) + { + return dim_; + } + + // Reconstruct class template implementation ///////////////////////////////// + template + void Reconstruct::fill(std::vector &v) + { + for (auto &e: v) + { + e = flatVector_[ind_++]; + } + } + + template + template + void Reconstruct::fill(W &v) + { + for (auto &e: v) + { + fill(e); + } + } + + template + void Reconstruct::resize(std::vector &v, const unsigned int dim) + { + v.resize(dim_[dim]); + } + + template + template + void Reconstruct::resize(W &v, const unsigned int dim) + { + v.resize(dim_[dim]); + for (auto &e: v) + { + resize(e, dim + 1); + } + } + + template + Reconstruct::Reconstruct(const std::vector &flatVector, + const std::vector &dim) + : flatVector_(flatVector) + , dim_(dim) + { + resize(vector_, 0); + fill(vector_); + } + + template + const V & Reconstruct::Reconstruct::getVector(void) + { + return vector_; + } + + template + const std::vector::Element> & + Reconstruct::getFlatVector(void) + { + return flatVector_; + } + + template + const std::vector & Reconstruct::getDim(void) + { + return dim_; + } + + // Generic writer interface ////////////////////////////////////////////////// template inline void push(Writer &w, const std::string &s) { @@ -217,7 +409,7 @@ namespace Grid { upcast->writeDefault(s, output); } - // Reader template implementation //////////////////////////////////////////// + // Reader template implementation template Reader::Reader(void) { diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index 0d62fdd8..02356220 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -11,11 +11,8 @@ Hdf5Writer::Hdf5Writer(const std::string &fileName) , file_(fileName.c_str(), H5F_ACC_TRUNC) { group_ = file_.openGroup("/"); -} - -Hdf5Writer::~Hdf5Writer(void) -{ - file_.close(); + writeSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", + *Hdf5Type::type); } void Hdf5Writer::push(const std::string &s) @@ -47,12 +44,8 @@ template <> void Hdf5Writer::writeDefault(const std::string &s, const std::string &x) { StrType strType(PredType::C_S1, x.size()); - Attribute attribute; - hsize_t attrDim = 1; - DataSpace attrSpace(1, &attrDim); - attribute = group_.createAttribute(s, strType, attrSpace); - attribute.write(strType, x.data()); + writeSingleAttribute(*(x.data()), s, strType); } void Hdf5Writer::writeDefault(const std::string &s, const char *x) @@ -64,21 +57,55 @@ void Hdf5Writer::writeDefault(const std::string &s, const char *x) // Reader implementation /////////////////////////////////////////////////////// Hdf5Reader::Hdf5Reader(const std::string &fileName) +: fileName_(fileName) +, file_(fileName.c_str(), H5F_ACC_RDONLY) { - -} - -Hdf5Reader::~Hdf5Reader(void) -{ - + group_ = file_.openGroup("/"); + readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", + *Hdf5Type::type); } void Hdf5Reader::push(const std::string &s) { - + group_ = group_.openGroup(s); + path_.push_back(s); } void Hdf5Reader::pop(void) { + path_.pop_back(); + if (path_.empty()) + { + group_ = file_.openGroup("/"); + } + else + { + auto binOp = [](const std::string &a, const std::string &b)->std::string + { + return a + "/" + b; + }; + group_ = group_.openGroup(std::accumulate(path_.begin(), path_.end(), + std::string(""), binOp)); + } +} + +template <> +void Hdf5Reader::readDefault(const std::string &s, std::string &x) +{ + Attribute attribute; + + attribute = group_.openAttribute(s); + StrType strType = attribute.getStrType(); + + x.resize(strType.getSize()); + attribute.read(strType, &(x[0])); + + std::cout << "length: " << strType.getSize() << std::endl; + std::cout << "string: |"; + for (auto &c: x) + { + std::cout << "'" << c << "'|"; + } + std::cout << std::endl; } diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 481fa1cf..b58c86ed 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -12,139 +12,135 @@ #endif // default thresold above which datasets are used instead of attributes -#ifndef H5_DEF_DATASET_THRES -#define H5_DEF_DATASET_THRES 6u +#ifndef HDF5_DEF_DATASET_THRES +#define HDF5_DEF_DATASET_THRES 6u #endif +// name guard for Grid metadata +#define HDF5_GRID_GUARD "_Grid_" + namespace Grid { - template - struct is_arithmetic_vector - { - static constexpr bool value = false; - }; - - template - struct is_arithmetic_vector> - { - static constexpr bool value = std::is_arithmetic::value - or is_arithmetic_vector::value; - }; - class Hdf5Writer: public Writer { public: Hdf5Writer(const std::string &fileName); - virtual ~Hdf5Writer(void); + virtual ~Hdf5Writer(void) = default; void push(const std::string &s); void pop(void); void writeDefault(const std::string &s, const char *x); template void writeDefault(const std::string &s, const U &x); template - typename std::enable_if>::value - and std::is_arithmetic::value, void>::type + typename std::enable_if>::is_arithmetic, void>::type writeDefault(const std::string &s, const std::vector &x); template - typename std::enable_if>::value - and !std::is_arithmetic::value, void>::type + typename std::enable_if>::is_arithmetic, void>::type writeDefault(const std::string &s, const std::vector &x); + private: template - typename std::enable_if>::value, void>::type - writeDefault(const std::string &s, const std::vector &x); + void writeSingleAttribute(const U &x, const std::string &name, + const H5NS::DataType &type); private: std::string fileName_; std::vector path_; - std::vector dim_; - bool multiDim_{true}; H5NS::H5File file_; H5NS::Group group_; - unsigned int datasetThres_{H5_DEF_DATASET_THRES}; + unsigned int dataSetThres_{HDF5_DEF_DATASET_THRES}; }; class Hdf5Reader: public Reader { public: Hdf5Reader(const std::string &fileName); - virtual ~Hdf5Reader(void); + virtual ~Hdf5Reader(void) = default; void push(const std::string &s); void pop(void); template void readDefault(const std::string &s, U &output); template - void readDefault(const std::string &s, std::vector &output); + typename std::enable_if>::is_arithmetic, void>::type + readDefault(const std::string &s, std::vector &x); + template + typename std::enable_if>::is_arithmetic, void>::type + readDefault(const std::string &s, std::vector &x); private: + template + void readSingleAttribute(U &x, const std::string &name, + const H5NS::DataType &type); + private: + std::string fileName_; + std::vector path_; + H5NS::H5File file_; + H5NS::Group group_; + unsigned int dataSetThres_; }; // Writer template implementation //////////////////////////////////////////// template - void Hdf5Writer::writeDefault(const std::string &s, const U &x) + void Hdf5Writer::writeSingleAttribute(const U &x, const std::string &name, + const H5NS::DataType &type) { H5NS::Attribute attribute; hsize_t attrDim = 1; H5NS::DataSpace attrSpace(1, &attrDim); - attribute = group_.createAttribute(s, *Hdf5Type::type, attrSpace); - attribute.write(*Hdf5Type::type, &x); + attribute = group_.createAttribute(name, type, attrSpace); + attribute.write(type, &x); + } + + template + void Hdf5Writer::writeDefault(const std::string &s, const U &x) + { + writeSingleAttribute(x, s, *Hdf5Type::type); } template <> void Hdf5Writer::writeDefault(const std::string &s, const std::string &x); template - typename std::enable_if>::value - and std::is_arithmetic::value, void>::type + typename std::enable_if>::is_arithmetic, void>::type Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) { - hsize_t size = 1; + // alias to element type + typedef typename element>::type Element; - dim_.push_back(x.size()); - for (auto d: dim_) + // flatten the vector and getting dimensions + Flatten> flat(x); + std::vector dim; + const auto &flatx = flat.getFlatVector(); + + for (auto &d: flat.getDim()) { - size *= d; + dim.push_back(d); } - H5NS::DataSpace dataspace(dim_.size(), dim_.data()); + // write to file + H5NS::DataSpace dataSpace(dim.size(), dim.data()); - if (size > datasetThres_) + if (flatx.size() > dataSetThres_) { - H5NS::DataSet dataset; + H5NS::DataSet dataSet; - dataset = group_.createDataSet(s, *Hdf5Type::type, dataspace); - dataset.write(x.data(), *Hdf5Type::type); + dataSet = group_.createDataSet(s, *Hdf5Type::type, dataSpace); + dataSet.write(flatx.data(), *Hdf5Type::type); } else { H5NS::Attribute attribute; - attribute = group_.createAttribute(s, *Hdf5Type::type, dataspace); - attribute.write(*Hdf5Type::type, x.data()); + attribute = group_.createAttribute(s, *Hdf5Type::type, dataSpace); + attribute.write(*Hdf5Type::type, flatx.data()); } - dim_.clear(); - multiDim_ = true; } template - typename std::enable_if>::value - and !std::is_arithmetic::value, void>::type - Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) - { - hsize_t firstSize = x[0].size(); - - for (auto &v: x) - { - multiDim_ = (multiDim_ and (v.size() == firstSize)); - } - assert(multiDim_); - dim_.push_back(x.size()); - writeDefault(s, x[0]); - } - - template - typename std::enable_if>::value, void>::type + typename std::enable_if>::is_arithmetic, void>::type Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) { push(s); + writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size", + *Hdf5Type::type); for (hsize_t i = 0; i < x.size(); ++i) { write(s + "_" + std::to_string(i), x[i]); @@ -154,15 +150,97 @@ namespace Grid // Reader template implementation //////////////////////////////////////////// template - void Hdf5Reader::readDefault(const std::string &s, U &output) + void Hdf5Reader::readSingleAttribute(U &x, const std::string &name, + const H5NS::DataType &type) { + H5NS::Attribute attribute; + attribute = group_.openAttribute(name); + attribute.read(type, &x); } template - void Hdf5Reader::readDefault(const std::string &s, std::vector &output) + void Hdf5Reader::readDefault(const std::string &s, U &output) { + readSingleAttribute(output, s, *Hdf5Type::type); + } + + template <> + void Hdf5Reader::readDefault(const std::string &s, std::string &x); + + template + typename std::enable_if>::is_arithmetic, void>::type + Hdf5Reader::readDefault(const std::string &s, std::vector &x) + { + // alias to element type + typedef typename element>::type Element; + // read the dimensions + H5NS::DataSpace dataSpace; + H5E_auto2_t func; + void * client_data; + std::vector hdim; + std::vector dim; + hsize_t size = 1; + + H5NS::Exception::getAutoPrint(func, &client_data); + try + { + H5NS::Exception::dontPrint(); + dataSpace = group_.openDataSet(s).getSpace(); + } + catch (H5NS::Exception &e) + { + H5NS::Exception::setAutoPrint(func, client_data); + dataSpace = group_.openAttribute(s).getSpace(); + } + hdim.resize(dataSpace.getSimpleExtentNdims()); + dataSpace.getSimpleExtentDims(hdim.data()); + for (auto &d: hdim) + { + dim.push_back(d); + size *= d; + } + + // read the flat vector + std::vector buf(size); + + if (size > dataSetThres_) + { + H5NS::DataSet dataSet; + + dataSet = group_.openDataSet(s); + dataSet.read(buf.data(), *Hdf5Type::type); + } + else + { + H5NS::Attribute attribute; + + attribute = group_.openAttribute(s); + attribute.read(*Hdf5Type::type, buf.data()); + } + + // reconstruct the multidimensional vector + Reconstruct> r(buf, dim); + + x = r.getVector(); + } + + template + typename std::enable_if>::is_arithmetic, void>::type + Hdf5Reader::readDefault(const std::string &s, std::vector &x) + { + uint64_t size; + + push(s); + readSingleAttribute(size, HDF5_GRID_GUARD "vector_size", + *Hdf5Type::type); + x.resize(size); + for (hsize_t i = 0; i < x.size(); ++i) + { + read(s + "_" + std::to_string(i), x[i]); + } + pop(); } } diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 7250d618..d3bbabe4 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -104,8 +104,8 @@ int main(int argc,char **argv) }; // read tests - myclass copy1, copy2, copy3; - std::vector veccopy1, veccopy2, veccopy3; + myclass copy1, copy2, copy3, copy4; + std::vector veccopy1, veccopy2, veccopy3, veccopy4; //// XML { XmlReader RD("bother.xml"); @@ -150,24 +150,41 @@ int main(int argc,char **argv) } { Hdf5Reader TRD("bother.h5"); - read (TRD,"discard",copy3 ); - read (TRD,"discard2",veccopy3 ); + std::cout << "read single" << std::endl; + read (TRD,"discard",copy4 ); + std::cout << "read vec" << std::endl; + read (TRD,"discard2",veccopy4 ); std::cout << "Loaded (h5) -----------------" << std::endl; - std::cout << copy3 << std::endl << veccopy3 << std::endl; + std::cout << copy3 << std::endl << veccopy4 << std::endl; } #endif - std::vector iv = strToVec("1 2 2 4"); - std::vector sv = strToVec("bli bla blu"); + typedef std::vector>> vec3d; - for (auto &e: iv) + vec3d dv, buf; + double d = 0.; + + dv.resize(4); + for (auto &v1: dv) { - std::cout << e << " "; + v1.resize(3); + for (auto &v2: v1) + { + v2.resize(5); + for (auto &x: v2) + { + x = d++; + } + } } - std::cout << std::endl; - for (auto &e: sv) - { - std::cout << e << " "; - } - std::cout << std::endl; + std::cout << dv << std::endl; + + Flatten flatdv(dv); + + std::cout << flatdv.getDim() << std::endl; + std::cout << flatdv.getFlatVector() << std::endl; + + Reconstruct rec(flatdv.getFlatVector(), flatdv.getDim()); + + std::cout << flatdv.getVector() << std::endl; } From 4be08ebccc3caa704dbb7552b353c92ecd43b782 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 18 Jan 2017 17:39:59 -0800 Subject: [PATCH 24/37] debug code cleaning --- lib/serialisation/Hdf5IO.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index 02356220..8b6581ea 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -100,12 +100,4 @@ void Hdf5Reader::readDefault(const std::string &s, std::string &x) x.resize(strType.getSize()); attribute.read(strType, &(x[0])); - - std::cout << "length: " << strType.getSize() << std::endl; - std::cout << "string: |"; - for (auto &c: x) - { - std::cout << "'" << c << "'|"; - } - std::cout << std::endl; } From 654e0b0fd0b23ad468c405ae053dd3f44748d28e Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 18 Jan 2017 17:40:32 -0800 Subject: [PATCH 25/37] Serialisable object are now comparable with == --- lib/serialisation/MacroMagic.h | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index c9137dfe..8b027f30 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -109,38 +109,36 @@ THE SOFTWARE. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #define GRID_MACRO_MEMBER(A,B) A B; +#define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B)); #define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <\ static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ push(WR,s);\ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \ pop(WR);\ -} \ -\ -\ +}\ template \ static inline void read(Reader &RD,const std::string &s, cname &obj){ \ push(RD,s);\ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \ pop(RD);\ -} \ -\ -\ +}\ friend inline std::ostream & operator << (std::ostream &os, const cname &obj ) { \ os<<"class "<<#cname<<" {"<::type #define GRID_MACRO_ENUMVAL(A,B) A = B, @@ -149,7 +147,7 @@ friend inline std::ostream & operator << (std::ostream &os, const cname &obj ) { #define GRID_MACRO_ENUMCASEIO(A,B) case GRID_ENUM_TYPE(obj)::A: os << #A; break; #define GRID_SERIALIZABLE_ENUM(name,undefname,...)\ -class name: public Serializable\ +class name: public Grid::Serializable\ {\ public:\ enum EnumType\ @@ -161,7 +159,7 @@ public:\ name(void): value_(undefname) {};\ name(EnumType value): value_(value) {};\ template \ - static inline void write(Writer &WR,const std::string &s, const name &obj)\ + static inline void write(Grid::Writer &WR,const std::string &s, const name &obj)\ {\ switch (obj.value_)\ {\ @@ -171,7 +169,7 @@ public:\ }\ \ template \ - static inline void read(Reader &RD,const std::string &s, name &obj)\ + static inline void read(Grid::Reader &RD,const std::string &s, name &obj)\ {\ std::string buf;\ Grid::read(RD, s, buf);\ From f3f0b6fef99b25cbc6857f4d8b87731a2aa1d2e7 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 18 Jan 2017 17:41:05 -0800 Subject: [PATCH 26/37] serious rewriting of Test_serialisation, now crashes if IO inconsistent --- tests/IO/Test_serialisation.cc | 217 +++++++++++++++------------------ 1 file changed, 101 insertions(+), 116 deletions(-) diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index d3bbabe4..b7158b2b 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -28,137 +28,119 @@ Author: Peter Boyle /* END LEGAL */ #include -namespace Grid { - - GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); - - class myclass: Serializable { - public: - - GRID_SERIALIZABLE_CLASS_MEMBERS(myclass, - myenum, e, - std::vector, ve, - std::string, name, - int, x, - double, y, - bool , b, - std::vector, array, - std::vector>, twodimarray, - ); - - myclass() {} - myclass(int i) - : array(4,5.1), twodimarray(3,std::vector(2,1.23456)), ve(2, myenum::blue) - { - e=myenum::red; - x=i; - y=2*i; - b=true; - name="bother said pooh"; - } - }; - -} - using namespace Grid; -int16_t i16 = 1; +GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); + +class myclass: Serializable { +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(myclass, + myenum, e, + std::vector, ve, + std::string, name, + int, x, + double, y, + bool , b, + std::vector, array, + std::vector>, twodimarray, + ); + myclass() {} + myclass(int i) + : array(4,5.1), twodimarray(3,std::vector(2,1.23456)), ve(2, myenum::blue) + { + e=myenum::red; + x=i; + y=2*i; + b=true; + name="bother said pooh"; + } +}; + +int16_t i16 = 1; uint16_t u16 = 2; -int32_t i32 = 3; +int32_t i32 = 3; uint32_t u32 = 4; -int64_t i64 = 5; +int64_t i64 = 5; uint64_t u64 = 6; -float f = M_PI; -double d = 2*M_PI; -bool b = false; +float f = M_PI; +double d = 2*M_PI; +bool b = false; + +template +void ioTest(const std::string &filename, const O &object, const std::string &name) +{ + // writer needs to be destroyed so that writing physically happens + { + W writer(filename); + + write(writer, "testobject", object); + } + + R reader(filename); + O buf; + bool good; + + read(reader, "testobject", buf); + good = (object == buf); + std::cout << name << " IO test: " << (good ? "success" : "failure"); + std::cout << std::endl; + if (!good) exit(EXIT_FAILURE); +} int main(int argc,char **argv) { - { - XmlWriter WR("bother.xml"); - - // test basic type writing - push(WR,"BasicTypes"); - write(WR,std::string("i16"),i16); - write(WR,"u16",u16); - write(WR,"i32",i32); - write(WR,"u32",u32); - write(WR,"i64",i64); - write(WR,"u64",u64); - write(WR,"f",f); - write(WR,"d",d); - write(WR,"b",b); - pop(WR); - - // test serializable class writing - myclass obj(1234); // non-trivial constructor - write(WR,"obj",obj); - WR.write("obj2", obj); - std::cout << obj << std::endl; - - std::vector vec; - vec.push_back(myclass(1234)); - vec.push_back(myclass(5678)); - vec.push_back(myclass(3838)); - write(WR, "objvec", vec); - }; + std::cout << "==== basic IO" << std::endl; + XmlWriter WR("bother.xml"); + + // test basic type writing + std::cout << "-- basic writing to 'bother.xml'..." << std::endl; + push(WR,"BasicTypes"); + write(WR,std::string("i16"),i16); + write(WR,"u16",u16); + write(WR,"i32",i32); + write(WR,"u32",u32); + write(WR,"i64",i64); + write(WR,"u64",u64); + write(WR,"f",f); + write(WR,"d",d); + write(WR,"b",b); + pop(WR); + + // test serializable class writing + myclass obj(1234); // non-trivial constructor + std::vector vec; + + std::cout << "-- serialisable class writing to 'bother.xml'..." << std::endl; + write(WR,"obj",obj); + WR.write("obj2", obj); + vec.push_back(myclass(1234)); + vec.push_back(myclass(5678)); + vec.push_back(myclass(3838)); + write(WR, "objvec", vec); + std::cout << "-- serialisable class writing to std::cout:" << std::endl; + std::cout << obj << std::endl; + std::cout << "-- serialisable class comparison:" << std::endl; + std::cout << "vec[0] == obj: " << ((vec[0] == obj) ? "true" : "false") << std::endl; + std::cout << "vec[1] == obj: " << ((vec[1] == obj) ? "true" : "false") << std::endl; // read tests - myclass copy1, copy2, copy3, copy4; - std::vector veccopy1, veccopy2, veccopy3, veccopy4; + std::cout << "\n==== IO self-consistency tests" << std::endl; //// XML - { - XmlReader RD("bother.xml"); - read(RD,"obj",copy1); - read(RD,"objvec", veccopy1); - std::cout << "Loaded (XML) -----------------" << std::endl; - std::cout << copy1 << std::endl << veccopy1 << std::endl; - } + ioTest("iotest.xml", obj, "XML (object) "); + ioTest("iotest.xml", vec, "XML (vector of objects)"); //// binary - { - BinaryWriter BWR("bother.bin"); - write(BWR,"discard",copy1 ); - write(BWR,"discard",veccopy1 ); - } - { - BinaryReader BRD("bother.bin"); - read (BRD,"discard",copy2 ); - read (BRD,"discard",veccopy2 ); - std::cout << "Loaded (bin) -----------------" << std::endl; - std::cout << copy2 << std::endl << veccopy2 << std::endl; - } + ioTest("iotest.bin", obj, "binary (object) "); + ioTest("iotest.bin", vec, "binary (vector of objects)"); //// text - { - TextWriter TWR("bother.txt"); - write(TWR,"discard",copy1 ); - write(TWR,"discard",veccopy1 ); - } - { - TextReader TRD("bother.txt"); - read (TRD,"discard",copy3 ); - read (TRD,"discard",veccopy3 ); - std::cout << "Loaded (txt) -----------------" << std::endl; - std::cout << copy3 << std::endl << veccopy3 << std::endl; - } -#ifdef HAVE_HDF5 + ioTest("iotest.dat", obj, "text (object) "); + ioTest("iotest.dat", vec, "text (vector of objects)"); //// HDF5 - //// HDF5 does not accept elements with the duplicated names, hence "discard2" - { - Hdf5Writer TWR("bother.h5"); - write(TWR,"discard",copy1 ); - write(TWR,"discard2",veccopy1 ); - } - { - Hdf5Reader TRD("bother.h5"); - std::cout << "read single" << std::endl; - read (TRD,"discard",copy4 ); - std::cout << "read vec" << std::endl; - read (TRD,"discard2",veccopy4 ); - std::cout << "Loaded (h5) -----------------" << std::endl; - std::cout << copy3 << std::endl << veccopy4 << std::endl; - } +#ifdef HAVE_HDF5 + ioTest("iotest.h5", obj, "HDF5 (object) "); + ioTest("iotest.h5", vec, "HDF5 (vector of objects)"); #endif + std::cout << "\n==== vector flattening/reconstruction" << std::endl; typedef std::vector>> vec3d; vec3d dv, buf; @@ -177,14 +159,17 @@ int main(int argc,char **argv) } } } + std::cout << "original 3D vector:" << std::endl; std::cout << dv << std::endl; Flatten flatdv(dv); + std::cout << "\ndimensions:" << std::endl; std::cout << flatdv.getDim() << std::endl; + std::cout << "\nflattened vector:" << std::endl; std::cout << flatdv.getFlatVector() << std::endl; Reconstruct rec(flatdv.getFlatVector(), flatdv.getDim()); - + std::cout << "\nreconstructed vector:" << std::endl; std::cout << flatdv.getVector() << std::endl; } From 5405526424a43ef7c78a186c831b1eecc2de985f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 18 Jan 2017 22:42:19 -0800 Subject: [PATCH 27/37] Code typo --- lib/serialisation/BaseIO.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index 5b5ef427..eca5cff2 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -306,7 +306,7 @@ namespace Grid { } template - const V & Reconstruct::Reconstruct::getVector(void) + const V & Reconstruct::getVector(void) { return vector_; } From 24d3d31b01416b5b6e7cc98bcf341023f696982c Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 19 Jan 2017 14:08:22 -0800 Subject: [PATCH 28/37] Genetic scheduler: uses insert instead of emplace for better compiler compatibility --- extras/Hadrons/GeneticScheduler.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extras/Hadrons/GeneticScheduler.hpp b/extras/Hadrons/GeneticScheduler.hpp index c9256d96..d0c52596 100644 --- a/extras/Hadrons/GeneticScheduler.hpp +++ b/extras/Hadrons/GeneticScheduler.hpp @@ -166,7 +166,7 @@ void GeneticScheduler::initPopulation(void) { auto p = graph_.topoSort(gen_); - population_.emplace(func_(p), p); + population_.insert(std::make_pair(func_(p), p)); } } @@ -180,8 +180,8 @@ void GeneticScheduler::doCrossover(void) crossover(c1, c2, p1, p2); PARALLEL_CRITICAL { - population_.emplace(func_(c1), c1); - population_.emplace(func_(c2), c2); + population_.insert(std::make_pair(func_(c1), c1)); + population_.insert(std::make_pair(func_(c2), c2)); } } @@ -200,7 +200,7 @@ void GeneticScheduler::doMutation(void) mutation(m, it->second); PARALLEL_CRITICAL { - population_.emplace(func_(m), m); + population_.insert(std::make_pair(func_(m), m)); } } } From 2c673666dacec2087306309b4f5824073aa13f0e Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 19 Jan 2017 17:19:12 -0800 Subject: [PATCH 29/37] Standardisation of HDF5 types --- lib/serialisation/Hdf5Type.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h index beb509c2..52c6ed24 100644 --- a/lib/serialisation/Hdf5Type.h +++ b/lib/serialisation/Hdf5Type.h @@ -17,21 +17,20 @@ static constexpr bool isNative = true;\ }; #define DEFINE_HDF5_NATIVE_TYPES \ -HDF5_NATIVE_TYPE(NATIVE_B8, bool);\ -HDF5_NATIVE_TYPE(NATIVE_CHAR, char);\ -HDF5_NATIVE_TYPE(NATIVE_SCHAR, signed char);\ -HDF5_NATIVE_TYPE(NATIVE_UCHAR, unsigned char);\ -HDF5_NATIVE_TYPE(NATIVE_SHORT, short);\ -HDF5_NATIVE_TYPE(NATIVE_USHORT, unsigned short);\ -HDF5_NATIVE_TYPE(NATIVE_INT, int);\ -HDF5_NATIVE_TYPE(NATIVE_UINT, unsigned int);\ -HDF5_NATIVE_TYPE(NATIVE_LONG, long);\ -HDF5_NATIVE_TYPE(NATIVE_ULONG, unsigned long);\ -HDF5_NATIVE_TYPE(NATIVE_LLONG, long long);\ -HDF5_NATIVE_TYPE(NATIVE_ULLONG, unsigned long long);\ -HDF5_NATIVE_TYPE(NATIVE_FLOAT, float);\ -HDF5_NATIVE_TYPE(NATIVE_DOUBLE, double);\ -HDF5_NATIVE_TYPE(NATIVE_LDOUBLE, long double); +HDF5_NATIVE_TYPE(STD_B8LE, bool);\ +HDF5_NATIVE_TYPE(STD_I8LE, char);\ +HDF5_NATIVE_TYPE(STD_U8LE, unsigned char);\ +HDF5_NATIVE_TYPE(STD_I16LE, short);\ +HDF5_NATIVE_TYPE(STD_U16LE, unsigned short);\ +HDF5_NATIVE_TYPE(STD_I32LE, int);\ +HDF5_NATIVE_TYPE(STD_U32LE, unsigned int);\ +HDF5_NATIVE_TYPE(STD_I64LE, long);\ +HDF5_NATIVE_TYPE(STD_U64LE, unsigned long);\ +HDF5_NATIVE_TYPE(STD_I64LE, long long);\ +HDF5_NATIVE_TYPE(STD_U64LE, unsigned long long);\ +HDF5_NATIVE_TYPE(IEEE_F32LE, float);\ +HDF5_NATIVE_TYPE(IEEE_F64LE, double);\ +HDF5_NATIVE_TYPE(IEEE_F64LE, long double); namespace Grid { From 6eea9e4da71227d205a971adfb968721fac09bc5 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 19 Jan 2017 18:02:53 -0800 Subject: [PATCH 30/37] HDF5 types static initialisation is mysteriously buggy on BG/Q, changing strategy --- lib/Makefile.am | 1 - lib/serialisation/Hdf5IO.cc | 4 ++-- lib/serialisation/Hdf5IO.h | 20 ++++++++++---------- lib/serialisation/Hdf5Type.cc | 8 -------- lib/serialisation/Hdf5Type.h | 5 ++++- 5 files changed, 16 insertions(+), 22 deletions(-) delete mode 100644 lib/serialisation/Hdf5Type.cc diff --git a/lib/Makefile.am b/lib/Makefile.am index 9aa6af92..fac622ca 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -27,7 +27,6 @@ endif if BUILD_HDF5 extra_sources+=serialisation/Hdf5IO.cc - extra_sources+=serialisation/Hdf5Type.cc extra_headers+=serialisation/Hdf5IO.h extra_headers+=serialisation/Hdf5Type.h endif diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index 8b6581ea..4d5a2df5 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -12,7 +12,7 @@ Hdf5Writer::Hdf5Writer(const std::string &fileName) { group_ = file_.openGroup("/"); writeSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", - *Hdf5Type::type); + *Hdf5Type::type()); } void Hdf5Writer::push(const std::string &s) @@ -62,7 +62,7 @@ Hdf5Reader::Hdf5Reader(const std::string &fileName) { group_ = file_.openGroup("/"); readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", - *Hdf5Type::type); + *Hdf5Type::type()); } void Hdf5Reader::push(const std::string &s) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index b58c86ed..9281e5a0 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -92,7 +92,7 @@ namespace Grid template void Hdf5Writer::writeDefault(const std::string &s, const U &x) { - writeSingleAttribute(x, s, *Hdf5Type::type); + writeSingleAttribute(x, s, *Hdf5Type::type()); } template <> @@ -122,15 +122,15 @@ namespace Grid { H5NS::DataSet dataSet; - dataSet = group_.createDataSet(s, *Hdf5Type::type, dataSpace); - dataSet.write(flatx.data(), *Hdf5Type::type); + dataSet = group_.createDataSet(s, *Hdf5Type::type(), dataSpace); + dataSet.write(flatx.data(), *Hdf5Type::type()); } else { H5NS::Attribute attribute; - attribute = group_.createAttribute(s, *Hdf5Type::type, dataSpace); - attribute.write(*Hdf5Type::type, flatx.data()); + attribute = group_.createAttribute(s, *Hdf5Type::type(), dataSpace); + attribute.write(*Hdf5Type::type(), flatx.data()); } } @@ -140,7 +140,7 @@ namespace Grid { push(s); writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size", - *Hdf5Type::type); + *Hdf5Type::type()); for (hsize_t i = 0; i < x.size(); ++i) { write(s + "_" + std::to_string(i), x[i]); @@ -162,7 +162,7 @@ namespace Grid template void Hdf5Reader::readDefault(const std::string &s, U &output) { - readSingleAttribute(output, s, *Hdf5Type::type); + readSingleAttribute(output, s, *Hdf5Type::type()); } template <> @@ -210,14 +210,14 @@ namespace Grid H5NS::DataSet dataSet; dataSet = group_.openDataSet(s); - dataSet.read(buf.data(), *Hdf5Type::type); + dataSet.read(buf.data(), *Hdf5Type::type()); } else { H5NS::Attribute attribute; attribute = group_.openAttribute(s); - attribute.read(*Hdf5Type::type, buf.data()); + attribute.read(*Hdf5Type::type(), buf.data()); } // reconstruct the multidimensional vector @@ -234,7 +234,7 @@ namespace Grid push(s); readSingleAttribute(size, HDF5_GRID_GUARD "vector_size", - *Hdf5Type::type); + *Hdf5Type::type()); x.resize(size); for (hsize_t i = 0; i < x.size(); ++i) { diff --git a/lib/serialisation/Hdf5Type.cc b/lib/serialisation/Hdf5Type.cc deleted file mode 100644 index 75c7692e..00000000 --- a/lib/serialisation/Hdf5Type.cc +++ /dev/null @@ -1,8 +0,0 @@ -#include "Hdf5Type.h" - -using namespace Grid; - -#define HDF5_NATIVE_TYPE(predType, cType)\ -const H5NS::PredType * Hdf5Type::type = &H5NS::PredType::predType; - -DEFINE_HDF5_NATIVE_TYPES; diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h index 52c6ed24..8b56c406 100644 --- a/lib/serialisation/Hdf5Type.h +++ b/lib/serialisation/Hdf5Type.h @@ -12,7 +12,10 @@ template <>\ struct Hdf5Type\ {\ -static const H5NS::PredType *type;\ +static inline const H5NS::PredType *type(void)\ +{\ + return &H5NS::PredType::predType;\ +}\ static constexpr bool isNative = true;\ }; From ade1058e5f249531217c7f6874f3596ad85479fe Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 19 Jan 2017 18:23:55 -0800 Subject: [PATCH 31/37] Hdf5Type does not need to be a pointer anymore --- lib/serialisation/Hdf5IO.cc | 4 ++-- lib/serialisation/Hdf5IO.h | 14 +++++++------- lib/serialisation/Hdf5Type.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index 4d5a2df5..c5313495 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -12,7 +12,7 @@ Hdf5Writer::Hdf5Writer(const std::string &fileName) { group_ = file_.openGroup("/"); writeSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", - *Hdf5Type::type()); + Hdf5Type::type()); } void Hdf5Writer::push(const std::string &s) @@ -62,7 +62,7 @@ Hdf5Reader::Hdf5Reader(const std::string &fileName) { group_ = file_.openGroup("/"); readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", - *Hdf5Type::type()); + Hdf5Type::type()); } void Hdf5Reader::push(const std::string &s) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 9281e5a0..1c73bea3 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -92,7 +92,7 @@ namespace Grid template void Hdf5Writer::writeDefault(const std::string &s, const U &x) { - writeSingleAttribute(x, s, *Hdf5Type::type()); + writeSingleAttribute(x, s, Hdf5Type::type()); } template <> @@ -122,7 +122,7 @@ namespace Grid { H5NS::DataSet dataSet; - dataSet = group_.createDataSet(s, *Hdf5Type::type(), dataSpace); + dataSet = group_.createDataSet(s, Hdf5Type::type(), dataSpace); dataSet.write(flatx.data(), *Hdf5Type::type()); } else @@ -140,7 +140,7 @@ namespace Grid { push(s); writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size", - *Hdf5Type::type()); + Hdf5Type::type()); for (hsize_t i = 0; i < x.size(); ++i) { write(s + "_" + std::to_string(i), x[i]); @@ -162,7 +162,7 @@ namespace Grid template void Hdf5Reader::readDefault(const std::string &s, U &output) { - readSingleAttribute(output, s, *Hdf5Type::type()); + readSingleAttribute(output, s, Hdf5Type::type()); } template <> @@ -210,14 +210,14 @@ namespace Grid H5NS::DataSet dataSet; dataSet = group_.openDataSet(s); - dataSet.read(buf.data(), *Hdf5Type::type()); + dataSet.read(buf.data(), Hdf5Type::type()); } else { H5NS::Attribute attribute; attribute = group_.openAttribute(s); - attribute.read(*Hdf5Type::type(), buf.data()); + attribute.read(Hdf5Type::type(), buf.data()); } // reconstruct the multidimensional vector @@ -234,7 +234,7 @@ namespace Grid push(s); readSingleAttribute(size, HDF5_GRID_GUARD "vector_size", - *Hdf5Type::type()); + Hdf5Type::type()); x.resize(size); for (hsize_t i = 0; i < x.size(); ++i) { diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h index 8b56c406..75575bf9 100644 --- a/lib/serialisation/Hdf5Type.h +++ b/lib/serialisation/Hdf5Type.h @@ -12,9 +12,9 @@ template <>\ struct Hdf5Type\ {\ -static inline const H5NS::PredType *type(void)\ +static inline const H5NS::PredType & type(void)\ {\ - return &H5NS::PredType::predType;\ + return H5NS::PredType::predType;\ }\ static constexpr bool isNative = true;\ }; From 81e66d6631ab07ee52522a50478ff3bb304a09d6 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 19 Jan 2017 18:24:53 -0800 Subject: [PATCH 32/37] HDF5: revert back to native types --- lib/serialisation/Hdf5Type.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h index 75575bf9..2e02128e 100644 --- a/lib/serialisation/Hdf5Type.h +++ b/lib/serialisation/Hdf5Type.h @@ -20,20 +20,21 @@ static constexpr bool isNative = true;\ }; #define DEFINE_HDF5_NATIVE_TYPES \ -HDF5_NATIVE_TYPE(STD_B8LE, bool);\ -HDF5_NATIVE_TYPE(STD_I8LE, char);\ -HDF5_NATIVE_TYPE(STD_U8LE, unsigned char);\ -HDF5_NATIVE_TYPE(STD_I16LE, short);\ -HDF5_NATIVE_TYPE(STD_U16LE, unsigned short);\ -HDF5_NATIVE_TYPE(STD_I32LE, int);\ -HDF5_NATIVE_TYPE(STD_U32LE, unsigned int);\ -HDF5_NATIVE_TYPE(STD_I64LE, long);\ -HDF5_NATIVE_TYPE(STD_U64LE, unsigned long);\ -HDF5_NATIVE_TYPE(STD_I64LE, long long);\ -HDF5_NATIVE_TYPE(STD_U64LE, unsigned long long);\ -HDF5_NATIVE_TYPE(IEEE_F32LE, float);\ -HDF5_NATIVE_TYPE(IEEE_F64LE, double);\ -HDF5_NATIVE_TYPE(IEEE_F64LE, long double); +HDF5_NATIVE_TYPE(NATIVE_B8, bool);\ +HDF5_NATIVE_TYPE(NATIVE_CHAR, char);\ +HDF5_NATIVE_TYPE(NATIVE_SCHAR, signed char);\ +HDF5_NATIVE_TYPE(NATIVE_UCHAR, unsigned char);\ +HDF5_NATIVE_TYPE(NATIVE_SHORT, short);\ +HDF5_NATIVE_TYPE(NATIVE_USHORT, unsigned short);\ +HDF5_NATIVE_TYPE(NATIVE_INT, int);\ +HDF5_NATIVE_TYPE(NATIVE_UINT, unsigned int);\ +HDF5_NATIVE_TYPE(NATIVE_LONG, long);\ +HDF5_NATIVE_TYPE(NATIVE_ULONG, unsigned long);\ +HDF5_NATIVE_TYPE(NATIVE_LLONG, long long);\ +HDF5_NATIVE_TYPE(NATIVE_ULLONG, unsigned long long);\ +HDF5_NATIVE_TYPE(NATIVE_FLOAT, float);\ +HDF5_NATIVE_TYPE(NATIVE_DOUBLE, double);\ +HDF5_NATIVE_TYPE(NATIVE_LDOUBLE, long double); namespace Grid { From 7423a352c5b2f97e800667000533c7df9ab06c07 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 19 Jan 2017 18:33:04 -0800 Subject: [PATCH 33/37] HDF5: typos --- lib/serialisation/Hdf5IO.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 1c73bea3..0fb277b0 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -123,14 +123,14 @@ namespace Grid H5NS::DataSet dataSet; dataSet = group_.createDataSet(s, Hdf5Type::type(), dataSpace); - dataSet.write(flatx.data(), *Hdf5Type::type()); + dataSet.write(flatx.data(), Hdf5Type::type()); } else { H5NS::Attribute attribute; - attribute = group_.createAttribute(s, *Hdf5Type::type(), dataSpace); - attribute.write(*Hdf5Type::type(), flatx.data()); + attribute = group_.createAttribute(s, Hdf5Type::type(), dataSpace); + attribute.write(Hdf5Type::type(), flatx.data()); } } From 6b5259cc104d79b714ec93ffbb95e31b039a95b0 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Jan 2017 11:03:19 -0800 Subject: [PATCH 34/37] HDF5 detects if a name is a dataset or not without using exception catching --- lib/serialisation/Hdf5IO.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 0fb277b0..3edb7d10 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -177,23 +177,18 @@ namespace Grid // read the dimensions H5NS::DataSpace dataSpace; - H5E_auto2_t func; - void * client_data; std::vector hdim; std::vector dim; hsize_t size = 1; - H5NS::Exception::getAutoPrint(func, &client_data); - try + if (group_.attrExists(s)) { - H5NS::Exception::dontPrint(); - dataSpace = group_.openDataSet(s).getSpace(); - } - catch (H5NS::Exception &e) - { - H5NS::Exception::setAutoPrint(func, client_data); dataSpace = group_.openAttribute(s).getSpace(); } + else + { + dataSpace = group_.openDataSet(s).getSpace(); + } hdim.resize(dataSpace.getSimpleExtentNdims()); dataSpace.getSimpleExtentDims(hdim.data()); for (auto &d: hdim) From afa095d33d1665dbf7648dddef95f78901e7e6cd Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Jan 2017 12:10:41 -0800 Subject: [PATCH 35/37] HDF5: better complex number support --- lib/Grid.h | 2 +- lib/serialisation/BaseIO.h | 9 ++++---- lib/serialisation/Hdf5IO.h | 16 +++++++------- lib/serialisation/Hdf5Type.h | 39 ++++++++++++++++++++++++++++------ tests/IO/Test_serialisation.cc | 6 +++++- 5 files changed, 51 insertions(+), 21 deletions(-) diff --git a/lib/Grid.h b/lib/Grid.h index 0f57c8a6..cb55d0c8 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -60,12 +60,12 @@ Author: paboyle // Grid headers /////////////////// #include "Config.h" -#include #include #include #include #include #include +#include #include #include #include diff --git a/lib/serialisation/BaseIO.h b/lib/serialisation/BaseIO.h index eca5cff2..0357915d 100644 --- a/lib/serialisation/BaseIO.h +++ b/lib/serialisation/BaseIO.h @@ -68,20 +68,21 @@ namespace Grid { return os; } - // Vector element trait ////////////////////////////////////////////////////// + // Vector element trait ////////////////////////////////////////////////////// template struct element { typedef T type; - static constexpr bool is_arithmetic = false; + static constexpr bool is_number = false; }; template struct element> { typedef typename element::type type; - static constexpr bool is_arithmetic = std::is_arithmetic::value - or element::is_arithmetic; + static constexpr bool is_number = std::is_arithmetic::value + or is_complex::value + or element::is_number; }; // Vector flatening utility class //////////////////////////////////////////// diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 3edb7d10..2f891cd4 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -32,10 +32,10 @@ namespace Grid template void writeDefault(const std::string &s, const U &x); template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type writeDefault(const std::string &s, const std::vector &x); template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type writeDefault(const std::string &s, const std::vector &x); private: template @@ -59,10 +59,10 @@ namespace Grid template void readDefault(const std::string &s, U &output); template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type readDefault(const std::string &s, std::vector &x); template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type readDefault(const std::string &s, std::vector &x); private: template @@ -99,7 +99,7 @@ namespace Grid void Hdf5Writer::writeDefault(const std::string &s, const std::string &x); template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) { // alias to element type @@ -135,7 +135,7 @@ namespace Grid } template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type Hdf5Writer::writeDefault(const std::string &s, const std::vector &x) { push(s); @@ -169,7 +169,7 @@ namespace Grid void Hdf5Reader::readDefault(const std::string &s, std::string &x); template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type Hdf5Reader::readDefault(const std::string &s, std::vector &x) { // alias to element type @@ -222,7 +222,7 @@ namespace Grid } template - typename std::enable_if>::is_arithmetic, void>::type + typename std::enable_if>::is_number, void>::type Hdf5Reader::readDefault(const std::string &s, std::vector &x) { uint64_t size; diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h index 2e02128e..cf682138 100644 --- a/lib/serialisation/Hdf5Type.h +++ b/lib/serialisation/Hdf5Type.h @@ -10,13 +10,14 @@ #define HDF5_NATIVE_TYPE(predType, cType)\ template <>\ -struct Hdf5Type\ +class Hdf5Type\ {\ -static inline const H5NS::PredType & type(void)\ -{\ - return H5NS::PredType::predType;\ -}\ -static constexpr bool isNative = true;\ +public:\ + static inline const H5NS::DataType & type(void)\ + {\ + return H5NS::PredType::predType;\ + }\ + static constexpr bool isNative = true;\ }; #define DEFINE_HDF5_NATIVE_TYPES \ @@ -38,12 +39,36 @@ HDF5_NATIVE_TYPE(NATIVE_LDOUBLE, long double); namespace Grid { - template struct Hdf5Type + template class Hdf5Type { + public: static constexpr bool isNative = false; }; DEFINE_HDF5_NATIVE_TYPES; + + template + class Hdf5Type> + { + public: + static inline const H5NS::DataType & type(void) + { + if (typePtr_ == nullptr) + { + typePtr_.reset(new H5NS::CompType(sizeof(std::complex))); + typePtr_->insertMember("re", 0, Hdf5Type::type()); + typePtr_->insertMember("im", sizeof(R), Hdf5Type::type()); + } + + return *typePtr_; + } + static constexpr bool isNative = false; + private: + static std::unique_ptr typePtr_; + }; + + template + std::unique_ptr Hdf5Type>::typePtr_ = nullptr; } #undef HDF5_NATIVE_TYPE diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index b7158b2b..8204b05b 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -43,10 +43,14 @@ public: bool , b, std::vector, array, std::vector>, twodimarray, + std::vector>>, cmplx3darray ); myclass() {} myclass(int i) - : array(4,5.1), twodimarray(3,std::vector(2,1.23456)), ve(2, myenum::blue) + : array(4,5.1) + , twodimarray(3,std::vector(5, 1.23456)) + , cmplx3darray(3,std::vector>(5, std::vector(7, Complex(1.2, 3.4)))) + , ve(2, myenum::blue) { e=myenum::red; x=i; From 4c75095c616c6c246b268c775fee13cbe7ae84da Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 20 Jan 2017 12:14:01 -0800 Subject: [PATCH 36/37] HDF5: header fix --- lib/serialisation/Hdf5Type.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/serialisation/Hdf5Type.h b/lib/serialisation/Hdf5Type.h index cf682138..8634f35b 100644 --- a/lib/serialisation/Hdf5Type.h +++ b/lib/serialisation/Hdf5Type.h @@ -2,7 +2,8 @@ #define GRID_SERIALISATION_HDF5_TYPE_H #include -#include +#include +#include #ifndef H5_NO_NAMESPACE #define H5NS H5 From b7da264b0a64c21bd9f45174c4d831e70243bc63 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Sat, 21 Jan 2017 13:40:23 -0800 Subject: [PATCH 37/37] Hadrons: Application is not storing the environment ref but calling getInstance() each time, solving a very nasty set fault on Linux/KNL --- extras/Hadrons/Application.cc | 43 +++++++++++++++++++--------------- extras/Hadrons/Application.hpp | 8 ++++--- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 4bb3b383..62674f30 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -42,7 +42,6 @@ using namespace Hadrons; ******************************************************************************/ // constructors //////////////////////////////////////////////////////////////// Application::Application(void) -: env_(Environment::getInstance()) { LOG(Message) << "Modules available:" << std::endl; auto list = ModuleFactory::getInstance().getBuilderList(); @@ -74,11 +73,17 @@ Application::Application(const std::string parameterFileName) parameterFileName_ = parameterFileName; } +// environment shortcut //////////////////////////////////////////////////////// +Environment & Application::env(void) const +{ + return Environment::getInstance(); +} + // access ////////////////////////////////////////////////////////////////////// void Application::setPar(const Application::GlobalPar &par) { par_ = par; - env_.setSeed(strToVec(par_.seed)); + env().setSeed(strToVec(par_.seed)); } const Application::GlobalPar & Application::getPar(void) @@ -89,7 +94,7 @@ const Application::GlobalPar & Application::getPar(void) // execute ///////////////////////////////////////////////////////////////////// void Application::run(void) { - if (!parameterFileName_.empty() and (env_.getNModule() == 0)) + if (!parameterFileName_.empty() and (env().getNModule() == 0)) { parseParameterFile(parameterFileName_); } @@ -124,7 +129,7 @@ void Application::parseParameterFile(const std::string parameterFileName) do { read(reader, "id", id); - env_.createModule(id.name, id.type, reader); + env().createModule(id.name, id.type, reader); } while (reader.nextElement("module")); pop(reader); pop(reader); @@ -134,7 +139,7 @@ void Application::saveParameterFile(const std::string parameterFileName) { XmlWriter writer(parameterFileName); ObjectId id; - const unsigned int nMod = env_.getNModule(); + const unsigned int nMod = env().getNModule(); LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl; write(writer, "parameters", getPar()); @@ -142,10 +147,10 @@ void Application::saveParameterFile(const std::string parameterFileName) for (unsigned int i = 0; i < nMod; ++i) { push(writer, "module"); - id.name = env_.getModuleName(i); - id.type = env_.getModule(i)->getRegisteredName(); + id.name = env().getModuleName(i); + id.type = env().getModule(i)->getRegisteredName(); write(writer, "id", id); - env_.getModule(i)->saveParameters(writer, "options"); + env().getModule(i)->saveParameters(writer, "options"); pop(writer); } pop(writer); @@ -164,10 +169,10 @@ auto memPeak = [this](const std::vector &program)\ \ msg = HadronsLogMessage.isActive();\ HadronsLogMessage.Active(false);\ - env_.dryRun(true);\ - memPeak = env_.executeProgram(program);\ - env_.dryRun(false);\ - env_.freeAll();\ + env().dryRun(true);\ + memPeak = env().executeProgram(program);\ + env().dryRun(false);\ + env().freeAll();\ HadronsLogMessage.Active(true);\ \ return memPeak;\ @@ -179,7 +184,7 @@ void Application::schedule(void) // build module dependency graph LOG(Message) << "Building module graph..." << std::endl; - auto graph = env_.makeModuleGraph(); + auto graph = env().makeModuleGraph(); auto con = graph.getConnectedComponents(); // constrained topological sort using a genetic algorithm @@ -256,7 +261,7 @@ void Application::saveSchedule(const std::string filename) << std::endl; for (auto address: program_) { - program.push_back(env_.getModuleName(address)); + program.push_back(env().getModuleName(address)); } write(writer, "schedule", program); } @@ -274,7 +279,7 @@ void Application::loadSchedule(const std::string filename) program_.clear(); for (auto &name: program) { - program_.push_back(env_.getModuleAddress(name)); + program_.push_back(env().getModuleAddress(name)); } scheduled_ = true; memPeak_ = memPeak(program_); @@ -291,7 +296,7 @@ void Application::printSchedule(void) for (unsigned int i = 0; i < program_.size(); ++i) { LOG(Message) << std::setw(4) << i + 1 << ": " - << env_.getModuleName(program_[i]) << std::endl; + << env().getModuleName(program_[i]) << std::endl; } } @@ -304,9 +309,9 @@ void Application::configLoop(void) { LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t << " " << BIG_SEP << std::endl; - env_.setTrajectory(t); - env_.executeProgram(program_); + env().setTrajectory(t); + env().executeProgram(program_); } LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl; - env_.freeAll(); + env().freeAll(); } diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index 4ea262df..fce9b6eb 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -98,11 +98,13 @@ public: void printSchedule(void); // loop on configurations void configLoop(void); +private: + // environment shortcut + Environment & env(void) const; private: long unsigned int locVol_; std::string parameterFileName_{""}; GlobalPar par_; - Environment &env_; std::vector program_; Environment::Size memPeak_; bool scheduled_{false}; @@ -115,14 +117,14 @@ private: template void Application::createModule(const std::string name) { - env_.createModule(name); + env().createModule(name); } template void Application::createModule(const std::string name, const typename M::Par &par) { - env_.createModule(name, par); + env().createModule(name, par); } END_HADRONS_NAMESPACE