From 4d2198ea567dc1a04acda8f8e9e556870308f7f2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 23 Apr 2015 11:04:59 +0100 Subject: [PATCH] Begginings of slice summation and subblocking --- TODO | 4 +-- configure | 46 ++++++++++++++------------------- lib/Grid_config.h | 7 ++--- lib/Grid_config.h.in | 7 ++--- lib/Grid_stencil.h | 6 ++--- lib/Grid_summation.h | 61 +++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 88 insertions(+), 43 deletions(-) diff --git a/TODO b/TODO index a8fa5ce9..256f2609 100644 --- a/TODO +++ b/TODO @@ -10,7 +10,7 @@ FUNCTIONALITY: * How to do U[mu] ... lorentz part of type structure or not. more like chroma if not. -- DONE * subdirs lib, tests ?? ----- DONE - - lib/math + - lib/math - lib/cartesian - lib/cshift - lib/stencil @@ -26,7 +26,7 @@ FUNCTIONALITY: Not done, or just incomplete * random number generation -* Consider switch std::vector to boost arrays. +* Consider switch std::vector to boost arrays or something lighter weight boost::multi_array A()... to replace multi1d, multi2d etc.. * How to define simple matrix operations, such as flavour matrices? diff --git a/configure b/configure index 48fafcf1..9e554925 100755 --- a/configure +++ b/configure @@ -1365,8 +1365,9 @@ Optional Features: --disable-dependency-tracking speeds up one-time build --disable-openmp do not use OpenMP - --enable-simd=SSE|AVX|AVX2|AVX512 - Select instructions + --enable-simd=SSE|AVX|AVX2|AVX512|MIC + Select instructions to be SSE4.0, AVX 1.0, AVX + 2.0+FMA, AVX 512, MIC --enable-comms=none|mpi Select communications Some influential environment variables: @@ -4945,6 +4946,18 @@ fi done +for ac_header in endian.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "endian.h" "ac_cv_header_endian_h" "$ac_includes_default" +if test "x$ac_cv_header_endian_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_ENDIAN_H 1 +_ACEOF + +fi + +done + # Checks for typedefs, structures, and compiler characteristics. ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" @@ -4999,32 +5012,11 @@ _ACEOF fi done -for ac_func in ntohll -do : - ac_fn_c_check_func "$LINENO" "ntohll" "ac_cv_func_ntohll" -if test "x$ac_cv_func_ntohll" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_NTOHLL 1 -_ACEOF - -fi -done - -for ac_func in be64toh -do : - ac_fn_c_check_func "$LINENO" "be64toh" "ac_cv_func_be64toh" -if test "x$ac_cv_func_be64toh" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_BE64TOH 1 -_ACEOF - -fi -done - # Check whether --enable-simd was given. if test "${enable_simd+set}" = set; then : - enableval=$enable_simd; ac_SIMD=${enable_simd} + enableval=$enable_simd; \ + ac_SIMD=${enable_simd} else ac_SIMD=AVX2 fi @@ -5049,8 +5041,8 @@ $as_echo "#define AVX1 1" >>confdefs.h $as_echo "#define AVX2 1" >>confdefs.h ;; - AVX512) - echo Configuring for AVX512 + AVX512|MIC) + echo Configuring for AVX512 and MIC $as_echo "#define AVX512 1" >>confdefs.h diff --git a/lib/Grid_config.h b/lib/Grid_config.h index 2171ea6b..4152540e 100644 --- a/lib/Grid_config.h +++ b/lib/Grid_config.h @@ -16,8 +16,8 @@ /* GRID_COMMS_NONE */ /* #undef GRID_COMMS_NONE */ -/* Define to 1 if you have the `be64toh' function. */ -/* #undef HAVE_BE64TOH */ +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ENDIAN_H */ /* Define to 1 if you have the `gettimeofday' function. */ #define HAVE_GETTIMEOFDAY 1 @@ -34,9 +34,6 @@ /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 -/* Define to 1 if you have the `ntohll' function. */ -/* #undef HAVE_NTOHLL */ - /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 diff --git a/lib/Grid_config.h.in b/lib/Grid_config.h.in index 2381525f..2dc0bda4 100644 --- a/lib/Grid_config.h.in +++ b/lib/Grid_config.h.in @@ -15,8 +15,8 @@ /* GRID_COMMS_NONE */ #undef GRID_COMMS_NONE -/* Define to 1 if you have the `be64toh' function. */ -#undef HAVE_BE64TOH +/* Define to 1 if you have the header file. */ +#undef HAVE_ENDIAN_H /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY @@ -33,9 +33,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H -/* Define to 1 if you have the `ntohll' function. */ -#undef HAVE_NTOHLL - /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H diff --git a/lib/Grid_stencil.h b/lib/Grid_stencil.h index d8debac0..7bafb879 100644 --- a/lib/Grid_stencil.h +++ b/lib/Grid_stencil.h @@ -283,7 +283,7 @@ namespace Grid { if ( comm_any ) { for(int i=0;i_ndimension == fine->_ndimension); + + int _ndimension = coarse->_ndimension; + + // local and global volumes subdivide cleanly after SIMDization + for(int d=0;d<_ndimension;d++){ + assert((fine->_rdimensions[d] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[d]); + assert((fine->_ldimensions[d] / coarse->_ldimensions[d])* coarse->_ldimensions[d]==fine->_ldimensions[d]); + assert((fine->_gdimensions[d] / coarse->_gdimensions[d])* coarse->_gdimensions[d]==fine->_gdimensions[d]); + assert((fine->_fdimensions[d] / coarse->_fdimensions[d])* coarse->_fdimensions[d]==fine->_fdimensions[d]); + } +} +// Generic name : Coarsen? +// : SubMeshSum? +// template -inline void sumBlocks(Lattice &coarseData,const Lattice &coarseData,const Lattice &fineData) { GridBase * fine = findData._grid; GridBase * coarse= findData._grid; + + subdivides(coars,fine); // require they map + + int _ndimension = coarse->_ndimension; + std::vector replicated(_ndimension,false); + std::vector block_r (_dimension); + std::vector block_f (_dimension); + + /////////////////////////////////////////////////////////// + // Detect whether the result is replicated in dimension d + /////////////////////////////////////////////////////////// + for(int d=0 ; d<_ndimension;d++){ + if ( (_fdimensions[d] == 1) && (coarse->_processors[d]>1) ) { + replicated[d]=true; + } + block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; + block_l[d] = fine->_ldimensions[d] / coarse->_ldimensions[d]; + block_f[d] = fine->_fdimensions[d] / coarse->_fdimensions[d]; + } + + coaseData=zero; + + //FIXME Bagel's strategy: loop over fine sites + // identify corresponding coarse site, but coarse sites are + // divided across threads. Not so easy to do in openmp but + // there must be a way + for(int sf=0;sfoSites();sf++){ + + int sc; + vobj sum=zero; + std::vector coor_c(_ndimension); + std::vector coor_f(_ndimension); + + GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions); + + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/fine->_rdimensions; + + GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + + } return; } #endif