mirror of
https://github.com/paboyle/Grid.git
synced 2025-10-23 17:24:47 +01:00
Merge branch 'master' of https://github.com/paboyle/Grid
Conflicts: lib/tensors/Tensor_trace.h
This commit is contained in:
31
.gitignore
vendored
31
.gitignore
vendored
@@ -5,6 +5,7 @@
|
|||||||
*.obj
|
*.obj
|
||||||
*~
|
*~
|
||||||
errs
|
errs
|
||||||
|
*#
|
||||||
|
|
||||||
# Precompiled Headers
|
# Precompiled Headers
|
||||||
*.gch
|
*.gch
|
||||||
@@ -48,3 +49,33 @@ config.status
|
|||||||
/stamp-h1
|
/stamp-h1
|
||||||
/config.sub
|
/config.sub
|
||||||
/config.guess
|
/config.guess
|
||||||
|
|
||||||
|
|
||||||
|
# Packages #
|
||||||
|
############
|
||||||
|
# it's better to unpack these files and commit the raw source
|
||||||
|
# git has its own built in compression methods
|
||||||
|
*.7z
|
||||||
|
*.dmg
|
||||||
|
*.gz
|
||||||
|
*.iso
|
||||||
|
*.jar
|
||||||
|
*.rar
|
||||||
|
*.tar
|
||||||
|
*.zip
|
||||||
|
|
||||||
|
# Logs and databases #
|
||||||
|
######################
|
||||||
|
*.log
|
||||||
|
*.sql
|
||||||
|
*.sqlite
|
||||||
|
|
||||||
|
# OS generated files #
|
||||||
|
######################
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
2
INSTALL
2
INSTALL
@@ -1 +1 @@
|
|||||||
/opt/local/share/automake-1.15/INSTALL
|
/usr/share/automake-1.14/INSTALL
|
@@ -20,7 +20,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
|||||||
for most programmers.
|
for most programmers.
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||||
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
||||||
|
|
||||||
These are presented as
|
These are presented as
|
||||||
|
|
||||||
@@ -46,3 +46,5 @@ are examples:
|
|||||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
||||||
|
|
||||||
|
|
||||||
|
For developers:
|
||||||
|
Use reconfigure_script in the scripts/ directory to create the autotools environment
|
||||||
|
3
TODO
3
TODO
@@ -66,6 +66,9 @@ Insert/Extract
|
|||||||
|
|
||||||
* Support for ILDG
|
* Support for ILDG
|
||||||
|
|
||||||
|
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||||
|
|
||||||
|
* Support different fermion representations?
|
||||||
|
|
||||||
Actions -- coherent framework for implementing actions and their forces.
|
Actions -- coherent framework for implementing actions and their forces.
|
||||||
|
|
||||||
|
32
configure.ac
32
configure.ac
@@ -3,7 +3,7 @@
|
|||||||
#
|
#
|
||||||
# Project Grid package
|
# Project Grid package
|
||||||
#
|
#
|
||||||
# Time-stamp: <2015-05-26 17:18:54 neo>
|
# Time-stamp: <2015-05-27 18:51:47 neo>
|
||||||
|
|
||||||
AC_PREREQ([2.63])
|
AC_PREREQ([2.63])
|
||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
||||||
@@ -27,7 +27,7 @@ AC_PROG_CXX
|
|||||||
AC_OPENMP
|
AC_OPENMP
|
||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
||||||
|
AX_EXT
|
||||||
|
|
||||||
# Checks for libraries.
|
# Checks for libraries.
|
||||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
||||||
@@ -69,26 +69,44 @@ Info at: http://www.mpfr.org/)])
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE|AVX|AVX2|AVX512|MIC],\
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
||||||
|
|
||||||
|
supported=no
|
||||||
|
|
||||||
case ${ac_SIMD} in
|
case ${ac_SIMD} in
|
||||||
SSE4)
|
SSE4)
|
||||||
echo Configuring for SSE4
|
echo Configuring for SSE4
|
||||||
|
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
|
||||||
AC_DEFINE([SSE4],[1],[SSE4] )
|
AC_DEFINE([SSE4],[1],[SSE4] )
|
||||||
|
supported=yes
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor does not support SSE4 instructions])
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
AVX)
|
AVX)
|
||||||
echo Configuring for AVX
|
echo Configuring for AVX
|
||||||
|
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
||||||
AC_DEFINE([AVX1],[1],[AVX] )
|
AC_DEFINE([AVX1],[1],[AVX] )
|
||||||
|
supported=yes
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor does not support AVX instructions])
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
AVX2)
|
AVX2)
|
||||||
echo Configuring for AVX2
|
echo Configuring for AVX2
|
||||||
|
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
|
||||||
AC_DEFINE([AVX2],[1],[AVX2] )
|
AC_DEFINE([AVX2],[1],[AVX2] )
|
||||||
|
supported=yes
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
AVX512|MIC)
|
AVX512|MIC)
|
||||||
echo Configuring for AVX512 and MIC
|
echo Configuring for AVX512 and MIC
|
||||||
AC_DEFINE([AVX512],[1],[AVX512] )
|
AC_DEFINE([AVX512],[1],[AVX512] )
|
||||||
|
supported="cross compilation"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]);
|
AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]);
|
||||||
@@ -129,7 +147,9 @@ then
|
|||||||
AC_CONFIG_FILES([docs/doxy.cfg])
|
AC_CONFIG_FILES([docs/doxy.cfg])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo Creating configuration files
|
||||||
|
echo :::::::::::::::::::::::::::::::::::::::::::
|
||||||
AC_CONFIG_FILES(Makefile)
|
AC_CONFIG_FILES(Makefile)
|
||||||
AC_CONFIG_FILES(lib/Makefile)
|
AC_CONFIG_FILES(lib/Makefile)
|
||||||
AC_CONFIG_FILES(tests/Makefile)
|
AC_CONFIG_FILES(tests/Makefile)
|
||||||
@@ -150,9 +170,9 @@ The following features are enabled:
|
|||||||
- os (target) : $target_os
|
- os (target) : $target_os
|
||||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||||
|
- Supported SIMD flags : $SIMD_FLAGS
|
||||||
----------------------------------------------------------
|
----------------------------------------------------------
|
||||||
- enabled simd support : ${ac_SIMD}
|
- enabled simd support : ${ac_SIMD} (supported: $supported )
|
||||||
- communications type : ${ac_COMMS}
|
- communications type : ${ac_COMMS}
|
||||||
|
|
||||||
|
|
||||||
|
2305
docs/doxy.cfg.test
2305
docs/doxy.cfg.test
File diff suppressed because it is too large
Load Diff
0
lib/.dirstamp
Normal file
0
lib/.dirstamp
Normal file
@@ -1,5 +1,5 @@
|
|||||||
/* lib/Grid_config.h. Generated from Grid_config.h.in by configure. */
|
/* lib/GridConfig.h. Generated from GridConfig.h.in by configure. */
|
||||||
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
|
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
/* AVX */
|
/* AVX */
|
||||||
/* #undef AVX1 */
|
/* #undef AVX1 */
|
||||||
@@ -16,6 +16,15 @@
|
|||||||
/* GRID_COMMS_NONE */
|
/* GRID_COMMS_NONE */
|
||||||
#define GRID_COMMS_NONE 1
|
#define GRID_COMMS_NONE 1
|
||||||
|
|
||||||
|
/* Support Altivec instructions */
|
||||||
|
/* #undef HAVE_ALTIVEC */
|
||||||
|
|
||||||
|
/* Support AVX (Advanced Vector Extensions) instructions */
|
||||||
|
/* #undef HAVE_AVX */
|
||||||
|
|
||||||
|
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
||||||
|
/* #undef HAVE_AVX2 */
|
||||||
|
|
||||||
/* define if the compiler supports basic C++11 syntax */
|
/* define if the compiler supports basic C++11 syntax */
|
||||||
/* #undef HAVE_CXX11 */
|
/* #undef HAVE_CXX11 */
|
||||||
|
|
||||||
@@ -30,6 +39,9 @@
|
|||||||
/* Define to 1 if you have the <endian.h> header file. */
|
/* Define to 1 if you have the <endian.h> header file. */
|
||||||
#define HAVE_ENDIAN_H 1
|
#define HAVE_ENDIAN_H 1
|
||||||
|
|
||||||
|
/* Support FMA3 (Fused Multiply-Add) instructions */
|
||||||
|
/* #undef HAVE_FMA */
|
||||||
|
|
||||||
/* Define to 1 if you have the `gettimeofday' function. */
|
/* Define to 1 if you have the `gettimeofday' function. */
|
||||||
#define HAVE_GETTIMEOFDAY 1
|
#define HAVE_GETTIMEOFDAY 1
|
||||||
|
|
||||||
@@ -54,9 +66,30 @@
|
|||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
#define HAVE_MEMORY_H 1
|
#define HAVE_MEMORY_H 1
|
||||||
|
|
||||||
|
/* Support mmx instructions */
|
||||||
|
#define HAVE_MMX /**/
|
||||||
|
|
||||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
||||||
#define HAVE_MM_MALLOC_H 1
|
#define HAVE_MM_MALLOC_H 1
|
||||||
|
|
||||||
|
/* Support SSE (Streaming SIMD Extensions) instructions */
|
||||||
|
#define HAVE_SSE /**/
|
||||||
|
|
||||||
|
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
|
||||||
|
#define HAVE_SSE2 /**/
|
||||||
|
|
||||||
|
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
|
||||||
|
#define HAVE_SSE3 /**/
|
||||||
|
|
||||||
|
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
|
||||||
|
#define HAVE_SSE4_1 /**/
|
||||||
|
|
||||||
|
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
|
||||||
|
#define HAVE_SSE4_2 /**/
|
||||||
|
|
||||||
|
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
|
||||||
|
#define HAVE_SSSE3 /**/
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
#define HAVE_STDINT_H 1
|
#define HAVE_STDINT_H 1
|
||||||
|
|
||||||
|
@@ -15,6 +15,15 @@
|
|||||||
/* GRID_COMMS_NONE */
|
/* GRID_COMMS_NONE */
|
||||||
#undef GRID_COMMS_NONE
|
#undef GRID_COMMS_NONE
|
||||||
|
|
||||||
|
/* Support Altivec instructions */
|
||||||
|
#undef HAVE_ALTIVEC
|
||||||
|
|
||||||
|
/* Support AVX (Advanced Vector Extensions) instructions */
|
||||||
|
#undef HAVE_AVX
|
||||||
|
|
||||||
|
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
||||||
|
#undef HAVE_AVX2
|
||||||
|
|
||||||
/* define if the compiler supports basic C++11 syntax */
|
/* define if the compiler supports basic C++11 syntax */
|
||||||
#undef HAVE_CXX11
|
#undef HAVE_CXX11
|
||||||
|
|
||||||
@@ -29,6 +38,9 @@
|
|||||||
/* Define to 1 if you have the <endian.h> header file. */
|
/* Define to 1 if you have the <endian.h> header file. */
|
||||||
#undef HAVE_ENDIAN_H
|
#undef HAVE_ENDIAN_H
|
||||||
|
|
||||||
|
/* Support FMA3 (Fused Multiply-Add) instructions */
|
||||||
|
#undef HAVE_FMA
|
||||||
|
|
||||||
/* Define to 1 if you have the `gettimeofday' function. */
|
/* Define to 1 if you have the `gettimeofday' function. */
|
||||||
#undef HAVE_GETTIMEOFDAY
|
#undef HAVE_GETTIMEOFDAY
|
||||||
|
|
||||||
@@ -53,9 +65,30 @@
|
|||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
#undef HAVE_MEMORY_H
|
#undef HAVE_MEMORY_H
|
||||||
|
|
||||||
|
/* Support mmx instructions */
|
||||||
|
#undef HAVE_MMX
|
||||||
|
|
||||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
||||||
#undef HAVE_MM_MALLOC_H
|
#undef HAVE_MM_MALLOC_H
|
||||||
|
|
||||||
|
/* Support SSE (Streaming SIMD Extensions) instructions */
|
||||||
|
#undef HAVE_SSE
|
||||||
|
|
||||||
|
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
|
||||||
|
#undef HAVE_SSE2
|
||||||
|
|
||||||
|
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
|
||||||
|
#undef HAVE_SSE3
|
||||||
|
|
||||||
|
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
|
||||||
|
#undef HAVE_SSE4_1
|
||||||
|
|
||||||
|
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
|
||||||
|
#undef HAVE_SSE4_2
|
||||||
|
|
||||||
|
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
|
||||||
|
#undef HAVE_SSSE3
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
#undef HAVE_STDINT_H
|
#undef HAVE_STDINT_H
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
|
|
||||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h
|
HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vRealF.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/LinalgUtils.h ./qcd/TwoSpinor.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Comparison.h ./Grid.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./stencil/Lebesgue.h
|
||||||
|
|
||||||
CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
CCFILES=./qcd/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/Dirac.cc ./GridInit.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||||
|
@@ -8,6 +8,7 @@
|
|||||||
#include <tensors/Tensor_outer.h>
|
#include <tensors/Tensor_outer.h>
|
||||||
#include <tensors/Tensor_transpose.h>
|
#include <tensors/Tensor_transpose.h>
|
||||||
#include <tensors/Tensor_trace.h>
|
#include <tensors/Tensor_trace.h>
|
||||||
|
#include <tensors/Tensor_Ta.h>
|
||||||
#include <tensors/Tensor_peek.h>
|
#include <tensors/Tensor_peek.h>
|
||||||
#include <tensors/Tensor_poke.h>
|
#include <tensors/Tensor_poke.h>
|
||||||
#include <tensors/Tensor_reality.h>
|
#include <tensors/Tensor_reality.h>
|
||||||
|
@@ -48,5 +48,16 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj> inline auto Ta(const Lattice<vobj> &z) -> Lattice<decltype(Ta(z._odata[0]))>
|
||||||
|
{
|
||||||
|
Lattice<decltype(Ta(z._odata[0]))> ret(z._grid);
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||||
|
ret._odata[ss] = Ta(z._odata[ss]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
0
lib/qcd/action/fermion/.dirstamp
Normal file
0
lib/qcd/action/fermion/.dirstamp
Normal file
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
Using intrinsics
|
Using intrinsics
|
||||||
*/
|
*/
|
||||||
// Time-stamp: <2015-05-27 12:07:15 neo>
|
// Time-stamp: <2015-05-29 14:13:30 neo>
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
@@ -261,13 +261,7 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline __m256d operator()(__m256d in){
|
inline __m256d operator()(__m256d in){
|
||||||
return _mm256_xor_pd(_mm256_addsub_pd(_mm256_setzero_pd(),in), _mm256_set1_pd(-0.f));//untested
|
return _mm256_xor_pd(_mm256_addsub_pd(_mm256_setzero_pd(),in), _mm256_set1_pd(-0.f));
|
||||||
/*
|
|
||||||
// original
|
|
||||||
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
|
|
||||||
__m256d tmp = _mm256_addsub_pd(_mm256_setzero_pd(),_mm256_shuffle_pd(in,in,0x5));
|
|
||||||
return _mm256_shuffle_pd(tmp,tmp,0x5);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
// do not define for integer input
|
// do not define for integer input
|
||||||
};
|
};
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
/*! @file Grid_vector_types.h
|
/*! @file Grid_vector_types.h
|
||||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||||
*/
|
*/
|
||||||
// Time-stamp: <2015-05-27 12:04:06 neo>
|
// Time-stamp: <2015-05-29 14:19:48 neo>
|
||||||
//---------------------------------------------------------------------------
|
//---------------------------------------------------------------------------
|
||||||
#ifndef GRID_VECTOR_TYPES
|
#ifndef GRID_VECTOR_TYPES
|
||||||
#define GRID_VECTOR_TYPES
|
#define GRID_VECTOR_TYPES
|
||||||
@@ -55,7 +55,6 @@ namespace Grid {
|
|||||||
// general forms to allow for vsplat syntax
|
// general forms to allow for vsplat syntax
|
||||||
// need explicit declaration of types when used since
|
// need explicit declaration of types when used since
|
||||||
// clang cannot automatically determine the output type sometimes
|
// clang cannot automatically determine the output type sometimes
|
||||||
// use decltype?
|
|
||||||
template < class Out, class Input1, class Input2, class Operation >
|
template < class Out, class Input1, class Input2, class Operation >
|
||||||
Out binary(Input1 src_1, Input2 src_2, Operation op){
|
Out binary(Input1 src_1, Input2 src_2, Operation op){
|
||||||
return op(src_1, src_2);
|
return op(src_1, src_2);
|
||||||
|
@@ -1 +1 @@
|
|||||||
timestamp for lib/Grid_config.h
|
timestamp for lib/GridConfig.h
|
||||||
|
43
lib/tensors/Tensor_Ta.h
Normal file
43
lib/tensors/Tensor_Ta.h
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
#ifndef GRID_MATH_TA_H
|
||||||
|
#define GRID_MATH_TA_H
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Ta function for scalar, vector, matrix
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
inline ComplexF Ta( const ComplexF &arg){ return arg;}
|
||||||
|
inline ComplexD Ta( const ComplexD &arg){ return arg;}
|
||||||
|
inline RealF Ta( const RealF &arg){ return arg;}
|
||||||
|
inline RealD Ta( const RealD &arg){ return arg;}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vtype> inline iScalar<vtype> Ta(const iScalar<vtype>&r)
|
||||||
|
{
|
||||||
|
iScalar<vtype> ret;
|
||||||
|
ret._internal = Ta(r._internal);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class vtype,int N> inline iVector<vtype,N> Ta(const iVector<vtype,N>&r)
|
||||||
|
{
|
||||||
|
iVector<vtype,N> ret;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
ret._internal[i] = Ta(r._internal[i]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class vtype,int N> inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &arg)
|
||||||
|
{
|
||||||
|
iMatrix<vtype,N> ret(arg);
|
||||||
|
double factor = (1/(double)N);
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
for(int c2=0;c2<N;c2++){
|
||||||
|
ret._internal[c1][c2]= (ret._internal[c1][c2] - adj(arg._internal[c2][c1]));
|
||||||
|
ret._internal[c1][c2] *= 0.5;
|
||||||
|
}}
|
||||||
|
//ret = (ret - adj(arg))*0.5;
|
||||||
|
ret -= trace(ret)*factor;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@@ -45,7 +45,10 @@ namespace Grid {
|
|||||||
{
|
{
|
||||||
for(int c2=0;c2<N;c2++){
|
for(int c2=0;c2<N;c2++){
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
|
if ( c1==c2)
|
||||||
add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
||||||
|
else
|
||||||
|
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
|
||||||
}}
|
}}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@@ -44,7 +44,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
|
|||||||
const iMatrix<rtype,N> * __restrict__ rhs){
|
const iMatrix<rtype,N> * __restrict__ rhs){
|
||||||
for(int c2=0;c2<N;c2++){
|
for(int c2=0;c2<N;c2++){
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
if ( c1!=c2) {
|
if ( c1==c2) {
|
||||||
sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
|
||||||
} else {
|
} else {
|
||||||
// Fails -- need unary minus. Catalogue other unops?
|
// Fails -- need unary minus. Catalogue other unops?
|
||||||
@@ -60,7 +60,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
|
|||||||
const iScalar<rtype> * __restrict__ rhs){
|
const iScalar<rtype> * __restrict__ rhs){
|
||||||
for(int c2=0;c2<N;c2++){
|
for(int c2=0;c2<N;c2++){
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
if ( c1!=c2)
|
if ( c1==c2)
|
||||||
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
|
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
|
||||||
else
|
else
|
||||||
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
|
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
|
||||||
|
@@ -2,10 +2,6 @@
|
|||||||
#define GRID_MATH_REALITY_H
|
#define GRID_MATH_REALITY_H
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
/////////////////////////////////////////// CONJ ///////////////////////////////////////////
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// multiply by I; make recursive.
|
// multiply by I; make recursive.
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
@@ -151,6 +147,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// Can only take the real/imag part of scalar objects, since
|
// Can only take the real/imag part of scalar objects, since
|
||||||
// lattice objects of different complex nature are non-conformable.
|
// lattice objects of different complex nature are non-conformable.
|
||||||
|
@@ -75,7 +75,7 @@ auto traceIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(traceIndex<Lev
|
|||||||
// Allow to recurse if vector, but never terminate on a vector
|
// Allow to recurse if vector, but never terminate on a vector
|
||||||
// trace of a different index can distribute across the vector index in a replicated way
|
// trace of a different index can distribute across the vector index in a replicated way
|
||||||
// but we do not trace a vector index.
|
// but we do not trace a vector index.
|
||||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
template<int Level,class vtype,int N,typename std::enable_if< iVector<vtype, N>::TensorLevel != Level >::type * =nullptr> inline
|
||||||
auto traceIndex(const iVector<vtype,N> &arg) -> iVector<decltype(traceIndex<Level>(arg._internal[0])),N>
|
auto traceIndex(const iVector<vtype,N> &arg) -> iVector<decltype(traceIndex<Level>(arg._internal[0])),N>
|
||||||
{
|
{
|
||||||
iVector<decltype(traceIndex<Level>(arg._internal[0])),N> ret;
|
iVector<decltype(traceIndex<Level>(arg._internal[0])),N> ret;
|
||||||
|
72
m4/ax_check_compile_flag.m4
Normal file
72
m4/ax_check_compile_flag.m4
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# Check whether the given FLAG works with the current language's compiler
|
||||||
|
# or gives an error. (Warnings, however, are ignored)
|
||||||
|
#
|
||||||
|
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
|
||||||
|
# success/failure.
|
||||||
|
#
|
||||||
|
# If EXTRA-FLAGS is defined, it is added to the current language's default
|
||||||
|
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
|
||||||
|
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
|
||||||
|
# force the compiler to issue an error when a bad flag is given.
|
||||||
|
#
|
||||||
|
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
|
||||||
|
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
|
||||||
|
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||||
|
# gives unlimited permission to copy, distribute and modify the configure
|
||||||
|
# scripts that are the output of Autoconf when processing the Macro. You
|
||||||
|
# need not follow the terms of the GNU General Public License when using
|
||||||
|
# or distributing such scripts, even though portions of the text of the
|
||||||
|
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||||
|
# all other use of the material that constitutes the Autoconf Macro.
|
||||||
|
#
|
||||||
|
# This special exception to the GPL applies to versions of the Autoconf
|
||||||
|
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||||
|
# modified version of the Autoconf Macro, you may extend this special
|
||||||
|
# exception to the GPL to apply to your modified version as well.
|
||||||
|
|
||||||
|
#serial 2
|
||||||
|
|
||||||
|
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
|
||||||
|
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
|
||||||
|
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
|
||||||
|
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
|
||||||
|
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
|
||||||
|
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
|
||||||
|
[AS_VAR_SET(CACHEVAR,[yes])],
|
||||||
|
[AS_VAR_SET(CACHEVAR,[no])])
|
||||||
|
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
|
||||||
|
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
|
||||||
|
[m4_default([$2], :)],
|
||||||
|
[m4_default([$3], :)])
|
||||||
|
AS_VAR_POPDEF([CACHEVAR])dnl
|
||||||
|
])dnl AX_CHECK_COMPILE_FLAGS
|
288
m4/ax_ext.m4
Normal file
288
m4/ax_ext.m4
Normal file
@@ -0,0 +1,288 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_EXT
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# Find supported SIMD extensions by requesting cpuid. When an SIMD
|
||||||
|
# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
|
||||||
|
# compiler supports it. For example, if "sse2" is available, then "-msse2"
|
||||||
|
# is added to SIMD_FLAGS.
|
||||||
|
#
|
||||||
|
# This macro calls:
|
||||||
|
#
|
||||||
|
# AC_SUBST(SIMD_FLAGS)
|
||||||
|
#
|
||||||
|
# And defines:
|
||||||
|
#
|
||||||
|
# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
|
||||||
|
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
|
||||||
|
#
|
||||||
|
# Copying and distribution of this file, with or without modification, are
|
||||||
|
# permitted in any medium without royalty provided the copyright notice
|
||||||
|
# and this notice are preserved. This file is offered as-is, without any
|
||||||
|
# warranty.
|
||||||
|
|
||||||
|
#serial 13
|
||||||
|
|
||||||
|
AC_DEFUN([AX_EXT],
|
||||||
|
[
|
||||||
|
AC_REQUIRE([AC_CANONICAL_HOST])
|
||||||
|
|
||||||
|
case $host_cpu in
|
||||||
|
powerpc*)
|
||||||
|
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
|
||||||
|
[
|
||||||
|
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
|
||||||
|
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
|
||||||
|
ax_cv_have_altivec_ext=yes
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
if test "$ax_cv_have_altivec_ext" = yes; then
|
||||||
|
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
|
||||||
|
AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [])
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
|
||||||
|
i[[3456]]86*|x86_64*|amd64*)
|
||||||
|
|
||||||
|
AC_REQUIRE([AX_GCC_X86_CPUID])
|
||||||
|
AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
|
||||||
|
|
||||||
|
AX_GCC_X86_CPUID(0x00000001)
|
||||||
|
ecx=0
|
||||||
|
edx=0
|
||||||
|
ebx=0
|
||||||
|
if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown";
|
||||||
|
then
|
||||||
|
ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
|
||||||
|
edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
|
||||||
|
fi
|
||||||
|
|
||||||
|
AX_GCC_X86_CPUID(0x00000007)
|
||||||
|
if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown";
|
||||||
|
then
|
||||||
|
ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2`
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_mmx_ext=no
|
||||||
|
if test "$((0x$edx>>23&0x01))" = 1; then
|
||||||
|
ax_cv_have_mmx_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_sse_ext=no
|
||||||
|
if test "$((0x$edx>>25&0x01))" = 1; then
|
||||||
|
ax_cv_have_sse_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_sse2_ext=no
|
||||||
|
if test "$((0x$edx>>26&0x01))" = 1; then
|
||||||
|
ax_cv_have_sse2_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_sse3_ext=no
|
||||||
|
if test "$((0x$ecx&0x01))" = 1; then
|
||||||
|
ax_cv_have_sse3_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_ssse3_ext=no
|
||||||
|
if test "$((0x$ecx>>9&0x01))" = 1; then
|
||||||
|
ax_cv_have_ssse3_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_sse41_ext=no
|
||||||
|
if test "$((0x$ecx>>19&0x01))" = 1; then
|
||||||
|
ax_cv_have_sse41_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_sse42_ext=no
|
||||||
|
if test "$((0x$ecx>>20&0x01))" = 1; then
|
||||||
|
ax_cv_have_sse42_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_avx_cpu_ext=no
|
||||||
|
if test "$((0x$ecx>>28&0x01))" = 1; then
|
||||||
|
ax_cv_have_avx_cpu_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether avx2 is supported by processor], [ax_cv_have_avx2_cpu_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_avx2_cpu_ext=no
|
||||||
|
if test "$((0x$ebx>>5&0x01))" = 1; then
|
||||||
|
ax_cv_have_avx2_cpu_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether fma is supported by processor], [ax_cv_have_fma_cpu_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_fma_cpu_ext=no
|
||||||
|
if test "$((0x$ecx>>12&0x01))" = 1; then
|
||||||
|
ax_cv_have_fma_cpu_ext=yes
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
|
||||||
|
AX_GCC_X86_AVX_XGETBV(0x00000000)
|
||||||
|
|
||||||
|
xgetbv_eax="0"
|
||||||
|
if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
|
||||||
|
xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
|
||||||
|
[
|
||||||
|
ax_cv_have_avx_ext=no
|
||||||
|
|
||||||
|
if test "$((0x$ecx>>27&0x01))" = 1; then
|
||||||
|
if test "$((0x$xgetbv_eax&0x6))" = 6; then
|
||||||
|
ax_cv_have_avx_ext=yes
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
if test x"$ax_cv_have_avx_ext" = x"no"; then
|
||||||
|
AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_mmx_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_mmx_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -mmmx"
|
||||||
|
AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_sse_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_sse_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -msse"
|
||||||
|
AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_sse2_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_sse2_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -msse2"
|
||||||
|
AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_sse3_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -msse3"
|
||||||
|
AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_ssse3_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -mssse3"
|
||||||
|
AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_sse41_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_sse41_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -msse4.1"
|
||||||
|
AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_sse42_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_sse42_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -msse4.2"
|
||||||
|
AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_avx_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_avx_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -mavx"
|
||||||
|
AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_avx2_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-mavx2, ax_cv_support_avx2_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_avx2_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -mavx2"
|
||||||
|
AC_DEFINE(HAVE_AVX2,,[Support AVX2 (Advanced Vector Extensions 2) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports avx2 instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$ax_cv_have_fma_ext" = yes; then
|
||||||
|
AX_CHECK_COMPILE_FLAG(-mfma, ax_cv_support_fma_ext=yes, [])
|
||||||
|
if test x"$ax_cv_support_fma_ext" = x"yes"; then
|
||||||
|
SIMD_FLAGS="$SIMD_FLAGS -mfma"
|
||||||
|
AC_DEFINE(HAVE_FMA,,[Support FMA3 (Fused Multiply-Add) instructions])
|
||||||
|
else
|
||||||
|
AC_MSG_WARN([Your processor supports fma instructions but not your compiler, can you try another compiler?])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
AC_SUBST(SIMD_FLAGS)
|
||||||
|
])
|
79
m4/ax_gcc_x86_avx_xgetbv.m4
Normal file
79
m4/ax_gcc_x86_avx_xgetbv.m4
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_GCC_X86_AVX_XGETBV
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# On later x86 processors with AVX SIMD support, with gcc or a compiler
|
||||||
|
# that has a compatible syntax for inline assembly instructions, run a
|
||||||
|
# small program that executes the xgetbv instruction with input OP. This
|
||||||
|
# can be used to detect if the OS supports AVX instruction usage.
|
||||||
|
#
|
||||||
|
# On output, the values of the eax and edx registers are stored as
|
||||||
|
# hexadecimal strings as "eax:edx" in the cache variable
|
||||||
|
# ax_cv_gcc_x86_avx_xgetbv.
|
||||||
|
#
|
||||||
|
# If the xgetbv instruction fails (because you are running a
|
||||||
|
# cross-compiler, or because you are not using gcc, or because you are on
|
||||||
|
# a processor that doesn't have this instruction),
|
||||||
|
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
|
||||||
|
#
|
||||||
|
# This macro mainly exists to be used in AX_EXT.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||||
|
# gives unlimited permission to copy, distribute and modify the configure
|
||||||
|
# scripts that are the output of Autoconf when processing the Macro. You
|
||||||
|
# need not follow the terms of the GNU General Public License when using
|
||||||
|
# or distributing such scripts, even though portions of the text of the
|
||||||
|
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||||
|
# all other use of the material that constitutes the Autoconf Macro.
|
||||||
|
#
|
||||||
|
# This special exception to the GPL applies to versions of the Autoconf
|
||||||
|
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||||
|
# modified version of the Autoconf Macro, you may extend this special
|
||||||
|
# exception to the GPL to apply to your modified version as well.
|
||||||
|
|
||||||
|
#serial 1
|
||||||
|
|
||||||
|
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
|
||||||
|
[AC_REQUIRE([AC_PROG_CC])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
|
||||||
|
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
|
||||||
|
int op = $1, eax, edx;
|
||||||
|
FILE *f;
|
||||||
|
/* Opcodes for xgetbv */
|
||||||
|
__asm__(".byte 0x0f, 0x01, 0xd0"
|
||||||
|
: "=a" (eax), "=d" (edx),
|
||||||
|
: "c" (op));
|
||||||
|
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
|
||||||
|
fprintf(f, "%x:%x\n", eax, edx);
|
||||||
|
fclose(f);
|
||||||
|
return 0;
|
||||||
|
])],
|
||||||
|
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
|
||||||
|
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
|
||||||
|
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
])
|
45
m4/ax_gcc_x86_cpuid.m4
Normal file
45
m4/ax_gcc_x86_cpuid.m4
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
dnl @synopsis AX_GCC_X86_CPUID(OP)
|
||||||
|
dnl
|
||||||
|
dnl @summary run x86 cpuid instruction OP using gcc inline assembler
|
||||||
|
dnl
|
||||||
|
dnl On Pentium and later x86 processors, with gcc or a compiler that
|
||||||
|
dnl has a compatible syntax for inline assembly instructions, run a
|
||||||
|
dnl small program that executes the cpuid instruction with input OP.
|
||||||
|
dnl This can be used to detect the CPU type.
|
||||||
|
dnl
|
||||||
|
dnl On output, the values of the eax, ebx, ecx, and edx registers are
|
||||||
|
dnl stored as hexadecimal strings as "eax:ebx:ecx:edx" in the cache
|
||||||
|
dnl variable ax_cv_gcc_x86_cpuid_OP.
|
||||||
|
dnl
|
||||||
|
dnl If the cpuid instruction fails (because you are running a
|
||||||
|
dnl cross-compiler, or because you are not using gcc, or because you
|
||||||
|
dnl are on a processor that doesn't have this instruction),
|
||||||
|
dnl ax_cv_gcc_x86_cpuid_OP is set to the string "unknown".
|
||||||
|
dnl
|
||||||
|
dnl This macro mainly exists to be used in AX_GCC_ARCHFLAG.
|
||||||
|
dnl
|
||||||
|
dnl @category Misc
|
||||||
|
dnl @author Steven G. Johnson <stevenj@alum.mit.edu> and Matteo Frigo.
|
||||||
|
dnl @version 2005-05-30
|
||||||
|
dnl @license GPLWithACException
|
||||||
|
|
||||||
|
AC_DEFUN([AX_GCC_X86_CPUID],
|
||||||
|
[AC_REQUIRE([AC_PROG_CC])
|
||||||
|
AC_LANG_PUSH([C])
|
||||||
|
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
|
||||||
|
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
|
||||||
|
int op = $1, eax, ebx, ecx, edx;
|
||||||
|
FILE *f;
|
||||||
|
__asm__("cpuid"
|
||||||
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
|
: "a" (op));
|
||||||
|
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
|
||||||
|
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
|
||||||
|
fclose(f);
|
||||||
|
return 0;
|
||||||
|
])],
|
||||||
|
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
|
||||||
|
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
|
||||||
|
[ax_cv_gcc_x86_cpuid_$1=unknown])])
|
||||||
|
AC_LANG_POP([C])
|
||||||
|
])
|
@@ -54,3 +54,5 @@ echo ${BNAME}_SOURCES=$f >> Make.inc
|
|||||||
echo ${BNAME}_LDADD=-lGrid>> Make.inc
|
echo ${BNAME}_LDADD=-lGrid>> Make.inc
|
||||||
echo >> Make.inc
|
echo >> Make.inc
|
||||||
done
|
done
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
@@ -56,6 +56,7 @@ int main (int argc, char ** argv)
|
|||||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
|
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
|
||||||
GridParallelRNG FineRNG(&Fine);
|
GridParallelRNG FineRNG(&Fine);
|
||||||
|
GridSerialRNG SerialRNG;
|
||||||
FineRNG.SeedRandomDevice();
|
FineRNG.SeedRandomDevice();
|
||||||
|
|
||||||
LatticeColourMatrix Foo(&Fine);
|
LatticeColourMatrix Foo(&Fine);
|
||||||
@@ -83,6 +84,9 @@ int main (int argc, char ** argv)
|
|||||||
LatticeSpinMatrix sMat(&Fine);
|
LatticeSpinMatrix sMat(&Fine);
|
||||||
LatticeSpinColourMatrix scMat(&Fine);
|
LatticeSpinColourMatrix scMat(&Fine);
|
||||||
|
|
||||||
|
LatticeLorentzColourMatrix lcMat(&Fine);
|
||||||
|
|
||||||
|
|
||||||
LatticeComplex scalar(&Fine);
|
LatticeComplex scalar(&Fine);
|
||||||
LatticeReal rscalar(&Fine);
|
LatticeReal rscalar(&Fine);
|
||||||
LatticeReal iscalar(&Fine);
|
LatticeReal iscalar(&Fine);
|
||||||
@@ -99,12 +103,15 @@ int main (int argc, char ** argv)
|
|||||||
random(FineRNG,cMat);
|
random(FineRNG,cMat);
|
||||||
random(FineRNG,sMat);
|
random(FineRNG,sMat);
|
||||||
random(FineRNG,scMat);
|
random(FineRNG,scMat);
|
||||||
|
random(FineRNG,lcMat);
|
||||||
random(FineRNG,cVec);
|
random(FineRNG,cVec);
|
||||||
random(FineRNG,sVec);
|
random(FineRNG,sVec);
|
||||||
random(FineRNG,scVec);
|
random(FineRNG,scVec);
|
||||||
|
|
||||||
|
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
|
TComplex tr = trace(cmat);
|
||||||
|
|
||||||
|
|
||||||
cVec = cMat * cVec; // LatticeColourVector = LatticeColourMatrix * LatticeColourVector
|
cVec = cMat * cVec; // LatticeColourVector = LatticeColourMatrix * LatticeColourVector
|
||||||
@@ -116,6 +123,8 @@ int main (int argc, char ** argv)
|
|||||||
cMat = outerProduct(cVec,cVec);
|
cMat = outerProduct(cVec,cVec);
|
||||||
scalar = localInnerProduct(cVec,cVec);
|
scalar = localInnerProduct(cVec,cVec);
|
||||||
|
|
||||||
|
cMat = Ta(cMat); //traceless antihermitian
|
||||||
|
|
||||||
|
|
||||||
scalar += scalar;
|
scalar += scalar;
|
||||||
scalar -= scalar;
|
scalar -= scalar;
|
||||||
@@ -207,6 +216,12 @@ int main (int argc, char ** argv)
|
|||||||
scm=transposeIndex<1>(scm);
|
scm=transposeIndex<1>(scm);
|
||||||
|
|
||||||
|
|
||||||
|
//random(SerialRNG, cm);
|
||||||
|
//std::cout << cm << std::endl;
|
||||||
|
|
||||||
|
cm = Ta(cm);
|
||||||
|
//TComplex tracecm= trace(cm);
|
||||||
|
//std::cout << cm << " "<< tracecm << std::endl;
|
||||||
|
|
||||||
|
|
||||||
// Foo = Foo+scalar; // LatticeColourMatrix+Scalar
|
// Foo = Foo+scalar; // LatticeColourMatrix+Scalar
|
||||||
@@ -219,6 +234,10 @@ int main (int argc, char ** argv)
|
|||||||
LatticeComplex trscMat(&Fine);
|
LatticeComplex trscMat(&Fine);
|
||||||
trscMat = trace(scMat); // Trace
|
trscMat = trace(scMat); // Trace
|
||||||
|
|
||||||
|
// LatticeComplex trlcMat(&Fine);
|
||||||
|
// trlcMat = trace(lcMat); // Trace involving iVector - now generates error
|
||||||
|
|
||||||
|
|
||||||
{ // Peek-ology and Poke-ology, with a little app-ology
|
{ // Peek-ology and Poke-ology, with a little app-ology
|
||||||
TComplex c;
|
TComplex c;
|
||||||
ColourMatrix c_m;
|
ColourMatrix c_m;
|
||||||
|
Reference in New Issue
Block a user