1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00
Conflicts:
	lib/tensors/Tensor_trace.h
This commit is contained in:
Peter Boyle 2015-06-04 12:17:00 +01:00
commit 1e4eca8321
29 changed files with 2580 additions and 2351 deletions

31
.gitignore vendored
View File

@ -5,6 +5,7 @@
*.obj
*~
errs
*#
# Precompiled Headers
*.gch
@ -48,3 +49,33 @@ config.status
/stamp-h1
/config.sub
/config.guess
# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
# Logs and databases #
######################
*.log
*.sql
*.sqlite
# OS generated files #
######################
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

View File

@ -1 +1 @@
/opt/local/share/automake-1.15/INSTALL
/usr/share/automake-1.14/INSTALL

View File

@ -20,7 +20,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
for most programmers.
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
These are presented as
@ -46,3 +46,5 @@ are examples:
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
For developers:
Use reconfigure_script in the scripts/ directory to create the autotools environment

3
TODO
View File

@ -66,6 +66,9 @@ Insert/Extract
* Support for ILDG
* Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
Actions -- coherent framework for implementing actions and their forces.

1885
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@
#
# Project Grid package
#
# Time-stamp: <2015-05-26 17:18:54 neo>
# Time-stamp: <2015-05-27 18:51:47 neo>
AC_PREREQ([2.63])
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
@ -27,7 +27,7 @@ AC_PROG_CXX
AC_OPENMP
AC_PROG_RANLIB
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
AX_EXT
# Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)
@ -69,26 +69,44 @@ Info at: http://www.mpfr.org/)])
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE|AVX|AVX2|AVX512|MIC],\
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
supported=no
case ${ac_SIMD} in
SSE4)
echo Configuring for SSE4
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
AC_DEFINE([SSE4],[1],[SSE4] )
supported=yes
else
AC_MSG_WARN([Your processor does not support SSE4 instructions])
fi
;;
AVX)
echo Configuring for AVX
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
AC_DEFINE([AVX1],[1],[AVX] )
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVX2)
echo Configuring for AVX2
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
AC_DEFINE([AVX2],[1],[AVX2] )
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX2 instructions])
fi
;;
AVX512|MIC)
echo Configuring for AVX512 and MIC
AC_DEFINE([AVX512],[1],[AVX512] )
supported="cross compilation"
;;
*)
AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]);
@ -129,7 +147,9 @@ then
AC_CONFIG_FILES([docs/doxy.cfg])
fi
echo
echo Creating configuration files
echo :::::::::::::::::::::::::::::::::::::::::::
AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile)
@ -150,9 +170,9 @@ The following features are enabled:
- os (target) : $target_os
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
- Supported SIMD flags : $SIMD_FLAGS
----------------------------------------------------------
- enabled simd support : ${ac_SIMD}
- enabled simd support : ${ac_SIMD} (supported: $supported )
- communications type : ${ac_COMMS}

File diff suppressed because it is too large Load Diff

0
lib/.dirstamp Normal file
View File

View File

@ -1,5 +1,5 @@
/* lib/Grid_config.h. Generated from Grid_config.h.in by configure. */
/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */
/* lib/GridConfig.h. Generated from GridConfig.h.in by configure. */
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
/* AVX */
/* #undef AVX1 */
@ -16,6 +16,15 @@
/* GRID_COMMS_NONE */
#define GRID_COMMS_NONE 1
/* Support Altivec instructions */
/* #undef HAVE_ALTIVEC */
/* Support AVX (Advanced Vector Extensions) instructions */
/* #undef HAVE_AVX */
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
/* #undef HAVE_AVX2 */
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
@ -30,6 +39,9 @@
/* Define to 1 if you have the <endian.h> header file. */
#define HAVE_ENDIAN_H 1
/* Support FMA3 (Fused Multiply-Add) instructions */
/* #undef HAVE_FMA */
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
@ -54,9 +66,30 @@
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Support mmx instructions */
#define HAVE_MMX /**/
/* Define to 1 if you have the <mm_malloc.h> header file. */
#define HAVE_MM_MALLOC_H 1
/* Support SSE (Streaming SIMD Extensions) instructions */
#define HAVE_SSE /**/
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
#define HAVE_SSE2 /**/
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
#define HAVE_SSE3 /**/
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
#define HAVE_SSE4_1 /**/
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
#define HAVE_SSE4_2 /**/
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
#define HAVE_SSSE3 /**/
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1

View File

@ -15,6 +15,15 @@
/* GRID_COMMS_NONE */
#undef GRID_COMMS_NONE
/* Support Altivec instructions */
#undef HAVE_ALTIVEC
/* Support AVX (Advanced Vector Extensions) instructions */
#undef HAVE_AVX
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
#undef HAVE_AVX2
/* define if the compiler supports basic C++11 syntax */
#undef HAVE_CXX11
@ -29,6 +38,9 @@
/* Define to 1 if you have the <endian.h> header file. */
#undef HAVE_ENDIAN_H
/* Support FMA3 (Fused Multiply-Add) instructions */
#undef HAVE_FMA
/* Define to 1 if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
@ -53,9 +65,30 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Support mmx instructions */
#undef HAVE_MMX
/* Define to 1 if you have the <mm_malloc.h> header file. */
#undef HAVE_MM_MALLOC_H
/* Support SSE (Streaming SIMD Extensions) instructions */
#undef HAVE_SSE
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
#undef HAVE_SSE2
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
#undef HAVE_SSE3
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
#undef HAVE_SSE4_1
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
#undef HAVE_SSE4_2
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
#undef HAVE_SSSE3
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H

View File

@ -1,4 +1,4 @@
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h
HFILES=./Cshift.h ./simd/Grid_avx.h ./simd/Grid_vector_types.h ./simd/Grid_sse4.h ./simd/Grid_avx512.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vRealF.h ./simd/Grid_qpx.h ./Tensors.h ./Algorithms.h ./communicator/Communicator_base.h ./lattice/Lattice_rng.h ./lattice/Lattice_reduction.h ./lattice/Lattice_transfer.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_comparison.h ./lattice/Lattice_overload.h ./lattice/Lattice_reality.h ./lattice/Lattice_local.h ./lattice/Lattice_conformable.h ./lattice/Lattice_where.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_ET.h ./lattice/Lattice_transpose.h ./lattice/Lattice_trace.h ./Stencil.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_poke.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_class.h ./tensors/Tensor_transpose.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_outer.h ./tensors/Tensor_inner.h ./tensors/Tensor_traits.h ./tensors/Tensor_Ta.h ./tensors/Tensor_peek.h ./tensors/Tensor_arith.h ./tensors/Tensor_extract_merge.h ./Communicator.h ./Cartesian.h ./parallelIO/NerscIO.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/LinalgUtils.h ./qcd/TwoSpinor.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/Dirac.h ./cshift/Cshift_common.h ./cshift/Cshift_none.h ./cshift/Cshift_mpi.h ./Simd.h ./GridConfig.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_red_black.h ./cartesian/Cartesian_full.h ./AlignedAllocator.h ./Lattice.h ./Threads.h ./Comparison.h ./Grid.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Remez.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./stencil/Lebesgue.h
CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
CCFILES=./qcd/SpaceTimeGrid.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/Dirac.cc ./GridInit.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc

View File

@ -8,6 +8,7 @@
#include <tensors/Tensor_outer.h>
#include <tensors/Tensor_transpose.h>
#include <tensors/Tensor_trace.h>
#include <tensors/Tensor_Ta.h>
#include <tensors/Tensor_peek.h>
#include <tensors/Tensor_poke.h>
#include <tensors/Tensor_reality.h>

View File

@ -48,5 +48,16 @@ PARALLEL_FOR_LOOP
}
template<class vobj> inline auto Ta(const Lattice<vobj> &z) -> Lattice<decltype(Ta(z._odata[0]))>
{
Lattice<decltype(Ta(z._odata[0]))> ret(z._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<z._grid->oSites();ss++){
ret._odata[ss] = Ta(z._odata[ss]);
}
return ret;
}
}
#endif

View File

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-05-27 12:07:15 neo>
// Time-stamp: <2015-05-29 14:13:30 neo>
//----------------------------------------------------------------------
#include <immintrin.h>
@ -261,13 +261,7 @@ namespace Optimization {
}
// Complex double
inline __m256d operator()(__m256d in){
return _mm256_xor_pd(_mm256_addsub_pd(_mm256_setzero_pd(),in), _mm256_set1_pd(-0.f));//untested
/*
// original
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
__m256d tmp = _mm256_addsub_pd(_mm256_setzero_pd(),_mm256_shuffle_pd(in,in,0x5));
return _mm256_shuffle_pd(tmp,tmp,0x5);
*/
return _mm256_xor_pd(_mm256_addsub_pd(_mm256_setzero_pd(),in), _mm256_set1_pd(-0.f));
}
// do not define for integer input
};

View File

@ -2,7 +2,7 @@
/*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types
*/
// Time-stamp: <2015-05-27 12:04:06 neo>
// Time-stamp: <2015-05-29 14:19:48 neo>
//---------------------------------------------------------------------------
#ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES
@ -55,7 +55,6 @@ namespace Grid {
// general forms to allow for vsplat syntax
// need explicit declaration of types when used since
// clang cannot automatically determine the output type sometimes
// use decltype?
template < class Out, class Input1, class Input2, class Operation >
Out binary(Input1 src_1, Input2 src_2, Operation op){
return op(src_1, src_2);

View File

@ -1 +1 @@
timestamp for lib/Grid_config.h
timestamp for lib/GridConfig.h

43
lib/tensors/Tensor_Ta.h Normal file
View File

@ -0,0 +1,43 @@
#ifndef GRID_MATH_TA_H
#define GRID_MATH_TA_H
namespace Grid {
///////////////////////////////////////////////
// Ta function for scalar, vector, matrix
///////////////////////////////////////////////
inline ComplexF Ta( const ComplexF &arg){ return arg;}
inline ComplexD Ta( const ComplexD &arg){ return arg;}
inline RealF Ta( const RealF &arg){ return arg;}
inline RealD Ta( const RealD &arg){ return arg;}
template<class vtype> inline iScalar<vtype> Ta(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
ret._internal = Ta(r._internal);
return ret;
}
template<class vtype,int N> inline iVector<vtype,N> Ta(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = Ta(r._internal[i]);
}
return ret;
}
template<class vtype,int N> inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &arg)
{
iMatrix<vtype,N> ret(arg);
double factor = (1/(double)N);
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2]= (ret._internal[c1][c2] - adj(arg._internal[c2][c1]));
ret._internal[c1][c2] *= 0.5;
}}
//ret = (ret - adj(arg))*0.5;
ret -= trace(ret)*factor;
return ret;
}
}
#endif

View File

@ -45,7 +45,10 @@ namespace Grid {
{
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
if ( c1==c2)
add(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
else
ret->_internal[c1][c2]=lhs->_internal[c1][c2];
}}
return;
}

View File

@ -44,7 +44,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
if ( c1!=c2) {
if ( c1==c2) {
sub(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
} else {
// Fails -- need unary minus. Catalogue other unops?
@ -60,7 +60,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
if ( c1!=c2)
if ( c1==c2)
sub(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
else
ret->_internal[c1][c2]=lhs->_internal[c1][c2];

View File

@ -2,10 +2,6 @@
#define GRID_MATH_REALITY_H
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// CONJ ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////
// multiply by I; make recursive.
///////////////////////////////////////////////
@ -151,6 +147,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &
/////////////////////////////////////////////////////////////////
// Can only take the real/imag part of scalar objects, since
// lattice objects of different complex nature are non-conformable.

View File

@ -75,7 +75,7 @@ auto traceIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(traceIndex<Lev
// Allow to recurse if vector, but never terminate on a vector
// trace of a different index can distribute across the vector index in a replicated way
// but we do not trace a vector index.
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
template<int Level,class vtype,int N,typename std::enable_if< iVector<vtype, N>::TensorLevel != Level >::type * =nullptr> inline
auto traceIndex(const iVector<vtype,N> &arg) -> iVector<decltype(traceIndex<Level>(arg._internal[0])),N>
{
iVector<decltype(traceIndex<Level>(arg._internal[0])),N> ret;

View File

@ -0,0 +1,72 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 2
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS

288
m4/ax_ext.m4 Normal file
View File

@ -0,0 +1,288 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_EXT
#
# DESCRIPTION
#
# Find supported SIMD extensions by requesting cpuid. When an SIMD
# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
# compiler supports it. For example, if "sse2" is available, then "-msse2"
# is added to SIMD_FLAGS.
#
# This macro calls:
#
# AC_SUBST(SIMD_FLAGS)
#
# And defines:
#
# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
#
# LICENSE
#
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 13
AC_DEFUN([AX_EXT],
[
AC_REQUIRE([AC_CANONICAL_HOST])
case $host_cpu in
powerpc*)
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
[
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
ax_cv_have_altivec_ext=yes
fi
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [])
fi
;;
i[[3456]]86*|x86_64*|amd64*)
AC_REQUIRE([AX_GCC_X86_CPUID])
AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
AX_GCC_X86_CPUID(0x00000001)
ecx=0
edx=0
ebx=0
if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown";
then
ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
fi
AX_GCC_X86_CPUID(0x00000007)
if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown";
then
ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2`
fi
AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
[
ax_cv_have_mmx_ext=no
if test "$((0x$edx>>23&0x01))" = 1; then
ax_cv_have_mmx_ext=yes
fi
])
AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
[
ax_cv_have_sse_ext=no
if test "$((0x$edx>>25&0x01))" = 1; then
ax_cv_have_sse_ext=yes
fi
])
AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
[
ax_cv_have_sse2_ext=no
if test "$((0x$edx>>26&0x01))" = 1; then
ax_cv_have_sse2_ext=yes
fi
])
AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
[
ax_cv_have_sse3_ext=no
if test "$((0x$ecx&0x01))" = 1; then
ax_cv_have_sse3_ext=yes
fi
])
AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
[
ax_cv_have_ssse3_ext=no
if test "$((0x$ecx>>9&0x01))" = 1; then
ax_cv_have_ssse3_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
[
ax_cv_have_sse41_ext=no
if test "$((0x$ecx>>19&0x01))" = 1; then
ax_cv_have_sse41_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
[
ax_cv_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
ax_cv_have_sse42_ext=yes
fi
])
AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
[
ax_cv_have_avx_cpu_ext=no
if test "$((0x$ecx>>28&0x01))" = 1; then
ax_cv_have_avx_cpu_ext=yes
fi
])
AC_CACHE_CHECK([whether avx2 is supported by processor], [ax_cv_have_avx2_cpu_ext],
[
ax_cv_have_avx2_cpu_ext=no
if test "$((0x$ebx>>5&0x01))" = 1; then
ax_cv_have_avx2_cpu_ext=yes
fi
])
AC_CACHE_CHECK([whether fma is supported by processor], [ax_cv_have_fma_cpu_ext],
[
ax_cv_have_fma_cpu_ext=no
if test "$((0x$ecx>>12&0x01))" = 1; then
ax_cv_have_fma_cpu_ext=yes
fi
])
if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
AX_GCC_X86_AVX_XGETBV(0x00000000)
xgetbv_eax="0"
if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
fi
AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
[
ax_cv_have_avx_ext=no
if test "$((0x$ecx>>27&0x01))" = 1; then
if test "$((0x$xgetbv_eax&0x6))" = 6; then
ax_cv_have_avx_ext=yes
fi
fi
])
if test x"$ax_cv_have_avx_ext" = x"no"; then
AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
fi
fi
if test "$ax_cv_have_mmx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
if test x"$ax_cv_support_mmx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mmmx"
AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
else
AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
if test x"$ax_cv_support_sse_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse"
AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
else
AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse2_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
if test x"$ax_cv_support_sse2_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse2"
AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
else
AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse3"
AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_ssse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mssse3"
AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse41_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
if test x"$ax_cv_support_sse41_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.1"
AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
else
AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse42_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
if test x"$ax_cv_support_sse42_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.2"
AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
else
AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_avx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
if test x"$ax_cv_support_avx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mavx"
AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
else
AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_avx2_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mavx2, ax_cv_support_avx2_ext=yes, [])
if test x"$ax_cv_support_avx2_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mavx2"
AC_DEFINE(HAVE_AVX2,,[Support AVX2 (Advanced Vector Extensions 2) instructions])
else
AC_MSG_WARN([Your processor supports avx2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_fma_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mfma, ax_cv_support_fma_ext=yes, [])
if test x"$ax_cv_support_fma_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mfma"
AC_DEFINE(HAVE_FMA,,[Support FMA3 (Fused Multiply-Add) instructions])
else
AC_MSG_WARN([Your processor supports fma instructions but not your compiler, can you try another compiler?])
fi
fi
;;
esac
AC_SUBST(SIMD_FLAGS)
])

View File

@ -0,0 +1,79 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_AVX_XGETBV
#
# DESCRIPTION
#
# On later x86 processors with AVX SIMD support, with gcc or a compiler
# that has a compatible syntax for inline assembly instructions, run a
# small program that executes the xgetbv instruction with input OP. This
# can be used to detect if the OS supports AVX instruction usage.
#
# On output, the values of the eax and edx registers are stored as
# hexadecimal strings as "eax:edx" in the cache variable
# ax_cv_gcc_x86_avx_xgetbv.
#
# If the xgetbv instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction),
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
#
# This macro mainly exists to be used in AX_EXT.
#
# LICENSE
#
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 1
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, edx;
FILE *f;
/* Opcodes for xgetbv */
__asm__(".byte 0x0f, 0x01, 0xd0"
: "=a" (eax), "=d" (edx),
: "c" (op));
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
fprintf(f, "%x:%x\n", eax, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
AC_LANG_POP([C])
])

45
m4/ax_gcc_x86_cpuid.m4 Normal file
View File

@ -0,0 +1,45 @@
dnl @synopsis AX_GCC_X86_CPUID(OP)
dnl
dnl @summary run x86 cpuid instruction OP using gcc inline assembler
dnl
dnl On Pentium and later x86 processors, with gcc or a compiler that
dnl has a compatible syntax for inline assembly instructions, run a
dnl small program that executes the cpuid instruction with input OP.
dnl This can be used to detect the CPU type.
dnl
dnl On output, the values of the eax, ebx, ecx, and edx registers are
dnl stored as hexadecimal strings as "eax:ebx:ecx:edx" in the cache
dnl variable ax_cv_gcc_x86_cpuid_OP.
dnl
dnl If the cpuid instruction fails (because you are running a
dnl cross-compiler, or because you are not using gcc, or because you
dnl are on a processor that doesn't have this instruction),
dnl ax_cv_gcc_x86_cpuid_OP is set to the string "unknown".
dnl
dnl This macro mainly exists to be used in AX_GCC_ARCHFLAG.
dnl
dnl @category Misc
dnl @author Steven G. Johnson <stevenj@alum.mit.edu> and Matteo Frigo.
dnl @version 2005-05-30
dnl @license GPLWithACException
AC_DEFUN([AX_GCC_X86_CPUID],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, ebx, ecx, edx;
FILE *f;
__asm__("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (op));
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown])])
AC_LANG_POP([C])
])

View File

@ -54,3 +54,5 @@ echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc
done
cd ..

View File

@ -56,6 +56,7 @@ int main (int argc, char ** argv)
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
GridParallelRNG FineRNG(&Fine);
GridSerialRNG SerialRNG;
FineRNG.SeedRandomDevice();
LatticeColourMatrix Foo(&Fine);
@ -83,6 +84,9 @@ int main (int argc, char ** argv)
LatticeSpinMatrix sMat(&Fine);
LatticeSpinColourMatrix scMat(&Fine);
LatticeLorentzColourMatrix lcMat(&Fine);
LatticeComplex scalar(&Fine);
LatticeReal rscalar(&Fine);
LatticeReal iscalar(&Fine);
@ -99,12 +103,15 @@ int main (int argc, char ** argv)
random(FineRNG,cMat);
random(FineRNG,sMat);
random(FineRNG,scMat);
random(FineRNG,lcMat);
random(FineRNG,cVec);
random(FineRNG,sVec);
random(FineRNG,scVec);
fflush(stdout);
TComplex tr = trace(cmat);
cVec = cMat * cVec; // LatticeColourVector = LatticeColourMatrix * LatticeColourVector
@ -116,7 +123,9 @@ int main (int argc, char ** argv)
cMat = outerProduct(cVec,cVec);
scalar = localInnerProduct(cVec,cVec);
cMat = Ta(cMat); //traceless antihermitian
scalar += scalar;
scalar -= scalar;
scalar *= scalar;
@ -206,7 +215,13 @@ int main (int argc, char ** argv)
scm=transpose(scm);
scm=transposeIndex<1>(scm);
//random(SerialRNG, cm);
//std::cout << cm << std::endl;
cm = Ta(cm);
//TComplex tracecm= trace(cm);
//std::cout << cm << " "<< tracecm << std::endl;
// Foo = Foo+scalar; // LatticeColourMatrix+Scalar
@ -219,6 +234,10 @@ int main (int argc, char ** argv)
LatticeComplex trscMat(&Fine);
trscMat = trace(scMat); // Trace
// LatticeComplex trlcMat(&Fine);
// trlcMat = trace(lcMat); // Trace involving iVector - now generates error
{ // Peek-ology and Poke-ology, with a little app-ology
TComplex c;
ColourMatrix c_m;