mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'master' of https://github.com/paboyle/Grid
This commit is contained in:
commit
562cdc805b
2
.gitignore
vendored
2
.gitignore
vendored
@ -49,7 +49,7 @@ config.status
|
||||
/stamp-h1
|
||||
/config.sub
|
||||
/config.guess
|
||||
|
||||
/INSTALL
|
||||
|
||||
# Packages #
|
||||
############
|
||||
|
2
INSTALL
2
INSTALL
@ -1 +1 @@
|
||||
/opt/local/share/automake-1.15/INSTALL
|
||||
/usr/share/automake-1.14/INSTALL
|
29
configure
vendored
29
configure
vendored
@ -2574,7 +2574,7 @@ test -n "$target_alias" &&
|
||||
NONENONEs,x,x, &&
|
||||
program_prefix=${target_alias}-
|
||||
|
||||
am__api_version='1.15'
|
||||
am__api_version='1.14'
|
||||
|
||||
# Find a good install program. We prefer a C program (faster),
|
||||
# so one script is as good as another. But avoid the broken or
|
||||
@ -2746,8 +2746,8 @@ test "$program_suffix" != NONE &&
|
||||
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
|
||||
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
|
||||
|
||||
# Expand $ac_aux_dir to an absolute path.
|
||||
am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
|
||||
if test x"${MISSING+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
@ -2766,7 +2766,7 @@ else
|
||||
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
|
||||
fi
|
||||
|
||||
if test x"${install_sh+set}" != xset; then
|
||||
if test x"${install_sh}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||
@ -3094,8 +3094,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||
mkdir_p='$(MKDIR_P)'
|
||||
|
||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
||||
# system "awk" is bad on some platforms.
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
# Always define AMTAR for backward compatibility. Yes, it's still used
|
||||
# in the wild :-( We should find a proper way to deprecate it ...
|
||||
AMTAR='$${TAR-tar}'
|
||||
@ -3154,7 +3154,6 @@ END
|
||||
fi
|
||||
|
||||
|
||||
|
||||
ac_config_headers="$ac_config_headers lib/GridConfig.h"
|
||||
|
||||
# Check whether --enable-silent-rules was given.
|
||||
@ -6003,6 +6002,7 @@ $as_echo "$as_me: WARNING: Your processor supports fma instructions but not your
|
||||
|
||||
|
||||
|
||||
|
||||
# Checks for libraries.
|
||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
||||
|
||||
@ -6709,8 +6709,21 @@ $as_echo "#define AVX512 1" >>confdefs.h
|
||||
|
||||
supported="cross compilation"
|
||||
;;
|
||||
NEONv7)
|
||||
echo Configuring for experimental ARMv7 support
|
||||
|
||||
$as_echo "#define NEONv7 1" >>confdefs.h
|
||||
|
||||
supported="cross compilation"
|
||||
;;
|
||||
DEBUG)
|
||||
echo Configuring without SIMD support - only for compiler DEBUGGING!
|
||||
|
||||
$as_echo "#define EMPTY_SIMD 1" >>confdefs.h
|
||||
|
||||
;;
|
||||
*)
|
||||
as_fn_error $? "${ac_SIMD} unsupported --enable-simd option" "$LINENO" 5;
|
||||
as_fn_error $? "${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options" "$LINENO" 5;
|
||||
;;
|
||||
esac
|
||||
|
||||
|
22
configure.ac
22
configure.ac
@ -3,7 +3,7 @@
|
||||
#
|
||||
# Project Grid package
|
||||
#
|
||||
# Time-stamp: <2015-05-27 18:51:47 neo>
|
||||
# Time-stamp: <2015-06-09 15:26:39 neo>
|
||||
|
||||
AC_PREREQ([2.63])
|
||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
||||
@ -29,6 +29,7 @@ AC_PROG_RANLIB
|
||||
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
||||
AX_EXT
|
||||
|
||||
|
||||
# Checks for libraries.
|
||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
||||
|
||||
@ -75,7 +76,7 @@ supported=no
|
||||
case ${ac_SIMD} in
|
||||
SSE4)
|
||||
echo Configuring for SSE4
|
||||
AC_DEFINE([SSE4],[1],[SSE4] )
|
||||
AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
|
||||
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
|
||||
supported=yes
|
||||
else
|
||||
@ -84,7 +85,7 @@ case ${ac_SIMD} in
|
||||
;;
|
||||
AVX)
|
||||
echo Configuring for AVX
|
||||
AC_DEFINE([AVX1],[1],[AVX] )
|
||||
AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
|
||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
||||
supported=yes
|
||||
else
|
||||
@ -93,7 +94,7 @@ case ${ac_SIMD} in
|
||||
;;
|
||||
AVX2)
|
||||
echo Configuring for AVX2
|
||||
AC_DEFINE([AVX2],[1],[AVX2] )
|
||||
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
|
||||
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
|
||||
supported=yes
|
||||
else
|
||||
@ -102,11 +103,20 @@ case ${ac_SIMD} in
|
||||
;;
|
||||
AVX512|MIC)
|
||||
echo Configuring for AVX512 and MIC
|
||||
AC_DEFINE([AVX512],[1],[AVX512] )
|
||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Corner] )
|
||||
supported="cross compilation"
|
||||
;;
|
||||
NEONv7)
|
||||
echo Configuring for experimental ARMv7 support
|
||||
AC_DEFINE([NEONv7],[1],[NEON ARMv7 Experimental support ] )
|
||||
supported="cross compilation"
|
||||
;;
|
||||
DEBUG)
|
||||
echo Configuring without SIMD support - only for compiler DEBUGGING!
|
||||
AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]);
|
||||
AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <algorithms/SparseMatrix.h>
|
||||
#include <algorithms/LinearOperator.h>
|
||||
#include <algorithms/CoarsenedMatrix.h>
|
||||
|
||||
#include <algorithms/approx/Zolotarev.h>
|
||||
#include <algorithms/approx/Chebyshev.h>
|
||||
@ -10,6 +11,7 @@
|
||||
#include <algorithms/approx/MultiShiftFunction.h>
|
||||
|
||||
#include <algorithms/iterative/ConjugateGradient.h>
|
||||
#include <algorithms/iterative/ConjugateResidual.h>
|
||||
#include <algorithms/iterative/NormalEquations.h>
|
||||
#include <algorithms/iterative/SchurRedBlack.h>
|
||||
|
||||
|
@ -55,7 +55,6 @@
|
||||
#include <Cartesian.h> // subdir aggregate
|
||||
#include <Tensors.h> // subdir aggregate
|
||||
#include <Lattice.h> // subdir aggregate
|
||||
#include <Comparison.h>
|
||||
#include <Cshift.h> // subdir aggregate
|
||||
#include <Stencil.h> // subdir aggregate
|
||||
#include <Algorithms.h>// subdir aggregate
|
||||
|
@ -1,15 +1,18 @@
|
||||
/* lib/GridConfig.h. Generated from GridConfig.h.in by configure. */
|
||||
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* AVX */
|
||||
/* AVX Intrinsics */
|
||||
/* #undef AVX1 */
|
||||
|
||||
/* AVX2 */
|
||||
/* AVX2 Intrinsics */
|
||||
/* #undef AVX2 */
|
||||
|
||||
/* AVX512 */
|
||||
/* AVX512 Intrinsics for Knights Corner */
|
||||
/* #undef AVX512 */
|
||||
|
||||
/* EMPTY_SIMD only for DEBUGGING */
|
||||
/* #undef EMPTY_SIMD */
|
||||
|
||||
/* GRID_COMMS_MPI */
|
||||
/* #undef GRID_COMMS_MPI */
|
||||
|
||||
@ -111,6 +114,9 @@
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* NEON ARMv7 Experimental support */
|
||||
/* #undef NEONv7 */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "grid"
|
||||
|
||||
@ -132,7 +138,7 @@
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.0"
|
||||
|
||||
/* SSE4 */
|
||||
/* SSE4 Intrinsics */
|
||||
#define SSE4 1
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
|
@ -1,14 +1,17 @@
|
||||
/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* AVX */
|
||||
/* AVX Intrinsics */
|
||||
#undef AVX1
|
||||
|
||||
/* AVX2 */
|
||||
/* AVX2 Intrinsics */
|
||||
#undef AVX2
|
||||
|
||||
/* AVX512 */
|
||||
/* AVX512 Intrinsics for Knights Corner */
|
||||
#undef AVX512
|
||||
|
||||
/* EMPTY_SIMD only for DEBUGGING */
|
||||
#undef EMPTY_SIMD
|
||||
|
||||
/* GRID_COMMS_MPI */
|
||||
#undef GRID_COMMS_MPI
|
||||
|
||||
@ -110,6 +113,9 @@
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* NEON ARMv7 Experimental support */
|
||||
#undef NEONv7
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
@ -131,7 +137,7 @@
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* SSE4 */
|
||||
/* SSE4 Intrinsics */
|
||||
#undef SSE4
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
|
@ -1,4 +1,4 @@
|
||||
|
||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h
|
||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/GaugeActionBase.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h
|
||||
|
||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
|
||||
|
@ -35,6 +35,8 @@ namespace Grid {
|
||||
inline RealD conjugate(const RealD & r){ return r; }
|
||||
inline RealD real(const RealD & r){ return r; }
|
||||
|
||||
inline RealD sqrt(const RealD & r){ return std::sqrt(r); }
|
||||
|
||||
inline ComplexD conjugate(const ComplexD& r){ return(conj(r)); }
|
||||
inline ComplexD adj(const ComplexD& r){ return(conjugate(r)); }
|
||||
inline ComplexF conjugate(const ComplexF& r ){ return(conj(r)); }
|
||||
@ -112,6 +114,7 @@ namespace Grid {
|
||||
};
|
||||
|
||||
#include <simd/Grid_vector_types.h>
|
||||
#include <simd/Grid_vector_unops.h>
|
||||
|
||||
namespace Grid {
|
||||
// Default precision
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <tensors/Tensor_peek.h>
|
||||
#include <tensors/Tensor_poke.h>
|
||||
#include <tensors/Tensor_reality.h>
|
||||
#include <tensors/Tensor_unary.h>
|
||||
#include <tensors/Tensor_extract_merge.h>
|
||||
|
||||
#endif
|
||||
|
308
lib/algorithms/CoarsenedMatrix.h
Normal file
308
lib/algorithms/CoarsenedMatrix.h
Normal file
@ -0,0 +1,308 @@
|
||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
|
||||
#include <Grid.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class Geometry {
|
||||
// int dimension;
|
||||
public:
|
||||
int npoint;
|
||||
std::vector<int> directions ;
|
||||
std::vector<int> displacements;
|
||||
|
||||
// FIXME -- don't like xposing the operator directions
|
||||
// as different to the geometrical dirs
|
||||
// Also don't like special casing five dim.. should pass an object in template
|
||||
Geometry(int _d) {
|
||||
|
||||
int base = (_d==5) ? 1:0;
|
||||
|
||||
// make coarse grid stencil for 4d , not 5d
|
||||
if ( _d==5 ) _d=4;
|
||||
|
||||
npoint = 2*_d+1;
|
||||
directions.resize(npoint);
|
||||
displacements.resize(npoint);
|
||||
for(int d=0;d<_d;d++){
|
||||
directions[2*d ] = d+base;
|
||||
directions[2*d+1] = d+base;
|
||||
displacements[2*d ] = +1;
|
||||
displacements[2*d+1] = -1;
|
||||
}
|
||||
directions [2*_d]=0;
|
||||
displacements[2*_d]=0;
|
||||
|
||||
//// report back
|
||||
std::cout<<"directions :";
|
||||
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
||||
std::cout <<std::endl;
|
||||
std::cout<<"displacements :";
|
||||
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
||||
std::cout <<std::endl;
|
||||
}
|
||||
|
||||
/*
|
||||
// Original cleaner code
|
||||
Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) {
|
||||
for(int d=0;d<dimension;d++){
|
||||
directions[2*d ] = d;
|
||||
directions[2*d+1] = d;
|
||||
displacements[2*d ] = +1;
|
||||
displacements[2*d+1] = -1;
|
||||
}
|
||||
directions [2*dimension]=0;
|
||||
displacements[2*dimension]=0;
|
||||
}
|
||||
std::vector<int> GetDelta(int point) {
|
||||
std::vector<int> delta(dimension,0);
|
||||
delta[directions[point]] = displacements[point];
|
||||
return delta;
|
||||
};
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
// Fine Object == (per site) type of fine field
|
||||
// nbasis == number of deflation vectors
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
typedef Lattice<siteVector> CoarseVector;
|
||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
|
||||
////////////////////
|
||||
// Data members
|
||||
////////////////////
|
||||
Geometry geom;
|
||||
GridBase * _grid;
|
||||
CartesianStencil Stencil;
|
||||
|
||||
std::vector<CoarseMatrix> A;
|
||||
|
||||
std::vector<siteVector,alignedAllocator<siteVector> > comm_buf;
|
||||
|
||||
///////////////////////
|
||||
// Interface
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||
|
||||
RealD M (const CoarseVector &in, CoarseVector &out){
|
||||
|
||||
conformable(_grid,in._grid);
|
||||
conformable(in._grid,out._grid);
|
||||
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
Stencil.HaloExchange(in,comm_buf,compressor);
|
||||
|
||||
//PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
siteVector res = zero;
|
||||
siteVector nbr;
|
||||
int offset,local,perm,ptype;
|
||||
|
||||
for(int point=0;point<geom.npoint;point++){
|
||||
offset = Stencil._offsets [point][ss];
|
||||
local = Stencil._is_local[point][ss];
|
||||
perm = Stencil._permute [point][ss];
|
||||
ptype = Stencil._permute_type[point];
|
||||
|
||||
if(local&&perm) {
|
||||
permute(nbr,in._odata[offset],ptype);
|
||||
} else if(local) {
|
||||
nbr = in._odata[offset];
|
||||
} else {
|
||||
nbr = comm_buf[offset];
|
||||
}
|
||||
res = res + A[point]._odata[ss]*nbr;
|
||||
}
|
||||
vstream(out._odata[ss],res);
|
||||
}
|
||||
return norm2(out);
|
||||
};
|
||||
|
||||
RealD Mdag (const CoarseVector &in, CoarseVector &out){
|
||||
return M(in,out);
|
||||
};
|
||||
|
||||
// Defer support for further coarsening for now
|
||||
void Mdiag (const CoarseVector &in, CoarseVector &out){};
|
||||
void Mdir (const CoarseVector &in, CoarseVector &out,int dir, int disp){};
|
||||
|
||||
CoarsenedMatrix(GridCartesian &CoarseGrid) :
|
||||
|
||||
_grid(&CoarseGrid),
|
||||
geom(CoarseGrid._ndimension),
|
||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
|
||||
A(geom.npoint,&CoarseGrid)
|
||||
{
|
||||
comm_buf.resize(Stencil._unified_buffer_size);
|
||||
};
|
||||
|
||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,std::vector<Lattice<Fobj> > & subspace){
|
||||
|
||||
FineField iblock(FineGrid); // contributions from within this block
|
||||
FineField oblock(FineGrid); // contributions from outwith this block
|
||||
|
||||
FineField phi(FineGrid);
|
||||
FineField tmp(FineGrid);
|
||||
FineField zz(FineGrid); zz=zero;
|
||||
FineField Mphi(FineGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(FineGrid);
|
||||
|
||||
CoarseVector iProj(Grid());
|
||||
CoarseVector oProj(Grid());
|
||||
CoarseScalar InnerProd(Grid());
|
||||
|
||||
// Orthogonalise the subblocks over the basis
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
blockProject(iProj,subspace[0],subspace);
|
||||
|
||||
// Compute the matrix elements of linop between this orthonormal
|
||||
// set of vectors.
|
||||
int self_stencil=-1;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
A[p]=zero;
|
||||
if( geom.displacements[p]==0){
|
||||
self_stencil=p;
|
||||
}
|
||||
}
|
||||
assert(self_stencil!=-1);
|
||||
|
||||
for(int i=0;i<nbasis;i++){
|
||||
phi=subspace[i];
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
|
||||
int dir = geom.directions[p];
|
||||
int disp = geom.displacements[p];
|
||||
|
||||
int block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
|
||||
|
||||
LatticeCoordinate(coor,dir);
|
||||
|
||||
if ( disp==0 ){
|
||||
linop.OpDiag(phi,Mphi);
|
||||
}
|
||||
else {
|
||||
linop.OpDir(phi,Mphi,dir,disp);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Pick out contributions coming from this cell and neighbour cell
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
if ( disp==0 ) {
|
||||
iblock = Mphi;
|
||||
oblock = zero;
|
||||
} else if ( disp==1 ) {
|
||||
oblock = where(mod(coor,block)==(block-1),Mphi,zz);
|
||||
iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
|
||||
} else if ( disp==-1 ) {
|
||||
oblock = where(mod(coor,block)==0,Mphi,zz);
|
||||
iblock = where(mod(coor,block)!=0,Mphi,zz);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
blockProject(iProj,iblock,subspace);
|
||||
blockProject(oProj,oblock,subspace);
|
||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
for(int j=0;j<nbasis;j++){
|
||||
if( disp!= 0 ) {
|
||||
A[p]._odata[ss](j,i) = oProj._odata[ss](j);
|
||||
}
|
||||
A[self_stencil]._odata[ss](j,i) = A[self_stencil]._odata[ss](j,i) + iProj._odata[ss](j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
///////////////////////////
|
||||
// test code worth preserving in if block
|
||||
///////////////////////////
|
||||
std::cout<< " Computed matrix elements "<< self_stencil <<std::endl;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
std::cout<< "A["<<p<<"]" << std::endl;
|
||||
std::cout<< A[p] << std::endl;
|
||||
}
|
||||
std::cout<< " picking by block0 "<< self_stencil <<std::endl;
|
||||
|
||||
phi=subspace[0];
|
||||
std::vector<int> bc(FineGrid->_ndimension,0);
|
||||
|
||||
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
||||
linop.Op(tmp,Mphi); // Apply big dop
|
||||
blockProject(iProj,Mphi,subspace); // project it and print it
|
||||
std::cout<< " Computed matrix elements from block zero only "<<std::endl;
|
||||
std::cout<< iProj <<std::endl;
|
||||
std::cout<<"Computed Coarse Operator"<<std::endl;
|
||||
#endif
|
||||
// AssertHermitian();
|
||||
// ForceHermitian();
|
||||
// ForceDiagonal();
|
||||
}
|
||||
void ForceDiagonal(void) {
|
||||
|
||||
|
||||
std::cout<<"**************************************************"<<std::endl;
|
||||
std::cout<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
|
||||
std::cout<<"**************************************************"<<std::endl;
|
||||
for(int p=0;p<8;p++){
|
||||
A[p]=zero;
|
||||
}
|
||||
|
||||
GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice();
|
||||
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
|
||||
|
||||
Complex one(1.0);
|
||||
|
||||
iMatrix<Complex,nbasis> ident; ident=one;
|
||||
|
||||
val = val*adj(val);
|
||||
val = val + 1.0;
|
||||
|
||||
A[8] = val*ident;
|
||||
|
||||
// for(int s=0;s<Grid()->oSites();s++) {
|
||||
// A[8]._odata[s]=val._odata[s];
|
||||
// }
|
||||
}
|
||||
void ForceHermitian(void) {
|
||||
for(int d=0;d<4;d++){
|
||||
int dd=d+1;
|
||||
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
|
||||
}
|
||||
A[8] = 0.5*(A[8] + adj(A[8]));
|
||||
}
|
||||
void AssertHermitian(void) {
|
||||
CoarseMatrix AA (Grid());
|
||||
CoarseMatrix AAc (Grid());
|
||||
CoarseMatrix Diff (Grid());
|
||||
for(int d=0;d<4;d++){
|
||||
|
||||
int dd=d+1;
|
||||
AAc = Cshift(A[2*d+1],dd,1);
|
||||
AA = A[2*d];
|
||||
|
||||
Diff = AA - adj(AAc);
|
||||
|
||||
std::cout<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
|
||||
std::cout<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
|
||||
|
||||
}
|
||||
Diff = A[8] - adj(A[8]);
|
||||
std::cout<<"Norm diff local "<< norm2(Diff)<<std::endl;
|
||||
std::cout<<"Norm local "<< norm2(A[8])<<std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -16,9 +16,15 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class LinearOperatorBase {
|
||||
public:
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||
|
||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2)=0;
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
};
|
||||
|
||||
|
||||
@ -42,15 +48,27 @@ namespace Grid {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
@ -61,13 +79,20 @@ namespace Grid {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
ComplexD dot;
|
||||
|
||||
_Mat.M(in,out);
|
||||
@ -78,6 +103,9 @@ namespace Grid {
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
@ -98,12 +126,24 @@ namespace Grid {
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
|
@ -19,6 +19,8 @@ namespace Grid {
|
||||
ni=M(in,tmp);
|
||||
no=Mdag(tmp,out);
|
||||
}
|
||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
0
lib/algorithms/approx/.dirstamp
Normal file
0
lib/algorithms/approx/.dirstamp
Normal file
@ -42,6 +42,14 @@ namespace Grid {
|
||||
double lo;
|
||||
|
||||
public:
|
||||
void csv(std::ostream &out){
|
||||
for (double x=lo; x<hi; x+=(hi-lo)/1000) {
|
||||
double f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Chebyshev(double _lo,double _hi,int _order, double (* func)(double) ){
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
@ -52,16 +60,16 @@ namespace Grid {
|
||||
for(int j=0;j<order;j++){
|
||||
double s=0;
|
||||
for(int k=0;k<order;k++){
|
||||
double y=cos(M_PI*(k+0.5)/order);
|
||||
double y=std::cos(M_PI*(k+0.5)/order);
|
||||
double x=0.5*(y*(hi-lo)+(hi+lo));
|
||||
double f=func(x);
|
||||
s=s+f*cos( j*M_PI*(k+0.5)/order );
|
||||
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
|
||||
}
|
||||
Coeffs[j] = s * 2.0/order;
|
||||
}
|
||||
};
|
||||
|
||||
double Evaluate(double x) // Convenience for plotting the approximation
|
||||
double approx(double x) // Convenience for plotting the approximation
|
||||
{
|
||||
double Tn;
|
||||
double Tnm;
|
||||
@ -91,7 +99,7 @@ namespace Grid {
|
||||
void PlotApprox(std::ostream &out) {
|
||||
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
|
||||
for(double x=lo;x<hi;x+=(hi-lo)/50.0){
|
||||
out <<x<<"\t"<<Evaluate(x)<<std::endl;
|
||||
out <<x<<"\t"<<approx(x)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
85
lib/algorithms/iterative/ConjugateResidual.h
Normal file
85
lib/algorithms/iterative/ConjugateResidual.h
Normal file
@ -0,0 +1,85 @@
|
||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_CONJUGATE_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
|
||||
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b, c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src._grid;
|
||||
psi=zero;
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid);
|
||||
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
std::cout << "pAp, pAAp"<< pAp<<" "<<pAAp<<std::endl;
|
||||
std::cout << "rAr, rAAr"<< rAr<<" "<<rAAr<<std::endl;
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
std::cout<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=1;k<MaxIterations;k++){
|
||||
|
||||
a = rAr/pAAp;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
|
||||
cp = axpy_norm(r,-a,Ap,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
std::cout<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r);
|
||||
std::cout<<"ConjugateResidual: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
std::cout<<"ConjugateResidual: true residual is "<<true_resid<<std::endl;
|
||||
std::cout<<"ConjugateResidual: target residual was "<<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<"ConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -108,7 +108,7 @@ namespace Grid {
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout << "SchurRedBlackDiagMooee solver true unprec resid "<< sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
std::cout << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
0
lib/communicator/.dirstamp
Normal file
0
lib/communicator/.dirstamp
Normal file
@ -297,10 +297,13 @@ PARALLEL_FOR_LOOP
|
||||
#include <lattice/Lattice_reduction.h>
|
||||
#include <lattice/Lattice_peekpoke.h>
|
||||
#include <lattice/Lattice_reality.h>
|
||||
#include <lattice/Lattice_comparison_utils.h>
|
||||
#include <lattice/Lattice_comparison.h>
|
||||
#include <lattice/Lattice_coordinate.h>
|
||||
#include <lattice/Lattice_where.h>
|
||||
#include <lattice/Lattice_rng.h>
|
||||
#include <lattice/Lattice_unary.h>
|
||||
#include <lattice/Lattice_transfer.h>
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -141,7 +141,5 @@ namespace Grid {
|
||||
}
|
||||
}
|
||||
|
||||
#include <lattice/Lattice_comparison.h>
|
||||
#include <lattice/Lattice_where.h>
|
||||
|
||||
#endif
|
@ -3,20 +3,8 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/*
|
||||
depbase=`echo Grid_main.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`;\
|
||||
icpc -DHAVE_CONFIG_H -I. -I../lib -I../lib -mmic -O3 -std=c++11 -fopenmp -MT Grid_main.o -MD -MP -MF $depbase.Tpo -c -o Grid_main.o Grid_main.cc &&\
|
||||
mv -f $depbase.Tpo $depbase.Po
|
||||
../lib/lattice/Grid_lattice_coordinate.h(25): error: no suitable user-defined conversion from "vector_type" to "const Grid::iScalar<Grid::iScalar<Grid::iScalar<Grid::vInteger>>>" exists
|
||||
l._odata[o]=vI;
|
||||
^
|
||||
detected during instantiation of "void Grid::LatticeCoordinate(Grid::Lattice<iobj> &, int) [with iobj=Grid::QCD::vTInteger]" at line 283 of "Grid_main.cc"
|
||||
|
||||
compilation aborted for Grid_main.cc (code 2)
|
||||
*/
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
typedef typename iobj::scalar_object scalar_object;
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
@ -33,7 +21,7 @@ namespace Grid {
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l._odata[o]._internal._internal._internal=vI;
|
||||
l._odata[o]=vI;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -25,8 +25,7 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
|
||||
-> Lattice<typename vobj::tensor_reduced>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
|
@ -27,9 +27,9 @@ PARALLEL_FOR_LOOP
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
|
||||
template<class vobj> inline auto real(const Lattice<vobj> &z) -> Lattice<vobj>
|
||||
{
|
||||
Lattice<decltype(real(z._odata[0]))> ret(z._grid);
|
||||
Lattice<vobj> ret(z._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||
ret._odata[ss] = real(z._odata[ss]);
|
||||
@ -37,9 +37,9 @@ PARALLEL_FOR_LOOP
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj> inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
|
||||
template<class vobj> inline auto imag(const Lattice<vobj> &z) -> Lattice<vobj>
|
||||
{
|
||||
Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
|
||||
Lattice<vobj> ret(z._grid);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<z._grid->oSites();ss++){
|
||||
ret._odata[ss] = imag(z._odata[ss]);
|
||||
|
@ -56,10 +56,10 @@ PARALLEL_FOR_LOOP
|
||||
}
|
||||
|
||||
|
||||
template<class vobj,int nbasis>
|
||||
inline void projectBlockBasis(Lattice<iVector<vComplex,nbasis > > &coarseData,
|
||||
const Lattice<vobj> &fineData,
|
||||
const std::vector<Lattice<vobj> > &Basis)
|
||||
template<class vobj,class CComplex,int nbasis>
|
||||
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
const Lattice<vobj> &fineData,
|
||||
const std::vector<Lattice<vobj> > &Basis)
|
||||
{
|
||||
GridBase * fine = fineData._grid;
|
||||
GridBase * coarse= coarseData._grid;
|
||||
@ -69,13 +69,14 @@ inline void projectBlockBasis(Lattice<iVector<vComplex,nbasis > > &coarseData,
|
||||
assert( nbasis == Basis.size() );
|
||||
subdivides(coarse,fine);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
conformable(Basis,fineData);
|
||||
conformable(Basis[i],fineData);
|
||||
}
|
||||
|
||||
std::vector<int> block_r (_ndimension);
|
||||
|
||||
for(int d=0 ; d<_ndimension;d++){
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
||||
}
|
||||
|
||||
coarseData=zero;
|
||||
@ -92,41 +93,41 @@ inline void projectBlockBasis(Lattice<iVector<vComplex,nbasis > > &coarseData,
|
||||
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
|
||||
coarseData._odata[sc][i]=coarseData._odata[sc][i]
|
||||
coarseData._odata[sc](i)=coarseData._odata[sc](i)
|
||||
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
|
||||
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
template<class vobj,int nbasis>
|
||||
inline void promoteBlockBasis(const Lattice<iVector<vComplex,nbasis > > &coarseData,
|
||||
Lattice<vobj> &fineData,
|
||||
const std::vector<Lattice<vobj> > &Basis)
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockZAXPY(Lattice<vobj> &fineZ,
|
||||
const Lattice<CComplex> &coarseA,
|
||||
const Lattice<vobj> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
{
|
||||
GridBase * fine = fineData._grid;
|
||||
GridBase * coarse= coarseData._grid;
|
||||
int _ndimension = coarse->_ndimension;
|
||||
GridBase * fine = fineZ._grid;
|
||||
GridBase * coarse= coarseA._grid;
|
||||
|
||||
// checks
|
||||
assert( nbasis == Basis.size() );
|
||||
subdivides(coarse,fine);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
conformable(Basis,fineData);
|
||||
}
|
||||
fineZ.checkerboard=fineX.checkerboard;
|
||||
subdivides(coarse,fine); // require they map
|
||||
conformable(fineX,fineY);
|
||||
conformable(fineX,fineZ);
|
||||
|
||||
std::vector<int> block_r (_ndimension);
|
||||
int _ndimension = coarse->_ndimension;
|
||||
|
||||
std::vector<int> block_r (_ndimension);
|
||||
|
||||
// FIXME merge with subdivide checking routine as this is redundant
|
||||
for(int d=0 ; d<_ndimension;d++){
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]);
|
||||
}
|
||||
|
||||
// Loop with a cache friendly loop ordering
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
|
||||
int sc;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
std::vector<int> coor_f(_ndimension);
|
||||
@ -135,21 +136,45 @@ inline void promoteBlockBasis(const Lattice<iVector<vComplex,nbasis > > &coarseD
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
// z = A x + y
|
||||
fineZ._odata[sf]=coarseA._odata[sc]*fineX._odata[sf]+fineY._odata[sf];
|
||||
|
||||
if(i==0) fineData._odata[sf]= coarseData._odata[sc][i]*Basis[i]._odata[sf];
|
||||
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc][i]*Basis[i]._odata[sf];
|
||||
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
|
||||
const Lattice<vobj> &fineX,
|
||||
const Lattice<vobj> &fineY)
|
||||
{
|
||||
typedef decltype(innerProduct(fineX._odata[0],fineY._odata[0])) dotp;
|
||||
|
||||
GridBase *coarse(CoarseInner._grid);
|
||||
GridBase *fine (fineX._grid);
|
||||
|
||||
Lattice<dotp> fine_inner(fine);
|
||||
Lattice<dotp> coarse_inner(coarse);
|
||||
|
||||
fine_inner = localInnerProduct(fineX,fineY);
|
||||
blockSum(coarse_inner,fine_inner);
|
||||
for(int ss=0;ss<coarse->oSites();ss++){
|
||||
CoarseInner._odata[ss] = coarse_inner._odata[ss];
|
||||
}
|
||||
}
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||
{
|
||||
GridBase *coarse = ip._grid;
|
||||
Lattice<vobj> zz(fineX._grid); zz=zero;
|
||||
blockInnerProduct(ip,fineX,fineX);
|
||||
ip = rsqrt(ip);
|
||||
blockZAXPY(fineX,ip,fineX,zz);
|
||||
}
|
||||
// useful in multigrid project;
|
||||
// Generic name : Coarsen?
|
||||
template<class vobj>
|
||||
inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
{
|
||||
GridBase * fine = fineData._grid;
|
||||
GridBase * coarse= coarseData._grid;
|
||||
@ -181,5 +206,96 @@ inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
return;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,std::vector<int> coor)
|
||||
{
|
||||
GridBase * fine = unpicked._grid;
|
||||
|
||||
Lattice<vobj> zz(fine);
|
||||
Lattice<iScalar<vInteger> > fcoor(fine);
|
||||
|
||||
zz = zero;
|
||||
|
||||
picked = unpicked;
|
||||
for(int d=0;d<fine->_ndimension;d++){
|
||||
LatticeCoordinate(fcoor,d);
|
||||
int block= fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
int lo = (coor[d])*block;
|
||||
int hi = (coor[d]+1)*block;
|
||||
picked = where( (fcoor<hi) , picked, zz);
|
||||
picked = where( (fcoor>=lo), picked, zz);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex>
|
||||
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis)
|
||||
{
|
||||
GridBase *coarse = ip._grid;
|
||||
GridBase *fine = Basis[0]._grid;
|
||||
|
||||
int nbasis = Basis.size() ;
|
||||
int _ndimension = coarse->_ndimension;
|
||||
|
||||
// checks
|
||||
subdivides(coarse,fine);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
conformable(Basis[i]._grid,fine);
|
||||
}
|
||||
|
||||
for(int v=0;v<nbasis;v++) {
|
||||
for(int u=0;u<v;u++) {
|
||||
//Inner product & remove component
|
||||
blockInnerProduct(ip,Basis[u],Basis[v]);
|
||||
ip = -ip;
|
||||
blockZAXPY<vobj,CComplex> (Basis[v],ip,Basis[u],Basis[v]);
|
||||
}
|
||||
blockNormalise(ip,Basis[v]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class CComplex,int nbasis>
|
||||
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
|
||||
Lattice<vobj> &fineData,
|
||||
const std::vector<Lattice<vobj> > &Basis)
|
||||
{
|
||||
GridBase * fine = fineData._grid;
|
||||
GridBase * coarse= coarseData._grid;
|
||||
int _ndimension = coarse->_ndimension;
|
||||
|
||||
// checks
|
||||
assert( nbasis == Basis.size() );
|
||||
subdivides(coarse,fine);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
conformable(Basis[i]._grid,fine);
|
||||
}
|
||||
|
||||
std::vector<int> block_r (_ndimension);
|
||||
|
||||
for(int d=0 ; d<_ndimension;d++){
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
}
|
||||
|
||||
// Loop with a cache friendly loop ordering
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
int sc;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
std::vector<int> coor_f(_ndimension);
|
||||
|
||||
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
for(int i=0;i<nbasis;i++) {
|
||||
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
|
||||
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
|
||||
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
73
lib/lattice/Lattice_unary.h
Normal file
73
lib/lattice/Lattice_unary.h
Normal file
@ -0,0 +1,73 @@
|
||||
#ifndef GRID_LATTICE_UNARY_H
|
||||
#define GRID_LATTICE_UNARY_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj> Lattice<obj> sqrt(const Lattice<obj> &rhs){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=sqrt(rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class obj> Lattice<obj> rsqrt(const Lattice<obj> &rhs){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=rsqrt(rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class obj> Lattice<obj> sin(const Lattice<obj> &rhs){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=sin(rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class obj> Lattice<obj> cos(const Lattice<obj> &rhs){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=cos(rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs,RealD y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=pow(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=mod(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
@ -164,37 +164,37 @@ inline int readNerscHeader(std::string file,GridBase *grid, NerscField &field)
|
||||
// chomp the values
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
field.hdr_version = header[std::string("HDR_VERSION")];
|
||||
field.data_type = header[std::string("DATATYPE")];
|
||||
field.storage_format = header[std::string("STORAGE_FORMAT")];
|
||||
field.hdr_version = header["HDR_VERSION"];
|
||||
field.data_type = header["DATATYPE"];
|
||||
field.storage_format = header["STORAGE_FORMAT"];
|
||||
|
||||
field.dimension[0] = std::stol(header[std::string("DIMENSION_1")]);
|
||||
field.dimension[1] = std::stol(header[std::string("DIMENSION_2")]);
|
||||
field.dimension[2] = std::stol(header[std::string("DIMENSION_3")]);
|
||||
field.dimension[3] = std::stol(header[std::string("DIMENSION_4")]);
|
||||
field.dimension[0] = std::stol(header["DIMENSION_1"]);
|
||||
field.dimension[1] = std::stol(header["DIMENSION_2"]);
|
||||
field.dimension[2] = std::stol(header["DIMENSION_3"]);
|
||||
field.dimension[3] = std::stol(header["DIMENSION_4"]);
|
||||
|
||||
assert(grid->_ndimension == 4);
|
||||
for(int d=0;d<4;d++){
|
||||
assert(grid->_fdimensions[d]==field.dimension[d]);
|
||||
}
|
||||
|
||||
field.link_trace = std::stod(header[std::string("LINK_TRACE")]);
|
||||
field.plaquette = std::stod(header[std::string("PLAQUETTE")]);
|
||||
field.link_trace = std::stod(header["LINK_TRACE"]);
|
||||
field.plaquette = std::stod(header["PLAQUETTE"]);
|
||||
|
||||
field.boundary[0] = header[std::string("BOUNDARY_1")];
|
||||
field.boundary[1] = header[std::string("BOUNDARY_2")];
|
||||
field.boundary[2] = header[std::string("BOUNDARY_3")];
|
||||
field.boundary[3] = header[std::string("BOUNDARY_4")];
|
||||
field.boundary[0] = header["BOUNDARY_1"];
|
||||
field.boundary[1] = header["BOUNDARY_2"];
|
||||
field.boundary[2] = header["BOUNDARY_3"];
|
||||
field.boundary[3] = header["BOUNDARY_4"];
|
||||
|
||||
field.checksum = std::stoul(header[std::string("CHECKSUM")],0,16);
|
||||
field.ensemble_id = header[std::string("ENSEMBLE_ID")];
|
||||
field.ensemble_label = header[std::string("ENSEMBLE_LABEL")];
|
||||
field.sequence_number = std::stol(header[std::string("SEQUENCE_NUMBER")]);
|
||||
field.creator = header[std::string("CREATOR")];
|
||||
field.creator_hardware = header[std::string("CREATOR_HARDWARE")];
|
||||
field.creation_date = header[std::string("CREATION_DATE")];
|
||||
field.archive_date = header[std::string("ARCHIVE_DATE")];
|
||||
field.floating_point = header[std::string("FLOATING_POINT")];
|
||||
field.checksum = std::stoul(header["CHECKSUM"],0,16);
|
||||
field.ensemble_id = header["ENSEMBLE_ID"];
|
||||
field.ensemble_label = header["ENSEMBLE_LABEL"];
|
||||
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
|
||||
field.creator = header["CREATOR"];
|
||||
field.creator_hardware = header["CREATOR_HARDWARE"];
|
||||
field.creation_date = header["CREATION_DATE"];
|
||||
field.archive_date = header["ARCHIVE_DATE"];
|
||||
field.floating_point = header["FLOATING_POINT"];
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
@ -307,10 +307,10 @@ namespace QCD {
|
||||
} //namespace QCD
|
||||
} // Grid
|
||||
|
||||
#include <qcd/SpaceTimeGrid.h>
|
||||
#include <qcd/Dirac.h>
|
||||
#include <qcd/TwoSpinor.h>
|
||||
#include <qcd/LinalgUtils.h>
|
||||
#include <qcd/utils/SpaceTimeGrid.h>
|
||||
#include <qcd/spin/Dirac.h>
|
||||
#include <qcd/spin/TwoSpinor.h>
|
||||
#include <qcd/utils/LinalgUtils.h>
|
||||
#include <qcd/utils/CovariantCshift.h>
|
||||
#include <qcd/utils/WilsonLoops.h>
|
||||
#include <qcd/action/Actions.h>
|
||||
|
@ -65,36 +65,9 @@
|
||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
||||
|
||||
|
||||
// Chroma interface defining FermionAction
|
||||
/*
|
||||
template<typename T, typename P, typename Q> class FermAct4D : public FermionAction<T,P,Q>
|
||||
virtual LinearOperator<T>* linOp(Handle< FermState<T,P,Q> > state) const = 0;
|
||||
virtual LinearOperator<T>* lMdagM(Handle< FermState<T,P,Q> > state) const = 0;
|
||||
virtual LinOpSystemSolver<T>* invLinOp(Handle< FermState<T,P,Q> > state,
|
||||
virtual MdagMSystemSolver<T>* invMdagM(Handle< FermState<T,P,Q> > state,
|
||||
virtual LinOpMultiSystemSolver<T>* mInvLinOp(Handle< FermState<T,P,Q> > state,
|
||||
virtual MdagMMultiSystemSolver<T>* mInvMdagM(Handle< FermState<T,P,Q> > state,
|
||||
virtual MdagMMultiSystemSolverAccumulate<T>* mInvMdagMAcc(Handle< FermState<T,P,Q> > state,
|
||||
virtual SystemSolver<T>* qprop(Handle< FermState<T,P,Q> > state,
|
||||
class DiffFermAct4D : public FermAct4D<T,P,Q>
|
||||
virtual DiffLinearOperator<T,Q,P>* linOp(Handle< FermState<T,P,Q> > state) const = 0;
|
||||
virtual DiffLinearOperator<T,Q,P>* lMdagM(Handle< FermState<T,P,Q> > state) const = 0;
|
||||
*/
|
||||
|
||||
|
||||
// Chroma interface defining GaugeAction
|
||||
/*
|
||||
template<typename P, typename Q> class GaugeAction
|
||||
virtual const CreateGaugeState<P,Q>& getCreateState() const = 0;
|
||||
virtual GaugeState<P,Q>* createState(const Q& q) const
|
||||
virtual const GaugeBC<P,Q>& getGaugeBC() const
|
||||
virtual const Set& getSet(void) const = 0;
|
||||
virtual void deriv(P& result, const Handle< GaugeState<P,Q> >& state) const
|
||||
virtual Double S(const Handle< GaugeState<P,Q> >& state) const = 0;
|
||||
|
||||
class LinearGaugeAction : public GaugeAction< multi1d<LatticeColorMatrix>, multi1d<LatticeColorMatrix> >
|
||||
typedef multi1d<LatticeColorMatrix> P;
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <qcd/action/fermion/g5HermitianLinop.h>
|
||||
|
||||
#endif
|
||||
|
@ -165,6 +165,27 @@ namespace QCD {
|
||||
}
|
||||
}
|
||||
|
||||
void CayleyFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
|
||||
LatticeFermion tmp(psi._grid);
|
||||
// Assemble the 5d matrix
|
||||
for(int s=0;s<Ls;s++){
|
||||
if ( s==0 ) {
|
||||
// tmp = bs psi[s] + cs[s] psi[s+1}
|
||||
// tmp+= -mass*cs[s] psi[s+1}
|
||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
|
||||
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
|
||||
} else if ( s==(Ls-1)) {
|
||||
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
|
||||
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
|
||||
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
||||
}
|
||||
}
|
||||
// Apply 4d dslash fragment
|
||||
DhopDir(tmp,chi,dir,disp);
|
||||
}
|
||||
|
||||
void CayleyFermion5D::MooeeDag (const LatticeFermion &psi, LatticeFermion &chi)
|
||||
{
|
||||
for (int s=0;s<Ls;s++){
|
||||
|
@ -21,6 +21,10 @@ namespace Grid {
|
||||
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out);
|
||||
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
|
||||
virtual void Instantiatable(void)=0;
|
||||
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
||||
|
||||
// protected:
|
||||
RealD mass;
|
||||
|
||||
|
@ -90,6 +90,18 @@ namespace Grid {
|
||||
// Can ignore "dag"
|
||||
return M(psi,chi);
|
||||
}
|
||||
void ContinuedFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
|
||||
DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
|
||||
int sign=1;
|
||||
for(int s=0;s<Ls;s++){
|
||||
if ( s==(Ls-1) ){
|
||||
ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
|
||||
} else {
|
||||
ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
|
||||
}
|
||||
sign=-sign;
|
||||
}
|
||||
}
|
||||
void ContinuedFractionFermion5D::Meooe (const LatticeFermion &psi, LatticeFermion &chi)
|
||||
{
|
||||
// Apply 4d dslash
|
||||
|
@ -24,6 +24,9 @@ namespace Grid {
|
||||
// virtual void Instantiatable(void)=0;
|
||||
virtual void Instantiatable(void) =0;
|
||||
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
||||
|
||||
// Constructors
|
||||
ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
|
@ -40,6 +40,9 @@ namespace Grid {
|
||||
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
|
||||
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
|
||||
|
||||
virtual void Mdiag(const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
|
||||
|
||||
};
|
||||
|
||||
|
@ -2,6 +2,22 @@
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
void PartialFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
|
||||
// this does both dag and undag but is trivial; make a common helper routing
|
||||
|
||||
int sign = 1;
|
||||
|
||||
DhopDir(psi,chi,dir,disp);
|
||||
|
||||
int nblock=(Ls-1)/2;
|
||||
for(int b=0;b<nblock;b++){
|
||||
int s = 2*b;
|
||||
ag5xpby_ssp(chi,-scale,chi,0.0,chi,s,s);
|
||||
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
|
||||
}
|
||||
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
|
||||
|
||||
}
|
||||
void PartialFractionFermion5D::Meooe_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
|
||||
{
|
||||
// this does both dag and undag but is trivial; make a common helper routing
|
||||
|
@ -30,6 +30,9 @@ namespace Grid {
|
||||
|
||||
virtual void Instantiatable(void) =0; // ensure no make-eee
|
||||
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
||||
|
||||
// Constructors
|
||||
PartialFractionFermion5D(LatticeGaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
|
@ -93,6 +93,26 @@ void WilsonFermion::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
|
||||
out.checkerboard = in.checkerboard;
|
||||
MooeeInv(in,out);
|
||||
}
|
||||
void WilsonFermion::Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp)
|
||||
{
|
||||
DhopDir(in,out,dir,disp);
|
||||
}
|
||||
void WilsonFermion::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp){
|
||||
WilsonCompressor compressor(DaggerNo);
|
||||
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||
|
||||
assert( (disp==1)||(disp==-1) );
|
||||
|
||||
int skip = (disp==1) ? 0 : 1;
|
||||
|
||||
int dirdisp = dir+skip*4;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
DiracOpt::DhopDir(Stencil,Umu,comm_buf,sss,sss,in,out,dirdisp);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void WilsonFermion::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
|
||||
const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
|
@ -26,7 +26,7 @@ namespace Grid {
|
||||
void MeooeDag (const LatticeFermion &in, LatticeFermion &out);
|
||||
virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); // remain virtual so we
|
||||
virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); // can derive Clover
|
||||
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); // from Wilson bas
|
||||
virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); // from Wilson base
|
||||
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
|
||||
|
||||
// non-hermitian hopping term; half cb or both
|
||||
@ -34,6 +34,10 @@ namespace Grid {
|
||||
void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
|
||||
// Multigrid assistance
|
||||
void Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
||||
void DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Extra methods added by derived
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -72,6 +72,7 @@ namespace QCD {
|
||||
}
|
||||
void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
|
||||
{
|
||||
assert(GaugeGrid()->_ndimension==4);
|
||||
conformable(Uds._grid,GaugeGrid());
|
||||
conformable(Umu._grid,GaugeGrid());
|
||||
LatticeColourMatrix U(GaugeGrid());
|
||||
@ -82,6 +83,34 @@ void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGau
|
||||
pokeIndex<LorentzIndex>(Uds,U,mu+4);
|
||||
}
|
||||
}
|
||||
void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir5,int disp)
|
||||
{
|
||||
int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
|
||||
// we drop off the innermost fifth dimension
|
||||
assert( (disp==1)||(disp==-1) );
|
||||
assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
||||
|
||||
WilsonCompressor compressor(DaggerNo);
|
||||
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||
|
||||
int skip = (disp==1) ? 0 : 1;
|
||||
|
||||
int dirdisp = dir+skip*4;
|
||||
|
||||
assert(dirdisp<=7);
|
||||
assert(dirdisp>=0);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
int sF = s+Ls*sU;
|
||||
DiracOpt::DhopDir(Stencil,Umu,comm_buf,sF,sU,in,out,dirdisp);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
|
||||
LatticeDoubledGaugeField & U,
|
||||
const LatticeFermion &in, LatticeFermion &out,int dag)
|
||||
|
@ -57,6 +57,10 @@ namespace Grid {
|
||||
void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
|
||||
|
||||
// add a DhopComm
|
||||
// -- suboptimal interface will presently trigger multiple comms.
|
||||
void DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// New methods added
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -13,7 +13,6 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
vHalfSpinColourVector Uchi;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
//#define VERBOSE( A) if ( ss<10 ) { std::cout << "site " <<ss << " " #A " neigh " << offset << " perm "<< perm <<std::endl;}
|
||||
|
||||
// Xp
|
||||
int ss = sF;
|
||||
@ -33,12 +32,6 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
mult(&Uchi(),&U._odata[sU](Xp),&chi());
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
// std::cout << "XP_RECON"<<std::endl;
|
||||
// std::cout << result()(0)(0) <<" "<<result()(0)(1) <<" "<<result()(0)(2) <<std::endl;
|
||||
// std::cout << result()(1)(0) <<" "<<result()(1)(1) <<" "<<result()(1)(2) <<std::endl;
|
||||
// std::cout << result()(2)(0) <<" "<<result()(2)(1) <<" "<<result()(2)(2) <<std::endl;
|
||||
// std::cout << result()(3)(0) <<" "<<result()(3)(1) <<" "<<result()(3)(2) <<std::endl;
|
||||
|
||||
// Yp
|
||||
offset = st._offsets [Yp][ss];
|
||||
local = st._is_local[Yp][ss];
|
||||
@ -93,8 +86,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
perm = st._permute[Xm][ss];
|
||||
ptype = st._permute_type[Xm];
|
||||
|
||||
if ( local && perm )
|
||||
{
|
||||
if ( local && perm ) {
|
||||
spProjXm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
@ -104,12 +96,6 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Xm),&chi());
|
||||
accumReconXm(result,Uchi);
|
||||
// std::cout << "XM_RECON_ACCUM"<<std::endl;
|
||||
// std::cout << result()(0)(0) <<" "<<result()(0)(1) <<" "<<result()(0)(2) <<std::endl;
|
||||
// std::cout << result()(1)(0) <<" "<<result()(1)(1) <<" "<<result()(1)(2) <<std::endl;
|
||||
// std::cout << result()(2)(0) <<" "<<result()(2)(1) <<" "<<result()(2)(2) <<std::endl;
|
||||
// std::cout << result()(3)(0) <<" "<<result()(3)(1) <<" "<<result()(3)(2) <<std::endl;
|
||||
|
||||
|
||||
// Ym
|
||||
offset = st._offsets [Ym][ss];
|
||||
@ -308,4 +294,136 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
|
||||
vstream(out._odata[ss],result*(-0.5));
|
||||
}
|
||||
|
||||
void DiracOpt::DhopDir(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp)
|
||||
{
|
||||
vHalfSpinColourVector tmp;
|
||||
vHalfSpinColourVector chi;
|
||||
vSpinColourVector result;
|
||||
vHalfSpinColourVector Uchi;
|
||||
int offset,local,perm, ptype;
|
||||
int ss=sF;
|
||||
|
||||
offset = st._offsets [dirdisp][ss];
|
||||
local = st._is_local[dirdisp][ss];
|
||||
perm = st._permute[dirdisp][ss];
|
||||
ptype = st._permute_type[dirdisp];
|
||||
|
||||
// Xp
|
||||
if(dirdisp==Xp){
|
||||
if ( local && perm ) {
|
||||
spProjXp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjXp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Xp),&chi());
|
||||
spReconXp(result,Uchi);
|
||||
}
|
||||
|
||||
// Yp
|
||||
if ( dirdisp==Yp ){
|
||||
if ( local && perm ) {
|
||||
spProjYp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjYp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Yp),&chi());
|
||||
spReconYp(result,Uchi);
|
||||
}
|
||||
|
||||
// Zp
|
||||
if ( dirdisp ==Zp ){
|
||||
if ( local && perm ) {
|
||||
spProjZp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjZp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Zp),&chi());
|
||||
spReconZp(result,Uchi);
|
||||
}
|
||||
|
||||
// Tp
|
||||
if ( dirdisp ==Tp ){
|
||||
if ( local && perm ) {
|
||||
spProjTp(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjTp(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Tp),&chi());
|
||||
spReconTp(result,Uchi);
|
||||
}
|
||||
|
||||
// Xm
|
||||
if ( dirdisp==Xm ){
|
||||
if ( local && perm ) {
|
||||
spProjXm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjXm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Xm),&chi());
|
||||
spReconXm(result,Uchi);
|
||||
}
|
||||
|
||||
// Ym
|
||||
if ( dirdisp == Ym ){
|
||||
if ( local && perm ) {
|
||||
spProjYm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjYm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Ym),&chi());
|
||||
spReconYm(result,Uchi);
|
||||
}
|
||||
|
||||
// Zm
|
||||
if ( dirdisp == Zm ){
|
||||
if ( local && perm ) {
|
||||
spProjZm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjZm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Zm),&chi());
|
||||
spReconZm(result,Uchi);
|
||||
}
|
||||
|
||||
// Tm
|
||||
if ( dirdisp==Tm ) {
|
||||
if ( local && perm ) {
|
||||
spProjTm(tmp,in._odata[offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else if ( local ) {
|
||||
spProjTm(chi,in._odata[offset]);
|
||||
} else {
|
||||
chi=buf[offset];
|
||||
}
|
||||
mult(&Uchi(),&U._odata[sU](Tm),&chi());
|
||||
spReconTm(result,Uchi);
|
||||
}
|
||||
|
||||
vstream(out._odata[ss],result*(-0.5));
|
||||
}
|
||||
|
||||
}}
|
||||
|
@ -20,6 +20,9 @@ namespace Grid {
|
||||
static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
|
||||
static void DhopDir(CartesianStencil &st,LatticeDoubledGaugeField &U,
|
||||
std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> > &buf,
|
||||
int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp);
|
||||
|
||||
};
|
||||
|
||||
|
92
lib/qcd/action/fermion/g5HermitianLinop.h
Normal file
92
lib/qcd/action/fermion/g5HermitianLinop.h
Normal file
@ -0,0 +1,92 @@
|
||||
#ifndef G5_HERMITIAN_LINOP
|
||||
#define G5_HERMITIAN_LINOP
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Wrap an already herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class Gamma5R5HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
Gamma5R5HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
void Op (const Field &in, Field &out){
|
||||
HermOp(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
HermOp(in,out);
|
||||
}
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
_Mat.Mdiag(in,tmp);
|
||||
G5R5(out,tmp);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
Field tmp(in._grid);
|
||||
_Mat.Mdir(in,tmp,dir,disp);
|
||||
G5R5(out,tmp);
|
||||
}
|
||||
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
|
||||
HermOp(in,out);
|
||||
|
||||
ComplexD dot;
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
_Mat.M(in,tmp);
|
||||
G5R5(out,tmp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class Gamma5HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
Gamma g5;
|
||||
public:
|
||||
Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Gamma5) {};
|
||||
void Op (const Field &in, Field &out){
|
||||
HermOp(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
HermOp(in,out);
|
||||
}
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
_Mat.Mdiag(in,tmp);
|
||||
out=g5*tmp;
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
Field tmp(in._grid);
|
||||
_Mat.Mdir(in,tmp,dir,disp);
|
||||
out=g5*tmp;
|
||||
}
|
||||
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
|
||||
HermOp(in,out);
|
||||
|
||||
ComplexD dot;
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
_Mat.M(in,tmp);
|
||||
out=g5*tmp;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}}
|
||||
#endif
|
0
lib/qcd/spin/.dirstamp
Normal file
0
lib/qcd/spin/.dirstamp
Normal file
0
lib/qcd/utils/.dirstamp
Normal file
0
lib/qcd/utils/.dirstamp
Normal file
@ -109,5 +109,24 @@ PARALLEL_FOR_LOOP
|
||||
vstream(z._odata[ss+s],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
||||
{
|
||||
GridBase *grid=x._grid;
|
||||
z.checkerboard = x.checkerboard;
|
||||
conformable(x,z);
|
||||
int Ls = grid->_rdimensions[0];
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||
vobj tmp;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sp = Ls-1-s;
|
||||
multGamma5(tmp(),x._odata[ss+s]());
|
||||
vstream(z._odata[ss+sp],tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}}
|
||||
#endif
|
@ -4,7 +4,7 @@
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-05-29 14:13:30 neo>
|
||||
// Time-stamp: <2015-06-09 14:26:59 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
@ -314,9 +314,9 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){
|
||||
__m256 v1,v2;
|
||||
Optimization::permute(v1,in,0); // sse 128; paired complex single
|
||||
Optimization::permute(v1,in,0); // avx 256; quad complex single
|
||||
v1 = _mm256_add_ps(v1,in);
|
||||
Optimization::permute(v2,v1,1); // avx 256; quad complex single
|
||||
Optimization::permute(v2,v1,1);
|
||||
v1 = _mm256_add_ps(v1,v2);
|
||||
u256f conv; conv.v = v1;
|
||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||
@ -367,7 +367,6 @@ namespace Optimization {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -384,6 +383,12 @@ namespace Grid {
|
||||
_mm_prefetch(ptr+i+512,_MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template < typename VectorSIMD >
|
||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
||||
Optimization::permute(y.v,b.v,perm);
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-05-27 12:08:50 neo>
|
||||
// Time-stamp: <2015-06-09 14:27:28 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
@ -309,6 +309,12 @@ namespace Grid {
|
||||
_mm_prefetch(ptr+i+512,_MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Gpermute utilities consider coalescing into 1 Gpermute
|
||||
template < typename VectorSIMD >
|
||||
|
289
lib/simd/Grid_empty.h
Normal file
289
lib/simd/Grid_empty.h
Normal file
@ -0,0 +1,289 @@
|
||||
//----------------------------------------------------------------------
|
||||
/*! @file Grid_sse4.h
|
||||
@brief Empty Optimization libraries for debugging
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-06-09 14:28:02 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
namespace Optimization {
|
||||
|
||||
template<class vtype>
|
||||
union uconv {
|
||||
float f;
|
||||
vtype v;
|
||||
};
|
||||
|
||||
union u128f {
|
||||
float v;
|
||||
float f[4];
|
||||
};
|
||||
union u128d {
|
||||
double v;
|
||||
double f[2];
|
||||
};
|
||||
|
||||
struct Vsplat{
|
||||
//Complex float
|
||||
inline float operator()(float a, float b){
|
||||
return 0;
|
||||
}
|
||||
// Real float
|
||||
inline float operator()(float a){
|
||||
return 0;
|
||||
}
|
||||
//Complex double
|
||||
inline double operator()(double a, double b){
|
||||
return 0;
|
||||
}
|
||||
//Real double
|
||||
inline double operator()(double a){
|
||||
return 0;
|
||||
}
|
||||
//Integer
|
||||
inline int operator()(Integer a){
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct Vstore{
|
||||
//Float
|
||||
inline void operator()(float a, float* F){
|
||||
|
||||
}
|
||||
//Double
|
||||
inline void operator()(double a, double* D){
|
||||
|
||||
}
|
||||
//Integer
|
||||
inline void operator()(int a, Integer* I){
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct Vstream{
|
||||
//Float
|
||||
inline void operator()(float * a, float b){
|
||||
|
||||
}
|
||||
//Double
|
||||
inline void operator()(double * a, double b){
|
||||
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct Vset{
|
||||
// Complex float
|
||||
inline float operator()(Grid::ComplexF *a){
|
||||
return 0;
|
||||
}
|
||||
// Complex double
|
||||
inline double operator()(Grid::ComplexD *a){
|
||||
return 0;
|
||||
}
|
||||
// Real float
|
||||
inline float operator()(float *a){
|
||||
return 0;
|
||||
}
|
||||
// Real double
|
||||
inline double operator()(double *a){
|
||||
return 0;
|
||||
}
|
||||
// Integer
|
||||
inline int operator()(Integer *a){
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <typename Out_type, typename In_type>
|
||||
struct Reduce{
|
||||
//Need templated class to overload output type
|
||||
//General form must generate error if compiled
|
||||
inline Out_type operator()(In_type in){
|
||||
printf("Error, using wrong Reduce function\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Arithmetic operations
|
||||
/////////////////////////////////////////////////////
|
||||
struct Sum{
|
||||
//Complex/Real float
|
||||
inline float operator()(float a, float b){
|
||||
return 0;
|
||||
}
|
||||
//Complex/Real double
|
||||
inline double operator()(double a, double b){
|
||||
return 0;
|
||||
}
|
||||
//Integer
|
||||
inline int operator()(int a, int b){
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct Sub{
|
||||
//Complex/Real float
|
||||
inline float operator()(float a, float b){
|
||||
return 0;
|
||||
}
|
||||
//Complex/Real double
|
||||
inline double operator()(double a, double b){
|
||||
return 0;
|
||||
}
|
||||
//Integer
|
||||
inline int operator()(int a, int b){
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct MultComplex{
|
||||
// Complex float
|
||||
inline float operator()(float a, float b){
|
||||
return 0;
|
||||
}
|
||||
// Complex double
|
||||
inline double operator()(double a, double b){
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct Mult{
|
||||
// Real float
|
||||
inline float operator()(float a, float b){
|
||||
return 0;
|
||||
}
|
||||
// Real double
|
||||
inline double operator()(double a, double b){
|
||||
return 0;
|
||||
}
|
||||
// Integer
|
||||
inline int operator()(int a, int b){
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct Conj{
|
||||
// Complex single
|
||||
inline float operator()(float in){
|
||||
return 0;
|
||||
}
|
||||
// Complex double
|
||||
inline double operator()(double in){
|
||||
return 0;
|
||||
}
|
||||
// do not define for integer input
|
||||
};
|
||||
|
||||
struct TimesMinusI{
|
||||
//Complex single
|
||||
inline float operator()(float in, float ret){
|
||||
return 0;
|
||||
}
|
||||
//Complex double
|
||||
inline double operator()(double in, double ret){
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline float operator()(float in, float ret){
|
||||
return 0;
|
||||
}
|
||||
//Complex double
|
||||
inline double operator()(double in, double ret){
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
template < typename vtype >
|
||||
void permute(vtype &a, vtype b, int perm) {
|
||||
};
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, float>::operator()(float in){
|
||||
return 0;
|
||||
}
|
||||
//Real float Reduce
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, float>::operator()(float in){
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//Complex double Reduce
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, double>::operator()(double in){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Real double Reduce
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, double>::operator()(double in){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Integer Reduce
|
||||
template<>
|
||||
inline Integer Reduce<Integer, int>::operator()(int in){
|
||||
// FIXME unimplemented
|
||||
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
namespace Grid {
|
||||
|
||||
typedef float SIMD_Ftype; // Single precision type
|
||||
typedef double SIMD_Dtype; // Double precision type
|
||||
typedef int SIMD_Itype; // Integer type
|
||||
|
||||
// prefetch utilities
|
||||
inline void v_prefetch0(int size, const char *ptr){};
|
||||
inline void prefetch_HINT_T0(const char *ptr){};
|
||||
|
||||
|
||||
|
||||
// Gpermute function
|
||||
template < typename VectorSIMD >
|
||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
||||
Optimization::permute(y.v,b.v,perm);
|
||||
}
|
||||
|
||||
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
typedef Optimization::Vstore VstoreSIMD;
|
||||
typedef Optimization::Vset VsetSIMD;
|
||||
typedef Optimization::Vstream VstreamSIMD;
|
||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||
|
||||
|
||||
|
||||
|
||||
// Arithmetic operations
|
||||
typedef Optimization::Sum SumSIMD;
|
||||
typedef Optimization::Sub SubSIMD;
|
||||
typedef Optimization::Mult MultSIMD;
|
||||
typedef Optimization::MultComplex MultComplexSIMD;
|
||||
typedef Optimization::Conj ConjSIMD;
|
||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||
typedef Optimization::TimesI TimesISIMD;
|
||||
|
||||
}
|
308
lib/simd/Grid_neon.h
Normal file
308
lib/simd/Grid_neon.h
Normal file
@ -0,0 +1,308 @@
|
||||
//----------------------------------------------------------------------
|
||||
/*! @file Grid_sse4.h
|
||||
@brief Optimization libraries for NEON (ARM) instructions set ARMv7
|
||||
|
||||
Experimental - Using intrinsics - DEVELOPING!
|
||||
*/
|
||||
// Time-stamp: <2015-06-09 15:25:40 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
namespace Optimization {
|
||||
|
||||
template<class vtype>
|
||||
union uconv {
|
||||
float32x4_t f;
|
||||
vtype v;
|
||||
};
|
||||
|
||||
union u128f {
|
||||
float32x4_t v;
|
||||
float f[4];
|
||||
};
|
||||
union u128d {
|
||||
float32x4_t v;
|
||||
float f[4];
|
||||
};
|
||||
|
||||
struct Vsplat{
|
||||
//Complex float
|
||||
inline float32x4_t operator()(float a, float b){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
// Real float
|
||||
inline float32x4_t operator()(float a){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
//Complex double
|
||||
inline float32x4_t operator()(double a, double b){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
//Real double
|
||||
inline float32x4_t operator()(double a){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
//Integer
|
||||
inline uint32x4_t operator()(Integer a){
|
||||
uint32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
};
|
||||
|
||||
struct Vstore{
|
||||
//Float
|
||||
inline void operator()(float32x4_t a, float* F){
|
||||
|
||||
}
|
||||
//Double
|
||||
inline void operator()(float32x4_t a, double* D){
|
||||
|
||||
}
|
||||
//Integer
|
||||
inline void operator()(uint32x4_t a, Integer* I){
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct Vstream{
|
||||
//Float
|
||||
inline void operator()(float * a, float32x4_t b){
|
||||
|
||||
}
|
||||
//Double
|
||||
inline void operator()(double * a, float32x4_t b){
|
||||
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct Vset{
|
||||
// Complex float
|
||||
inline float32x4_t operator()(Grid::ComplexF *a){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
// Complex double
|
||||
inline float32x4_t operator()(Grid::ComplexD *a){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
// Real float
|
||||
inline float32x4_t operator()(float *a){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
// Real double
|
||||
inline float32x4_t operator()(double *a){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
// Integer
|
||||
inline uint32x4_t operator()(Integer *a){
|
||||
uint32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <typename Out_type, typename In_type>
|
||||
struct Reduce{
|
||||
//Need templated class to overload output type
|
||||
//General form must generate error if compiled
|
||||
inline Out_type operator()(In_type in){
|
||||
printf("Error, using wrong Reduce function\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Arithmetic operations
|
||||
/////////////////////////////////////////////////////
|
||||
struct Sum{
|
||||
//Complex/Real float
|
||||
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
//Complex/Real double
|
||||
//inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
// float32x4_t foo;
|
||||
// return foo;
|
||||
//}
|
||||
//Integer
|
||||
inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
|
||||
uint32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
};
|
||||
|
||||
struct Sub{
|
||||
//Complex/Real float
|
||||
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
//Complex/Real double
|
||||
//inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
// float32x4_t foo;
|
||||
// return foo;
|
||||
//}
|
||||
//Integer
|
||||
inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
|
||||
uint32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
};
|
||||
|
||||
struct MultComplex{
|
||||
// Complex float
|
||||
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
float32x4_t foo;
|
||||
return foo;
|
||||
}
|
||||
// Complex double
|
||||
//inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
// float32x4_t foo;
|
||||
// return foo;
|
||||
//}
|
||||
};
|
||||
|
||||
struct Mult{
|
||||
// Real float
|
||||
inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
return a;
|
||||
}
|
||||
// Real double
|
||||
//inline float32x4_t operator()(float32x4_t a, float32x4_t b){
|
||||
// return 0;
|
||||
//}
|
||||
// Integer
|
||||
inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
|
||||
return a;
|
||||
}
|
||||
};
|
||||
|
||||
struct Conj{
|
||||
// Complex single
|
||||
inline float32x4_t operator()(float32x4_t in){
|
||||
return in;
|
||||
}
|
||||
// Complex double
|
||||
//inline float32x4_t operator()(float32x4_t in){
|
||||
// return 0;
|
||||
//}
|
||||
// do not define for integer input
|
||||
};
|
||||
|
||||
struct TimesMinusI{
|
||||
//Complex single
|
||||
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
|
||||
return in;
|
||||
}
|
||||
//Complex double
|
||||
//inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
|
||||
// return in;
|
||||
//}
|
||||
|
||||
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
//Complex single
|
||||
inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
|
||||
return in;
|
||||
}
|
||||
//Complex double
|
||||
//inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
|
||||
// return 0;
|
||||
//}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
template < typename vtype >
|
||||
void permute(vtype &a, vtype b, int perm) {
|
||||
|
||||
};
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, float32x4_t>::operator()(float32x4_t in){
|
||||
return 0;
|
||||
}
|
||||
//Real float Reduce
|
||||
template<>
|
||||
inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//Complex double Reduce
|
||||
template<>
|
||||
inline Grid::ComplexD Reduce<Grid::ComplexD, float32x4_t>::operator()(float32x4_t in){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Real double Reduce
|
||||
template<>
|
||||
inline Grid::RealD Reduce<Grid::RealD, float32x4_t>::operator()(float32x4_t in){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Integer Reduce
|
||||
template<>
|
||||
inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
|
||||
// FIXME unimplemented
|
||||
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
namespace Grid {
|
||||
|
||||
typedef float32x4_t SIMD_Ftype; // Single precision type
|
||||
typedef float32x4_t SIMD_Dtype; // Double precision type - no double on ARMv7
|
||||
typedef uint32x4_t SIMD_Itype; // Integer type
|
||||
|
||||
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
|
||||
inline void prefetch_HINT_T0(const char *ptr){};
|
||||
|
||||
|
||||
// Gpermute function
|
||||
template < typename VectorSIMD >
|
||||
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
|
||||
Optimization::permute(y.v,b.v,perm);
|
||||
}
|
||||
|
||||
|
||||
// Function name aliases
|
||||
typedef Optimization::Vsplat VsplatSIMD;
|
||||
typedef Optimization::Vstore VstoreSIMD;
|
||||
typedef Optimization::Vset VsetSIMD;
|
||||
typedef Optimization::Vstream VstreamSIMD;
|
||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||
|
||||
|
||||
|
||||
|
||||
// Arithmetic operations
|
||||
typedef Optimization::Sum SumSIMD;
|
||||
typedef Optimization::Sub SubSIMD;
|
||||
typedef Optimization::Mult MultSIMD;
|
||||
typedef Optimization::MultComplex MultComplexSIMD;
|
||||
typedef Optimization::Conj ConjSIMD;
|
||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||
typedef Optimization::TimesI TimesISIMD;
|
||||
|
||||
}
|
@ -4,7 +4,7 @@
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-05-27 12:02:07 neo>
|
||||
// Time-stamp: <2015-06-09 14:24:01 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <pmmintrin.h>
|
||||
@ -297,7 +297,12 @@ namespace Grid {
|
||||
typedef __m128d SIMD_Dtype; // Double precision type
|
||||
typedef __m128i SIMD_Itype; // Integer type
|
||||
|
||||
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
|
||||
// prefetch utilities
|
||||
inline void v_prefetch0(int size, const char *ptr){};
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
|
||||
// Gpermute function
|
||||
template < typename VectorSIMD >
|
||||
|
@ -2,11 +2,14 @@
|
||||
/*! @file Grid_vector_types.h
|
||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||
*/
|
||||
// Time-stamp: <2015-05-29 14:19:48 neo>
|
||||
// Time-stamp: <2015-06-09 15:00:47 neo>
|
||||
//---------------------------------------------------------------------------
|
||||
#ifndef GRID_VECTOR_TYPES
|
||||
#define GRID_VECTOR_TYPES
|
||||
|
||||
#ifdef EMPTY_SIMD
|
||||
#include "Grid_empty.h"
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
#include "Grid_sse4.h"
|
||||
#endif
|
||||
@ -19,6 +22,9 @@
|
||||
#if defined QPX
|
||||
#include "Grid_qpx.h"
|
||||
#endif
|
||||
#ifdef NEONv7
|
||||
#include "Grid_neon.h"
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -78,11 +84,18 @@ namespace Grid {
|
||||
typedef typename RealPart < Scalar_type >::type Real;
|
||||
typedef Vector_type vector_type;
|
||||
typedef Scalar_type scalar_type;
|
||||
|
||||
|
||||
typedef union conv_t_union {
|
||||
Vector_type v;
|
||||
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)];
|
||||
conv_t_union(){};
|
||||
} conv_t;
|
||||
|
||||
|
||||
Vector_type v;
|
||||
|
||||
static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);}
|
||||
|
||||
|
||||
Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;};
|
||||
Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler
|
||||
@ -145,7 +158,7 @@ namespace Grid {
|
||||
///////////////////////
|
||||
friend inline void vprefetch(const Grid_simd &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
prefetch_HINT_T0((const char*)&v.v);
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
@ -192,6 +205,27 @@ namespace Grid {
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Not all functions are supported
|
||||
// through SIMD and must breakout to
|
||||
// scalar type and back again. This
|
||||
// provides support
|
||||
///////////////////////////////////////
|
||||
|
||||
template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) {
|
||||
|
||||
Grid_simd ret;
|
||||
Grid_simd::conv_t conv;
|
||||
|
||||
conv.v = v.v;
|
||||
for(int i=0;i<Nsimd();i++){
|
||||
conv.s[i]=func(conv.s[i]);
|
||||
}
|
||||
ret.v = conv.v;
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General permute; assumes vector length is same across
|
||||
// all subtypes; may not be a good assumption, but could
|
||||
|
72
lib/simd/Grid_vector_unops.h
Normal file
72
lib/simd/Grid_vector_unops.h
Normal file
@ -0,0 +1,72 @@
|
||||
#ifndef GRID_VECTOR_UNOPS
|
||||
#define GRID_VECTOR_UNOPS
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class scalar> struct SqrtRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return sqrt(real(a));
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct RSqrtRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return scalar(1.0/sqrt(real(a)));
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct CosRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return cos(real(a));
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct SinRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return sin(real(a));
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct PowRealFunctor {
|
||||
double y;
|
||||
PowRealFunctor(double _y) : y(_y) {};
|
||||
scalar operator()(const scalar &a) const {
|
||||
return pow(real(a),y);
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct ModIntFunctor {
|
||||
Integer y;
|
||||
ModIntFunctor(Integer _y) : y(_y) {};
|
||||
scalar operator()(const scalar &a) const {
|
||||
return Integer(a)%y;
|
||||
}
|
||||
};
|
||||
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(SqrtRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> rsqrt(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(RSqrtRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> cos(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(CosRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> sin(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(CosRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) {
|
||||
return SimdApply(PowRealFunctor<S>(y),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) {
|
||||
return SimdApply(ModIntFunctor<S>(y),r);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
@ -1,430 +0,0 @@
|
||||
#ifndef GRID_VCOMPLEXD_H
|
||||
#define GRID_VCOMPLEXD_H
|
||||
|
||||
namespace Grid {
|
||||
class vComplexD {
|
||||
public:
|
||||
zvec v;
|
||||
public:
|
||||
typedef zvec vector_type;
|
||||
typedef ComplexD scalar_type;
|
||||
|
||||
vComplexD & operator = ( Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
vComplexD( Zero & z){
|
||||
vzero(*this);
|
||||
}
|
||||
vComplexD()=default;
|
||||
vComplexD(ComplexD a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
vComplexD(double a){
|
||||
vsplat(*this,ComplexD(a));
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// mac, mult, sub, add, adj
|
||||
// Should do an AVX2 version with mac.
|
||||
///////////////////////////////////////////////
|
||||
friend inline void mac (vComplexD * __restrict__ y,const vComplexD * __restrict__ a,const vComplexD *__restrict__ x) {*y = (*a)*(*x)+(*y);};
|
||||
friend inline void mult(vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) * (*r);}
|
||||
friend inline void sub (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) - (*r);}
|
||||
friend inline void add (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) + (*r);}
|
||||
friend inline vComplexD adj(const vComplexD &in){ return conjugate(in); }
|
||||
|
||||
//////////////////////////////////
|
||||
// Initialise to 1,0,i
|
||||
//////////////////////////////////
|
||||
friend inline void vone (vComplexD &ret){ vsplat(ret,1.0,0.0);}
|
||||
friend inline void vzero (vComplexD &ret){ vsplat(ret,0.0,0.0);}
|
||||
friend inline void vcomplex_i(vComplexD &ret){ vsplat(ret,0.0,1.0);}
|
||||
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
friend inline vComplexD operator + (vComplexD a, vComplexD b)
|
||||
{
|
||||
vComplexD ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_add_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_add_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_add_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_add(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vComplexD operator - (vComplexD a, vComplexD b)
|
||||
{
|
||||
vComplexD ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_sub_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_sub_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_sub_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_sub(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vComplexD operator * (vComplexD a, vComplexD b)
|
||||
{
|
||||
vComplexD ret;
|
||||
|
||||
//Multiplicationof (ak+ibk)*(ck+idk)
|
||||
// a + i b can be stored as a data structure
|
||||
//From intel optimisation reference guide
|
||||
/*
|
||||
movsldup xmm0, Src1; load real parts into the destination,
|
||||
; a1, a1, a0, a0
|
||||
movaps xmm1, src2; load the 2nd pair of complex values, ; i.e. d1, c1, d0, c0
|
||||
mulps xmm0, xmm1; temporary results, a1d1, a1c1, a0d0, ; a0c0
|
||||
shufps xmm1, xmm1, b1; reorder the real and imaginary ; parts, c1, d1, c0, d0
|
||||
movshdup xmm2, Src1; load the imaginary parts into the ; destination, b1, b1, b0, b0
|
||||
mulps xmm2, xmm1; temporary results, b1c1, b1d1, b0c0, ; b0d0
|
||||
addsubps xmm0, xmm2; b1c1+a1d1, a1c1 -b1d1, b0c0+a0d
|
||||
VSHUFPD (VEX.256 encoded version)
|
||||
IF IMM0[0] = 0
|
||||
THEN DEST[63:0]=SRC1[63:0] ELSE DEST[63:0]=SRC1[127:64] FI;
|
||||
IF IMM0[1] = 0
|
||||
THEN DEST[127:64]=SRC2[63:0] ELSE DEST[127:64]=SRC2[127:64] FI;
|
||||
IF IMM0[2] = 0
|
||||
THEN DEST[191:128]=SRC1[191:128] ELSE DEST[191:128]=SRC1[255:192] FI;
|
||||
IF IMM0[3] = 0
|
||||
THEN DEST[255:192]=SRC2[191:128] ELSE DEST[255:192]=SRC2[255:192] FI; // Ox5 r<->i ; 0xC unchanged
|
||||
*/
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
zvec ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm256_shuffle_pd(a.v,a.v,0x0); // ymm0 <- ar ar, ar,ar b'00,00
|
||||
ymm0 = _mm256_mul_pd(ymm0,b.v); // ymm0 <- ar bi, ar br
|
||||
ymm1 = _mm256_shuffle_pd(b.v,b.v,0x5); // ymm1 <- br,bi b'01,01
|
||||
ymm2 = _mm256_shuffle_pd(a.v,a.v,0xF); // ymm2 <- ai,ai b'11,11
|
||||
ymm1 = _mm256_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
ret.v= _mm256_addsub_pd(ymm0,ymm1);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
zvec ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm_shuffle_pd(a.v,a.v,0x0); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_mul_pd(ymm0,b.v); // ymm0 <- ar bi, ar br
|
||||
ymm1 = _mm_shuffle_pd(b.v,b.v,0x1); // ymm1 <- br,bi b01
|
||||
ymm2 = _mm_shuffle_pd(a.v,a.v,0x3); // ymm2 <- ai,ai b11
|
||||
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
ret.v= _mm_addsub_pd(ymm0,ymm1);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
/* This is from
|
||||
* Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets
|
||||
* @inproceedings{McFarlin:2011:ASV:1995896.1995938,
|
||||
* author = {McFarlin, Daniel S. and Arbatov, Volodymyr and Franchetti, Franz and P\"{u}schel, Markus},
|
||||
* title = {Automatic SIMD Vectorization of Fast Fourier Transforms for the Larrabee and AVX Instruction Sets},
|
||||
* booktitle = {Proceedings of the International Conference on Supercomputing},
|
||||
* series = {ICS '11},
|
||||
* year = {2011},
|
||||
* isbn = {978-1-4503-0102-2},
|
||||
* location = {Tucson, Arizona, USA},
|
||||
* pages = {265--274},
|
||||
* numpages = {10},
|
||||
* url = {http://doi.acm.org/10.1145/1995896.1995938},
|
||||
* doi = {10.1145/1995896.1995938},
|
||||
* acmid = {1995938},
|
||||
* publisher = {ACM},
|
||||
* address = {New York, NY, USA},
|
||||
* keywords = {autovectorization, fourier transform, program generation, simd, super-optimization},
|
||||
* }
|
||||
*/
|
||||
zvec vzero,ymm0,ymm1,real,imag;
|
||||
vzero =(zvec)_mm512_setzero();
|
||||
ymm0 = _mm512_swizzle_pd(a.v, _MM_SWIZ_REG_CDAB); //
|
||||
real =(zvec)_mm512_mask_or_epi64((__m512i)a.v, 0xAA,(__m512i)vzero,(__m512i) ymm0);
|
||||
imag = _mm512_mask_sub_pd(a.v, 0x55,vzero, ymm0);
|
||||
ymm1 = _mm512_mul_pd(real, b.v);
|
||||
ymm0 = _mm512_swizzle_pd(b.v, _MM_SWIZ_REG_CDAB); // OK
|
||||
ret.v= _mm512_fmadd_pd(ymm0,imag,ymm1);
|
||||
/* Imag OK */
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_mul(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General permute; assumes vector length is same across
|
||||
// all subtypes; may not be a good assumption, but could
|
||||
// add the vector width as a template param for BG/Q for example
|
||||
////////////////////////////////////////////////////////////////////
|
||||
friend inline void permute(vComplexD &y,vComplexD b,int perm)
|
||||
{
|
||||
Gpermute<vComplexD>(y,b,perm);
|
||||
}
|
||||
/*
|
||||
friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted)
|
||||
{
|
||||
Gmerge<vComplexD,ComplexD >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vComplexD &y,std::vector<ComplexD *> &extracted)
|
||||
{
|
||||
Gextract<vComplexD,ComplexD>(y,extracted);
|
||||
}
|
||||
friend inline void merge(vComplexD &y,std::vector<ComplexD > &extracted)
|
||||
{
|
||||
Gmerge<vComplexD,ComplexD >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vComplexD &y,std::vector<ComplexD > &extracted)
|
||||
{
|
||||
Gextract<vComplexD,ComplexD>(y,extracted);
|
||||
}
|
||||
*/
|
||||
|
||||
///////////////////////
|
||||
// Splat
|
||||
///////////////////////
|
||||
friend inline void vsplat(vComplexD &ret,ComplexD c){
|
||||
float a= real(c);
|
||||
float b= imag(c);
|
||||
vsplat(ret,a,b);
|
||||
}
|
||||
|
||||
|
||||
friend inline void vsplat(vComplexD &ret,double rl,double ig){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_pd(ig,rl,ig,rl);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_pd(ig,rl);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_pd(ig,rl,ig,rl,ig,rl,ig,rl);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {ig,rl,ig,rl};
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline void vset(vComplexD &ret,ComplexD *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_pd(a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_pd(a[0].imag(),a[0].real());
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_pd(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||
// Note v has a0 a1 a2 a3
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a[0].real(),a[0].imag(),a[1].real(),a[3].imag()};
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline void vstore(const vComplexD &ret, ComplexD *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_store_pd((double *)a,ret.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_store_pd((double *)a,ret.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_store_pd((double *)a,ret.v);
|
||||
//Note v has a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void vstream(vComplexD &out,const vComplexD &in){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_stream_pd((double *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_stream_pd((double *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_storenrngo_pd((double *)&out.v,in.v);
|
||||
// _mm512_stream_pd((double *)&out.v,in.v);
|
||||
//Note v has a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void prefetch(const vComplexD &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
////////////////////////
|
||||
// Conjugate
|
||||
////////////////////////
|
||||
friend inline vComplexD conjugate(const vComplexD &in){
|
||||
vComplexD ret ; vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
|
||||
zvec tmp = _mm256_addsub_pd(ret.v,_mm256_shuffle_pd(in.v,in.v,0x5));
|
||||
ret.v =_mm256_shuffle_pd(tmp,tmp,0x5);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
zvec tmp = _mm_addsub_pd(ret.v,_mm_shuffle_pd(in.v,in.v,0x1));
|
||||
ret.v = _mm_shuffle_pd(tmp,tmp,0x1);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_pd(in.v, 0xaa,ret.v, in.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline void timesMinusI(vComplexD &ret,const vComplexD &in){
|
||||
vzero(ret);
|
||||
vComplexD tmp;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
tmp.v =_mm256_addsub_pd(ret.v,in.v); // r,-i
|
||||
ret.v =_mm256_shuffle_pd(tmp.v,tmp.v,0x5);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
tmp.v =_mm_addsub_pd(ret.v,in.v); // r,-i
|
||||
ret.v =_mm_shuffle_pd(tmp.v,tmp.v,0x1);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_pd(in.v,0xaa,ret.v,in.v); // real -imag
|
||||
ret.v = _mm512_swizzle_pd(ret.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline void timesI(vComplexD &ret, const vComplexD &in){
|
||||
vzero(ret);
|
||||
vComplexD tmp;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
tmp.v =_mm256_shuffle_pd(in.v,in.v,0x5);
|
||||
ret.v =_mm256_addsub_pd(ret.v,tmp.v); // i,-r
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
tmp.v =_mm_shuffle_pd(in.v,in.v,0x1);
|
||||
ret.v =_mm_addsub_pd(ret.v,tmp.v); // r,-i
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
tmp.v = _mm512_swizzle_pd(in.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
ret.v = _mm512_mask_sub_pd(tmp.v,0xaa,ret.v,tmp.v); // real -imag
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline vComplexD timesMinusI(const vComplexD &in){
|
||||
vComplexD ret;
|
||||
timesMinusI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline vComplexD timesI(const vComplexD &in){
|
||||
vComplexD ret;
|
||||
timesI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// REDUCE FIXME must be a cleaner implementation
|
||||
friend inline ComplexD Reduce(const vComplexD & in)
|
||||
{
|
||||
vComplexD v1,v2;
|
||||
union {
|
||||
zvec v;
|
||||
double f[sizeof(zvec)/sizeof(double)];
|
||||
} conv;
|
||||
|
||||
#ifdef SSE4
|
||||
v1=in;
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1); // avx 256; quad complex single
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
conv.v = v1.v;
|
||||
return ComplexD(conv.f[0],conv.f[1]);
|
||||
}
|
||||
|
||||
// Unary negation
|
||||
friend inline vComplexD operator -(const vComplexD &r) {
|
||||
vComplexD ret;
|
||||
vzero(ret);
|
||||
ret = ret - r;
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators
|
||||
inline vComplexD &operator *=(const vComplexD &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline vComplexD &operator +=(const vComplexD &r) {
|
||||
*this = *this+r;
|
||||
return *this;
|
||||
}
|
||||
inline vComplexD &operator -=(const vComplexD &r) {
|
||||
*this = *this-r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
public:
|
||||
static int Nsimd(void) { return sizeof(zvec)/sizeof(double)/2;}
|
||||
};
|
||||
|
||||
|
||||
inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conjugate(l)*r; }
|
||||
|
||||
|
||||
typedef vComplexD vDComplex;
|
||||
inline void zeroit(vComplexD &z){ vzero(z);}
|
||||
|
||||
inline vComplexD outerProduct(const vComplexD &l, const vComplexD& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline vComplexD trace(const vComplexD &arg){
|
||||
return arg;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//// Generic routine to promote object<complex> -> object<vcomplex>
|
||||
//// Supports the array reordering transformation that gives me SIMD utilisation
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<template<class> class object>
|
||||
inline object<vComplex> splat(object<Complex >s){
|
||||
object<vComplex> ret;
|
||||
vComplex * v_ptr = (vComplex *)& ret;
|
||||
Complex * s_ptr = (Complex *) &s;
|
||||
for(int i=0;i<sizeof(ret);i+=sizeof(vComplex)){
|
||||
vsplat(*(v_ptr++),*(s_ptr++));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
*/
|
||||
}
|
||||
#endif
|
@ -1,463 +0,0 @@
|
||||
#ifndef GRID_VCOMPLEXF
|
||||
#define GRID_VCOMPLEXF
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/*
|
||||
inline void Print(const char *A,cvec c) {
|
||||
float *fp=(float *)&c;
|
||||
printf(A);
|
||||
printf(" %le %le %le %le %le %le %le %le\n",
|
||||
fp[0],fp[1],fp[2],fp[3],fp[4],fp[5],fp[6],fp[7]);
|
||||
}
|
||||
*/
|
||||
|
||||
class vComplexF {
|
||||
// protected:
|
||||
|
||||
public:
|
||||
cvec v;
|
||||
|
||||
public:
|
||||
static inline int Nsimd(void) { return sizeof(cvec)/sizeof(float)/2;}
|
||||
public:
|
||||
typedef cvec vector_type;
|
||||
typedef ComplexF scalar_type;
|
||||
|
||||
vComplexF & operator = ( Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
// vComplexF( Zero & z){
|
||||
// vzero(*this);
|
||||
// }
|
||||
vComplexF()=default;
|
||||
vComplexF(ComplexF a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
vComplexF(double a){
|
||||
vsplat(*this,ComplexF(a));
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// mac, mult, sub, add, adj
|
||||
// Should do an AVX2 version with mac.
|
||||
///////////////////////////////////////////////
|
||||
friend inline void mac (vComplexF * __restrict__ y,const vComplexF * __restrict__ a,const vComplexF *__restrict__ x){ *y = (*a)*(*x)+(*y); };
|
||||
friend inline void mult(vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) * (*r); }
|
||||
friend inline void sub (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) - (*r); }
|
||||
friend inline void add (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) + (*r); }
|
||||
friend inline vComplexF adj(const vComplexF &in){ return conjugate(in); }
|
||||
|
||||
//////////////////////////////////
|
||||
// Initialise to 1,0,i
|
||||
//////////////////////////////////
|
||||
friend inline void vone(vComplexF &ret) { vsplat(ret,1.0,0.0); }
|
||||
friend inline void vzero(vComplexF &ret) { vsplat(ret,0.0,0.0); }
|
||||
friend inline void vcomplex_i(vComplexF &ret){ vsplat(ret,0.0,1.0); }
|
||||
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
friend inline vComplexF operator + (vComplexF a, vComplexF b)
|
||||
{
|
||||
vComplexF ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_add_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_add_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_add_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vComplexF operator - (vComplexF a, vComplexF b)
|
||||
{
|
||||
vComplexF ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_sub_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_sub_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_sub_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vComplexF operator * (vComplexF a, vComplexF b)
|
||||
{
|
||||
vComplexF ret;
|
||||
|
||||
//Multiplicationof (ak+ibk)*(ck+idk)
|
||||
// a + i b can be stored as a data structure
|
||||
//From intel optimisation reference
|
||||
/*
|
||||
movsldup xmm0, Src1; load real parts into the destination,
|
||||
; a1, a1, a0, a0
|
||||
movaps xmm1, src2; load the 2nd pair of complex values, ; i.e. d1, c1, d0, c0
|
||||
mulps xmm0, xmm1; temporary results, a1d1, a1c1, a0d0, ; a0c0
|
||||
shufps xmm1, xmm1, b1; reorder the real and imaginary ; parts, c1, d1, c0, d0
|
||||
movshdup xmm2, Src1; load the imaginary parts into the ; destination, b1, b1, b0, b0
|
||||
mulps xmm2, xmm1; temporary results, b1c1, b1d1, b0c0, ; b0d0
|
||||
addsubps xmm0, xmm2; b1c1+a1d1, a1c1 -b1d1, b0c0+a0d
|
||||
*/
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm256_shuffle_ps(a.v,a.v,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm256_mul_ps(ymm0,b.v); // ymm0 <- ar bi, ar br
|
||||
// FIXME AVX2 could MAC
|
||||
ymm1 = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm256_shuffle_ps(a.v,a.v,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
ret.v= _mm256_addsub_ps(ymm0,ymm1);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec ymm0,ymm1,ymm2;
|
||||
ymm0 = _mm_shuffle_ps(a.v,a.v,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
|
||||
ymm0 = _mm_mul_ps(ymm0,b.v); // ymm0 <- ar bi, ar br
|
||||
ymm1 = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
|
||||
ymm2 = _mm_shuffle_ps(a.v,a.v,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
|
||||
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||
ret.v= _mm_addsub_ps(ymm0,ymm1);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
//
|
||||
cvec vzero,ymm0,ymm1,real, imag;
|
||||
vzero = _mm512_setzero();
|
||||
ymm0 = _mm512_swizzle_ps(a.v, _MM_SWIZ_REG_CDAB); //
|
||||
real = (__m512)_mm512_mask_or_epi32((__m512i)a.v, 0xAAAA,(__m512i)vzero,(__m512i)ymm0);
|
||||
imag = _mm512_mask_sub_ps(a.v, 0x5555,vzero, ymm0);
|
||||
ymm1 = _mm512_mul_ps(real, b.v);
|
||||
ymm0 = _mm512_swizzle_ps(b.v, _MM_SWIZ_REG_CDAB); // OK
|
||||
ret.v = _mm512_fmadd_ps(ymm0,imag,ymm1);
|
||||
|
||||
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_mul(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
friend inline void vset(vComplexF &ret, ComplexF *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_ps(a[1].imag(), a[1].real(),a[0].imag(),a[0].real());
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_ps(a[7].imag(),a[7].real(),a[6].imag(),a[6].real(),a[5].imag(),a[5].real(),a[4].imag(),a[4].real(),a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||
// Note v has a0 a1 a2 a3 a4 a5 a6 a7
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()};
|
||||
#endif
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// Splat
|
||||
///////////////////////
|
||||
friend inline void vsplat(vComplexF &ret,ComplexF c){
|
||||
float a= real(c);
|
||||
float b= imag(c);
|
||||
vsplat(ret,a,b);
|
||||
}
|
||||
|
||||
friend inline void vstore(const vComplexF &ret, ComplexF *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_store_ps((float *)a,ret.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_store_ps((float *)a,ret.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_store_ps((float *)a,ret.v);
|
||||
//Note v has a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void vstream(vComplexF &out,const vComplexF &in){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_stream_ps((float *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_stream_ps((float *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_storenrngo_ps((float *)&out.v,in.v);
|
||||
//Note v has a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void prefetch(const vComplexF &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
friend inline void vsplat(vComplexF &ret,float a,float b){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_ps(b,a,b,a,b,a,b,a);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_ps(b,a,b,a);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_ps(b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a,b,a,b};
|
||||
#endif
|
||||
}
|
||||
friend inline void permute(vComplexF &y,vComplexF b,int perm)
|
||||
{
|
||||
Gpermute<vComplexF>(y,b,perm);
|
||||
}
|
||||
friend inline ComplexF Reduce(const vComplexF & in)
|
||||
{
|
||||
vComplexF v1,v2;
|
||||
union {
|
||||
cvec v;
|
||||
float f[sizeof(cvec)/sizeof(float)];
|
||||
} conv;
|
||||
#ifdef SSE4
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
permute(v1,in,0); // sse 128; paired complex single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1); // avx 256; quad complex single
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
permute(v1,in,0); // avx512 octo-complex single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
conv.v = v1.v;
|
||||
return ComplexF(conv.f[0],conv.f[1]);
|
||||
}
|
||||
|
||||
friend inline vComplexF operator * (const ComplexF &a, vComplexF b){
|
||||
vComplexF va;
|
||||
vsplat(va,a);
|
||||
return va*b;
|
||||
}
|
||||
friend inline vComplexF operator * (vComplexF b,const ComplexF &a){
|
||||
return a*b;
|
||||
}
|
||||
|
||||
/*
|
||||
template<class real>
|
||||
friend inline vComplexF operator * (vComplexF b,const real &a){
|
||||
vComplexF va;
|
||||
Complex ca(a,0);
|
||||
vsplat(va,ca);
|
||||
return va*b;
|
||||
}
|
||||
template<class real>
|
||||
friend inline vComplexF operator * (const real &a,vComplexF b){
|
||||
return a*b;
|
||||
}
|
||||
|
||||
friend inline vComplexF operator + (const Complex &a, vComplexF b){
|
||||
vComplexF va;
|
||||
vsplat(va,a);
|
||||
return va+b;
|
||||
}
|
||||
friend inline vComplexF operator + (vComplexF b,const Complex &a){
|
||||
return a+b;
|
||||
}
|
||||
template<class real>
|
||||
friend inline vComplexF operator + (vComplexF b,const real &a){
|
||||
vComplexF va;
|
||||
Complex ca(a,0);
|
||||
vsplat(va,ca);
|
||||
return va+b;
|
||||
}
|
||||
template<class real>
|
||||
friend inline vComplexF operator + (const real &a,vComplexF b){
|
||||
return a+b;
|
||||
}
|
||||
friend inline vComplexF operator - (const Complex &a, vComplexF b){
|
||||
vComplexF va;
|
||||
vsplat(va,a);
|
||||
return va-b;
|
||||
}
|
||||
friend inline vComplexF operator - (vComplexF b,const Complex &a){
|
||||
vComplexF va;
|
||||
vsplat(va,a);
|
||||
return b-va;
|
||||
}
|
||||
template<class real>
|
||||
friend inline vComplexF operator - (vComplexF b,const real &a){
|
||||
vComplexF va;
|
||||
Complex ca(a,0);
|
||||
vsplat(va,ca);
|
||||
return b-va;
|
||||
}
|
||||
template<class real>
|
||||
friend inline vComplexF operator - (const real &a,vComplexF b){
|
||||
vComplexF va;
|
||||
Complex ca(a,0);
|
||||
vsplat(va,ca);
|
||||
return va-b;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Conjugate
|
||||
///////////////////////
|
||||
|
||||
friend inline vComplexF conjugate(const vComplexF &in){
|
||||
vComplexF ret ; vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_xor_ps(_mm256_addsub_ps(ret.v,in.v), _mm256_set1_ps(-0.f));
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_xor_ps(_mm_addsub_ps(ret.v,in.v), _mm_set1_ps(-0.f));
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
friend inline void timesMinusI( vComplexF &ret,const vComplexF &in){
|
||||
vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_addsub_ps(ret.v,in.v); // r,-i
|
||||
ret.v = _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); //-i,r
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec tmp =_mm_addsub_ps(ret.v,in.v); // r,-i
|
||||
ret.v = _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // real -imag
|
||||
ret.v = _mm512_swizzle_ps(ret.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void timesI(vComplexF &ret,const vComplexF &in){
|
||||
vzero(ret);
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
cvec tmp =_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));//i,r
|
||||
ret.v =_mm256_addsub_ps(ret.v,tmp); //i,-r
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
cvec tmp =_mm_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1));
|
||||
ret.v = _mm_addsub_ps(ret.v,tmp); // r,-i
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
cvec tmp = _mm512_swizzle_ps(in.v, _MM_SWIZ_REG_CDAB);// OK
|
||||
ret.v = _mm512_mask_sub_ps(tmp,0xaaaa,ret.v,tmp); // real -imag
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline vComplexF timesMinusI(const vComplexF &in){
|
||||
vComplexF ret;
|
||||
timesMinusI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
friend inline vComplexF timesI(const vComplexF &in){
|
||||
vComplexF ret;
|
||||
timesI(ret,in);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// Unary negation
|
||||
friend inline vComplexF operator -(const vComplexF &r) {
|
||||
vComplexF ret;
|
||||
vzero(ret);
|
||||
ret = ret - r;
|
||||
return ret;
|
||||
}
|
||||
// *=,+=,-= operators
|
||||
inline vComplexF &operator *=(const vComplexF &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline vComplexF &operator +=(const vComplexF &r) {
|
||||
*this = *this+r;
|
||||
return *this;
|
||||
}
|
||||
inline vComplexF &operator -=(const vComplexF &r) {
|
||||
*this = *this-r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/*
|
||||
friend inline void merge(vComplexF &y,std::vector<ComplexF *> &extracted)
|
||||
{
|
||||
Gmerge<vComplexF,ComplexF >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vComplexF &y,std::vector<ComplexF *> &extracted)
|
||||
{
|
||||
Gextract<vComplexF,ComplexF>(y,extracted);
|
||||
}
|
||||
friend inline void merge(vComplexF &y,std::vector<ComplexF > &extracted)
|
||||
{
|
||||
Gmerge<vComplexF,ComplexF >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vComplexF &y,std::vector<ComplexF > &extracted)
|
||||
{
|
||||
Gextract<vComplexF,ComplexF>(y,extracted);
|
||||
}
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
inline vComplexF innerProduct(const vComplexF & l, const vComplexF & r)
|
||||
{
|
||||
return conjugate(l)*r;
|
||||
}
|
||||
|
||||
inline void zeroit(vComplexF &z){ vzero(z);}
|
||||
|
||||
inline vComplexF outerProduct(const vComplexF &l, const vComplexF& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline vComplexF trace(const vComplexF &arg){
|
||||
return arg;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,259 +0,0 @@
|
||||
#ifndef GRID_VINTEGER_H
|
||||
#define GRID_VINTEGER_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
class vInteger {
|
||||
protected:
|
||||
|
||||
public:
|
||||
|
||||
ivec v;
|
||||
|
||||
typedef ivec vector_type;
|
||||
typedef Integer scalar_type;
|
||||
|
||||
vInteger(){};
|
||||
vInteger & operator = (const Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
vInteger(Integer a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
friend inline vInteger operator + ( vInteger a, vInteger b)
|
||||
{
|
||||
vInteger ret;
|
||||
#if defined (AVX1)
|
||||
__m128i a0,a1;
|
||||
__m128i b0,b1;
|
||||
a0 = _mm256_extractf128_si256(a.v,0);
|
||||
b0 = _mm256_extractf128_si256(b.v,0);
|
||||
a1 = _mm256_extractf128_si256(a.v,1);
|
||||
b1 = _mm256_extractf128_si256(b.v,1);
|
||||
a0 = _mm_add_epi32(a0,b0);
|
||||
a1 = _mm_add_epi32(a1,b1);
|
||||
ret.v = _mm256_set_m128i(a1,a0);
|
||||
#endif
|
||||
#if defined (AVX2)
|
||||
ret.v = _mm256_add_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_add_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_add_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
// Implement as array of ints is only option
|
||||
#error
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vInteger operator - ( vInteger a, vInteger b)
|
||||
{
|
||||
vInteger ret;
|
||||
#if defined (AVX1)
|
||||
__m128i a0,a1;
|
||||
__m128i b0,b1;
|
||||
a0 = _mm256_extractf128_si256(a.v,0);
|
||||
b0 = _mm256_extractf128_si256(b.v,0);
|
||||
a1 = _mm256_extractf128_si256(a.v,1);
|
||||
b1 = _mm256_extractf128_si256(b.v,1);
|
||||
a0 = _mm_sub_epi32(a0,b0);
|
||||
a1 = _mm_sub_epi32(a1,b1);
|
||||
ret.v = _mm256_set_m128i(a1,a0);
|
||||
#endif
|
||||
#if defined (AVX2)
|
||||
ret.v = _mm256_sub_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_sub_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_sub_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
// Implement as array of ints is only option
|
||||
#error
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vInteger operator * ( vInteger a, vInteger b)
|
||||
{
|
||||
vInteger ret;
|
||||
#if defined (AVX1)
|
||||
__m128i a0,a1;
|
||||
__m128i b0,b1;
|
||||
a0 = _mm256_extractf128_si256(a.v,0);
|
||||
b0 = _mm256_extractf128_si256(b.v,0);
|
||||
a1 = _mm256_extractf128_si256(a.v,1);
|
||||
b1 = _mm256_extractf128_si256(b.v,1);
|
||||
a0 = _mm_mul_epi32(a0,b0);
|
||||
a1 = _mm_mul_epi32(a1,b1);
|
||||
ret.v = _mm256_set_m128i(a1,a0);
|
||||
#endif
|
||||
#if defined (AVX2)
|
||||
ret.v = _mm256_mul_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_mul_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
// ret.v = _mm512_mul_epi32(a.v,b.v);
|
||||
ret.v = _mm512_mullo_epi32(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
// Implement as array of ints is only option
|
||||
#error
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// mult, sub, add, adj,conjugate, mac functions
|
||||
///////////////////////////////////////////////
|
||||
friend inline void mult(vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) * (*r);}
|
||||
friend inline void sub (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) - (*r);}
|
||||
friend inline void add (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) + (*r);}
|
||||
friend inline void mac (vInteger &y,const vInteger a,const vInteger x){
|
||||
y = a*x+y;
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Initialise to 1,0,i
|
||||
//////////////////////////////////
|
||||
friend inline void vone (vInteger &ret){vsplat(ret,1);}
|
||||
friend inline void vzero(vInteger &ret){vsplat(ret,0);}
|
||||
friend inline void vtrue (vInteger &ret){vsplat(ret,0xFFFFFFFF);}
|
||||
friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Broadcast a value across Nsimd copies.
|
||||
/////////////////////////////////////////////////////
|
||||
friend inline void vsplat(vInteger &ret,scalar_type a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set1_epi32(a);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set1_epi32(a);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set1_epi32(a);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
}
|
||||
friend inline void vset(vInteger &ret,scalar_type *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_epi32(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_epi32(a[0],a[1],a[2],a[3]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_epi32( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#error
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline void vstore(const vInteger &ret, Integer *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_store_si256((__m256i*)a,ret.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_store_si128((__m128i *)a,ret.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_store_si512(a,ret.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline void vstream(vInteger & out,const vInteger &in){
|
||||
out=in;
|
||||
}
|
||||
|
||||
friend inline void prefetch(const vInteger &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
}
|
||||
|
||||
// Unary negation
|
||||
friend inline vInteger operator -(const vInteger &r) {
|
||||
vInteger ret;
|
||||
vzero(ret);
|
||||
ret = ret - r;
|
||||
return ret;
|
||||
}
|
||||
friend inline Integer Reduce(const vInteger & in)
|
||||
{
|
||||
// unimplemented
|
||||
assert(0);
|
||||
}
|
||||
// *=,+=,-= operators
|
||||
inline vInteger &operator *=(const vInteger &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline vInteger &operator +=(const vInteger &r) {
|
||||
*this = *this+r;
|
||||
return *this;
|
||||
}
|
||||
inline vInteger &operator -=(const vInteger &r) {
|
||||
*this = *this-r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend inline void permute(vInteger &y,const vInteger b,int perm)
|
||||
{
|
||||
Gpermute<vInteger>(y,b,perm);
|
||||
}
|
||||
/*
|
||||
friend inline void merge(vInteger &y,std::vector<Integer *> &extracted)
|
||||
{
|
||||
Gmerge<vInteger,Integer>(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vInteger &y,std::vector<Integer *> &extracted)
|
||||
{
|
||||
Gextract<vInteger,Integer>(y,extracted);
|
||||
}
|
||||
friend inline void merge(vInteger &y,std::vector<Integer> &extracted)
|
||||
{
|
||||
Gmerge<vInteger,Integer>(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vInteger &y,std::vector<Integer> &extracted)
|
||||
{
|
||||
Gextract<vInteger,Integer>(y,extracted);
|
||||
}
|
||||
*/
|
||||
|
||||
public:
|
||||
static inline int Nsimd(void) { return sizeof(ivec)/sizeof(Integer);}
|
||||
};
|
||||
|
||||
inline vInteger localInnerProduct(const vInteger & l, const vInteger & r) { return l*r; }
|
||||
|
||||
inline void zeroit(vInteger &z){ vzero(z);}
|
||||
|
||||
inline vInteger outerProduct(const vInteger &l, const vInteger& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,276 +0,0 @@
|
||||
#ifndef GRID_VREALD_H
|
||||
#define GRID_VREALD_H
|
||||
|
||||
namespace Grid {
|
||||
class vRealD {
|
||||
public:
|
||||
dvec v; // dvec is double precision vector
|
||||
|
||||
public:
|
||||
typedef dvec vector_type;
|
||||
typedef RealD scalar_type;
|
||||
|
||||
vRealD()=default;
|
||||
vRealD(RealD a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
vRealD(Zero &zero){
|
||||
zeroit(*this);
|
||||
}
|
||||
vRealD & operator = ( Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
|
||||
friend inline void mult(vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) * (*r);}
|
||||
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
|
||||
friend inline void add (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) + (*r);}
|
||||
friend inline vRealD adj(const vRealD &in) { return in; }
|
||||
friend inline vRealD conjugate(const vRealD &in){ return in; }
|
||||
|
||||
friend inline void mac (vRealD &y,const vRealD a,const vRealD x){
|
||||
#if defined (AVX1) || defined (SSE4)
|
||||
y = a*x+y;
|
||||
#endif
|
||||
#ifdef AVX2 // AVX 2 introduced FMA support. FMA4 eliminates a copy, but AVX only has FMA3
|
||||
// accelerates multiply accumulate, but not general multiply add
|
||||
y.v = _mm256_fmadd_pd(a.v,x.v,y.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
// here precision of vector are still single
|
||||
y.v = _mm512_fmadd_pd(a.v,x.v,y.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
y.v = vec_madd(a.v,x.v,y.v);
|
||||
#endif
|
||||
}
|
||||
//////////////////////////////////
|
||||
// Initialise to 1,0
|
||||
//////////////////////////////////
|
||||
friend inline void vone (vRealD &ret){ vsplat(ret,1.0);}
|
||||
friend inline void vzero(vRealD &ret){ vsplat(ret,0.0);}
|
||||
|
||||
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
friend inline vRealD operator + (vRealD a, vRealD b)
|
||||
{
|
||||
vRealD ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_add_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_add_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_add_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_add(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
friend inline vRealD operator - (vRealD a, vRealD b)
|
||||
{
|
||||
vRealD ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_sub_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_sub_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_sub_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_sub(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vRealD operator * (vRealD a, vRealD b)
|
||||
{
|
||||
vRealD ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_mul_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_mul_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mul_pd(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = vec_mul(a.v,b.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General permute; assumes vector length is same across
|
||||
// all subtypes; may not be a good assumption, but could
|
||||
// add the vector width as a template param for BG/Q for example
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
friend inline void permute(vRealD &y,vRealD b,int perm)
|
||||
{
|
||||
Gpermute<vRealD>(y,b,perm);
|
||||
}
|
||||
/*
|
||||
friend inline void merge(vRealD &y,std::vector<RealD *> &extracted)
|
||||
{
|
||||
Gmerge<vRealD,RealD >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vRealD &y,std::vector<RealD *> &extracted)
|
||||
{
|
||||
Gextract<vRealD,RealD>(y,extracted);
|
||||
}
|
||||
friend inline void merge(vRealD &y,std::vector<RealD > &extracted)
|
||||
{
|
||||
Gmerge<vRealD,RealD >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vRealD &y,std::vector<RealD > &extracted)
|
||||
{
|
||||
Gextract<vRealD,RealD>(y,extracted);
|
||||
}
|
||||
*/
|
||||
|
||||
friend inline void vsplat(vRealD &ret,double a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_pd(a,a,a,a);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_pd(a,a);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set1_pd(a);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a,a,a,a};
|
||||
#endif
|
||||
}
|
||||
friend inline void vset(vRealD &ret, double *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_pd(a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_pd(a[1],a[0]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_pd(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
// Note v has a0 a1 a2 a3 a4 a5 a6 a7
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a[0],a[1],a[2],a[3]};
|
||||
#endif
|
||||
}
|
||||
|
||||
friend inline void vstore(const vRealD &ret, double *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_store_pd(a,ret.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_store_pd(a,ret.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_store_pd(a,ret.v);
|
||||
// Note v has a7 a6 a5ba4 a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void vstream(vRealD &out,const vRealD &in){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_stream_pd((double *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_stream_pd((double *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_storenrngo_pd((double *)&out.v,in.v);
|
||||
//Note v has a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void prefetch(const vRealD &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
}
|
||||
// Unary negation
|
||||
friend inline vRealD operator -(const vRealD &r) {
|
||||
vRealD ret;
|
||||
vzero(ret);
|
||||
ret = ret - r;
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend inline RealD Reduce(const vRealD & in)
|
||||
{
|
||||
vRealD v1,v2;
|
||||
union {
|
||||
dvec v;
|
||||
double f[sizeof(dvec)/sizeof(double)];
|
||||
} conv;
|
||||
#ifdef SSE4
|
||||
permute(v1,in,0); // sse 128; paired real double
|
||||
v1=v1+in;
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
permute(v1,in,0); // avx 256; quad double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
permute(v1,in,0); // avx 512; octo-double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#endif
|
||||
conv.v=v1.v;
|
||||
return conv.f[0];
|
||||
}
|
||||
|
||||
// *=,+=,-= operators
|
||||
inline vRealD &operator *=(const vRealD &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline vRealD &operator +=(const vRealD &r) {
|
||||
*this = *this+r;
|
||||
return *this;
|
||||
}
|
||||
inline vRealD &operator -=(const vRealD &r) {
|
||||
*this = *this-r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
public:
|
||||
static int Nsimd(void) { return sizeof(dvec)/sizeof(double);}
|
||||
};
|
||||
|
||||
inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conjugate(l)*r; }
|
||||
inline void zeroit(vRealD &z){ vzero(z);}
|
||||
|
||||
inline vRealD outerProduct(const vRealD &l, const vRealD& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline vRealD trace(const vRealD &arg){
|
||||
return arg;
|
||||
}
|
||||
inline vRealD real(const vRealD &arg){
|
||||
return arg;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@ -1,313 +0,0 @@
|
||||
#ifndef GRID_VREALF_H
|
||||
#define GRID_VREALF_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
class vRealF {
|
||||
public:
|
||||
fvec v;
|
||||
|
||||
public:
|
||||
typedef fvec vector_type;
|
||||
typedef RealF scalar_type;
|
||||
|
||||
vRealF()=default;
|
||||
vRealF(RealF a){
|
||||
vsplat(*this,a);
|
||||
};
|
||||
vRealF(Zero &zero){
|
||||
zeroit(*this);
|
||||
}
|
||||
vRealF & operator = ( Zero & z){
|
||||
vzero(*this);
|
||||
return (*this);
|
||||
}
|
||||
////////////////////////////////////
|
||||
// Arithmetic operator overloads +,-,*
|
||||
////////////////////////////////////
|
||||
friend inline vRealF operator + ( vRealF a, vRealF b)
|
||||
{
|
||||
vRealF ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_add_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_add_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_add_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
vector4double aa,bb,cc;
|
||||
aa = vec_lda(0,(float *)&a);
|
||||
bb = vec_lda(0,(float *)&b);
|
||||
cc = vec_add(aa,bb);
|
||||
vec_sta(cc,0,(float *)&ret.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vRealF operator - ( vRealF a, vRealF b)
|
||||
{
|
||||
vRealF ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_sub_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_sub_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_sub_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
vector4double aa,bb,cc;
|
||||
aa = vec_lda(0,(float *)&a);
|
||||
bb = vec_lda(0,(float *)&b);
|
||||
cc = vec_sub(aa,bb);
|
||||
vec_sta(cc,0,(float *)&ret.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
friend inline vRealF operator * ( vRealF a, vRealF b)
|
||||
{
|
||||
vRealF ret;
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_mul_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_mul_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_mul_ps(a.v,b.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
vector4double aa,bb,cc; // QPX single we are forced to load as this promotes single mem->double regs.
|
||||
aa = vec_lda(0,(float *)&a);
|
||||
bb = vec_lda(0,(float *)&b);
|
||||
cc = vec_mul(aa,bb);
|
||||
vec_sta(cc,0,(float *)&ret.v);
|
||||
#endif
|
||||
return ret;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// mult, sub, add, adj,conjugate, mac functions
|
||||
///////////////////////////////////////////////
|
||||
friend inline void mult(vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) * (*r);}
|
||||
friend inline void sub (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) - (*r);}
|
||||
friend inline void add (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) + (*r);}
|
||||
friend inline vRealF adj(const vRealF &in) { return in; }
|
||||
friend inline vRealF conjugate(const vRealF &in){ return in; }
|
||||
|
||||
friend inline void mac (vRealF &y,const vRealF a,const vRealF x){
|
||||
#if defined (AVX1) || defined (SSE4)
|
||||
y = a*x+y;
|
||||
#endif
|
||||
#ifdef AVX2 // AVX 2 introduced FMA support. FMA4 eliminates a copy, but AVX only has FMA3
|
||||
// accelerates multiply accumulate, but not general multiply add
|
||||
y.v = _mm256_fmadd_ps(a.v,x.v,y.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
y.v = _mm512_fmadd_ps(a.v,x.v,y.v);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
vector4double aa,xx,yy; // QPX single we are forced to load as this promotes single mem->double regs.
|
||||
aa = vec_lda(0,(float *)&a.v);
|
||||
xx = vec_lda(0,(float *)&x.v);
|
||||
yy = vec_lda(0,(float *)&y.v);
|
||||
yy = vec_madd(aa,xx,yy);
|
||||
vec_sta(yy,0,(float *)&y.v);
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Initialise to 1,0,i
|
||||
//////////////////////////////////
|
||||
friend inline void vone (vRealF &ret){vsplat(ret,1.0);}
|
||||
friend inline void vzero(vRealF &ret){vsplat(ret,0.0);}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General permute; assumes vector length is same across
|
||||
// all subtypes; may not be a good assumption, but could
|
||||
// add the vector width as a template param for BG/Q for example
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
friend inline void permute(vRealF &y,vRealF b,int perm)
|
||||
{
|
||||
Gpermute<vRealF>(y,b,perm);
|
||||
}
|
||||
/*
|
||||
friend inline void merge(vRealF &y,std::vector<RealF *> &extracted)
|
||||
{
|
||||
Gmerge<vRealF,RealF >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vRealF &y,std::vector<RealF *> &extracted)
|
||||
{
|
||||
Gextract<vRealF,RealF>(y,extracted);
|
||||
}
|
||||
friend inline void merge(vRealF &y,std::vector<RealF> &extracted)
|
||||
{
|
||||
Gmerge<vRealF,RealF >(y,extracted);
|
||||
}
|
||||
friend inline void extract(const vRealF &y,std::vector<RealF> &extracted)
|
||||
{
|
||||
Gextract<vRealF,RealF>(y,extracted);
|
||||
}
|
||||
*/
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Broadcast a value across Nsimd copies.
|
||||
/////////////////////////////////////////////////////
|
||||
friend inline void vsplat(vRealF &ret,float a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_ps(a,a,a,a,a,a,a,a);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_ps(a,a,a,a);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
//ret.v = _mm512_set_ps(a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a);
|
||||
ret.v = _mm512_set1_ps(a);
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a,a,a,a};
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
friend inline void vset(vRealF &ret, float *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
ret.v = _mm_set_ps(a[3],a[2],a[1],a[0]);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
ret.v = _mm512_set_ps( a[15],a[14],a[13],a[12],a[11],a[10],a[9],a[8],
|
||||
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||
// Note v has a0 a1 a2 a3 a4 a5 a6 a7
|
||||
#endif
|
||||
#ifdef QPX
|
||||
ret.v = {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]};
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// FIXME: gonna remove these load/store, get, set, prefetch
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
friend inline void vstore(const vRealF &ret, float *a){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_store_ps(a,ret.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_store_ps(a,ret.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_store_ps(a,ret.v);
|
||||
// Note v has a7 a6 a5ba4 a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
friend inline void vstream(vRealF &out,const vRealF &in){
|
||||
#if defined (AVX1)|| defined (AVX2)
|
||||
_mm256_stream_ps((float *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef SSE4
|
||||
_mm_stream_ps((float *)&out.v,in.v);
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
_mm512_storenrngo_ps((float *)&out.v,in.v);
|
||||
// _mm512_stream_ps((float *)&out.v,in.v);
|
||||
//Note v has a3 a2 a1 a0
|
||||
#endif
|
||||
#ifdef QPX
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
friend inline void prefetch(const vRealF &v)
|
||||
{
|
||||
_mm_prefetch((const char*)&v.v,_MM_HINT_T0);
|
||||
}
|
||||
// Unary negation
|
||||
friend inline vRealF operator -(const vRealF &r) {
|
||||
vRealF ret;
|
||||
vzero(ret);
|
||||
ret = ret - r;
|
||||
return ret;
|
||||
}
|
||||
friend inline RealF Reduce(const vRealF & in)
|
||||
{
|
||||
vRealF v1,v2;
|
||||
union {
|
||||
fvec v;
|
||||
float f[sizeof(fvec)/sizeof(double)];
|
||||
} conv;
|
||||
#ifdef SSE4
|
||||
permute(v1,in,0); // sse 128; quad single
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#if defined(AVX1) || defined (AVX2)
|
||||
permute(v1,in,0); // avx 256; octo-double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef AVX512
|
||||
permute(v1,in,0); // avx 256; octo-double
|
||||
v1=v1+in;
|
||||
permute(v2,v1,1);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,2);
|
||||
v1=v1+v2;
|
||||
permute(v2,v1,3);
|
||||
v1=v1+v2;
|
||||
#endif
|
||||
#ifdef QPX
|
||||
#endif
|
||||
conv.v=v1.v;
|
||||
return conv.f[0];
|
||||
}
|
||||
|
||||
// *=,+=,-= operators
|
||||
inline vRealF &operator *=(const vRealF &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
inline vRealF &operator +=(const vRealF &r) {
|
||||
*this = *this+r;
|
||||
return *this;
|
||||
}
|
||||
inline vRealF &operator -=(const vRealF &r) {
|
||||
*this = *this-r;
|
||||
return *this;
|
||||
}
|
||||
public:
|
||||
static inline int Nsimd(void) { return sizeof(fvec)/sizeof(float);}
|
||||
};
|
||||
inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conjugate(l)*r; }
|
||||
inline void zeroit(vRealF &z){ vzero(z);}
|
||||
|
||||
inline vRealF outerProduct(const vRealF &l, const vRealF& r)
|
||||
{
|
||||
return l*r;
|
||||
}
|
||||
inline vRealF trace(const vRealF &arg){
|
||||
return arg;
|
||||
}
|
||||
inline vRealF real(const vRealF &arg){
|
||||
return arg;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
0
lib/stencil/.dirstamp
Normal file
0
lib/stencil/.dirstamp
Normal file
@ -29,15 +29,87 @@ namespace Grid {
|
||||
{
|
||||
iMatrix<vtype,N> ret(arg);
|
||||
double factor = (1/(double)N);
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2]= (ret._internal[c1][c2] - adj(arg._internal[c2][c1]));
|
||||
ret._internal[c1][c2] *= 0.5;
|
||||
}}
|
||||
//ret = (ret - adj(arg))*0.5;
|
||||
ret = (ret - adj(arg))*0.5;
|
||||
ret -= trace(ret)*factor;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// ProjectOnGroup function for scalar, vector, matrix
|
||||
///////////////////////////////////////////////
|
||||
|
||||
|
||||
template<class vtype> inline iScalar<vtype> ProjectOnGroup(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
ret._internal = ProjectOnGroup(r._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> inline iVector<vtype,N> ProjectOnGroup(const iVector<vtype,N>&r)
|
||||
{
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
ret._internal[i] = ProjectOnGroup(r._internal[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
|
||||
inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
||||
{
|
||||
// need a check for the group type?
|
||||
iMatrix<vtype,N> ret(arg);
|
||||
double nrm;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
nrm = 0.0;
|
||||
for(int c2=0;c2<N;c2++)
|
||||
nrm = real(innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]));
|
||||
nrm = 1.0/sqrt(nrm);
|
||||
for(int c2=0;c2<N;c2++)
|
||||
ret._internal[c1][c2]*= nrm;
|
||||
|
||||
for (int b=c1+1; b<N; ++b){
|
||||
decltype(ret._internal[b][b]*ret._internal[b][b]) pr = 0.0;
|
||||
for(int c=0; c<N; ++c)
|
||||
pr += ret._internal[c1][c]*ret._internal[b][c];
|
||||
|
||||
for(int c=0; c<N; ++c){
|
||||
ret._internal[b][c] -= pr * ret._internal[c1][c];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Exponentiate function for scalar, vector, matrix
|
||||
///////////////////////////////////////////////
|
||||
|
||||
|
||||
template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, double alpha, int Nexp)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
ret._internal = Exponentiate(r._internal, alpha, Nexp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
|
||||
inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, double alpha, int Nexp)
|
||||
{
|
||||
iMatrix<vtype,N> unit(1.0);
|
||||
iMatrix<vtype,N> temp(unit);
|
||||
|
||||
for(int i=Nexp; i>=1;--i){
|
||||
temp *= alpha/double(i);
|
||||
temp = unit + temp*arg;
|
||||
}
|
||||
return ProjectOnGroup(temp);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -90,10 +90,10 @@ public:
|
||||
operator ComplexD () const { return(TensorRemove(_internal)); };
|
||||
operator RealD () const { return(real(TensorRemove(_internal))); }
|
||||
|
||||
// convert from a something to a scalar
|
||||
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iScalar<vtype>
|
||||
// convert from a something to a scalar via constructor of something arg
|
||||
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline iScalar<vtype> operator = (T arg)
|
||||
{
|
||||
_internal = vtype(arg);
|
||||
_internal = arg;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -123,6 +123,13 @@ public:
|
||||
typedef iScalar<tensor_reduced_v> tensor_reduced;
|
||||
typedef iVector<recurse_scalar_object,N> scalar_object;
|
||||
|
||||
template<class T,typename std::enable_if<!isGridTensor<T>::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iVector<vtype,N>
|
||||
{
|
||||
zeroit(*this);
|
||||
for(int i=0;i<N;i++)
|
||||
_internal[i] = arg;
|
||||
return *this;
|
||||
}
|
||||
|
||||
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1};
|
||||
iVector(const Zero &z){ *this = zero; };
|
||||
@ -309,7 +316,8 @@ public:
|
||||
stream<<o._internal[i][j];
|
||||
if (i<N-1) stream<<",";
|
||||
}
|
||||
stream<<"}\n\t\t";
|
||||
stream<<"}";
|
||||
if(i!=N-1) stream<<"\n\t\t";
|
||||
}
|
||||
stream<<"}";
|
||||
return stream;
|
||||
|
@ -119,7 +119,6 @@ namespace Grid {
|
||||
static const bool value = true;
|
||||
static const bool notvalue = false;
|
||||
};
|
||||
|
||||
template<> struct isGridTensor<int > {
|
||||
static const bool value = false;
|
||||
static const bool notvalue = true;
|
||||
|
66
lib/tensors/Tensor_unary.h
Normal file
66
lib/tensors/Tensor_unary.h
Normal file
@ -0,0 +1,66 @@
|
||||
#ifndef GRID_TENSOR_UNARY_H
|
||||
#define GRID_TENSOR_UNARY_H
|
||||
namespace Grid {
|
||||
|
||||
#define UNARY_REAL(func)\
|
||||
template<class obj> inline auto func(const iScalar<obj> &z) -> iScalar<obj>\
|
||||
{\
|
||||
iScalar<obj> ret;\
|
||||
ret._internal = func( (z._internal));\
|
||||
return ret;\
|
||||
}\
|
||||
template<class obj,int N> inline auto func(const iVector<obj,N> &z) -> iVector<obj,N>\
|
||||
{\
|
||||
iVector<obj,N> ret;\
|
||||
for(int c1=0;c1<N;c1++){\
|
||||
ret._internal[c1] = func( (z._internal[c1]));\
|
||||
}\
|
||||
return ret;\
|
||||
}\
|
||||
template<class obj,int N> inline auto func(const iMatrix<obj,N> &z) -> iMatrix<obj,N>\
|
||||
{\
|
||||
iMatrix<obj,N> ret;\
|
||||
for(int c1=0;c1<N;c1++){\
|
||||
for(int c2=0;c2<N;c2++){\
|
||||
ret._internal[c1][c2] = func( (z._internal[c1][c2]));\
|
||||
}}\
|
||||
return ret;\
|
||||
}
|
||||
|
||||
|
||||
#define BINARY_RSCALAR(func,scal) \
|
||||
template<class obj> inline iScalar<obj> func(const iScalar<obj> &z,scal y) \
|
||||
{\
|
||||
iScalar<obj> ret;\
|
||||
ret._internal = func(z._internal,y); \
|
||||
return ret;\
|
||||
}\
|
||||
template<class obj,int N> inline iVector<obj,N> func(const iVector<obj,N> &z,scal y) \
|
||||
{\
|
||||
iVector<obj,N> ret;\
|
||||
for(int c1=0;c1<N;c1++){\
|
||||
ret._internal[c1] = func(z._internal[c1],y); \
|
||||
}\
|
||||
return ret;\
|
||||
}\
|
||||
template<class obj,int N> inline iMatrix<obj,N> func(const iMatrix<obj,N> &z, scal y) \
|
||||
{\
|
||||
iMatrix<obj,N> ret;\
|
||||
for(int c1=0;c1<N;c1++){\
|
||||
for(int c2=0;c2<N;c2++){\
|
||||
ret._internal[c1][c2] = func(z._internal[c1][c2],y); \
|
||||
}}\
|
||||
return ret;\
|
||||
}
|
||||
|
||||
UNARY_REAL(sqrt);
|
||||
UNARY_REAL(rsqrt);
|
||||
UNARY_REAL(sin);
|
||||
UNARY_REAL(cos);
|
||||
|
||||
BINARY_RSCALAR(mod,Integer);
|
||||
BINARY_RSCALAR(pow,RealD);
|
||||
|
||||
|
||||
}
|
||||
#endif
|
1
scripts/arm_configure.experimental
Normal file
1
scripts/arm_configure.experimental
Normal file
@ -0,0 +1 @@
|
||||
./configure --host=arm-linux-gnueabihf CXX=clang++-3.5 CXXFLAGS='-std=c++11 -O3 -target arm-linux-gnueabihf -I/usr/arm-linux-gnueabihf/include/ -I/home/neo/Codes/gmp6.0/gmp-arm/include/ -I/usr/arm-linux-gnueabihf/include/c++/4.8.2/arm-linux-gnueabihf/ -L/home/neo/Codes/gmp6.0/gmp-arm/lib/ -I/home/neo/Codes/mpfr3.1.2/mpfr-arm/include/ -L/home/neo/Codes/mpfr3.1.2/mpfr-arm/lib/ -static -mcpu=cortex-a7' --enable-simd=NEONv7
|
@ -1,3 +1,30 @@
|
||||
#!/bin/sh
|
||||
if [ $# -eq 1 ]
|
||||
then
|
||||
wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h >& tmp.fil
|
||||
count=`grep total tmp.fil`
|
||||
echo $count " in Grid library"
|
||||
else
|
||||
wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h
|
||||
fi
|
||||
|
||||
wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h
|
||||
wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h >& tmp.fil
|
||||
count=`grep total tmp.fil`
|
||||
echo $count " in lib"
|
||||
|
||||
for sdir in `ls -F lib/| grep /`
|
||||
do
|
||||
wc -l lib/${sdir}/*.h lib/${sdir}/*/*.h lib/${sdir}/*.cc lib/${sdir}/*/*.cc lib/${sdir}/*/*/*.cc lib/${sdir}/*/*/*.h >& tmp.fil
|
||||
count=`grep total tmp.fil`
|
||||
echo $count " in lib/${sdir}"
|
||||
done
|
||||
|
||||
wc -l tests/*.cc | grep total >& tmp.fil
|
||||
count=`grep total tmp.fil`
|
||||
echo $count " in tests"
|
||||
|
||||
wc -l benchmarks/*.cc | grep total >& tmp.fil
|
||||
count=`grep total tmp.fil`
|
||||
echo $count " in benchmarks"
|
||||
|
||||
rm tmp.fil
|
||||
|
0
tests/InvSqrt.gnu
Normal file
0
tests/InvSqrt.gnu
Normal file
@ -1,5 +1,5 @@
|
||||
|
||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_even_odd Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_even_odd
|
||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cg Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd
|
||||
|
||||
|
||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
|
||||
@ -10,10 +10,30 @@ Test_cayley_cg_SOURCES=Test_cayley_cg.cc
|
||||
Test_cayley_cg_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cayley_coarsen_support_SOURCES=Test_cayley_coarsen_support.cc
|
||||
Test_cayley_coarsen_support_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cayley_even_odd_SOURCES=Test_cayley_even_odd.cc
|
||||
Test_cayley_even_odd_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cayley_ldop_cg_SOURCES=Test_cayley_ldop_cg.cc
|
||||
Test_cayley_ldop_cg_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cayley_ldop_cr_SOURCES=Test_cayley_ldop_cr.cc
|
||||
Test_cayley_ldop_cr_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cf_coarsen_support_SOURCES=Test_cf_coarsen_support.cc
|
||||
Test_cf_coarsen_support_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cf_cr_unprec_SOURCES=Test_cf_cr_unprec.cc
|
||||
Test_cf_cr_unprec_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_contfrac_cg_SOURCES=Test_contfrac_cg.cc
|
||||
Test_contfrac_cg_LDADD=-lGrid
|
||||
|
||||
@ -42,6 +62,10 @@ Test_dwf_cg_unprec_SOURCES=Test_dwf_cg_unprec.cc
|
||||
Test_dwf_cg_unprec_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_dwf_cr_unprec_SOURCES=Test_dwf_cr_unprec.cc
|
||||
Test_dwf_cr_unprec_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_dwf_even_odd_SOURCES=Test_dwf_even_odd.cc
|
||||
Test_dwf_even_odd_LDADD=-lGrid
|
||||
|
||||
@ -94,6 +118,10 @@ Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc
|
||||
Test_wilson_cg_unprec_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_wilson_cr_unprec_SOURCES=Test_wilson_cr_unprec.cc
|
||||
Test_wilson_cr_unprec_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_wilson_even_odd_SOURCES=Test_wilson_even_odd.cc
|
||||
Test_wilson_even_odd_LDADD=-lGrid
|
||||
|
||||
|
2
tests/Sqrt.gnu
Normal file
2
tests/Sqrt.gnu
Normal file
@ -0,0 +1,2 @@
|
||||
f(x) = 6.81384+(-2.34645e-06/(x+0.000228091))+(-1.51593e-05/(x+0.00112084))+(-6.89254e-05/(x+0.003496))+(-0.000288983/(x+0.00954309))+(-0.00119277/(x+0.024928))+(-0.0050183/(x+0.0646627))+(-0.0226449/(x+0.171576))+(-0.123767/(x+0.491792))+(-1.1705/(x+1.78667))+(-102.992/(x+18.4866));
|
||||
f(x) = 0.14676+(0.00952992/(x+5.40933e-05))+(0.0115952/(x+0.000559699))+(0.0161824/(x+0.00203338))+(0.0243252/(x+0.00582831))+(0.0379533/(x+0.0154649))+(0.060699/(x+0.0401156))+(0.100345/(x+0.104788))+(0.178335/(x+0.286042))+(0.381586/(x+0.892189))+(1.42625/(x+4.38422));
|
@ -115,7 +115,7 @@ int main (int argc, char ** argv)
|
||||
Complex l = TensorRemove(Tl);
|
||||
std::cout << "calculated link trace " <<l*LinkTraceScale<<std::endl;
|
||||
|
||||
sumBlocks(cPlaq,Plaq);
|
||||
blockSum(cPlaq,Plaq);
|
||||
TComplex TcP = sum(cPlaq);
|
||||
Complex ll= TensorRemove(TcP);
|
||||
std::cout << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
|
||||
|
174
tests/Test_cayley_coarsen_support.cc
Normal file
174
tests/Test_cayley_coarsen_support.cc
Normal file
@ -0,0 +1,174 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
// Construct a coarsened grid
|
||||
std::vector<int> clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/2;
|
||||
}
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
std::vector<int> cseeds({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
|
||||
#if 0
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
Umu=zero;
|
||||
Complex cone(1.0,0.0);
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
if(1) {
|
||||
if (nn>2) { U[nn]=zero; std::cout << "zeroing gauge field in dir "<<nn<<std::endl; }
|
||||
else { U[nn]=cone; std::cout << "unit gauge field in dir "<<nn<<std::endl; }
|
||||
}
|
||||
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||
}
|
||||
#endif
|
||||
|
||||
RealD mass=0.5;
|
||||
RealD M5=1.8;
|
||||
|
||||
DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> HermIndefOp(Ddwf);
|
||||
|
||||
HermIndefOp.Op(src,ref);
|
||||
HermIndefOp.OpDiag(src,result);
|
||||
|
||||
for(int d=0;d<4;d++){
|
||||
HermIndefOp.OpDir(src,tmp,d+1,+1); result=result+tmp;
|
||||
std::cout<<"dir "<<d<<" tmp "<<norm2(tmp)<<std::endl;
|
||||
HermIndefOp.OpDir(src,tmp,d+1,-1); result=result+tmp;
|
||||
std::cout<<"dir "<<d<<" tmp "<<norm2(tmp)<<std::endl;
|
||||
}
|
||||
err = result-ref;
|
||||
std::cout<<"Error "<<norm2(err)<<std::endl;
|
||||
|
||||
const int nbasis = 2;
|
||||
std::vector<LatticeFermion> subspace(nbasis,FGrid);
|
||||
LatticeFermion prom(FGrid);
|
||||
|
||||
for(int b=0;b<nbasis;b++){
|
||||
random(RNG5,subspace[b]);
|
||||
}
|
||||
std::cout << "Computed randoms"<< std::endl;
|
||||
|
||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||
|
||||
LittleDiracOperator LittleDiracOp(*Coarse5d);
|
||||
|
||||
LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace);
|
||||
|
||||
CoarseVector c_src (Coarse5d);
|
||||
CoarseVector c_res (Coarse5d);
|
||||
CoarseVector c_proj(Coarse5d);
|
||||
|
||||
// TODO
|
||||
// -- promote from subspace, check we get the vector we wanted
|
||||
// -- apply ldop; check we get the same as inner product of M times big vec
|
||||
// -- pick blocks one by one. Evaluate matrix elements.
|
||||
Complex one(1.0);
|
||||
c_src = one; // 1 in every element for vector 1.
|
||||
|
||||
blockPromote(c_src,err,subspace);
|
||||
|
||||
|
||||
prom=zero;
|
||||
for(int b=0;b<nbasis;b++){
|
||||
prom=prom+subspace[b];
|
||||
}
|
||||
err=err-prom;
|
||||
std::cout<<"Promoted back from subspace err "<<norm2(err)<<std::endl;
|
||||
|
||||
HermIndefOp.HermOp(prom,tmp);
|
||||
blockProject(c_proj,tmp,subspace);
|
||||
|
||||
LittleDiracOp.M(c_src,c_res);
|
||||
|
||||
c_proj = c_proj - c_res;
|
||||
std::cout<<"Representation of ldop within subspace "<<norm2(c_proj)<<std::endl;
|
||||
|
||||
std::cout << "Multiplying by LittleDiracOp "<< std::endl;
|
||||
LittleDiracOp.M(c_src,c_res);
|
||||
|
||||
std::cout<<"Testing hermiticity explicitly by inspecting matrix elements"<<std::endl;
|
||||
LittleDiracOp.AssertHermitian();
|
||||
|
||||
std::cout << "Testing Hermiticity stochastically "<< std::endl;
|
||||
CoarseVector phi(Coarse5d);
|
||||
CoarseVector chi(Coarse5d);
|
||||
CoarseVector Aphi(Coarse5d);
|
||||
CoarseVector Achi(Coarse5d);
|
||||
|
||||
random(CRNG,phi);
|
||||
random(CRNG,chi);
|
||||
|
||||
|
||||
std::cout<<"Made randoms"<<std::endl;
|
||||
|
||||
LittleDiracOp.M(phi,Aphi);
|
||||
LittleDiracOp.Mdag(chi,Achi);
|
||||
|
||||
ComplexD pAc = innerProduct(chi,Aphi);
|
||||
ComplexD cAp = innerProduct(phi,Achi);
|
||||
ComplexD cAc = innerProduct(chi,Achi);
|
||||
ComplexD pAp = innerProduct(phi,Aphi);
|
||||
|
||||
std::cout<< "pAc "<<pAc<<" cAp "<< cAp<< " diff "<<pAc-adj(cAp)<<std::endl;
|
||||
|
||||
std::cout<< "pAp "<<pAp<<" cAc "<< cAc<<"Should be real"<< std::endl;
|
||||
|
||||
std::cout<<"Testing linearity"<<std::endl;
|
||||
CoarseVector PhiPlusChi(Coarse5d);
|
||||
CoarseVector APhiPlusChi(Coarse5d);
|
||||
CoarseVector linerr(Coarse5d);
|
||||
PhiPlusChi = phi+chi;
|
||||
LittleDiracOp.M(PhiPlusChi,APhiPlusChi);
|
||||
|
||||
linerr= APhiPlusChi-Aphi;
|
||||
linerr= linerr-Achi;
|
||||
std::cout<<"**Diff "<<norm2(linerr)<<std::endl;
|
||||
|
||||
|
||||
std::cout << "Done "<< std::endl;
|
||||
Grid_finalize();
|
||||
}
|
136
tests/Test_cayley_ldop_cg.cc
Normal file
136
tests/Test_cayley_ldop_cg.cc
Normal file
@ -0,0 +1,136 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
double lowpass(double x)
|
||||
{
|
||||
return pow(x*x+1.0,-2);
|
||||
}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
Chebyshev<LatticeFermion> filter(-150.0, 150.0,16, lowpass);
|
||||
ofstream csv(std::string("filter.dat"),std::ios::out|std::ios::trunc);
|
||||
filter.csv(csv);
|
||||
csv.close();
|
||||
|
||||
const int Ls=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
// Construct a coarsened grid
|
||||
std::vector<int> clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/2;
|
||||
}
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
std::vector<int> cseeds({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||
|
||||
LatticeFermion src(FGrid); gaussian(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); gaussian(RNG4,Umu);
|
||||
|
||||
|
||||
//random(RNG4,Umu);
|
||||
NerscField header;
|
||||
std::string file("./ckpoint_lat.400");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
|
||||
#if 0
|
||||
LatticeColourMatrix U(UGrid);
|
||||
U=zero;
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
if (nn>2) {
|
||||
pokeIndex<LorentzIndex>(Umu,U,nn);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
RealD mass=0.1;
|
||||
RealD M5=1.0;
|
||||
|
||||
DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> HermIndefOp(Ddwf);
|
||||
|
||||
const int nbasis = 8;
|
||||
std::vector<LatticeFermion> subspace(nbasis,FGrid);
|
||||
LatticeFermion noise(FGrid);
|
||||
LatticeFermion ms(FGrid);
|
||||
for(int b=0;b<nbasis;b++){
|
||||
Gamma g5(Gamma::Gamma5);
|
||||
gaussian(RNG5,noise);
|
||||
RealD scale = pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
HermIndefOp.HermOp(noise,ms); std::cout << "Noise "<<b<<" Ms "<<norm2(ms)<< " "<< norm2(noise)<<std::endl;
|
||||
|
||||
|
||||
// filter(HermIndefOp,noise,subspace[b]);
|
||||
// inverse iteration
|
||||
MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermDefOp(Ddwf);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-5,10000);
|
||||
|
||||
for(int i=0;i<4;i++){
|
||||
|
||||
CG(HermDefOp,noise,subspace[b]);
|
||||
noise = subspace[b];
|
||||
|
||||
scale = pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
HermDefOp.HermOp(noise,ms); std::cout << "filt "<<b<<" <u|H|u> "<<norm2(ms)<< " "<< norm2(noise)<<std::endl;
|
||||
}
|
||||
|
||||
subspace[b] = noise;
|
||||
HermIndefOp.HermOp(subspace[b],ms); std::cout << "Filtered "<<b<<" Ms "<<norm2(ms)<< " "<<norm2(subspace[b]) <<std::endl;
|
||||
}
|
||||
std::cout << "Computed randoms"<< std::endl;
|
||||
|
||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||
|
||||
LittleDiracOperator LittleDiracOp(*Coarse5d);
|
||||
LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace);
|
||||
|
||||
CoarseVector c_src (Coarse5d);
|
||||
CoarseVector c_res (Coarse5d);
|
||||
gaussian(CRNG,c_src);
|
||||
c_res=zero;
|
||||
|
||||
std::cout << "Solving CG on coarse space "<< std::endl;
|
||||
MdagMLinearOperator<LittleDiracOperator,CoarseVector> PosdefLdop(LittleDiracOp);
|
||||
ConjugateGradient<CoarseVector> CG(1.0e-8,10000);
|
||||
CG(PosdefLdop,c_src,c_res);
|
||||
|
||||
std::cout << "Done "<< std::endl;
|
||||
Grid_finalize();
|
||||
}
|
138
tests/Test_cayley_ldop_cr.cc
Normal file
138
tests/Test_cayley_ldop_cr.cc
Normal file
@ -0,0 +1,138 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
double lowpass(double x)
|
||||
{
|
||||
return pow(x*x+1.0,-2);
|
||||
}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
Chebyshev<LatticeFermion> filter(-150.0, 150.0,16, lowpass);
|
||||
ofstream csv(std::string("filter.dat"),std::ios::out|std::ios::trunc);
|
||||
filter.csv(csv);
|
||||
csv.close();
|
||||
|
||||
const int Ls=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
// Construct a coarsened grid
|
||||
std::vector<int> clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/2;
|
||||
}
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
std::vector<int> cseeds({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||
|
||||
LatticeFermion src(FGrid); gaussian(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); gaussian(RNG4,Umu);
|
||||
|
||||
|
||||
//random(RNG4,Umu);
|
||||
NerscField header;
|
||||
std::string file("./ckpoint_lat.400");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
|
||||
#if 0
|
||||
LatticeColourMatrix U(UGrid);
|
||||
Complex cone(1.0,0.0);
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
if (nn==3) {
|
||||
U=zero; std::cout << "zeroing gauge field in dir "<<nn<<std::endl;
|
||||
// else { U[nn]= cone;std::cout << "unit gauge field in dir "<<nn<<std::endl; }
|
||||
pokeIndex<LorentzIndex>(Umu,U,nn);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
RealD mass=0.5;
|
||||
RealD M5=1.8;
|
||||
|
||||
DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> HermIndefOp(Ddwf);
|
||||
|
||||
const int nbasis = 8;
|
||||
std::vector<LatticeFermion> subspace(nbasis,FGrid);
|
||||
LatticeFermion noise(FGrid);
|
||||
LatticeFermion ms(FGrid);
|
||||
for(int b=0;b<nbasis;b++){
|
||||
Gamma g5(Gamma::Gamma5);
|
||||
gaussian(RNG5,noise);
|
||||
RealD scale = pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
HermIndefOp.HermOp(noise,ms); std::cout << "Noise "<<b<<" Ms "<<norm2(ms)<< " "<< norm2(noise)<<std::endl;
|
||||
|
||||
|
||||
// filter(HermIndefOp,noise,subspace[b]);
|
||||
// inverse iteration
|
||||
MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermDefOp(Ddwf);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-5,10000);
|
||||
|
||||
for(int i=0;i<4;i++){
|
||||
|
||||
CG(HermDefOp,noise,subspace[b]);
|
||||
noise = subspace[b];
|
||||
|
||||
scale = pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
HermDefOp.HermOp(noise,ms); std::cout << "filt "<<b<<" <u|H|u> "<<norm2(ms)<< " "<< norm2(noise)<<std::endl;
|
||||
}
|
||||
|
||||
subspace[b] = noise;
|
||||
HermIndefOp.HermOp(subspace[b],ms); std::cout << "Filtered "<<b<<" Ms "<<norm2(ms)<< " "<<norm2(subspace[b]) <<std::endl;
|
||||
}
|
||||
std::cout << "Computed randoms"<< std::endl;
|
||||
|
||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||
|
||||
LittleDiracOperator LittleDiracOp(*Coarse5d);
|
||||
LittleDiracOp.CoarsenOperator(FGrid,HermIndefOp,subspace);
|
||||
|
||||
CoarseVector c_src (Coarse5d);
|
||||
CoarseVector c_res (Coarse5d);
|
||||
gaussian(CRNG,c_src);
|
||||
c_res=zero;
|
||||
|
||||
std::cout << "Solving MCR on coarse space "<< std::endl;
|
||||
HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermIndefLdop(LittleDiracOp);
|
||||
ConjugateResidual<CoarseVector> MCR(1.0e-8,10000);
|
||||
MCR(HermIndefLdop,c_src,c_res);
|
||||
|
||||
std::cout << "Done "<< std::endl;
|
||||
Grid_finalize();
|
||||
}
|
87
tests/Test_cf_coarsen_support.cc
Normal file
87
tests/Test_cf_coarsen_support.cc
Normal file
@ -0,0 +1,87 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=9;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5=1.8;
|
||||
|
||||
{
|
||||
OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
|
||||
HermitianLinearOperator<OverlapWilsonContFracTanhFermion,LatticeFermion> HermIndefOp(Dcf);
|
||||
|
||||
HermIndefOp.Op(src,ref);
|
||||
HermIndefOp.OpDiag(src,result);
|
||||
|
||||
for(int d=0;d<4;d++){
|
||||
HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp;
|
||||
std::cout<<"dir "<<d<<" tmp "<<norm2(tmp)<<std::endl;
|
||||
HermIndefOp.OpDir(src,tmp,d,-1); result=result+tmp;
|
||||
std::cout<<"dir "<<d<<" tmp "<<norm2(tmp)<<std::endl;
|
||||
}
|
||||
err = result-ref;
|
||||
std::cout<<"Error "<<norm2(err)<<std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
OverlapWilsonPartialFractionTanhFermion Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
|
||||
HermitianLinearOperator<OverlapWilsonPartialFractionTanhFermion,LatticeFermion> HermIndefOp(Dpf);
|
||||
|
||||
HermIndefOp.Op(src,ref);
|
||||
HermIndefOp.OpDiag(src,result);
|
||||
|
||||
for(int d=0;d<4;d++){
|
||||
HermIndefOp.OpDir(src,tmp,d,+1); result=result+tmp;
|
||||
std::cout<<"dir "<<d<<" tmp "<<norm2(tmp)<<std::endl;
|
||||
HermIndefOp.OpDir(src,tmp,d,-1); result=result+tmp;
|
||||
std::cout<<"dir "<<d<<" tmp "<<norm2(tmp)<<std::endl;
|
||||
}
|
||||
|
||||
err = result-ref;
|
||||
std::cout<<"Error "<<norm2(err)<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
58
tests/Test_cf_cr_unprec.cc
Normal file
58
tests/Test_cf_cr_unprec.cc
Normal file
@ -0,0 +1,58 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=9;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5=1.8;
|
||||
|
||||
OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
|
||||
|
||||
ConjugateResidual<LatticeFermion> MCR(1.0e-8,10000);
|
||||
|
||||
MdagMLinearOperator<OverlapWilsonContFracTanhFermion,LatticeFermion> HermPosDefOp(Dcf);
|
||||
MCR(HermPosDefOp,src,result);
|
||||
|
||||
HermitianLinearOperator<OverlapWilsonContFracTanhFermion,LatticeFermion> HermIndefOp(Dcf);
|
||||
MCR(HermIndefOp,src,result);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -34,7 +34,6 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
|
||||
|
||||
TComplex cm;
|
||||
|
||||
for(int dir=0;dir<4;dir++){
|
||||
|
63
tests/Test_dwf_cr_unprec.cc
Normal file
63
tests/Test_dwf_cr_unprec.cc
Normal file
@ -0,0 +1,63 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
|
||||
const int Ls=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeFermion src(FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
ConjugateResidual<LatticeFermion> MCR(1.0e-8,10000);
|
||||
|
||||
RealD mass=0.5;
|
||||
RealD M5=1.8;
|
||||
DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermOp(Ddwf);
|
||||
MCR(HermOp,src,result);
|
||||
|
||||
Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> g5HermOp(Ddwf);
|
||||
MCR(g5HermOp,src,result);
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -223,6 +223,9 @@ int main (int argc, char ** argv)
|
||||
//TComplex tracecm= trace(cm);
|
||||
//std::cout << cm << " "<< tracecm << std::endl;
|
||||
|
||||
cm = ProjectOnGroup(cm);
|
||||
|
||||
cm = Exponentiate(cm, 1.0, 10);
|
||||
|
||||
// Foo = Foo+scalar; // LatticeColourMatrix+Scalar
|
||||
// Foo = Foo*scalar; // LatticeColourMatrix*Scalar
|
||||
|
@ -20,12 +20,20 @@ public:
|
||||
sqrtscale = sqrtscale * adj(sqrtscale);// force real pos def
|
||||
scale = sqrtscale * sqrtscale;
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {};
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp){};
|
||||
|
||||
void Op (const Field &in, Field &out){
|
||||
out = scale * in;
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
out = scale * in;
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
double n1, n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){
|
||||
ComplexD dot;
|
||||
|
||||
|
@ -25,7 +25,6 @@ int main (int argc, char ** argv)
|
||||
std::vector<LatticeColourMatrix> U(4,&Fine);
|
||||
|
||||
NerscField header;
|
||||
|
||||
std::string file("./ckpoint_lat.4000");
|
||||
readNerscConfiguration(Umu,header,file);
|
||||
|
||||
@ -83,7 +82,7 @@ int main (int argc, char ** argv)
|
||||
Complex l = TensorRemove(Tl);
|
||||
std::cout << "calculated link trace " <<l*LinkTraceScale<<std::endl;
|
||||
|
||||
sumBlocks(cPlaq,Plaq);
|
||||
blockSum(cPlaq,Plaq);
|
||||
TComplex TcP = sum(cPlaq);
|
||||
Complex ll= TensorRemove(TcP);
|
||||
std::cout << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
|
||||
|
@ -39,8 +39,8 @@ int main (int argc, char ** argv)
|
||||
InvSqrt.gnuplot(gnuplot);
|
||||
|
||||
double x=0.6789;
|
||||
double sx=sqrt(x);
|
||||
double ssx=sqrt(sx);
|
||||
double sx=std::sqrt(x);
|
||||
double ssx=std::sqrt(sx);
|
||||
double isx=1.0/sx;
|
||||
double issx=1.0/ssx;
|
||||
|
||||
|
59
tests/Test_wilson_cr_unprec.cc
Normal file
59
tests/Test_wilson_cr_unprec.cc
Normal file
@ -0,0 +1,59 @@
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeFermion src(&Grid); random(pRNG,src);
|
||||
RealD nrm = norm2(src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.5;
|
||||
WilsonFermion Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
MdagMLinearOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
|
||||
|
||||
ConjugateResidual<LatticeFermion> MCR(1.0e-8,10000);
|
||||
|
||||
MCR(HermOp,src,result);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
Loading…
Reference in New Issue
Block a user