1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-03 18:55:56 +01:00

Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,

not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition
on 1,2,4,8,16 mpi tasks.
Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite.
Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
This commit is contained in:
Peter Boyle 2015-05-19 13:57:35 +01:00
parent c7314e526e
commit ffc00caea3
33 changed files with 566 additions and 316 deletions

View File

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am. # Makefile.in generated by automake 1.15 from Makefile.am.
# @configure_input@ # @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc. # Copyright (C) 1994-2014 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -14,7 +14,17 @@
@SET_MAKE@ @SET_MAKE@
VPATH = @srcdir@ VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \ am__make_running_with_option = \
case $${target_option-} in \ case $${target_option-} in \
?) ;; \ ?) ;; \
@ -79,14 +89,12 @@ build_triplet = @build@
host_triplet = @host@ host_triplet = @host@
target_triplet = @target@ target_triplet = @target@
subdir = . subdir = .
DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/configure $(am__configure_deps) COPYING TODO \
compile config.guess config.sub depcomp install-sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4) $(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
$(am__configure_deps) $(am__DIST_COMMON)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d mkinstalldirs = $(install_sh) -d
@ -149,6 +157,9 @@ ETAGS = etags
CTAGS = ctags CTAGS = ctags
CSCOPE = cscope CSCOPE = cscope
DIST_SUBDIRS = $(SUBDIRS) DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in AUTHORS COPYING ChangeLog \
INSTALL NEWS README TODO compile config.guess config.sub \
depcomp install-sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION) distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir) top_distdir = $(distdir)
@ -314,7 +325,6 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
$(am__cd) $(top_srcdir) && \ $(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu Makefile $(AUTOMAKE) --gnu Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \ @case '$?' in \
*config.status*) \ *config.status*) \
@ -521,15 +531,15 @@ dist-xz: distdir
$(am__post_remove_distdir) $(am__post_remove_distdir)
dist-tarZ: distdir dist-tarZ: distdir
@echo WARNING: "Support for shar distribution archives is" \ @echo WARNING: "Support for distribution archives compressed with" \
"deprecated." >&2 "legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir) $(am__post_remove_distdir)
dist-shar: distdir dist-shar: distdir
@echo WARNING: "Support for distribution archives compressed with" \ @echo WARNING: "Support for shar distribution archives is" \
"legacy program 'compress' is deprecated." >&2 "deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir) $(am__post_remove_distdir)
@ -565,17 +575,17 @@ distcheck: dist
esac esac
chmod -R a-w $(distdir) chmod -R a-w $(distdir)
chmod u+w $(distdir) chmod u+w $(distdir)
mkdir $(distdir)/_build $(distdir)/_inst mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
chmod a-w $(distdir) chmod a-w $(distdir)
test -d $(distdir)/_build || exit 0; \ test -d $(distdir)/_build || exit 0; \
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \ && am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \ && $(am__cd) $(distdir)/_build/sub \
&& ../configure \ && ../../configure \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \
--srcdir=.. --prefix="$$dc_install_base" \ --srcdir=../.. --prefix="$$dc_install_base" \
&& $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) check \
@ -749,6 +759,8 @@ uninstall-am:
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am pdf-am ps ps-am tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables. # Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded. # Otherwise a system limit (for SysV at least) may be exceeded.

61
aclocal.m4 vendored
View File

@ -1,6 +1,6 @@
# generated automatically by aclocal 1.14.1 -*- Autoconf -*- # generated automatically by aclocal 1.15 -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc. # Copyright (C) 1996-2014 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely. If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])]) To do so, use the procedure documented by the package, typically 'autoreconf'.])])
# Copyright (C) 2002-2013 Free Software Foundation, Inc. # Copyright (C) 2002-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
# generated from the m4 files accompanying Automake X.Y. # generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.) # (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION], AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.14' [am__api_version='1.15'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro. dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.14.1], [], m4_if([$1], [1.15], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
]) ])
@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.14.1])dnl [AM_AUTOMAKE_VERSION([1.15])dnl
m4_ifndef([AC_AUTOCONF_VERSION], m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*- # AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001-2013 Free Software Foundation, Inc. # Copyright (C) 2001-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -103,15 +103,14 @@ _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# configured tree to be moved without reconfiguration. # configured tree to be moved without reconfiguration.
AC_DEFUN([AM_AUX_DIR_EXPAND], AC_DEFUN([AM_AUX_DIR_EXPAND],
[dnl Rely on autoconf to set up CDPATH properly. [AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
AC_PREREQ([2.50])dnl # Expand $ac_aux_dir to an absolute path.
# expand $ac_aux_dir to an absolute path am_aux_dir=`cd "$ac_aux_dir" && pwd`
am_aux_dir=`cd $ac_aux_dir && pwd`
]) ])
# AM_CONDITIONAL -*- Autoconf -*- # AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997-2013 Free Software Foundation, Inc. # Copyright (C) 1997-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -142,7 +141,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]]) Usually this means the macro was only invoked conditionally.]])
fi])]) fi])])
# Copyright (C) 1999-2013 Free Software Foundation, Inc. # Copyright (C) 1999-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -333,7 +332,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*- # Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999-2013 Free Software Foundation, Inc. # Copyright (C) 1999-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -409,7 +408,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*- # Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc. # Copyright (C) 1996-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -499,8 +498,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html> # <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> # <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
AC_SUBST([mkdir_p], ['$(MKDIR_P)']) AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
# We need awk for the "check" target. The system "awk" is bad on # We need awk for the "check" target (and possibly the TAP driver). The
# some platforms. # system "awk" is bad on some platforms.
AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_AWK])dnl
AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl
AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl
@ -573,7 +572,11 @@ to "yes", and re-run configure.
END END
AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
fi fi
fi]) fi
dnl The trailing newline in this macro's definition is deliberate, for
dnl backward compatibility and to allow trailing 'dnl'-style comments
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
])
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
@ -602,7 +605,7 @@ for _am_header in $config_headers :; do
done done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001-2013 Free Software Foundation, Inc. # Copyright (C) 2001-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -613,7 +616,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
# Define $install_sh. # Define $install_sh.
AC_DEFUN([AM_PROG_INSTALL_SH], AC_DEFUN([AM_PROG_INSTALL_SH],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
if test x"${install_sh}" != xset; then if test x"${install_sh+set}" != xset; then
case $am_aux_dir in case $am_aux_dir in
*\ * | *\ *) *\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -623,7 +626,7 @@ if test x"${install_sh}" != xset; then
fi fi
AC_SUBST([install_sh])]) AC_SUBST([install_sh])])
# Copyright (C) 2003-2013 Free Software Foundation, Inc. # Copyright (C) 2003-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -644,7 +647,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*- # Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2013 Free Software Foundation, Inc. # Copyright (C) 2001-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -694,7 +697,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997-2013 Free Software Foundation, Inc. # Copyright (C) 1997-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -733,7 +736,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*- # Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001-2013 Free Software Foundation, Inc. # Copyright (C) 2001-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -764,7 +767,7 @@ AC_DEFUN([_AM_IF_OPTION],
# Check to make sure that the build environment is sane. -*- Autoconf -*- # Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc. # Copyright (C) 1996-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -845,7 +848,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file rm -f conftest.file
]) ])
# Copyright (C) 2009-2013 Free Software Foundation, Inc. # Copyright (C) 2009-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -905,7 +908,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl _AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
]) ])
# Copyright (C) 2001-2013 Free Software Foundation, Inc. # Copyright (C) 2001-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -933,7 +936,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])]) AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006-2013 Free Software Foundation, Inc. # Copyright (C) 2006-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -952,7 +955,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*- # Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004-2013 Free Software Foundation, Inc. # Copyright (C) 2004-2014 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,

View File

@ -46,6 +46,13 @@ int main (int argc, char ** argv)
volume=volume*latt_size[mu]; volume=volume*latt_size[mu];
} }
// Only one non-zero (y)
Umu=zero;
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu,mu); U[mu] = peekIndex<LorentzIndex>(Umu,mu);
} }
@ -53,7 +60,6 @@ int main (int argc, char ** argv)
{ // Naive wilson implementation { // Naive wilson implementation
ref = zero; ref = zero;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x // ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1); tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
@ -75,7 +81,7 @@ int main (int argc, char ** argv)
int ncall=1000; int ncall=1000;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
Dw.M(src,result); Dw.Dhop(src,result,0);
} }
double t1=usecond(); double t1=usecond();
double flops=1320*volume*ncall; double flops=1320*volume*ncall;
@ -99,5 +105,30 @@ int main (int argc, char ** argv)
} }
} }
{ // Naive wilson dag implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
}
Dw.Dhop(src,result,1);
std::cout << "Called DwDag"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
err = ref -result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize(); Grid_finalize();
} }

View File

@ -0,0 +1,56 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeFermion src(&Grid); random(pRNG,src);
std::cout << "src norm" << norm2(src)<<std::endl;
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
}
RealD mass=0.5;
WilsonMatrix Dw(Umu,mass);
HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOp,src,result);
Grid_finalize();
}

View File

@ -5,12 +5,14 @@ AM_LDFLAGS = -L$(top_builddir)/lib
# #
# Test code # Test code
# #
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec
Grid_wilson_SOURCES = Grid_wilson.cc Grid_wilson_SOURCES = Grid_wilson.cc
Grid_wilson_LDADD = -lGrid Grid_wilson_LDADD = -lGrid
Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
Grid_wilson_cg_unprec_LDADD = -lGrid
Grid_comms_SOURCES = Grid_comms.cc Grid_comms_SOURCES = Grid_comms.cc
Grid_comms_LDADD = -lGrid Grid_comms_LDADD = -lGrid

2
config.guess vendored
View File

@ -1 +1 @@
/usr/share/automake-1.14/config.guess /opt/local/share/automake-1.15/config.guess

2
config.sub vendored
View File

@ -1 +1 @@
/usr/share/automake-1.14/config.sub /opt/local/share/automake-1.15/config.sub

13
configure vendored
View File

@ -2466,7 +2466,7 @@ test -n "$target_alias" &&
NONENONEs,x,x, && NONENONEs,x,x, &&
program_prefix=${target_alias}- program_prefix=${target_alias}-
am__api_version='1.14' am__api_version='1.15'
# Find a good install program. We prefer a C program (faster), # Find a good install program. We prefer a C program (faster),
# so one script is as good as another. But avoid the broken or # so one script is as good as another. But avoid the broken or
@ -2638,8 +2638,8 @@ test "$program_suffix" != NONE &&
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
# expand $ac_aux_dir to an absolute path # Expand $ac_aux_dir to an absolute path.
am_aux_dir=`cd $ac_aux_dir && pwd` am_aux_dir=`cd "$ac_aux_dir" && pwd`
if test x"${MISSING+set}" != xset; then if test x"${MISSING+set}" != xset; then
case $am_aux_dir in case $am_aux_dir in
@ -2658,7 +2658,7 @@ else
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
fi fi
if test x"${install_sh}" != xset; then if test x"${install_sh+set}" != xset; then
case $am_aux_dir in case $am_aux_dir in
*\ * | *\ *) *\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -2986,8 +2986,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> # <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
mkdir_p='$(MKDIR_P)' mkdir_p='$(MKDIR_P)'
# We need awk for the "check" target. The system "awk" is bad on # We need awk for the "check" target (and possibly the TAP driver). The
# some platforms. # system "awk" is bad on some platforms.
# Always define AMTAR for backward compatibility. Yes, it's still used # Always define AMTAR for backward compatibility. Yes, it's still used
# in the wild :-( We should find a proper way to deprecate it ... # in the wild :-( We should find a proper way to deprecate it ...
AMTAR='$${TAR-tar}' AMTAR='$${TAR-tar}'
@ -3046,6 +3046,7 @@ END
fi fi
ac_config_headers="$ac_config_headers lib/Grid_config.h" ac_config_headers="$ac_config_headers lib/Grid_config.h"
# Check whether --enable-silent-rules was given. # Check whether --enable-silent-rules was given.

View File

@ -50,15 +50,20 @@ namespace Grid {
typedef std::complex<Real> Complex; typedef std::complex<Real> Complex;
inline RealF adj(const RealF & r){ return r; } inline RealF adj(const RealF & r){ return r; }
inline RealF conj(const RealF & r){ return r; } inline RealF conjugate(const RealF & r){ return r; }
inline RealF real(const RealF & r){ return r; } inline RealF real(const RealF & r){ return r; }
inline RealD adj(const RealD & r){ return r; } inline RealD adj(const RealD & r){ return r; }
inline RealD conj(const RealD & r){ return r; } inline RealD conjugate(const RealD & r){ return r; }
inline RealD real(const RealD & r){ return r; } inline RealD real(const RealD & r){ return r; }
inline ComplexD innerProduct(const ComplexD & l, const ComplexD & r) { return conj(l)*r; } inline ComplexD conjugate(const ComplexD& r){ return(conj(r)); }
inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conj(l)*r; } inline ComplexD adj(const ComplexD& r){ return(conjugate(r)); }
inline ComplexF conjugate(const ComplexF& r ){ return(conj(r)); }
inline ComplexF adj(const ComplexF& r ){ return(conjugate(r)); }
inline ComplexD innerProduct(const ComplexD & l, const ComplexD & r) { return conjugate(l)*r; }
inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conjugate(l)*r; }
inline RealD innerProduct(const RealD & l, const RealD & r) { return l*r; } inline RealD innerProduct(const RealD & l, const RealD & r) { return l*r; }
inline RealF innerProduct(const RealF & l, const RealF & r) { return l*r; } inline RealF innerProduct(const RealF & l, const RealF & r) { return l*r; }
@ -70,15 +75,14 @@ namespace Grid {
inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);} inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);}
inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);} inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);}
inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);} inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);}
inline ComplexD adj(const ComplexD& r){ return(conj(r)); } // conjugate already supported for complex
// conj already supported for complex
inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); } inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); } inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); } inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); } inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
inline ComplexF adj(const ComplexF& r ){ return(conj(r)); }
//conj already supported for complex //conjugate already supported for complex
inline ComplexF timesI(const ComplexF &r) { return(r*ComplexF(0.0,1.0));} inline ComplexF timesI(const ComplexF &r) { return(r*ComplexF(0.0,1.0));}
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));} inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}

View File

@ -7,8 +7,8 @@ namespace Grid {
// LinearOperators Take a something and return a something. // LinearOperators Take a something and return a something.
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// //
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugate (transpose if real): // Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real):
// //SBase
// i) F(a x + b y) = aF(x) + b F(y). // i) F(a x + b y) = aF(x) + b F(y).
// ii) <x|Op|y> = <y|AdjOp|x>^\ast // ii) <x|Op|y> = <y|AdjOp|x>^\ast
// //
@ -25,12 +25,13 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> { template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
public: public:
virtual RealD OpAndNorm(const Field &in, Field &out); virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2);
void AdjOp(const Field &in, Field &out) { void AdjOp(const Field &in, Field &out) {
Op(in,out); Op(in,out);
}; };
void Op(const Field &in, Field &out) { void Op(const Field &in, Field &out) {
OpAndNorm(in,out); double n1,n2;
OpAndNorm(in,out,n1,n2);
}; };
}; };
@ -80,8 +81,8 @@ namespace Grid {
Matrix &_Mat; Matrix &_Mat;
public: public:
HermitianOperator(Matrix &Mat): _Mat(Mat) {}; HermitianOperator(Matrix &Mat): _Mat(Mat) {};
RealD OpAndNorm(const Field &in, Field &out){ void OpAndNorm(const Field &in, Field &out,double &n1,double &n2){
return _Mat.MdagM(in,out); return _Mat.MdagM(in,out,n1,n2);
} }
}; };

View File

@ -18,6 +18,7 @@ public:
std::cout << Tolerance<<std::endl; std::cout << Tolerance<<std::endl;
}; };
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi) {assert(0);};
void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){ void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){
RealD cp,c,a,d,b,ssq,qq,b_pred; RealD cp,c,a,d,b,ssq,qq,b_pred;
@ -61,21 +62,27 @@ public:
Linop.OpAndNorm(p,mmp,d,qq); Linop.OpAndNorm(p,mmp,d,qq);
// std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<std::endl;
a = c/d; a = c/d;
b_pred = a*(a*qq-d)/c; b_pred = a*(a*qq-d)/c;
cp = axpy_norm(r,mmp,r,-a); // std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c; b = cp/c;
// std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,b "<<cp<< " "<<b <<std::endl;
// Fuse these loops ; should be really easy // Fuse these loops ; should be really easy
psi= a*p+psi; psi= a*p+psi;
p = p*b+r; p = p*b+r;
std::cout << "Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl; std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
// Stopping condition // Stopping condition
if ( cp <= rsq ) { if ( cp <= rsq ) {
Linop.Op(p,mmp); Linop.Op(psi,mmp);
p=mmp-src; p=mmp-src;
RealD mmpnorm = sqrt(norm2(mmp)); RealD mmpnorm = sqrt(norm2(mmp));
@ -83,8 +90,11 @@ public:
RealD srcnorm = sqrt(norm2(src)); RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p)); RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm; RealD true_residual = resnorm/srcnorm;
std::cout<<"ConjugateGradient: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
std::cout<<"ConjugateGradient: true residual is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl; std::cout<<"ConjugateGradient: true residual is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl; std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
return;
} }
} }
std::cout<<"ConjugateGradient did NOT converge"<<std::endl; std::cout<<"ConjugateGradient did NOT converge"<<std::endl;

View File

@ -102,7 +102,7 @@ template <class arg> struct name\
GridUnopClass(UnarySub,-a); GridUnopClass(UnarySub,-a);
GridUnopClass(UnaryAdj,adj(a)); GridUnopClass(UnaryAdj,adj(a));
GridUnopClass(UnaryConj,conj(a)); GridUnopClass(UnaryConj,conjugate(a));
GridUnopClass(UnaryTrace,trace(a)); GridUnopClass(UnaryTrace,trace(a));
GridUnopClass(UnaryTranspose,transpose(a)); GridUnopClass(UnaryTranspose,transpose(a));
@ -178,7 +178,7 @@ template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,con
GRID_DEF_UNOP(operator -,UnarySub); GRID_DEF_UNOP(operator -,UnarySub);
GRID_DEF_UNOP(adj,UnaryAdj); GRID_DEF_UNOP(adj,UnaryAdj);
GRID_DEF_UNOP(conj,UnaryConj); GRID_DEF_UNOP(conjugate,UnaryConj);
GRID_DEF_UNOP(trace,UnaryTrace); GRID_DEF_UNOP(trace,UnaryTrace);
GRID_DEF_UNOP(transpose,UnaryTranspose); GRID_DEF_UNOP(transpose,UnaryTranspose);

View File

@ -144,14 +144,44 @@ PARALLEL_FOR_LOOP
} }
template<class sobj,class vobj> strong_inline template<class sobj,class vobj> strong_inline
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(lhs,rhs); conformable(x,y);
#pragma omp parallel for #pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){ for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*lhs._odata[ss]; vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp+rhs._odata[ss]); vstream(ret._odata[ss],tmp);
} }
} }
template<class sobj,class vobj> strong_inline
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
}
template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
}
return norm2(ret);
}
template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
return norm2(ret); // FIXME implement parallel norm in ss loop
}
} }
#endif #endif

View File

@ -9,7 +9,7 @@ namespace Grid {
// Functionality: // Functionality:
// -=,+=,*=,() // -=,+=,*=,()
// add,+,sub,-,mult,mac,* // add,+,sub,-,mult,mac,*
// adj,conj // adj,conjugate
// real,imag // real,imag
// transpose,transposeIndex // transpose,transposeIndex
// trace,traceIndex // trace,traceIndex

View File

@ -18,11 +18,11 @@ PARALLEL_FOR_LOOP
return ret; return ret;
}; };
template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){ template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid); Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){ for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conj(lhs._odata[ss]); ret._odata[ss] = conjugate(lhs._odata[ss]);
} }
return ret; return ret;
}; };

View File

@ -98,26 +98,26 @@ template<class vtype,int N> inline void timesMinusI(iMatrix<vtype,N> &ret,const
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Conj function for scalar, vector, matrix // Conj function for scalar, vector, matrix
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> conj(const iScalar<vtype>&r) template<class vtype> inline iScalar<vtype> conjugate(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = conj(r._internal); ret._internal = conjugate(r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> conj(const iVector<vtype,N>&r) template<class vtype,int N> inline iVector<vtype,N> conjugate(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
ret._internal[i] = conj(r._internal[i]); ret._internal[i] = conjugate(r._internal[i]);
} }
return ret; return ret;
} }
template<class vtype,int N> inline iMatrix<vtype,N> conj(const iMatrix<vtype,N>&r) template<class vtype,int N> inline iMatrix<vtype,N> conjugate(const iMatrix<vtype,N>&r)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
ret._internal[i][j] = conj(r._internal[i][j]); ret._internal[i][j] = conjugate(r._internal[i][j]);
}} }}
return ret; return ret;
} }

View File

@ -21,7 +21,13 @@ const int WilsonMatrix::Tm = 7;
class WilsonCompressor { class WilsonCompressor {
public: public:
int mu; int mu;
int dag;
WilsonCompressor(int _dag){
mu=0;
dag=_dag;
assert((dag==0)||(dag==1));
}
void Point(int p) { void Point(int p) {
mu=p; mu=p;
}; };
@ -29,7 +35,11 @@ const int WilsonMatrix::Tm = 7;
vHalfSpinColourVector operator () (const vSpinColourVector &in) vHalfSpinColourVector operator () (const vSpinColourVector &in)
{ {
vHalfSpinColourVector ret; vHalfSpinColourVector ret;
switch(mu) { int mudag=mu;
if (dag) {
mudag=(mu+Nd)%(2*Nd);
}
switch(mudag) {
case WilsonMatrix::Xp: case WilsonMatrix::Xp:
spProjXp(ret,in); spProjXp(ret,in);
break; break;
@ -87,43 +97,52 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out) RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
{ {
Dhop(in,out); Dhop(in,out,0);
return 0.0; out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
return norm2(out);
} }
RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out) RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
{ {
Dhop(in,out); Dhop(in,out,1);
return 0.0; out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
return norm2(out);
} }
void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
{ {
Dhop(in,out); Dhop(in,out,0);
out = 0.5*out; // FIXME : scale factor in Dhop
} }
void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
{ {
Dhop(in,out); Dhop(in,out,1);
} }
void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
{ {
out = (4.0+mass)*in;
return ; return ;
} }
void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
{ {
out = (1.0/(4.0+mass))*in;
return ; return ;
} }
void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
{ {
out = (1.0/(4.0+mass))*in;
return ; return ;
} }
void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
{ {
out = (1.0/(4.0+mass))*in;
return ; return ;
} }
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
{ {
WilsonCompressor compressor; assert((dag==0) ||(dag==1));
WilsonCompressor compressor(dag);
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor); Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
@ -140,13 +159,13 @@ PARALLEL_FOR_LOOP
int ssu= ss; int ssu= ss;
// Xp // Xp
offset = Stencil._offsets [Xp][ss]; int idx = (Xp+dag*4)%8;
local = Stencil._is_local[Xp][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Xp][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Xp]; perm = Stencil._permute[idx][ss];
if ( local && perm ) ptype = Stencil._permute_type[idx];
{ if ( local && perm ) {
spProjXp(tmp,in._odata[offset]); spProjXp(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -154,18 +173,17 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
//prefetch(Umu._odata[ssu](Yp));
spReconXp(result,Uchi); spReconXp(result,Uchi);
// Yp // Yp
offset = Stencil._offsets [Yp][ss]; idx = (Yp+dag*4)%8;
local = Stencil._is_local[Yp][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Yp][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Yp]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm ) {
{
spProjYp(tmp,in._odata[offset]); spProjYp(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -173,18 +191,16 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
// prefetch(Umu._odata[ssu](Zp));
accumReconYp(result,Uchi); accumReconYp(result,Uchi);
// Zp // Zp
offset = Stencil._offsets [Zp][ss]; idx = (Zp+dag*4)%8;
local = Stencil._is_local[Zp][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Zp][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Zp]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm ) {
{
spProjZp(tmp,in._odata[offset]); spProjZp(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -192,18 +208,16 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
// prefetch(Umu._odata[ssu](Tp));
accumReconZp(result,Uchi); accumReconZp(result,Uchi);
// Tp // Tp
offset = Stencil._offsets [Tp][ss]; idx = (Tp+dag*4)%8;
local = Stencil._is_local[Tp][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Tp][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Tp]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm ) {
{
spProjTp(tmp,in._odata[offset]); spProjTp(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -211,15 +225,15 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
// prefetch(Umu._odata[ssu](Xm));
accumReconTp(result,Uchi); accumReconTp(result,Uchi);
// Xm // Xm
offset = Stencil._offsets [Xm][ss]; idx = (Xm+dag*4)%8;
local = Stencil._is_local[Xm][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Xm][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Xm]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm )
{ {
@ -230,18 +244,18 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconXm(result,Uchi); accumReconXm(result,Uchi);
// Ym // Ym
offset = Stencil._offsets [Ym][ss]; idx = (Ym+dag*4)%8;
local = Stencil._is_local[Ym][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Ym][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Ym]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm ) {
{
spProjYm(tmp,in._odata[offset]); spProjYm(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -249,17 +263,16 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconYm(result,Uchi); accumReconYm(result,Uchi);
// Zm // Zm
offset = Stencil._offsets [Zm][ss]; idx = (Zm+dag*4)%8;
local = Stencil._is_local[Zm][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Zm][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Zm]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm ) {
{
spProjZm(tmp,in._odata[offset]); spProjZm(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -267,17 +280,16 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconZm(result,Uchi); accumReconZm(result,Uchi);
// Tm // Tm
offset = Stencil._offsets [Tm][ss]; idx = (Tm+dag*4)%8;
local = Stencil._is_local[Tm][ss]; offset = Stencil._offsets [idx][ss];
perm = Stencil._permute[Tm][ss]; local = Stencil._is_local[idx][ss];
ptype = Stencil._permute_type[Tm]; perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) if ( local && perm ) {
{
spProjTm(tmp,in._odata[offset]); spProjTm(tmp,in._odata[offset]);
permute(chi,tmp,ptype); permute(chi,tmp,ptype);
} else if ( local ) { } else if ( local ) {
@ -285,7 +297,7 @@ PARALLEL_FOR_LOOP
} else { } else {
chi=comm_buf[offset]; chi=comm_buf[offset];
} }
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi()); mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconTm(result,Uchi); accumReconTm(result,Uchi);
vstream(out._odata[ss],result); vstream(out._odata[ss],result);
@ -294,10 +306,6 @@ PARALLEL_FOR_LOOP
} }
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
{
return;
}

View File

@ -45,10 +45,7 @@ namespace Grid {
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
// non-hermitian hopping term; half cb or both // non-hermitian hopping term; half cb or both
void Dhop(const LatticeFermion &in, LatticeFermion &out); void Dhop(const LatticeFermion &in, LatticeFermion &out,int dag);
// m+4r -1/2 Dhop; both cb's
void Dw(const LatticeFermion &in, LatticeFermion &out);
typedef iScalar<iMatrix<vComplex, Nc> > matrix; typedef iScalar<iMatrix<vComplex, Nc> > matrix;

View File

@ -32,7 +32,7 @@ namespace Grid {
friend inline void mult(vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) * (*r);} friend inline void mult(vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) - (*r);} friend inline void sub (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) + (*r);} friend inline void add (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) + (*r);}
friend inline vComplexD adj(const vComplexD &in){ return conj(in); } friend inline vComplexD adj(const vComplexD &in){ return conjugate(in); }
////////////////////////////////// //////////////////////////////////
// Initialise to 1,0,i // Initialise to 1,0,i
@ -166,11 +166,11 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could // all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example // add the vector width as a template param for BG/Q for example
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vComplexD &y,vComplexD b,int perm) friend inline void permute(vComplexD &y,vComplexD b,int perm)
{ {
Gpermute<vComplexD>(y,b,perm); Gpermute<vComplexD>(y,b,perm);
} }
/*
friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted) friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted)
{ {
Gmerge<vComplexD,ComplexD >(y,extracted); Gmerge<vComplexD,ComplexD >(y,extracted);
@ -269,7 +269,7 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
//////////////////////// ////////////////////////
// Conjugate // Conjugate
//////////////////////// ////////////////////////
friend inline vComplexD conj(const vComplexD &in){ friend inline vComplexD conjugate(const vComplexD &in){
vComplexD ret ; vzero(ret); vComplexD ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ... // addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
@ -345,17 +345,17 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
// REDUCE FIXME must be a cleaner implementation // REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in) friend inline ComplexD Reduce(const vComplexD & in)
{ {
#if defined SSE4 #ifdef SSE4
return ComplexD(in.v[0], in.v[1]); // inefficient return ComplexD(in.v[0],in.v[1]);
#endif
#if defined(AVX1) || defined (AVX2)
vComplexD v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexD(v1.v[0],v1.v[1]);
#endif #endif
#if defined (AVX1) || defined(AVX2)
// return std::complex<double>(_mm256_mask_reduce_add_pd(0x55, in.v),_mm256_mask_reduce_add_pd(0xAA, in.v));
__attribute__ ((aligned(32))) double c_[4];
_mm256_store_pd(c_,in.v);
return ComplexD(c_[0]+c_[2],c_[1]+c_[3]);
#endif
#ifdef AVX512 #ifdef AVX512
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v)); return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
#endif #endif
#ifdef QPX #ifdef QPX
#endif #endif
@ -387,7 +387,7 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
}; };
inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conj(l)*r; } inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conjugate(l)*r; }
typedef vComplexD vDComplex; typedef vComplexD vDComplex;

View File

@ -47,7 +47,7 @@ namespace Grid {
friend inline void mult(vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) * (*r); } friend inline void mult(vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) - (*r); } friend inline void sub (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) + (*r); } friend inline void add (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) + (*r); }
friend inline vComplexF adj(const vComplexF &in){ return conj(in); } friend inline vComplexF adj(const vComplexF &in){ return conjugate(in); }
////////////////////////////////// //////////////////////////////////
// Initialise to 1,0,i // Initialise to 1,0,i
@ -228,42 +228,25 @@ namespace Grid {
ret.v = {a,b,a,b}; ret.v = {a,b,a,b};
#endif #endif
} }
friend inline ComplexF Reduce(const vComplexF & in) friend inline void permute(vComplexF &y,vComplexF b,int perm)
{ {
Gpermute<vComplexF>(y,b,perm);
}
friend inline ComplexF Reduce(const vComplexF & in)
{
#ifdef SSE4 #ifdef SSE4
union { vComplexF v1;
cvec v1; // SSE 4 x float vector permute(v1,in,0); // sse 128; paired complex single
float f[4]; // scalar array of 4 floats v1=v1+in;
} u128; return ComplexF(v1.v[0],v1.v[1]);
u128.v1= _mm_add_ps(in.v, _mm_shuffle_ps(in.v,in.v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return ComplexF(u128.f[0], u128.f[1]);
#endif #endif
#ifdef AVX1 #if defined(AVX1) || defined (AVX2)
//it would be better passing 2 arguments to saturate the vector lanes vComplexF v1,v2;
union { permute(v1,in,0); // sse 128; paired complex single
__m256 v1; v1=v1+in;
float f[8]; permute(v2,v1,1); // avx 256; quad complex single
} u256; v1=v1+v2;
//SWAP lanes return ComplexF(v1.v[0],v1.v[1]);
// FIXME .. icc complains with lib/lattice/Grid_lattice_reduction.h (49): (col. 20) warning #13211: Immediate parameter to intrinsic call too large
__m256 t0 = _mm256_permute2f128_ps(in.v, in.v, 1);
__m256 t1 = _mm256_permute_ps(in.v , 0b11011000);//real (0,2,1,3)
__m256 t2 = _mm256_permute_ps(t0 , 0b10001101);//imag (1,3,0,2)
t0 = _mm256_blend_ps(t1, t2, 0b0101000001010000);// (0,0,1,1,0,0,1,1)
t1 = _mm256_hadd_ps(t0,t0);
u256.v1 = _mm256_hadd_ps(t1, t1);
return ComplexF(u256.f[0], u256.f[4]);
#endif
#ifdef AVX2
union {
__m256 v1;
float f[8];
} u256;
const __m256i mask= _mm256_set_epi32( 7, 5, 3, 1, 6, 4, 2, 0);
__m256 tmp1 = _mm256_permutevar8x32_ps(in.v, mask);
__m256 tmp2 = _mm256_hadd_ps(tmp1, tmp1);
u256.v1 = _mm256_hadd_ps(tmp2, tmp2);
return ComplexF(u256.f[0], u256.f[4]);
#endif #endif
#ifdef AVX512 #ifdef AVX512
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v)); return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
@ -345,13 +328,10 @@ namespace Grid {
// Conjugate // Conjugate
/////////////////////// ///////////////////////
friend inline vComplexF conj(const vComplexF &in){ friend inline vComplexF conjugate(const vComplexF &in){
vComplexF ret ; vzero(ret); vComplexF ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2) #if defined (AVX1)|| defined (AVX2)
cvec tmp; ret.v = _mm256_xor_ps(_mm256_addsub_ps(ret.v,in.v), _mm256_set1_ps(-0.f));
tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
#endif #endif
#ifdef SSE4 #ifdef SSE4
ret.v = _mm_xor_ps(_mm_addsub_ps(ret.v,in.v), _mm_set1_ps(-0.f)); ret.v = _mm_xor_ps(_mm_addsub_ps(ret.v,in.v), _mm_set1_ps(-0.f));
@ -433,10 +413,6 @@ namespace Grid {
return *this; return *this;
} }
friend inline void permute(vComplexF &y,vComplexF b,int perm)
{
Gpermute<vComplexF>(y,b,perm);
}
/* /*
friend inline void merge(vComplexF &y,std::vector<ComplexF *> &extracted) friend inline void merge(vComplexF &y,std::vector<ComplexF *> &extracted)
{ {
@ -460,7 +436,7 @@ namespace Grid {
inline vComplexF innerProduct(const vComplexF & l, const vComplexF & r) inline vComplexF innerProduct(const vComplexF & l, const vComplexF & r)
{ {
return conj(l)*r; return conjugate(l)*r;
} }
inline void zeroit(vComplexF &z){ vzero(z);} inline void zeroit(vComplexF &z){ vzero(z);}

View File

@ -117,7 +117,7 @@ namespace Grid {
}; };
/////////////////////////////////////////////// ///////////////////////////////////////////////
// mult, sub, add, adj,conj, mac functions // mult, sub, add, adj,conjugate, mac functions
/////////////////////////////////////////////// ///////////////////////////////////////////////
friend inline void mult(vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) * (*r);} friend inline void mult(vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) - (*r);} friend inline void sub (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) - (*r);}

View File

@ -26,7 +26,7 @@ namespace Grid {
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);} friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) + (*r);} friend inline void add (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) + (*r);}
friend inline vRealD adj(const vRealD &in) { return in; } friend inline vRealD adj(const vRealD &in) { return in; }
friend inline vRealD conj(const vRealD &in){ return in; } friend inline vRealD conjugate(const vRealD &in){ return in; }
friend inline void mac (vRealD &y,const vRealD a,const vRealD x){ friend inline void mac (vRealD &y,const vRealD a,const vRealD x){
#if defined (AVX1) || defined (SSE4) #if defined (AVX1) || defined (SSE4)
@ -112,11 +112,12 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could // all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example // add the vector width as a template param for BG/Q for example
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vRealD &y,vRealD b,int perm) friend inline void permute(vRealD &y,vRealD b,int perm)
{ {
Gpermute<vRealD>(y,b,perm); Gpermute<vRealD>(y,b,perm);
} }
/*
friend inline void merge(vRealD &y,std::vector<RealD *> &extracted) friend inline void merge(vRealD &y,std::vector<RealD *> &extracted)
{ {
Gmerge<vRealD,RealD >(y,extracted); Gmerge<vRealD,RealD >(y,extracted);
@ -209,48 +210,26 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in) friend inline RealD Reduce(const vRealD & in)
{ {
#if defined (SSE4) #ifdef SSE4
// FIXME Hack vRealD v1;
const RealD * ptr =(const RealD *) &in; permute(v1,in,0); // sse 128; paired real double
RealD ret = 0; v1=v1+in;
for(int i=0;i<vRealD::Nsimd();i++){ return RealD(v1.v[0]);
ret = ret+ptr[i];
}
return ret;
#endif #endif
#if defined (AVX1) || defined(AVX2) #if defined(AVX1) || defined (AVX2)
typedef union { vRealD v1,v2;
uint64_t l; permute(v1,in,0); // avx 256; quad double
double d; v1=v1+in;
} my_conv_t; permute(v2,v1,1);
my_conv_t converter; v1=v1+v2;
// more reduce_add return v1.v[0];
/*
__attribute__ ((aligned(32))) double c_[16];
__m256d tmp = _mm256_permute2f128_pd(in.v,in.v,0x01); // tmp 1032; in= 3210
__m256d hadd = _mm256_hadd_pd(in.v,tmp); // hadd = 1+0,3+2,3+2,1+0
tmp = _mm256_permute2f128_pd(hadd,hadd,0x01);// tmp = 3+2,1+0,1+0,3+2
hadd = _mm256_hadd_pd(tmp,tmp); // tmp = 3+2+1+0,3+2+1+0,1+0+3+2,1+0+3+2
_mm256_store_pd(c_,hadd);ô
return c[0]
*/
__m256d tmp = _mm256_permute2f128_pd(in.v,in.v,0x01); // tmp 1032; in= 3210
__m256d hadd = _mm256_hadd_pd(in.v,tmp); // hadd = 1+0,3+2,3+2,1+0
hadd = _mm256_hadd_pd(hadd,hadd); // hadd = 1+0+3+2...
converter.l = _mm256_extract_epi64((ivec)hadd,0);
return converter.d;
#endif #endif
#ifdef AVX512 #ifdef AVX512
return _mm512_reduce_add_pd(in.v); return _mm512_reduce_add_pd(in.v);
/*
__attribute__ ((aligned(32))) double c_[8];
_mm512_store_pd(c_,in.v);
return c_[0]+c_[1]+c_[2]+c_[3]+c_[4]+c_[5]+c_[6]+c_[7];
*/
#endif #endif
#ifdef QPX #ifdef QPX
#endif #endif
} }
// *=,+=,-= operators // *=,+=,-= operators
inline vRealD &operator *=(const vRealD &r) { inline vRealD &operator *=(const vRealD &r) {
@ -270,7 +249,7 @@ namespace Grid {
static int Nsimd(void) { return sizeof(dvec)/sizeof(double);} static int Nsimd(void) { return sizeof(dvec)/sizeof(double);}
}; };
inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conj(l)*r; } inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conjugate(l)*r; }
inline void zeroit(vRealD &z){ vzero(z);} inline void zeroit(vRealD &z){ vzero(z);}
inline vRealD outerProduct(const vRealD &l, const vRealD& r) inline vRealD outerProduct(const vRealD &l, const vRealD& r)

View File

@ -92,13 +92,13 @@ namespace Grid {
}; };
/////////////////////////////////////////////// ///////////////////////////////////////////////
// mult, sub, add, adj,conj, mac functions // mult, sub, add, adj,conjugate, mac functions
/////////////////////////////////////////////// ///////////////////////////////////////////////
friend inline void mult(vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) * (*r);} friend inline void mult(vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) - (*r);} friend inline void sub (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) + (*r);} friend inline void add (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) + (*r);}
friend inline vRealF adj(const vRealF &in) { return in; } friend inline vRealF adj(const vRealF &in) { return in; }
friend inline vRealF conj(const vRealF &in){ return in; } friend inline vRealF conjugate(const vRealF &in){ return in; }
friend inline void mac (vRealF &y,const vRealF a,const vRealF x){ friend inline void mac (vRealF &y,const vRealF a,const vRealF x){
#if defined (AVX1) || defined (SSE4) #if defined (AVX1) || defined (SSE4)
@ -133,11 +133,12 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could // all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example // add the vector width as a template param for BG/Q for example
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vRealF &y,vRealF b,int perm) friend inline void permute(vRealF &y,vRealF b,int perm)
{ {
Gpermute<vRealF>(y,b,perm); Gpermute<vRealF>(y,b,perm);
} }
/*
friend inline void merge(vRealF &y,std::vector<RealF *> &extracted) friend inline void merge(vRealF &y,std::vector<RealF *> &extracted)
{ {
Gmerge<vRealF,RealF >(y,extracted); Gmerge<vRealF,RealF >(y,extracted);
@ -155,7 +156,6 @@ namespace Grid {
Gextract<vRealF,RealF>(y,extracted); Gextract<vRealF,RealF>(y,extracted);
} }
*/ */
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Broadcast a value across Nsimd copies. // Broadcast a value across Nsimd copies.
@ -243,33 +243,26 @@ friend inline void vstore(const vRealF &ret, float *a){
} }
friend inline RealF Reduce(const vRealF & in) friend inline RealF Reduce(const vRealF & in)
{ {
#if defined (SSE4) #ifdef SSE4
// FIXME Hack vRealF v1,v2;
const RealF * ptr = (const RealF *) &in; permute(v1,in,0); // sse 128; quad single
RealF ret = 0; v1=v1+in;
for(int i=0;i<vRealF::Nsimd();i++){ permute(v2,v1,1);
ret = ret+ptr[i]; v1=v1+v2;
} return v1.v[0];
return ret;
#endif #endif
#if defined (AVX1) || defined(AVX2) #if defined(AVX1) || defined (AVX2)
__attribute__ ((aligned(32))) float c_[16]; vRealF v1,v2;
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01); permute(v1,in,0); // avx 256; octo-double
__m256 hadd = _mm256_hadd_ps(in.v,tmp); v1=v1+in;
tmp = _mm256_permute2f128_ps(hadd,hadd,0x01); permute(v2,v1,1);
hadd = _mm256_hadd_ps(tmp,tmp); v1=v1+v2;
_mm256_store_ps(c_,hadd); permute(v2,v1,2);
return (float)c_[0]; v1=v1+v2;
return v1.v[0];
#endif #endif
#ifdef AVX512 #ifdef AVX512
return _mm512_reduce_add_ps(in.v); return _mm512_reduce_add_ps(in.v);
/*
__attribute__ ((aligned(64))) float c_[16];
_mm512_store_ps(c_,in.v);
return c_[0]+c_[1]+c_[2]+c_[3]+c_[4]+c_[5]+c_[6]+c_[7]
+c_[8]+c_[9]+c_[10]+c_[11]+c_[12]+c_[13]+c_[14]+c_[15];
*/
#endif #endif
#ifdef QPX #ifdef QPX
#endif #endif
@ -291,7 +284,7 @@ friend inline void vstore(const vRealF &ret, float *a){
public: public:
static inline int Nsimd(void) { return sizeof(fvec)/sizeof(float);} static inline int Nsimd(void) { return sizeof(fvec)/sizeof(float);}
}; };
inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conj(l)*r; } inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conjugate(l)*r; }
inline void zeroit(vRealF &z){ vzero(z);} inline void zeroit(vRealF &z){ vzero(z);}
inline vRealF outerProduct(const vRealF &l, const vRealF& r) inline vRealF outerProduct(const vRealF &l, const vRealF& r)

View File

@ -79,7 +79,7 @@ namespace Grid {
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
friend inline Grid_simd adj(const Grid_simd &in){ return conj(in); } friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
////////////////////////////////// //////////////////////////////////
// Initialise to 1,0,i // Initialise to 1,0,i
@ -193,7 +193,7 @@ namespace Grid {
// Conjugate // Conjugate
/////////////////////// ///////////////////////
friend inline Grid_simd conj(const Grid_simd &in){ friend inline Grid_simd conjugate(const Grid_simd &in){
Grid_simd ret ; vzero(ret); Grid_simd ret ; vzero(ret);
// FIXME add operator // FIXME add operator
return ret; return ret;
@ -265,7 +265,7 @@ namespace Grid {
template<class scalar_type, class vector_type > template<class scalar_type, class vector_type >
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r) inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
{ {
return conj(l)*r; return conjugate(l)*r;
} }
template<class scalar_type, class vector_type > template<class scalar_type, class vector_type >

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash -e
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi" DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi clang-sse"
EXTRADIRS="g++-avx g++-sse4 icpc-avx icpc-avx2 icpc-avx512" EXTRADIRS="g++-avx g++-sse4 icpc-avx icpc-avx2 icpc-avx512"
BLACK="\033[30m" BLACK="\033[30m"
RED="\033[31m" RED="\033[31m"

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx" DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx clang-sse"
for D in $DIRS for D in $DIRS
do do

View File

@ -34,6 +34,9 @@ icpc-avx512)
icpc-mic) icpc-mic)
CXX=icpc ../../configure --host=none --enable-simd=AVX512 CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none CXX=icpc ../../configure --host=none --enable-simd=AVX512 CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none
;; ;;
clang-sse)
CXX=clang++ ../../configure --enable-simd=SSE4 CXXFLAGS="-msse4 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
;;
clang-avx) clang-avx)
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
;; ;;
@ -47,16 +50,16 @@ clang-avx2-openmp)
CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -std=c++11" LDFLAGS="-fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=none CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -std=c++11" LDFLAGS="-fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=none
;; ;;
clang-avx-openmp-mpi) clang-avx-openmp-mpi)
CXX=clang-omp++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=mpi CXX=clang-omp++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
;; ;;
clang-avx2-openmp-mpi) clang-avx2-openmp-mpi)
CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=mpi CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
;; ;;
clang-avx-mpi) clang-avx-mpi)
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx " LIBS="-lgmp -lmpfr" --enable-comms=mpi CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -lgmp -lmpfr" --enable-comms=mpi
;; ;;
clang-avx2-mpi) clang-avx2-mpi)
CXX=clang++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx " LIBS="-lgmp -lmpfr" --enable-comms=mpi CXX=clang++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -lgmp -lmpfr" --enable-comms=mpi
;; ;;
esac esac
echo -e $NORMAL echo -e $NORMAL

View File

@ -140,7 +140,7 @@ int main (int argc, char ** argv)
// -=,+=,*=,() // -=,+=,*=,()
// add,+,sub,-,mult,mac,* // add,+,sub,-,mult,mac,*
// adj,conj // adj,conjugate
// real,imag // real,imag
// transpose,transposeIndex // transpose,transposeIndex
// trace,traceIndex // trace,traceIndex
@ -437,7 +437,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){ for(int r=0;r<3;r++){
for(int c=0;c<3;c++){ for(int c=0;c<3;c++){
diff =shifted1()()(r,c)-shifted2()()(r,c); diff =shifted1()()(r,c)-shifted2()()(r,c);
nn=real(conj(diff)*diff); nn=real(conjugate(diff)*diff);
if ( nn > 0 ) if ( nn > 0 )
cout<<"Shift fail (shifted1/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" " cout<<"Shift fail (shifted1/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
<<shifted1()()(r,c)<<" "<<shifted2()()(r,c) <<shifted1()()(r,c)<<" "<<shifted2()()(r,c)
@ -451,7 +451,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){ for(int r=0;r<3;r++){
for(int c=0;c<3;c++){ for(int c=0;c<3;c++){
diff =shifted3()()(r,c)-shifted2()()(r,c); diff =shifted3()()(r,c)-shifted2()()(r,c);
nn=real(conj(diff)*diff); nn=real(conjugate(diff)*diff);
if ( nn > 0 ) if ( nn > 0 )
cout<<"Shift rb fail (shifted3/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" " cout<<"Shift rb fail (shifted3/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
<<shifted3()()(r,c)<<" "<<shifted2()()(r,c) <<shifted3()()(r,c)<<" "<<shifted2()()(r,c)
@ -468,7 +468,7 @@ int main (int argc, char ** argv)
for(int r=0;r<Nc;r++){ for(int r=0;r<Nc;r++){
for(int c=0;c<Nc;c++){ for(int c=0;c<Nc;c++){
diff =foobar2()()(r,c)-foobar1()()(r,c); diff =foobar2()()(r,c)-foobar1()()(r,c);
nrm = nrm + real(conj(diff)*diff); nrm = nrm + real(conjugate(diff)*diff);
}} }}
}}}} }}}}
if( Fine.IsBoss() ){ if( Fine.IsBoss() ){

52
tests/Grid_rng_fixed.cc Normal file
View File

@ -0,0 +1,52 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridSerialRNG fsRNG; fsRNG.SeedFixedIntegers(seeds);
GridParallelRNG fpRNG(&Grid); fpRNG.SeedFixedIntegers(seeds);
vComplexF tmp; random(fsRNG,tmp);
std::cout<<"Random vComplexF (fixed seed)\n"<< tmp<<std::endl;
std::cout<<"conjugate(tmp)\n"<< conjugate(tmp)<<std::endl;
std::cout<<"conjugate(tmp)*tmp\n"<< conjugate(tmp)*tmp<<std::endl;
std::cout<<"innerProduct"<< innerProduct(tmp,tmp)<<std::endl;
std::cout<<"Reduce(innerProduct)"<< Reduce(innerProduct(tmp,tmp))<<std::endl;
SpinMatrix rnd ;
random(fsRNG,rnd);
std::cout<<"Random Spin Matrix (fixed seed)\n"<< rnd<<std::endl;
SpinVector rv;
random(fsRNG,rv);
std::cout<<"Random Spin Vector (fixed seed)\n"<< rv<<std::endl;
gaussian(fsRNG,rv);
std::cout<<"Gaussian Spin Vector (fixed seed)\n"<< rv<<std::endl;
LatticeColourVector lcv(&Grid);
LatticeFermion src(&Grid); random(fpRNG,src);
std::cout << "src norm : " << norm2(src)<<std::endl;
std::cout << "src " << src<<std::endl;
random(fpRNG,lcv);
std::cout<<"Random Lattice Colour Vector (fixed seed)\n"<< lcv<<std::endl;
Grid_finalize();
}

View File

@ -26,7 +26,7 @@ public:
class funcConj { class funcConj {
public: public:
funcConj() {}; funcConj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);} template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);}
std::string name(void) const { return std::string("Conj"); } std::string name(void) const { return std::string("Conj"); }
}; };
class funcAdj { class funcAdj {
@ -49,6 +49,34 @@ public:
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);} template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
std::string name(void) const { return std::string("timesMinusI"); } std::string name(void) const { return std::string("timesMinusI"); }
}; };
class funcInnerProduct {
public:
funcInnerProduct() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = innerProduct(i1,i2);}
std::string name(void) const { return std::string("innerProduct"); }
};
// FIXME still to test:
//
// innerProduct,
// norm2,
// Reduce,
//
// mac,mult,sub,add, vone,vzero,vcomplex_i, =zero,
// vset,vsplat,vstore,vstream,vload, scalar*vec, vec*scalar
// unary -,
// *= , -=, +=
// outerproduct,
// zeroit
// permute
class funcReduce {
public:
funcReduce() {};
template<class reduce,class vec> void vfunc(reduce &rr,vec &i1,vec &i2) const { rr = Reduce(i1);}
template<class reduce,class scal> void sfunc(reduce &rr,scal &i1,scal &i2) const { rr = i1;}
std::string name(void) const { return std::string("Reduce"); }
};
template<class scal, class vec,class functor > template<class scal, class vec,class functor >
void Tester(const functor &func) void Tester(const functor &func)
@ -96,8 +124,59 @@ void Tester(const functor &func)
ok++; ok++;
} }
} }
if ( ok==0 ) std::cout << " OK!" <<std::endl; if ( ok==0 ) {
std::cout << " OK!" <<std::endl;
}
assert(ok==0);
}
template<class reduced,class scal, class vec,class functor >
void ReductionTester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
reduced result(0);
reduced reference(0);
reduced tmp;
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
func.template vfunc<reduced,vec>(result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func.template sfunc<reduced,scal>(tmp,input1[i],input2[i]);
reference+=tmp;
}
std::cout << " " << func.name()<<std::endl;
int ok=0;
if ( abs(reference-result)/abs(reference) > 1.0e-6 ){ // rounding is possible for reduce order
std::cout<< "*****" << std::endl;
std::cout<< abs(reference-result) << " " <<reference<< " " << result<<std::endl;
ok++;
}
if ( ok==0 ) {
std::cout << " OK!" <<std::endl;
}
assert(ok==0);
} }
@ -127,6 +206,8 @@ int main (int argc, char ** argv)
Tester<ComplexF,vComplexF>(funcTimes()); Tester<ComplexF,vComplexF>(funcTimes());
Tester<ComplexF,vComplexF>(funcConj()); Tester<ComplexF,vComplexF>(funcConj());
Tester<ComplexF,vComplexF>(funcAdj()); Tester<ComplexF,vComplexF>(funcAdj());
Tester<ComplexF,vComplexF>(funcInnerProduct());
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
std::cout << "==================================="<< std::endl; std::cout << "==================================="<< std::endl;
std::cout << "Testing vComplexD "<<std::endl; std::cout << "Testing vComplexD "<<std::endl;
@ -140,6 +221,8 @@ int main (int argc, char ** argv)
Tester<ComplexD,vComplexD>(funcTimes()); Tester<ComplexD,vComplexD>(funcTimes());
Tester<ComplexD,vComplexD>(funcConj()); Tester<ComplexD,vComplexD>(funcConj());
Tester<ComplexD,vComplexD>(funcAdj()); Tester<ComplexD,vComplexD>(funcAdj());
Tester<ComplexD,vComplexD>(funcInnerProduct());
ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
std::cout << "==================================="<< std::endl; std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealF "<<std::endl; std::cout << "Testing vRealF "<<std::endl;
@ -150,6 +233,9 @@ int main (int argc, char ** argv)
Tester<RealF,vRealF>(funcMinus()); Tester<RealF,vRealF>(funcMinus());
Tester<RealF,vRealF>(funcTimes()); Tester<RealF,vRealF>(funcTimes());
Tester<RealF,vRealF>(funcAdj()); Tester<RealF,vRealF>(funcAdj());
Tester<RealF,vRealF>(funcConj());
Tester<RealF,vRealF>(funcInnerProduct());
ReductionTester<RealF,RealF,vRealF>(funcReduce());
std::cout << "==================================="<< std::endl; std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealD "<<std::endl; std::cout << "Testing vRealD "<<std::endl;
@ -159,6 +245,9 @@ int main (int argc, char ** argv)
Tester<RealD,vRealD>(funcMinus()); Tester<RealD,vRealD>(funcMinus());
Tester<RealD,vRealD>(funcTimes()); Tester<RealD,vRealD>(funcTimes());
Tester<RealD,vRealD>(funcAdj()); Tester<RealD,vRealD>(funcAdj());
Tester<RealD,vRealD>(funcConj());
Tester<RealD,vRealD>(funcInnerProduct());
ReductionTester<RealD,RealD,vRealD>(funcReduce());
Grid_finalize(); Grid_finalize();
} }

View File

@ -93,7 +93,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){ for(int r=0;r<3;r++){
for(int c=0;c<3;c++){ for(int c=0;c<3;c++){
diff =check()()(r,c)-bar()()(r,c); diff =check()()(r,c)-bar()()(r,c);
double nn=real(conj(diff)*diff); double nn=real(conjugate(diff)*diff);
if ( nn > 0){ if ( nn > 0){
printf("Coor (%d %d %d %d) \t rc %d%d \t %le (%le,%le) %le\n", printf("Coor (%d %d %d %d) \t rc %d%d \t %le (%le,%le) %le\n",
coor[0],coor[1],coor[2],coor[3],r,c, coor[0],coor[1],coor[2],coor[3],r,c,
@ -103,8 +103,8 @@ int main (int argc, char ** argv)
real(bar()()(r,c)) real(bar()()(r,c))
); );
} }
snrmC=snrmC+real(conj(check()()(r,c))*check()()(r,c)); snrmC=snrmC+real(conjugate(check()()(r,c))*check()()(r,c));
snrmB=snrmB+real(conj(bar()()(r,c))*bar()()(r,c)); snrmB=snrmB+real(conjugate(bar()()(r,c))*bar()()(r,c));
snrm=snrm+nn; snrm=snrm+nn;
}} }}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
# #
# Test code # Test code
# #
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed
Grid_main_SOURCES = Grid_main.cc Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid Grid_main_LDADD = -lGrid
@ -13,6 +13,9 @@ Grid_main_LDADD = -lGrid
Grid_rng_SOURCES = Grid_rng.cc Grid_rng_SOURCES = Grid_rng.cc
Grid_rng_LDADD = -lGrid Grid_rng_LDADD = -lGrid
Grid_rng_fixed_SOURCES = Grid_rng_fixed.cc
Grid_rng_fixed_LDADD = -lGrid
Grid_remez_SOURCES = Grid_remez.cc Grid_remez_SOURCES = Grid_remez.cc
Grid_remez_LDADD = -lGrid Grid_remez_LDADD = -lGrid

View File

@ -186,7 +186,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){ for(int r=0;r<3;r++){
for(int c=0;c<3;c++){ for(int c=0;c<3;c++){
diff =check._internal._internal[r][c]-bar._internal._internal[r][c]; diff =check._internal._internal[r][c]-bar._internal._internal[r][c];
double nn=real(conj(diff)*diff); double nn=real(conjugate(diff)*diff);
if ( nn > 0 ){ if ( nn > 0 ){
printf("Coor (%d %d %d %d) \t rc %d%d \t %le %le %le\n", printf("Coor (%d %d %d %d) \t rc %d%d \t %le %le %le\n",
coor[0],coor[1],coor[2],coor[3],r,c, coor[0],coor[1],coor[2],coor[3],r,c,