1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Got unpreconditioned conjugate gradient to run and converge on a random (uniform random,

not even SU(3) for now) gauge field. Convergence history is correctly indepdendent of decomposition
on 1,2,4,8,16 mpi tasks.
Found a couple of simd bugs which required fixed and enhanced the Grid_simd.cc test suite.
Implemented the Mdag, M, MdagM, Meooe Mooee schur type stuff in the wilson dop.
This commit is contained in:
Peter Boyle 2015-05-19 13:57:35 +01:00
parent 6f387b4916
commit a6e1ea216d
33 changed files with 566 additions and 316 deletions

View File

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# Makefile.in generated by automake 1.15 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# Copyright (C) 1994-2014 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -14,7 +14,17 @@
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
@ -79,14 +89,12 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = .
DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/configure $(am__configure_deps) COPYING TODO \
compile config.guess config.sub depcomp install-sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
$(am__configure_deps) $(am__DIST_COMMON)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d
@ -149,6 +157,9 @@ ETAGS = etags
CTAGS = ctags
CSCOPE = cscope
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in AUTHORS COPYING ChangeLog \
INSTALL NEWS README TODO compile config.guess config.sub \
depcomp install-sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
@ -314,7 +325,6 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
@ -521,15 +531,15 @@ dist-xz: distdir
$(am__post_remove_distdir)
dist-tarZ: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir)
dist-shar: distdir
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir)
@ -565,17 +575,17 @@ distcheck: dist
esac
chmod -R a-w $(distdir)
chmod u+w $(distdir)
mkdir $(distdir)/_build $(distdir)/_inst
mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst
chmod a-w $(distdir)
test -d $(distdir)/_build || exit 0; \
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \
&& ../configure \
&& $(am__cd) $(distdir)/_build/sub \
&& ../../configure \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
--srcdir=.. --prefix="$$dc_install_base" \
--srcdir=../.. --prefix="$$dc_install_base" \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \
@ -749,6 +759,8 @@ uninstall-am:
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.

61
aclocal.m4 vendored
View File

@ -1,6 +1,6 @@
# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
# generated automatically by aclocal 1.15 -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
# Copyright (C) 2002-2013 Free Software Foundation, Inc.
# Copyright (C) 2002-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.14'
[am__api_version='1.15'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.14.1], [],
m4_if([$1], [1.15], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.14.1])dnl
[AM_AUTOMAKE_VERSION([1.15])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -103,15 +103,14 @@ _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# configured tree to be moved without reconfiguration.
AC_DEFUN([AM_AUX_DIR_EXPAND],
[dnl Rely on autoconf to set up CDPATH properly.
AC_PREREQ([2.50])dnl
# expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
# Expand $ac_aux_dir to an absolute path.
am_aux_dir=`cd "$ac_aux_dir" && pwd`
])
# AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -142,7 +141,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -333,7 +332,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -409,7 +408,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -499,8 +498,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
# We need awk for the "check" target (and possibly the TAP driver). The
# system "awk" is bad on some platforms.
AC_REQUIRE([AC_PROG_AWK])dnl
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
@ -573,7 +572,11 @@ to "yes", and re-run configure.
END
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
fi
fi])
fi
dnl The trailing newline in this macro's definition is deliberate, for
dnl backward compatibility and to allow trailing 'dnl'-style comments
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
])
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
@ -602,7 +605,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -613,7 +616,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
# Define $install_sh.
AC_DEFUN([AM_PROG_INSTALL_SH],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
if test x"${install_sh}" != xset; then
if test x"${install_sh+set}" != xset; then
case $am_aux_dir in
*\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -623,7 +626,7 @@ if test x"${install_sh}" != xset; then
fi
AC_SUBST([install_sh])])
# Copyright (C) 2003-2013 Free Software Foundation, Inc.
# Copyright (C) 2003-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -644,7 +647,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -694,7 +697,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -733,7 +736,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -764,7 +767,7 @@ AC_DEFUN([_AM_IF_OPTION],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -845,7 +848,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
# Copyright (C) 2009-2013 Free Software Foundation, Inc.
# Copyright (C) 2009-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -905,7 +908,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -933,7 +936,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006-2013 Free Software Foundation, Inc.
# Copyright (C) 2006-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -952,7 +955,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004-2013 Free Software Foundation, Inc.
# Copyright (C) 2004-2014 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,

View File

@ -46,6 +46,13 @@ int main (int argc, char ** argv)
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
Umu=zero;
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
pokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
}
@ -53,7 +60,6 @@ int main (int argc, char ** argv)
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
@ -75,7 +81,7 @@ int main (int argc, char ** argv)
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.M(src,result);
Dw.Dhop(src,result,0);
}
double t1=usecond();
double flops=1320*volume*ncall;
@ -99,5 +105,30 @@ int main (int argc, char ** argv)
}
}
{ // Naive wilson dag implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
}
Dw.Dhop(src,result,1);
std::cout << "Called DwDag"<<std::endl;
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
err = ref -result;
std::cout << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,56 @@
#include <Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeFermion src(&Grid); random(pRNG,src);
std::cout << "src norm" << norm2(src)<<std::endl;
LatticeFermion result(&Grid); result=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
for(int mu=0;mu<Nd;mu++){
U[mu] = peekIndex<LorentzIndex>(Umu,mu);
}
RealD mass=0.5;
WilsonMatrix Dw(Umu,mass);
HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOp,src,result);
Grid_finalize();
}

View File

@ -5,12 +5,14 @@ AM_LDFLAGS = -L$(top_builddir)/lib
#
# Test code
#
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3
bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec
Grid_wilson_SOURCES = Grid_wilson.cc
Grid_wilson_LDADD = -lGrid
Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
Grid_wilson_cg_unprec_LDADD = -lGrid
Grid_comms_SOURCES = Grid_comms.cc
Grid_comms_LDADD = -lGrid

2
config.guess vendored
View File

@ -1 +1 @@
/usr/share/automake-1.14/config.guess
/opt/local/share/automake-1.15/config.guess

2
config.sub vendored
View File

@ -1 +1 @@
/usr/share/automake-1.14/config.sub
/opt/local/share/automake-1.15/config.sub

13
configure vendored
View File

@ -2466,7 +2466,7 @@ test -n "$target_alias" &&
NONENONEs,x,x, &&
program_prefix=${target_alias}-
am__api_version='1.14'
am__api_version='1.15'
# Find a good install program. We prefer a C program (faster),
# so one script is as good as another. But avoid the broken or
@ -2638,8 +2638,8 @@ test "$program_suffix" != NONE &&
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
# expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
# Expand $ac_aux_dir to an absolute path.
am_aux_dir=`cd "$ac_aux_dir" && pwd`
if test x"${MISSING+set}" != xset; then
case $am_aux_dir in
@ -2658,7 +2658,7 @@ else
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
fi
if test x"${install_sh}" != xset; then
if test x"${install_sh+set}" != xset; then
case $am_aux_dir in
*\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -2986,8 +2986,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
mkdir_p='$(MKDIR_P)'
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
# We need awk for the "check" target (and possibly the TAP driver). The
# system "awk" is bad on some platforms.
# Always define AMTAR for backward compatibility. Yes, it's still used
# in the wild :-( We should find a proper way to deprecate it ...
AMTAR='$${TAR-tar}'
@ -3046,6 +3046,7 @@ END
fi
ac_config_headers="$ac_config_headers lib/Grid_config.h"
# Check whether --enable-silent-rules was given.

View File

@ -50,15 +50,20 @@ namespace Grid {
typedef std::complex<Real> Complex;
inline RealF adj(const RealF & r){ return r; }
inline RealF conj(const RealF & r){ return r; }
inline RealF conjugate(const RealF & r){ return r; }
inline RealF real(const RealF & r){ return r; }
inline RealD adj(const RealD & r){ return r; }
inline RealD conj(const RealD & r){ return r; }
inline RealD conjugate(const RealD & r){ return r; }
inline RealD real(const RealD & r){ return r; }
inline ComplexD innerProduct(const ComplexD & l, const ComplexD & r) { return conj(l)*r; }
inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conj(l)*r; }
inline ComplexD conjugate(const ComplexD& r){ return(conj(r)); }
inline ComplexD adj(const ComplexD& r){ return(conjugate(r)); }
inline ComplexF conjugate(const ComplexF& r ){ return(conj(r)); }
inline ComplexF adj(const ComplexF& r ){ return(conjugate(r)); }
inline ComplexD innerProduct(const ComplexD & l, const ComplexD & r) { return conjugate(l)*r; }
inline ComplexF innerProduct(const ComplexF & l, const ComplexF & r) { return conjugate(l)*r; }
inline RealD innerProduct(const RealD & l, const RealD & r) { return l*r; }
inline RealF innerProduct(const RealF & l, const RealF & r) { return l*r; }
@ -70,15 +75,14 @@ namespace Grid {
inline void mult(ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) * (*r);}
inline void sub (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) - (*r);}
inline void add (ComplexD * __restrict__ y,const ComplexD * __restrict__ l,const ComplexD *__restrict__ r){ *y = (*l) + (*r);}
inline ComplexD adj(const ComplexD& r){ return(conj(r)); }
// conj already supported for complex
// conjugate already supported for complex
inline void mac (ComplexF * __restrict__ y,const ComplexF * __restrict__ a,const ComplexF *__restrict__ x){ *y = (*a) * (*x)+(*y); }
inline void mult(ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) * (*r); }
inline void sub (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) - (*r); }
inline void add (ComplexF * __restrict__ y,const ComplexF * __restrict__ l,const ComplexF *__restrict__ r){ *y = (*l) + (*r); }
inline ComplexF adj(const ComplexF& r ){ return(conj(r)); }
//conj already supported for complex
//conjugate already supported for complex
inline ComplexF timesI(const ComplexF &r) { return(r*ComplexF(0.0,1.0));}
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}

View File

@ -7,8 +7,8 @@ namespace Grid {
// LinearOperators Take a something and return a something.
/////////////////////////////////////////////////////////////////////////////////////////////
//
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugate (transpose if real):
//
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real):
//SBase
// i) F(a x + b y) = aF(x) + b F(y).
// ii) <x|Op|y> = <y|AdjOp|x>^\ast
//
@ -25,12 +25,13 @@ namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class HermitianOperatorBase : public LinearOperatorBase<Field> {
public:
virtual RealD OpAndNorm(const Field &in, Field &out);
virtual void OpAndNorm(const Field &in, Field &out,double &n1,double &n2);
void AdjOp(const Field &in, Field &out) {
Op(in,out);
};
void Op(const Field &in, Field &out) {
OpAndNorm(in,out);
double n1,n2;
OpAndNorm(in,out,n1,n2);
};
};
@ -80,8 +81,8 @@ namespace Grid {
Matrix &_Mat;
public:
HermitianOperator(Matrix &Mat): _Mat(Mat) {};
RealD OpAndNorm(const Field &in, Field &out){
return _Mat.MdagM(in,out);
void OpAndNorm(const Field &in, Field &out,double &n1,double &n2){
return _Mat.MdagM(in,out,n1,n2);
}
};

View File

@ -18,6 +18,7 @@ public:
std::cout << Tolerance<<std::endl;
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi) {assert(0);};
void operator() (HermitianOperatorBase<Field> &Linop,const Field &src, Field &psi){
RealD cp,c,a,d,b,ssq,qq,b_pred;
@ -61,21 +62,27 @@ public:
Linop.OpAndNorm(p,mmp,d,qq);
// std::cout <<std::setprecision(4)<< "ConjugateGradient: d,qq "<<d<< " "<<qq <<std::endl;
a = c/d;
b_pred = a*(a*qq-d)/c;
cp = axpy_norm(r,mmp,r,-a);
// std::cout <<std::setprecision(4)<< "ConjugateGradient: a,bp "<<a<< " "<<b_pred <<std::endl;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
// std::cout <<std::setprecision(4)<< "ConjugateGradient: cp,b "<<cp<< " "<<b <<std::endl;
// Fuse these loops ; should be really easy
psi= a*p+psi;
p = p*b+r;
std::cout << "Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
std::cout<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
// Stopping condition
if ( cp <= rsq ) {
Linop.Op(p,mmp);
Linop.Op(psi,mmp);
p=mmp-src;
RealD mmpnorm = sqrt(norm2(mmp));
@ -83,8 +90,11 @@ public:
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm;
std::cout<<"ConjugateGradient: Converged on iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
std::cout<<"ConjugateGradient: true residual is "<<true_residual<<" sol "<<psinorm<<" src "<<srcnorm<<std::endl;
std::cout<<"ConjugateGradient: target residual was "<<Tolerance<<std::endl;
return;
}
}
std::cout<<"ConjugateGradient did NOT converge"<<std::endl;

View File

@ -102,7 +102,7 @@ template <class arg> struct name\
GridUnopClass(UnarySub,-a);
GridUnopClass(UnaryAdj,adj(a));
GridUnopClass(UnaryConj,conj(a));
GridUnopClass(UnaryConj,conjugate(a));
GridUnopClass(UnaryTrace,trace(a));
GridUnopClass(UnaryTranspose,transpose(a));
@ -178,7 +178,7 @@ template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,con
GRID_DEF_UNOP(operator -,UnarySub);
GRID_DEF_UNOP(adj,UnaryAdj);
GRID_DEF_UNOP(conj,UnaryConj);
GRID_DEF_UNOP(conjugate,UnaryConj);
GRID_DEF_UNOP(trace,UnaryTrace);
GRID_DEF_UNOP(transpose,UnaryTranspose);

View File

@ -144,14 +144,44 @@ PARALLEL_FOR_LOOP
}
template<class sobj,class vobj> strong_inline
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<lhs._grid->oSites();ss++){
vobj tmp = a*lhs._odata[ss];
vstream(ret._odata[ss],tmp+rhs._odata[ss]);
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
}
}
template<class sobj,class vobj> strong_inline
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
}
template<class sobj,class vobj> strong_inline
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+y._odata[ss];
vstream(ret._odata[ss],tmp);
}
return norm2(ret);
}
template<class sobj,class vobj> strong_inline
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
conformable(x,y);
#pragma omp parallel for
for(int ss=0;ss<x._grid->oSites();ss++){
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
vstream(ret._odata[ss],tmp);
}
return norm2(ret); // FIXME implement parallel norm in ss loop
}
}
#endif

View File

@ -9,7 +9,7 @@ namespace Grid {
// Functionality:
// -=,+=,*=,()
// add,+,sub,-,mult,mac,*
// adj,conj
// adj,conjugate
// real,imag
// transpose,transposeIndex
// trace,traceIndex

View File

@ -18,11 +18,11 @@ PARALLEL_FOR_LOOP
return ret;
};
template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = conj(lhs._odata[ss]);
ret._odata[ss] = conjugate(lhs._odata[ss]);
}
return ret;
};

View File

@ -98,26 +98,26 @@ template<class vtype,int N> inline void timesMinusI(iMatrix<vtype,N> &ret,const
///////////////////////////////////////////////
// Conj function for scalar, vector, matrix
///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> conj(const iScalar<vtype>&r)
template<class vtype> inline iScalar<vtype> conjugate(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
ret._internal = conj(r._internal);
ret._internal = conjugate(r._internal);
return ret;
}
template<class vtype,int N> inline iVector<vtype,N> conj(const iVector<vtype,N>&r)
template<class vtype,int N> inline iVector<vtype,N> conjugate(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = conj(r._internal[i]);
ret._internal[i] = conjugate(r._internal[i]);
}
return ret;
}
template<class vtype,int N> inline iMatrix<vtype,N> conj(const iMatrix<vtype,N>&r)
template<class vtype,int N> inline iMatrix<vtype,N> conjugate(const iMatrix<vtype,N>&r)
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j] = conj(r._internal[i][j]);
ret._internal[i][j] = conjugate(r._internal[i][j]);
}}
return ret;
}

View File

@ -21,7 +21,13 @@ const int WilsonMatrix::Tm = 7;
class WilsonCompressor {
public:
int mu;
int dag;
WilsonCompressor(int _dag){
mu=0;
dag=_dag;
assert((dag==0)||(dag==1));
}
void Point(int p) {
mu=p;
};
@ -29,7 +35,11 @@ const int WilsonMatrix::Tm = 7;
vHalfSpinColourVector operator () (const vSpinColourVector &in)
{
vHalfSpinColourVector ret;
switch(mu) {
int mudag=mu;
if (dag) {
mudag=(mu+Nd)%(2*Nd);
}
switch(mudag) {
case WilsonMatrix::Xp:
spProjXp(ret,in);
break;
@ -87,43 +97,52 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
return 0.0;
Dhop(in,out,0);
out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
return norm2(out);
}
RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
return 0.0;
Dhop(in,out,1);
out = (4+mass)*in - 0.5*out ; // FIXME : axpby_norm! fusion fun
return norm2(out);
}
void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
Dhop(in,out,0);
out = 0.5*out; // FIXME : scale factor in Dhop
}
void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
Dhop(in,out,1);
}
void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
{
out = (4.0+mass)*in;
return ;
}
void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
{
out = (1.0/(4.0+mass))*in;
return ;
}
void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
{
out = (1.0/(4.0+mass))*in;
return ;
}
void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
{
out = (1.0/(4.0+mass))*in;
return ;
}
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
{
WilsonCompressor compressor;
assert((dag==0) ||(dag==1));
WilsonCompressor compressor(dag);
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
PARALLEL_FOR_LOOP
@ -140,13 +159,13 @@ PARALLEL_FOR_LOOP
int ssu= ss;
// Xp
offset = Stencil._offsets [Xp][ss];
local = Stencil._is_local[Xp][ss];
perm = Stencil._permute[Xp][ss];
ptype = Stencil._permute_type[Xp];
int idx = (Xp+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
if ( local && perm )
{
ptype = Stencil._permute_type[idx];
if ( local && perm ) {
spProjXp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -154,18 +173,17 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
//prefetch(Umu._odata[ssu](Yp));
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
spReconXp(result,Uchi);
// Yp
offset = Stencil._offsets [Yp][ss];
local = Stencil._is_local[Yp][ss];
perm = Stencil._permute[Yp][ss];
ptype = Stencil._permute_type[Yp];
idx = (Yp+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm )
{
if ( local && perm ) {
spProjYp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -173,18 +191,16 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
// prefetch(Umu._odata[ssu](Zp));
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconYp(result,Uchi);
// Zp
offset = Stencil._offsets [Zp][ss];
local = Stencil._is_local[Zp][ss];
perm = Stencil._permute[Zp][ss];
ptype = Stencil._permute_type[Zp];
if ( local && perm )
{
idx = (Zp+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) {
spProjZp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -192,18 +208,16 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
// prefetch(Umu._odata[ssu](Tp));
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconZp(result,Uchi);
// Tp
offset = Stencil._offsets [Tp][ss];
local = Stencil._is_local[Tp][ss];
perm = Stencil._permute[Tp][ss];
ptype = Stencil._permute_type[Tp];
if ( local && perm )
{
idx = (Tp+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) {
spProjTp(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -211,15 +225,15 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
// prefetch(Umu._odata[ssu](Xm));
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconTp(result,Uchi);
// Xm
offset = Stencil._offsets [Xm][ss];
local = Stencil._is_local[Xm][ss];
perm = Stencil._permute[Xm][ss];
ptype = Stencil._permute_type[Xm];
idx = (Xm+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm )
{
@ -230,18 +244,18 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconXm(result,Uchi);
// Ym
offset = Stencil._offsets [Ym][ss];
local = Stencil._is_local[Ym][ss];
perm = Stencil._permute[Ym][ss];
ptype = Stencil._permute_type[Ym];
idx = (Ym+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm )
{
if ( local && perm ) {
spProjYm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -249,17 +263,16 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconYm(result,Uchi);
// Zm
offset = Stencil._offsets [Zm][ss];
local = Stencil._is_local[Zm][ss];
perm = Stencil._permute[Zm][ss];
ptype = Stencil._permute_type[Zm];
if ( local && perm )
{
idx = (Zm+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) {
spProjZm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -267,17 +280,16 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconZm(result,Uchi);
// Tm
offset = Stencil._offsets [Tm][ss];
local = Stencil._is_local[Tm][ss];
perm = Stencil._permute[Tm][ss];
ptype = Stencil._permute_type[Tm];
if ( local && perm )
{
idx = (Tm+dag*4)%8;
offset = Stencil._offsets [idx][ss];
local = Stencil._is_local[idx][ss];
perm = Stencil._permute[idx][ss];
ptype = Stencil._permute_type[idx];
if ( local && perm ) {
spProjTm(tmp,in._odata[offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
@ -285,7 +297,7 @@ PARALLEL_FOR_LOOP
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
mult(&Uchi(),&Umu._odata[ssu](idx),&chi());
accumReconTm(result,Uchi);
vstream(out._odata[ss],result);
@ -294,10 +306,6 @@ PARALLEL_FOR_LOOP
}
void WilsonMatrix::Dw(const LatticeFermion &in, LatticeFermion &out)
{
return;
}

View File

@ -45,10 +45,7 @@ namespace Grid {
virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
// non-hermitian hopping term; half cb or both
void Dhop(const LatticeFermion &in, LatticeFermion &out);
// m+4r -1/2 Dhop; both cb's
void Dw(const LatticeFermion &in, LatticeFermion &out);
void Dhop(const LatticeFermion &in, LatticeFermion &out,int dag);
typedef iScalar<iMatrix<vComplex, Nc> > matrix;

View File

@ -32,7 +32,7 @@ namespace Grid {
friend inline void mult(vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vComplexD * __restrict__ y,const vComplexD * __restrict__ l,const vComplexD *__restrict__ r) {*y = (*l) + (*r);}
friend inline vComplexD adj(const vComplexD &in){ return conj(in); }
friend inline vComplexD adj(const vComplexD &in){ return conjugate(in); }
//////////////////////////////////
// Initialise to 1,0,i
@ -166,11 +166,11 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vComplexD &y,vComplexD b,int perm)
{
Gpermute<vComplexD>(y,b,perm);
}
/*
friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted)
{
Gmerge<vComplexD,ComplexD >(y,extracted);
@ -269,7 +269,7 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
////////////////////////
// Conjugate
////////////////////////
friend inline vComplexD conj(const vComplexD &in){
friend inline vComplexD conjugate(const vComplexD &in){
vComplexD ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
// addsubps 0, inv=>0+in.v[3] 0-in.v[2], 0+in.v[1], 0-in.v[0], ...
@ -345,17 +345,17 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
// REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in)
{
#if defined SSE4
return ComplexD(in.v[0], in.v[1]); // inefficient
#ifdef SSE4
return ComplexD(in.v[0],in.v[1]);
#endif
#if defined(AVX1) || defined (AVX2)
vComplexD v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexD(v1.v[0],v1.v[1]);
#endif
#if defined (AVX1) || defined(AVX2)
// return std::complex<double>(_mm256_mask_reduce_add_pd(0x55, in.v),_mm256_mask_reduce_add_pd(0xAA, in.v));
__attribute__ ((aligned(32))) double c_[4];
_mm256_store_pd(c_,in.v);
return ComplexD(c_[0]+c_[2],c_[1]+c_[3]);
#endif
#ifdef AVX512
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
#endif
#ifdef QPX
#endif
@ -387,7 +387,7 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
};
inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conj(l)*r; }
inline vComplexD innerProduct(const vComplexD & l, const vComplexD & r) { return conjugate(l)*r; }
typedef vComplexD vDComplex;

View File

@ -47,7 +47,7 @@ namespace Grid {
friend inline void mult(vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (vComplexF * __restrict__ y,const vComplexF * __restrict__ l,const vComplexF *__restrict__ r){ *y = (*l) + (*r); }
friend inline vComplexF adj(const vComplexF &in){ return conj(in); }
friend inline vComplexF adj(const vComplexF &in){ return conjugate(in); }
//////////////////////////////////
// Initialise to 1,0,i
@ -228,42 +228,25 @@ namespace Grid {
ret.v = {a,b,a,b};
#endif
}
friend inline ComplexF Reduce(const vComplexF & in)
{
friend inline void permute(vComplexF &y,vComplexF b,int perm)
{
Gpermute<vComplexF>(y,b,perm);
}
friend inline ComplexF Reduce(const vComplexF & in)
{
#ifdef SSE4
union {
cvec v1; // SSE 4 x float vector
float f[4]; // scalar array of 4 floats
} u128;
u128.v1= _mm_add_ps(in.v, _mm_shuffle_ps(in.v,in.v, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return ComplexF(u128.f[0], u128.f[1]);
vComplexF v1;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
return ComplexF(v1.v[0],v1.v[1]);
#endif
#ifdef AVX1
//it would be better passing 2 arguments to saturate the vector lanes
union {
__m256 v1;
float f[8];
} u256;
//SWAP lanes
// FIXME .. icc complains with lib/lattice/Grid_lattice_reduction.h (49): (col. 20) warning #13211: Immediate parameter to intrinsic call too large
__m256 t0 = _mm256_permute2f128_ps(in.v, in.v, 1);
__m256 t1 = _mm256_permute_ps(in.v , 0b11011000);//real (0,2,1,3)
__m256 t2 = _mm256_permute_ps(t0 , 0b10001101);//imag (1,3,0,2)
t0 = _mm256_blend_ps(t1, t2, 0b0101000001010000);// (0,0,1,1,0,0,1,1)
t1 = _mm256_hadd_ps(t0,t0);
u256.v1 = _mm256_hadd_ps(t1, t1);
return ComplexF(u256.f[0], u256.f[4]);
#endif
#ifdef AVX2
union {
__m256 v1;
float f[8];
} u256;
const __m256i mask= _mm256_set_epi32( 7, 5, 3, 1, 6, 4, 2, 0);
__m256 tmp1 = _mm256_permutevar8x32_ps(in.v, mask);
__m256 tmp2 = _mm256_hadd_ps(tmp1, tmp1);
u256.v1 = _mm256_hadd_ps(tmp2, tmp2);
return ComplexF(u256.f[0], u256.f[4]);
#if defined(AVX1) || defined (AVX2)
vComplexF v1,v2;
permute(v1,in,0); // sse 128; paired complex single
v1=v1+in;
permute(v2,v1,1); // avx 256; quad complex single
v1=v1+v2;
return ComplexF(v1.v[0],v1.v[1]);
#endif
#ifdef AVX512
return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
@ -345,13 +328,10 @@ namespace Grid {
// Conjugate
///////////////////////
friend inline vComplexF conj(const vComplexF &in){
friend inline vComplexF conjugate(const vComplexF &in){
vComplexF ret ; vzero(ret);
#if defined (AVX1)|| defined (AVX2)
cvec tmp;
tmp = _mm256_addsub_ps(ret.v,_mm256_shuffle_ps(in.v,in.v,_MM_SHUFFLE(2,3,0,1))); // ymm1 <- br,bi
ret.v=_mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
ret.v = _mm256_xor_ps(_mm256_addsub_ps(ret.v,in.v), _mm256_set1_ps(-0.f));
#endif
#ifdef SSE4
ret.v = _mm_xor_ps(_mm_addsub_ps(ret.v,in.v), _mm_set1_ps(-0.f));
@ -433,10 +413,6 @@ namespace Grid {
return *this;
}
friend inline void permute(vComplexF &y,vComplexF b,int perm)
{
Gpermute<vComplexF>(y,b,perm);
}
/*
friend inline void merge(vComplexF &y,std::vector<ComplexF *> &extracted)
{
@ -460,7 +436,7 @@ namespace Grid {
inline vComplexF innerProduct(const vComplexF & l, const vComplexF & r)
{
return conj(l)*r;
return conjugate(l)*r;
}
inline void zeroit(vComplexF &z){ vzero(z);}

View File

@ -117,7 +117,7 @@ namespace Grid {
};
///////////////////////////////////////////////
// mult, sub, add, adj,conj, mac functions
// mult, sub, add, adj,conjugate, mac functions
///////////////////////////////////////////////
friend inline void mult(vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vInteger * __restrict__ y,const vInteger * __restrict__ l,const vInteger *__restrict__ r) {*y = (*l) - (*r);}

View File

@ -26,7 +26,7 @@ namespace Grid {
friend inline void sub (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vRealD * __restrict__ y,const vRealD * __restrict__ l,const vRealD *__restrict__ r) {*y = (*l) + (*r);}
friend inline vRealD adj(const vRealD &in) { return in; }
friend inline vRealD conj(const vRealD &in){ return in; }
friend inline vRealD conjugate(const vRealD &in){ return in; }
friend inline void mac (vRealD &y,const vRealD a,const vRealD x){
#if defined (AVX1) || defined (SSE4)
@ -112,11 +112,12 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vRealD &y,vRealD b,int perm)
{
Gpermute<vRealD>(y,b,perm);
}
/*
friend inline void merge(vRealD &y,std::vector<RealD *> &extracted)
{
Gmerge<vRealD,RealD >(y,extracted);
@ -209,48 +210,26 @@ namespace Grid {
friend inline RealD Reduce(const vRealD & in)
{
#if defined (SSE4)
// FIXME Hack
const RealD * ptr =(const RealD *) &in;
RealD ret = 0;
for(int i=0;i<vRealD::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#ifdef SSE4
vRealD v1;
permute(v1,in,0); // sse 128; paired real double
v1=v1+in;
return RealD(v1.v[0]);
#endif
#if defined (AVX1) || defined(AVX2)
typedef union {
uint64_t l;
double d;
} my_conv_t;
my_conv_t converter;
// more reduce_add
/*
__attribute__ ((aligned(32))) double c_[16];
__m256d tmp = _mm256_permute2f128_pd(in.v,in.v,0x01); // tmp 1032; in= 3210
__m256d hadd = _mm256_hadd_pd(in.v,tmp); // hadd = 1+0,3+2,3+2,1+0
tmp = _mm256_permute2f128_pd(hadd,hadd,0x01);// tmp = 3+2,1+0,1+0,3+2
hadd = _mm256_hadd_pd(tmp,tmp); // tmp = 3+2+1+0,3+2+1+0,1+0+3+2,1+0+3+2
_mm256_store_pd(c_,hadd);ô
return c[0]
*/
__m256d tmp = _mm256_permute2f128_pd(in.v,in.v,0x01); // tmp 1032; in= 3210
__m256d hadd = _mm256_hadd_pd(in.v,tmp); // hadd = 1+0,3+2,3+2,1+0
hadd = _mm256_hadd_pd(hadd,hadd); // hadd = 1+0+3+2...
converter.l = _mm256_extract_epi64((ivec)hadd,0);
return converter.d;
#if defined(AVX1) || defined (AVX2)
vRealD v1,v2;
permute(v1,in,0); // avx 256; quad double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_pd(in.v);
/*
__attribute__ ((aligned(32))) double c_[8];
_mm512_store_pd(c_,in.v);
return c_[0]+c_[1]+c_[2]+c_[3]+c_[4]+c_[5]+c_[6]+c_[7];
*/
#endif
#ifdef QPX
#endif
}
}
// *=,+=,-= operators
inline vRealD &operator *=(const vRealD &r) {
@ -270,7 +249,7 @@ namespace Grid {
static int Nsimd(void) { return sizeof(dvec)/sizeof(double);}
};
inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conj(l)*r; }
inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conjugate(l)*r; }
inline void zeroit(vRealD &z){ vzero(z);}
inline vRealD outerProduct(const vRealD &l, const vRealD& r)

View File

@ -92,13 +92,13 @@ namespace Grid {
};
///////////////////////////////////////////////
// mult, sub, add, adj,conj, mac functions
// mult, sub, add, adj,conjugate, mac functions
///////////////////////////////////////////////
friend inline void mult(vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) * (*r);}
friend inline void sub (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) - (*r);}
friend inline void add (vRealF * __restrict__ y,const vRealF * __restrict__ l,const vRealF *__restrict__ r) {*y = (*l) + (*r);}
friend inline vRealF adj(const vRealF &in) { return in; }
friend inline vRealF conj(const vRealF &in){ return in; }
friend inline vRealF conjugate(const vRealF &in){ return in; }
friend inline void mac (vRealF &y,const vRealF a,const vRealF x){
#if defined (AVX1) || defined (SSE4)
@ -133,11 +133,12 @@ namespace Grid {
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
/*
friend inline void permute(vRealF &y,vRealF b,int perm)
{
Gpermute<vRealF>(y,b,perm);
}
/*
friend inline void merge(vRealF &y,std::vector<RealF *> &extracted)
{
Gmerge<vRealF,RealF >(y,extracted);
@ -155,7 +156,6 @@ namespace Grid {
Gextract<vRealF,RealF>(y,extracted);
}
*/
/////////////////////////////////////////////////////
// Broadcast a value across Nsimd copies.
@ -243,33 +243,26 @@ friend inline void vstore(const vRealF &ret, float *a){
}
friend inline RealF Reduce(const vRealF & in)
{
#if defined (SSE4)
// FIXME Hack
const RealF * ptr = (const RealF *) &in;
RealF ret = 0;
for(int i=0;i<vRealF::Nsimd();i++){
ret = ret+ptr[i];
}
return ret;
#ifdef SSE4
vRealF v1,v2;
permute(v1,in,0); // sse 128; quad single
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
return v1.v[0];
#endif
#if defined (AVX1) || defined(AVX2)
__attribute__ ((aligned(32))) float c_[16];
__m256 tmp = _mm256_permute2f128_ps(in.v,in.v,0x01);
__m256 hadd = _mm256_hadd_ps(in.v,tmp);
tmp = _mm256_permute2f128_ps(hadd,hadd,0x01);
hadd = _mm256_hadd_ps(tmp,tmp);
_mm256_store_ps(c_,hadd);
return (float)c_[0];
#if defined(AVX1) || defined (AVX2)
vRealF v1,v2;
permute(v1,in,0); // avx 256; octo-double
v1=v1+in;
permute(v2,v1,1);
v1=v1+v2;
permute(v2,v1,2);
v1=v1+v2;
return v1.v[0];
#endif
#ifdef AVX512
return _mm512_reduce_add_ps(in.v);
/*
__attribute__ ((aligned(64))) float c_[16];
_mm512_store_ps(c_,in.v);
return c_[0]+c_[1]+c_[2]+c_[3]+c_[4]+c_[5]+c_[6]+c_[7]
+c_[8]+c_[9]+c_[10]+c_[11]+c_[12]+c_[13]+c_[14]+c_[15];
*/
#endif
#ifdef QPX
#endif
@ -291,7 +284,7 @@ friend inline void vstore(const vRealF &ret, float *a){
public:
static inline int Nsimd(void) { return sizeof(fvec)/sizeof(float);}
};
inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conj(l)*r; }
inline vRealF innerProduct(const vRealF & l, const vRealF & r) { return conjugate(l)*r; }
inline void zeroit(vRealF &z){ vzero(z);}
inline vRealF outerProduct(const vRealF &l, const vRealF& r)

View File

@ -79,7 +79,7 @@ namespace Grid {
friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
friend inline Grid_simd adj(const Grid_simd &in){ return conj(in); }
friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); }
//////////////////////////////////
// Initialise to 1,0,i
@ -193,7 +193,7 @@ namespace Grid {
// Conjugate
///////////////////////
friend inline Grid_simd conj(const Grid_simd &in){
friend inline Grid_simd conjugate(const Grid_simd &in){
Grid_simd ret ; vzero(ret);
// FIXME add operator
return ret;
@ -265,7 +265,7 @@ namespace Grid {
template<class scalar_type, class vector_type >
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
{
return conj(l)*r;
return conjugate(l)*r;
}
template<class scalar_type, class vector_type >

View File

@ -1,6 +1,6 @@
#!/bin/bash
#!/bin/bash -e
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi"
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi clang-sse"
EXTRADIRS="g++-avx g++-sse4 icpc-avx icpc-avx2 icpc-avx512"
BLACK="\033[30m"
RED="\033[31m"

View File

@ -1,6 +1,6 @@
#!/bin/bash
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx"
DIRS="clang-avx clang-avx-openmp clang-avx-openmp-mpi clang-avx-mpi clang-avx2 clang-avx2-openmp clang-avx2-openmp-mpi clang-avx2-mpi icpc-avx icpc-avx2 icpc-avx512 g++-sse4 g++-avx clang-sse"
for D in $DIRS
do

View File

@ -34,6 +34,9 @@ icpc-avx512)
icpc-mic)
CXX=icpc ../../configure --host=none --enable-simd=AVX512 CXXFLAGS="-mmic -O3 -std=c++11" LDFLAGS=-mmic LIBS="-lgmp -lmpfr" --enable-comms=none
;;
clang-sse)
CXX=clang++ ../../configure --enable-simd=SSE4 CXXFLAGS="-msse4 -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
;;
clang-avx)
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -std=c++11" LIBS="-lgmp -lmpfr" --enable-comms=none
;;
@ -47,16 +50,16 @@ clang-avx2-openmp)
CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -std=c++11" LDFLAGS="-fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=none
;;
clang-avx-openmp-mpi)
CXX=clang-omp++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=mpi
CXX=clang-omp++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
;;
clang-avx2-openmp-mpi)
CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp" LIBS="-lgmp -lmpfr" --enable-comms=mpi
CXX=clang-omp++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp -lgmp -lmpfr" --enable-comms=mpi
;;
clang-avx-mpi)
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx " LIBS="-lgmp -lmpfr" --enable-comms=mpi
CXX=clang++ ../../configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -lgmp -lmpfr" --enable-comms=mpi
;;
clang-avx2-mpi)
CXX=clang++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx " LIBS="-lgmp -lmpfr" --enable-comms=mpi
CXX=clang++ ../../configure --enable-simd=AVX2 CXXFLAGS="-mavx2 -mfma -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -lgmp -lmpfr" --enable-comms=mpi
;;
esac
echo -e $NORMAL

View File

@ -140,7 +140,7 @@ int main (int argc, char ** argv)
// -=,+=,*=,()
// add,+,sub,-,mult,mac,*
// adj,conj
// adj,conjugate
// real,imag
// transpose,transposeIndex
// trace,traceIndex
@ -437,7 +437,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){
for(int c=0;c<3;c++){
diff =shifted1()()(r,c)-shifted2()()(r,c);
nn=real(conj(diff)*diff);
nn=real(conjugate(diff)*diff);
if ( nn > 0 )
cout<<"Shift fail (shifted1/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
<<shifted1()()(r,c)<<" "<<shifted2()()(r,c)
@ -451,7 +451,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){
for(int c=0;c<3;c++){
diff =shifted3()()(r,c)-shifted2()()(r,c);
nn=real(conj(diff)*diff);
nn=real(conjugate(diff)*diff);
if ( nn > 0 )
cout<<"Shift rb fail (shifted3/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
<<shifted3()()(r,c)<<" "<<shifted2()()(r,c)
@ -468,7 +468,7 @@ int main (int argc, char ** argv)
for(int r=0;r<Nc;r++){
for(int c=0;c<Nc;c++){
diff =foobar2()()(r,c)-foobar1()()(r,c);
nrm = nrm + real(conj(diff)*diff);
nrm = nrm + real(conjugate(diff)*diff);
}}
}}}}
if( Fine.IsBoss() ){

52
tests/Grid_rng_fixed.cc Normal file
View File

@ -0,0 +1,52 @@
#include <Grid.h>
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(4,vComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
GridSerialRNG fsRNG; fsRNG.SeedFixedIntegers(seeds);
GridParallelRNG fpRNG(&Grid); fpRNG.SeedFixedIntegers(seeds);
vComplexF tmp; random(fsRNG,tmp);
std::cout<<"Random vComplexF (fixed seed)\n"<< tmp<<std::endl;
std::cout<<"conjugate(tmp)\n"<< conjugate(tmp)<<std::endl;
std::cout<<"conjugate(tmp)*tmp\n"<< conjugate(tmp)*tmp<<std::endl;
std::cout<<"innerProduct"<< innerProduct(tmp,tmp)<<std::endl;
std::cout<<"Reduce(innerProduct)"<< Reduce(innerProduct(tmp,tmp))<<std::endl;
SpinMatrix rnd ;
random(fsRNG,rnd);
std::cout<<"Random Spin Matrix (fixed seed)\n"<< rnd<<std::endl;
SpinVector rv;
random(fsRNG,rv);
std::cout<<"Random Spin Vector (fixed seed)\n"<< rv<<std::endl;
gaussian(fsRNG,rv);
std::cout<<"Gaussian Spin Vector (fixed seed)\n"<< rv<<std::endl;
LatticeColourVector lcv(&Grid);
LatticeFermion src(&Grid); random(fpRNG,src);
std::cout << "src norm : " << norm2(src)<<std::endl;
std::cout << "src " << src<<std::endl;
random(fpRNG,lcv);
std::cout<<"Random Lattice Colour Vector (fixed seed)\n"<< lcv<<std::endl;
Grid_finalize();
}

View File

@ -26,7 +26,7 @@ public:
class funcConj {
public:
funcConj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);}
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);}
std::string name(void) const { return std::string("Conj"); }
};
class funcAdj {
@ -49,6 +49,34 @@ public:
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
std::string name(void) const { return std::string("timesMinusI"); }
};
class funcInnerProduct {
public:
funcInnerProduct() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = innerProduct(i1,i2);}
std::string name(void) const { return std::string("innerProduct"); }
};
// FIXME still to test:
//
// innerProduct,
// norm2,
// Reduce,
//
// mac,mult,sub,add, vone,vzero,vcomplex_i, =zero,
// vset,vsplat,vstore,vstream,vload, scalar*vec, vec*scalar
// unary -,
// *= , -=, +=
// outerproduct,
// zeroit
// permute
class funcReduce {
public:
funcReduce() {};
template<class reduce,class vec> void vfunc(reduce &rr,vec &i1,vec &i2) const { rr = Reduce(i1);}
template<class reduce,class scal> void sfunc(reduce &rr,scal &i1,scal &i2) const { rr = i1;}
std::string name(void) const { return std::string("Reduce"); }
};
template<class scal, class vec,class functor >
void Tester(const functor &func)
@ -96,8 +124,59 @@ void Tester(const functor &func)
ok++;
}
}
if ( ok==0 ) std::cout << " OK!" <<std::endl;
if ( ok==0 ) {
std::cout << " OK!" <<std::endl;
}
assert(ok==0);
}
template<class reduced,class scal, class vec,class functor >
void ReductionTester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
reduced result(0);
reduced reference(0);
reduced tmp;
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
func.template vfunc<reduced,vec>(result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func.template sfunc<reduced,scal>(tmp,input1[i],input2[i]);
reference+=tmp;
}
std::cout << " " << func.name()<<std::endl;
int ok=0;
if ( abs(reference-result)/abs(reference) > 1.0e-6 ){ // rounding is possible for reduce order
std::cout<< "*****" << std::endl;
std::cout<< abs(reference-result) << " " <<reference<< " " << result<<std::endl;
ok++;
}
if ( ok==0 ) {
std::cout << " OK!" <<std::endl;
}
assert(ok==0);
}
@ -127,6 +206,8 @@ int main (int argc, char ** argv)
Tester<ComplexF,vComplexF>(funcTimes());
Tester<ComplexF,vComplexF>(funcConj());
Tester<ComplexF,vComplexF>(funcAdj());
Tester<ComplexF,vComplexF>(funcInnerProduct());
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vComplexD "<<std::endl;
@ -140,6 +221,8 @@ int main (int argc, char ** argv)
Tester<ComplexD,vComplexD>(funcTimes());
Tester<ComplexD,vComplexD>(funcConj());
Tester<ComplexD,vComplexD>(funcAdj());
Tester<ComplexD,vComplexD>(funcInnerProduct());
ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealF "<<std::endl;
@ -150,6 +233,9 @@ int main (int argc, char ** argv)
Tester<RealF,vRealF>(funcMinus());
Tester<RealF,vRealF>(funcTimes());
Tester<RealF,vRealF>(funcAdj());
Tester<RealF,vRealF>(funcConj());
Tester<RealF,vRealF>(funcInnerProduct());
ReductionTester<RealF,RealF,vRealF>(funcReduce());
std::cout << "==================================="<< std::endl;
std::cout << "Testing vRealD "<<std::endl;
@ -159,6 +245,9 @@ int main (int argc, char ** argv)
Tester<RealD,vRealD>(funcMinus());
Tester<RealD,vRealD>(funcTimes());
Tester<RealD,vRealD>(funcAdj());
Tester<RealD,vRealD>(funcConj());
Tester<RealD,vRealD>(funcInnerProduct());
ReductionTester<RealD,RealD,vRealD>(funcReduce());
Grid_finalize();
}

View File

@ -93,7 +93,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){
for(int c=0;c<3;c++){
diff =check()()(r,c)-bar()()(r,c);
double nn=real(conj(diff)*diff);
double nn=real(conjugate(diff)*diff);
if ( nn > 0){
printf("Coor (%d %d %d %d) \t rc %d%d \t %le (%le,%le) %le\n",
coor[0],coor[1],coor[2],coor[3],r,c,
@ -103,8 +103,8 @@ int main (int argc, char ** argv)
real(bar()()(r,c))
);
}
snrmC=snrmC+real(conj(check()()(r,c))*check()()(r,c));
snrmB=snrmB+real(conj(bar()()(r,c))*bar()()(r,c));
snrmC=snrmC+real(conjugate(check()()(r,c))*check()()(r,c));
snrmB=snrmB+real(conjugate(bar()()(r,c))*bar()()(r,c));
snrm=snrm+nn;
}}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
#
# Test code
#
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed
Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid
@ -13,6 +13,9 @@ Grid_main_LDADD = -lGrid
Grid_rng_SOURCES = Grid_rng.cc
Grid_rng_LDADD = -lGrid
Grid_rng_fixed_SOURCES = Grid_rng_fixed.cc
Grid_rng_fixed_LDADD = -lGrid
Grid_remez_SOURCES = Grid_remez.cc
Grid_remez_LDADD = -lGrid

View File

@ -186,7 +186,7 @@ int main (int argc, char ** argv)
for(int r=0;r<3;r++){
for(int c=0;c<3;c++){
diff =check._internal._internal[r][c]-bar._internal._internal[r][c];
double nn=real(conj(diff)*diff);
double nn=real(conjugate(diff)*diff);
if ( nn > 0 ){
printf("Coor (%d %d %d %d) \t rc %d%d \t %le %le %le\n",
coor[0],coor[1],coor[2],coor[3],r,c,