mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Compare commits
73 Commits
rmhmc_merg
...
feature/La
Author | SHA1 | Date | |
---|---|---|---|
dc5b909f99 | |||
855b249f57 | |||
8bb11e5039 | |||
3686827df5 | |||
60589a93a3 | |||
bc1f5be265 | |||
1c430ec71c | |||
0b63e2e9cd | |||
386b4fcb04 | |||
11219a8f7a | |||
3caf0e8b09 | |||
bc5ba39278 | |||
fbe1209f7e | |||
ebb1bebf24 | |||
53a9260a94 | |||
dc6f637e70 | |||
44b218a595 | |||
bfc0306a43 | |||
4c0ae75ac5 | |||
3cb8cb7282 | |||
89c4e9b168 | |||
fe406e230d | |||
3dbc8586fa | |||
7305910c95 | |||
5139eaf491 | |||
2c35c89b92 | |||
91cc33e907 | |||
5d44346be3 | |||
3a754fcd51 | |||
137886c316 | |||
ef61b549e6 | |||
3006663b9c | |||
0145685f96 | |||
e73e4b4002 | |||
caa6605b43 | |||
522c9248ae | |||
191fbf85fc | |||
93650f3a61 | |||
cab4b4d063 | |||
cf4b30b2dd | |||
c51d0b4078 | |||
2f4cbeb4d5 | |||
fb7c4fb815 | |||
00bb71e5af | |||
cfed2c1ea0 | |||
b1b15f0b70 | |||
927c7ae3ed | |||
05d04ceff8 | |||
8313367a50 | |||
5c479ce663 | |||
4bf9d65bf8 | |||
3a056c4dff | |||
b0ba651654 | |||
25d4c175c3 | |||
a8d7986e1c | |||
92ec509bfa | |||
e80a87ff7f | |||
867fe93018 | |||
09651c3326 | |||
f87f2a3f8b | |||
a07556dd5f | |||
f80a847aef | |||
93cb5d4e97 | |||
9e48b7dfda | |||
d0c2c9c71f | |||
c8cafa77ca | |||
a3bcad3804 | |||
5a5b66292b | |||
e63be32ad2 | |||
6aa106d906 | |||
33d59c8869 | |||
a833fd8dbf | |||
e9712bc7fb |
54
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
54
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@ -1,54 +0,0 @@
|
||||
name: Bug report
|
||||
description: Report a bug.
|
||||
title: "<insert title>"
|
||||
labels: [bug]
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thank you for taking the time to file a bug report.
|
||||
Please check that the code is pointing to the HEAD of develop
|
||||
or any commit in master which is tagged with a version number.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Describe the issue:"
|
||||
description: >
|
||||
Describe the issue and any previous attempt to solve it.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Code example:"
|
||||
description: >
|
||||
If relevant, show how to reproduce the issue using a minimal working
|
||||
example.
|
||||
placeholder: |
|
||||
<< your code here >>
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Target platform:"
|
||||
description: >
|
||||
Give a description of the target platform (CPU, network, compiler).
|
||||
Please give the full CPU part description, using for example
|
||||
`cat /proc/cpuinfo | grep 'model name' | uniq` (Linux)
|
||||
or `sysctl machdep.cpu.brand_string` (macOS) and the full output
|
||||
the `--version` option of your compiler.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Configure options:"
|
||||
description: >
|
||||
Please give the exact configure command used and attach
|
||||
`config.log`, `grid.config.summary` and the output of `make V=1`.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
34
.gitignore
vendored
34
.gitignore
vendored
@ -83,12 +83,14 @@ ltmain.sh
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
.dirstamp
|
||||
|
||||
# build directory #
|
||||
###################
|
||||
build*/*
|
||||
Documentation/_build
|
||||
|
||||
# bootstrap #
|
||||
#############
|
||||
*.tar.bz2*
|
||||
|
||||
# IDE related files #
|
||||
#####################
|
||||
@ -96,11 +98,16 @@ Documentation/_build
|
||||
build.sh
|
||||
.vscode
|
||||
*.code-workspace
|
||||
.ctags
|
||||
tags
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
Grid/Eigen
|
||||
Eigen/*
|
||||
lib/Eigen/*
|
||||
|
||||
# FFTW source #
|
||||
################
|
||||
lib/fftw/*
|
||||
|
||||
# libtool macros #
|
||||
##################
|
||||
@ -111,8 +118,21 @@ m4/libtool.m4
|
||||
################
|
||||
gh-pages/
|
||||
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
buck-out
|
||||
BUCK
|
||||
make-bin-BUCK.sh
|
||||
|
||||
# generated sources #
|
||||
#####################
|
||||
Grid/qcd/spin/gamma-gen/*.h
|
||||
Grid/qcd/spin/gamma-gen/*.cc
|
||||
Grid/util/Version.h
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
lib/version.h
|
||||
|
||||
# vs code editor files #
|
||||
########################
|
||||
.vscode/
|
||||
.vscode/settings.json
|
||||
settings.json
|
||||
|
60
.travis.yml
Normal file
60
.travis.yml
Normal file
@ -0,0 +1,60 @@
|
||||
language: cpp
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- clang
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
|
||||
install:
|
||||
- export CWD=`pwd`
|
||||
- echo $CWD
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
- which autoconf
|
||||
- autoconf --version
|
||||
- which automake
|
||||
- automake --version
|
||||
- which $CC
|
||||
- $CC --version
|
||||
- which $CXX
|
||||
- $CXX --version
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||
|
||||
script:
|
||||
- ./bootstrap.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- mkdir lime
|
||||
- cd lime
|
||||
- mkdir build
|
||||
- cd build
|
||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||
- tar xf lime-1.3.2.tar.gz
|
||||
- cd lime-1.3.2
|
||||
- ./configure --prefix=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
|
@ -1,5 +0,0 @@
|
||||
Version : 0.8.0
|
||||
|
||||
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
|
||||
- MPI and MPI3 comms optimisations for KNL and OPA finished
|
||||
- Half precision comms
|
||||
|
@ -1,73 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/DisableWarnings.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef DISABLE_WARNINGS_H
|
||||
#define DISABLE_WARNINGS_H
|
||||
|
||||
|
||||
|
||||
#if defined __GNUC__ && __GNUC__>=6
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
//disables and intel compiler specific warning (in json.hpp)
|
||||
#ifdef __ICC
|
||||
#pragma warning disable 488
|
||||
#endif
|
||||
|
||||
#ifdef __NVCC__
|
||||
//disables nvcc specific warning in json.hpp
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-register"
|
||||
|
||||
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
|
||||
//disables nvcc specific warning in json.hpp
|
||||
#pragma nv_diag_suppress unsigned_compare_with_zero
|
||||
#pragma nv_diag_suppress cast_to_qualified_type
|
||||
//disables nvcc specific warning in many files
|
||||
#pragma nv_diag_suppress esa_on_defaulted_function_ignored
|
||||
#pragma nv_diag_suppress extra_semicolon
|
||||
#else
|
||||
//disables nvcc specific warning in json.hpp
|
||||
#pragma diag_suppress unsigned_compare_with_zero
|
||||
#pragma diag_suppress cast_to_qualified_type
|
||||
//disables nvcc specific warning in many files
|
||||
#pragma diag_suppress esa_on_defaulted_function_ignored
|
||||
#pragma diag_suppress extra_semicolon
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Disable vectorisation in Eigen on the Power8/9 and PowerPC
|
||||
#ifdef __ALTIVEC__
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
#ifdef __VSX__
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,75 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#pragma once
|
||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
|
||||
#ifdef USE_MKL
|
||||
#define EIGEN_USE_MKL_ALL
|
||||
#endif
|
||||
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
/* NVCC save and restore compile environment*/
|
||||
#ifdef __NVCC__
|
||||
#pragma push
|
||||
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
|
||||
#pragma nv_diag_suppress code_is_unreachable
|
||||
#else
|
||||
#pragma diag_suppress code_is_unreachable
|
||||
#endif
|
||||
#pragma push_macro("__CUDA_ARCH__")
|
||||
#pragma push_macro("__NVCC__")
|
||||
#pragma push_macro("__CUDACC__")
|
||||
#undef __CUDA_ARCH__
|
||||
#undef __NVCC__
|
||||
#undef __CUDACC__
|
||||
#define __NVCC__REDEFINE__
|
||||
#endif
|
||||
|
||||
/* SYCL save and restore compile environment*/
|
||||
#ifdef GRID_SYCL
|
||||
#pragma push
|
||||
#pragma push_macro("__SYCL_DEVICE_ONLY__")
|
||||
#undef __SYCL_DEVICE_ONLY__
|
||||
#define EIGEN_DONT_VECTORIZE
|
||||
//#undef EIGEN_USE_SYCL
|
||||
#define __SYCL__REDEFINE__
|
||||
#endif
|
||||
|
||||
/* HIP save and restore compile environment*/
|
||||
#ifdef GRID_HIP
|
||||
#pragma push
|
||||
#pragma push_macro("__HIP_DEVICE_COMPILE__")
|
||||
#endif
|
||||
#define EIGEN_NO_HIP
|
||||
|
||||
#include <Grid/Eigen/Dense>
|
||||
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
||||
|
||||
/* NVCC restore */
|
||||
#ifdef __NVCC__REDEFINE__
|
||||
#pragma pop_macro("__CUDACC__")
|
||||
#pragma pop_macro("__NVCC__")
|
||||
#pragma pop_macro("__CUDA_ARCH__")
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
/*SYCL restore*/
|
||||
#ifdef __SYCL__REDEFINE__
|
||||
#pragma pop_macro("__SYCL_DEVICE_ONLY__")
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
/*HIP restore*/
|
||||
#ifdef __HIP__REDEFINE__
|
||||
#pragma pop_macro("__HIP_DEVICE_COMPILE__")
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
@ -1 +0,0 @@
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
@ -1,81 +0,0 @@
|
||||
extra_sources=
|
||||
extra_headers=
|
||||
|
||||
if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_mpi3.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
extra_sources+=communicator/SharedMemoryMPI.cc
|
||||
extra_sources+=communicator/SharedMemory.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_NONE
|
||||
extra_sources+=communicator/Communicator_none.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
extra_sources+=communicator/SharedMemoryNone.cc
|
||||
extra_sources+=communicator/SharedMemory.cc
|
||||
endif
|
||||
|
||||
if BUILD_HDF5
|
||||
extra_sources+=serialisation/Hdf5IO.cc
|
||||
extra_headers+=serialisation/Hdf5IO.h
|
||||
extra_headers+=serialisation/Hdf5Type.h
|
||||
endif
|
||||
|
||||
|
||||
all: version-cache Version.h
|
||||
|
||||
version-cache:
|
||||
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
|
||||
a="uncommited changes";\
|
||||
else\
|
||||
a="clean";\
|
||||
fi;\
|
||||
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
|
||||
if [ -e version-cache ]; then\
|
||||
d=`diff vertmp version-cache`;\
|
||||
if [ "$${d}" != "" ]; then\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
else\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
rm -f vertmp
|
||||
|
||||
Version.h: version-cache
|
||||
cp version-cache Version.h
|
||||
|
||||
.PHONY: version-cache
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
include Make.inc
|
||||
include Eigen.inc
|
||||
|
||||
extra_sources+=$(WILS_FERMION_FILES)
|
||||
extra_sources+=$(STAG_FERMION_FILES)
|
||||
if BUILD_ZMOBIUS
|
||||
extra_sources+=$(ZWILS_FERMION_FILES)
|
||||
endif
|
||||
if BUILD_GPARITY
|
||||
extra_sources+=$(GP_FERMION_FILES)
|
||||
endif
|
||||
if BUILD_FERMION_REPS
|
||||
extra_sources+=$(ADJ_FERMION_FILES)
|
||||
extra_sources+=$(TWOIND_FERMION_FILES)
|
||||
endif
|
||||
if BUILD_SP
|
||||
extra_sources+=$(SP_FERMION_FILES)
|
||||
extra_sources+=$(SP_TWOIND_FERMION_FILES)
|
||||
endif
|
||||
|
||||
lib_LIBRARIES = libGrid.a
|
||||
|
||||
CCFILES += $(extra_sources)
|
||||
HFILES += $(extra_headers) Config.h Version.h
|
||||
|
||||
libGrid_a_SOURCES = $(CCFILES)
|
||||
libGrid_adir = $(includedir)/Grid
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
|
@ -1,38 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Namespace.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <cassert>
|
||||
|
||||
#define NAMESPACE_BEGIN(A) namespace A {
|
||||
#define NAMESPACE_END(A) }
|
||||
#define GRID_NAMESPACE_BEGIN NAMESPACE_BEGIN(Grid)
|
||||
#define GRID_NAMESPACE_END NAMESPACE_END(Grid)
|
||||
#define NAMESPACE_CHECK(x) struct namespaceTEST##x {}; static_assert(std::is_same<namespaceTEST##x, ::namespaceTEST##x>::value,"Not in :: at" );
|
File diff suppressed because it is too large
Load Diff
@ -1,296 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Cshift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_FFT_H_
|
||||
#define _GRID_FFT_H_
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
#ifdef USE_MKL
|
||||
#include <fftw/fftw3.h>
|
||||
#else
|
||||
#include <fftw3.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class scalar> struct FFTW { };
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
template<> struct FFTW<ComplexD> {
|
||||
public:
|
||||
|
||||
typedef fftw_complex FFTW_scalar;
|
||||
typedef fftw_plan FFTW_plan;
|
||||
|
||||
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||
FFTW_scalar *in, const int *inembed,
|
||||
int istride, int idist,
|
||||
FFTW_scalar *out, const int *onembed,
|
||||
int ostride, int odist,
|
||||
int sign, unsigned flags) {
|
||||
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||
}
|
||||
|
||||
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||
::fftw_flops(p,add,mul,fmas);
|
||||
}
|
||||
|
||||
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||
::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||
::fftw_destroy_plan(p);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct FFTW<ComplexF> {
|
||||
public:
|
||||
|
||||
typedef fftwf_complex FFTW_scalar;
|
||||
typedef fftwf_plan FFTW_plan;
|
||||
|
||||
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||
FFTW_scalar *in, const int *inembed,
|
||||
int istride, int idist,
|
||||
FFTW_scalar *out, const int *onembed,
|
||||
int ostride, int odist,
|
||||
int sign, unsigned flags) {
|
||||
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||
}
|
||||
|
||||
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||
::fftwf_flops(p,add,mul,fmas);
|
||||
}
|
||||
|
||||
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||
::fftwf_execute_dft(p,in,out);
|
||||
}
|
||||
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||
::fftwf_destroy_plan(p);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef FFTW_FORWARD
|
||||
#define FFTW_FORWARD (-1)
|
||||
#define FFTW_BACKWARD (+1)
|
||||
#endif
|
||||
|
||||
class FFT {
|
||||
private:
|
||||
|
||||
GridCartesian *vgrid;
|
||||
GridCartesian *sgrid;
|
||||
|
||||
int Nd;
|
||||
double flops;
|
||||
double flops_call;
|
||||
uint64_t usec;
|
||||
|
||||
Coordinate dimensions;
|
||||
Coordinate processors;
|
||||
Coordinate processor_coor;
|
||||
|
||||
public:
|
||||
|
||||
static const int forward=FFTW_FORWARD;
|
||||
static const int backward=FFTW_BACKWARD;
|
||||
|
||||
double Flops(void) {return flops;}
|
||||
double MFlops(void) {return flops/usec;}
|
||||
double USec(void) {return (double)usec;}
|
||||
|
||||
FFT ( GridCartesian * grid ) :
|
||||
vgrid(grid),
|
||||
Nd(grid->_ndimension),
|
||||
dimensions(grid->_fdimensions),
|
||||
processors(grid->_processors),
|
||||
processor_coor(grid->_processor_coor)
|
||||
{
|
||||
flops=0;
|
||||
usec =0;
|
||||
Coordinate layout(Nd,1);
|
||||
sgrid = new GridCartesian(dimensions,layout,processors,*grid);
|
||||
};
|
||||
|
||||
~FFT ( void) {
|
||||
delete sgrid;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim_mask(Lattice<vobj> &result,const Lattice<vobj> &source,Coordinate mask,int sign){
|
||||
|
||||
conformable(result.Grid(),vgrid);
|
||||
conformable(source.Grid(),vgrid);
|
||||
Lattice<vobj> tmp(vgrid);
|
||||
tmp = source;
|
||||
for(int d=0;d<Nd;d++){
|
||||
if( mask[d] ) {
|
||||
FFT_dim(result,tmp,d,sign);
|
||||
tmp=result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_all_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int sign){
|
||||
Coordinate mask(Nd,1);
|
||||
FFT_dim_mask(result,source,mask,sign);
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int sign){
|
||||
#ifndef HAVE_FFTW
|
||||
assert(0);
|
||||
#else
|
||||
conformable(result.Grid(),vgrid);
|
||||
conformable(source.Grid(),vgrid);
|
||||
|
||||
int L = vgrid->_ldimensions[dim];
|
||||
int G = vgrid->_fdimensions[dim];
|
||||
|
||||
Coordinate layout(Nd,1);
|
||||
Coordinate pencil_gd(vgrid->_fdimensions);
|
||||
|
||||
pencil_gd[dim] = G*processors[dim];
|
||||
|
||||
// Pencil global vol LxLxGxLxL per node
|
||||
GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid);
|
||||
|
||||
// Construct pencils
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename sobj::scalar_type scalar;
|
||||
|
||||
Lattice<sobj> pgbuf(&pencil_g);
|
||||
autoView(pgbuf_v , pgbuf, CpuWrite);
|
||||
|
||||
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||
|
||||
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||
int Nlow = 1;
|
||||
for(int d=0;d<dim;d++){
|
||||
Nlow*=vgrid->_ldimensions[d];
|
||||
}
|
||||
|
||||
int rank = 1; /* 1d transforms */
|
||||
int n[] = {G}; /* 1d transforms of length G */
|
||||
int howmany = Ncomp;
|
||||
int odist,idist,istride,ostride;
|
||||
idist = odist = 1; /* Distance between consecutive FT's */
|
||||
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||
int *inembed = n, *onembed = n;
|
||||
|
||||
scalar div;
|
||||
if ( sign == backward ) div = 1.0/G;
|
||||
else if ( sign == forward ) div = 1.0;
|
||||
else assert(0);
|
||||
|
||||
FFTW_plan p;
|
||||
{
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[0];
|
||||
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||
in,inembed,
|
||||
istride,idist,
|
||||
out,onembed,
|
||||
ostride, odist,
|
||||
sign,FFTW_ESTIMATE);
|
||||
}
|
||||
|
||||
// Barrel shift and collect global pencil
|
||||
Coordinate lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
int pc = processor_coor[dim];
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
{
|
||||
autoView(r_v,result,CpuRead);
|
||||
autoView(p_v,pgbuf,CpuWrite);
|
||||
thread_for(idx, sgrid->lSites(),{
|
||||
Coordinate cbuf(Nd);
|
||||
sobj s;
|
||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||
peekLocalSite(s,r_v,cbuf);
|
||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||
pokeLocalSite(s,p_v,cbuf);
|
||||
});
|
||||
}
|
||||
if (p != processors[dim] - 1) {
|
||||
result = Cshift(result,dim,L);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over orthog coords
|
||||
int NN=pencil_g.lSites();
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
thread_for( idx,NN,{
|
||||
Coordinate cbuf(Nd);
|
||||
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[idx];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[idx];
|
||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
});
|
||||
timer.Stop();
|
||||
|
||||
// performance counting
|
||||
double add,mul,fma;
|
||||
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||
flops_call = add+mul+2.0*fma;
|
||||
usec += timer.useconds();
|
||||
flops+= flops_call*NN;
|
||||
|
||||
// writing out result
|
||||
{
|
||||
autoView(pgbuf_v,pgbuf,CpuRead);
|
||||
autoView(result_v,result,CpuWrite);
|
||||
thread_for(idx,sgrid->lSites(),{
|
||||
Coordinate clbuf(Nd), cgbuf(Nd);
|
||||
sobj s;
|
||||
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||
cgbuf = clbuf;
|
||||
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||
peekLocalSite(s,pgbuf_v,cgbuf);
|
||||
pokeLocalSite(s,result_v,clbuf);
|
||||
});
|
||||
}
|
||||
result = result*div;
|
||||
|
||||
// destroying plan
|
||||
FFTW<scalar>::fftw_destroy_plan(p);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,688 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/LinearOperator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LinearOperators Take a something and return a something.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian Conjugateugate (transpose if real):
|
||||
//SBase
|
||||
// i) F(a x + b y) = aF(x) + b F(y).
|
||||
// ii) <x|Op|y> = <y|AdjOp|x>^\ast
|
||||
//
|
||||
// Would be fun to have a test linearity & Herm Conj function!
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class LinearOperatorBase {
|
||||
public:
|
||||
// Support for coarsening to a multigrid
|
||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||
virtual void OpDirAll (const Field &in, std::vector<Field> &out) = 0; // Abstract base
|
||||
|
||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
virtual ~LinearOperatorBase(){};
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code
|
||||
// between RB and non-RB variants. Sparse matrix is like the fermion action def, and then
|
||||
// the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising
|
||||
// replication of code.
|
||||
//
|
||||
// I'm not entirely happy with implementation; to share the Schur code between herm and non-herm
|
||||
// while still having a "OpAndNorm" in the abstract base I had to implement it in both cases
|
||||
// with an assert trap in the non-herm. This isn't right; there must be a better C++ way to
|
||||
// do it, but I fear it required multiple inheritance and mixed in abstract base classes
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Construct herm op from non-herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class MdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
_Mat.MdirAll(in,out);
|
||||
};
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.MdagM(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Construct herm op and shift it for mgrid smoother
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
RealD _shift;
|
||||
public:
|
||||
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
assert(0);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
assert(0);
|
||||
};
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
HermOp(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.MdagM(in,out);
|
||||
out = out + _shift*in;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Wrap an already herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
_Mat.MdirAll(in,out);
|
||||
};
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
HermOp(in,out);
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
_Mat.MdirAll(in,out);
|
||||
};
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
assert(0);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Even Odd Schur decomp operators; there are several
|
||||
// ways to introduce the even odd checkerboarding
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual void Mpc (const Field &in, Field &out) =0;
|
||||
virtual void MpcDag (const Field &in, Field &out) =0;
|
||||
virtual void MpcDagMpc(const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
Mpc(in,tmp);
|
||||
MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out);
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
assert(0);
|
||||
};
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
Matrix &_Mat;
|
||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.Mooee(in,out);
|
||||
axpy(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeDag(in,out);
|
||||
axpy(out,-1.0,tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
_Mat.Meooe(tmp,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInvDag(in,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInv(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
_Mat.MeooeDag(tmp,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
|
||||
axpy(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class NonHermitianSchurOperatorBase : public LinearOperatorBase<Field>
|
||||
{
|
||||
public:
|
||||
virtual void Mpc (const Field& in, Field& out) = 0;
|
||||
virtual void MpcDag (const Field& in, Field& out) = 0;
|
||||
virtual void MpcDagMpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
Mpc(in,tmp);
|
||||
MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) {
|
||||
assert(0);
|
||||
}
|
||||
virtual void HermOp(const Field& in, Field& out) {
|
||||
assert(0);
|
||||
}
|
||||
void Op(const Field& in, Field& out) {
|
||||
Mpc(in, out);
|
||||
}
|
||||
void AdjOp(const Field& in, Field& out) {
|
||||
MpcDag(in, out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag(const Field& in, Field& out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir(const Field& in, Field& out, int dir, int disp) {
|
||||
assert(0);
|
||||
}
|
||||
void OpDirAll(const Field& in, std::vector<Field>& out){
|
||||
assert(0);
|
||||
};
|
||||
};
|
||||
|
||||
template<class Matrix, class Field>
|
||||
class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase<Field>
|
||||
{
|
||||
public:
|
||||
Matrix& _Mat;
|
||||
NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
|
||||
_Mat.Meooe(in, tmp);
|
||||
_Mat.MooeeInv(tmp, out);
|
||||
_Mat.Meooe(out, tmp);
|
||||
|
||||
_Mat.Mooee(in, out);
|
||||
|
||||
axpy(out, -1.0, tmp, out);
|
||||
}
|
||||
virtual void MpcDag(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in, tmp);
|
||||
_Mat.MooeeInvDag(tmp, out);
|
||||
_Mat.MeooeDag(out, tmp);
|
||||
|
||||
_Mat.MooeeDag(in, out);
|
||||
|
||||
axpy(out, -1.0, tmp, out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Field>
|
||||
{
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
|
||||
public:
|
||||
NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.Meooe(in, out);
|
||||
_Mat.MooeeInv(out, tmp);
|
||||
_Mat.Meooe(tmp, out);
|
||||
_Mat.MooeeInv(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
virtual void MpcDag(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInvDag(in, out);
|
||||
_Mat.MeooeDag(out, tmp);
|
||||
_Mat.MooeeInvDag(tmp, out);
|
||||
_Mat.MeooeDag(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix, class Field>
|
||||
class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Field>
|
||||
{
|
||||
protected:
|
||||
Matrix& _Mat;
|
||||
|
||||
public:
|
||||
NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
|
||||
|
||||
virtual void Mpc(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInv(in, out);
|
||||
_Mat.Meooe(out, tmp);
|
||||
_Mat.MooeeInv(tmp, out);
|
||||
_Mat.Meooe(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
virtual void MpcDag(const Field& in, Field& out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in, out);
|
||||
_Mat.MooeeInvDag(out, tmp);
|
||||
_Mat.MeooeDag(tmp, out);
|
||||
_Mat.MooeeInvDag(out, tmp);
|
||||
|
||||
axpy(out, -1.0, tmp, in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class QuadLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
RealD a0,a1,a2;
|
||||
QuadLinearOperator(Matrix &Mat): _Mat(Mat),a0(0.),a1(0.),a2(1.) {};
|
||||
QuadLinearOperator(Matrix &Mat, RealD _a0,RealD _a1,RealD _a2): _Mat(Mat),a0(_a0),a1(_a1),a2(_a2) {};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0);
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
assert(0);
|
||||
_Mat.MdirAll(in,out);
|
||||
}
|
||||
void HermOp (const Field &in, Field &out){
|
||||
// _Mat.M(in,out);
|
||||
Field tmp1(in.Grid());
|
||||
// Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
_Mat.M(in,tmp1);
|
||||
_Mat.M(tmp1,out);
|
||||
out *= a2;
|
||||
axpy(out, a1, tmp1, out);
|
||||
axpy(out, a0, in, out);
|
||||
// d=real(innerProduct(psi,mmp));
|
||||
// b=norm2(mmp);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
assert(0);
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
HermOp(in,out);
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void Op(const Field &in, Field &out){
|
||||
assert(0);
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
Mpc(in,out);
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
n1 = real(dot);
|
||||
n2 =0.0;
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
// _Mat.Meooe(in,out);
|
||||
// _Mat.Meooe(out,tmp);
|
||||
// axpby(out,-1.0,mass*mass,tmp,in);
|
||||
}
|
||||
virtual void Mpc (const Field &in, Field &out)
|
||||
{
|
||||
Field tmp(in.Grid());
|
||||
Field tmp2(in.Grid());
|
||||
|
||||
// _Mat.Mooee(in,out);
|
||||
// _Mat.Mooee(out,tmp);
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
}
|
||||
virtual void MpcDag (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out) {
|
||||
assert(0);// Never need with staggered
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for functions of operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class OperatorFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
|
||||
assert(in.size()==out.size());
|
||||
for(int k=0;k<in.size();k++){
|
||||
(*this)(Linop,in[k],out[k]);
|
||||
}
|
||||
};
|
||||
virtual ~OperatorFunction(){};
|
||||
};
|
||||
|
||||
template<class Field> class LinearFunction {
|
||||
public:
|
||||
virtual void operator() (const Field &in, Field &out) = 0;
|
||||
|
||||
virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
assert(in.size() == out.size());
|
||||
|
||||
for (unsigned int i = 0; i < in.size(); ++i)
|
||||
{
|
||||
(*this)(in[i], out[i]);
|
||||
}
|
||||
}
|
||||
virtual ~LinearFunction(){};
|
||||
};
|
||||
|
||||
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
|
||||
public:
|
||||
void operator() (const Field &in, Field &out){
|
||||
out = in;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for Multishift solvers for operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class OperatorMultiFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, std::vector<Field> &out) = 0;
|
||||
};
|
||||
|
||||
// FIXME : To think about
|
||||
|
||||
// Chroma functionality list defining LinearOperator
|
||||
/*
|
||||
virtual void operator() (T& chi, const T& psi, enum PlusMinus isign) const = 0;
|
||||
virtual void operator() (T& chi, const T& psi, enum PlusMinus isign, Real epsilon) const
|
||||
virtual const Subset& subset() const = 0;
|
||||
virtual unsigned long nFlops() const { return 0; }
|
||||
virtual void deriv(P& ds_u, const T& chi, const T& psi, enum PlusMinus isign) const
|
||||
class UnprecLinearOperator : public DiffLinearOperator<T,P,Q>
|
||||
const Subset& subset() const {return all;}
|
||||
};
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hermitian operator Linear function and operator function
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
class HermOpOperatorFunction : public OperatorFunction<Field> {
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
Linop.HermOp(in,out);
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Field>
|
||||
class PlainHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
|
||||
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
|
||||
{}
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
_Linop.HermOp(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Field>
|
||||
class FunctionHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
OperatorFunction<Field> & _poly;
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
|
||||
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
|
||||
: _poly(poly), _Linop(linop) {};
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
_poly(_Linop,in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class Polynomial : public OperatorFunction<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
Field AtoN(in.Grid());
|
||||
Field Mtmp(in.Grid());
|
||||
AtoN = in;
|
||||
out = AtoN*Coeffs[0];
|
||||
for(int n=1;n<Coeffs.size();n++){
|
||||
Mtmp = AtoN;
|
||||
Linop.HermOp(Mtmp,AtoN);
|
||||
out=out+AtoN*Coeffs[n];
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,81 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/SparseMatrix.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interface defining what I expect of a general sparse matrix, such as a Fermion action
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SparseMatrixBase {
|
||||
public:
|
||||
virtual GridBase *Grid(void) =0;
|
||||
// Full checkerboar operations
|
||||
virtual void M (const Field &in, Field &out)=0;
|
||||
virtual void Mdag (const Field &in, Field &out)=0;
|
||||
virtual void MdagM(const Field &in, Field &out) {
|
||||
Field tmp (in.Grid());
|
||||
M(in,tmp);
|
||||
Mdag(tmp,out);
|
||||
}
|
||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
|
||||
virtual ~SparseMatrixBase() {};
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interface augmented by a red black sparse matrix, such as a Fermion action
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||
public:
|
||||
virtual GridBase *RedBlackGrid(void)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Query the even even properties to make algorithmic decisions
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
virtual RealD Mass(void) { return 0.0; };
|
||||
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
|
||||
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const Field &in, Field &out)=0;
|
||||
virtual void Mooee (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInv (const Field &in, Field &out)=0;
|
||||
|
||||
virtual void MeooeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInvDag (const Field &in, Field &out)=0;
|
||||
virtual ~CheckerBoardedSparseMatrixBase() {};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,395 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Chebyshev.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CHEBYSHEV_H
|
||||
#define GRID_CHEBYSHEV_H
|
||||
|
||||
#include <Grid/algorithms/LinearOperator.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
struct ChebyParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
|
||||
RealD, alpha,
|
||||
RealD, beta,
|
||||
int, Npoly);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Chebyshev approximations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
class Chebyshev : public OperatorFunction<Field> {
|
||||
private:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
std::vector<RealD> Coeffs;
|
||||
int order;
|
||||
RealD hi;
|
||||
RealD lo;
|
||||
|
||||
public:
|
||||
void csv(std::ostream &out){
|
||||
RealD diff = hi-lo;
|
||||
RealD delta = diff*1.0e-9;
|
||||
for (RealD x=lo; x<hi; x+=delta) {
|
||||
delta*=1.1;
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Convenience for plotting the approximation
|
||||
void PlotApprox(std::ostream &out) {
|
||||
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
|
||||
for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
|
||||
out <<x<<"\t"<<approx(x)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
Chebyshev(){};
|
||||
Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
|
||||
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
|
||||
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// CJ: the one we need for Lanczos
|
||||
void Init(RealD _lo,RealD _hi,int _order)
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
Coeffs.assign(0.,order);
|
||||
Coeffs[order-1] = 1.;
|
||||
};
|
||||
|
||||
// PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.
|
||||
// Similar kick effect below the threshold as Lanczos filter approach
|
||||
void InitLowPass(RealD _lo,RealD _hi,int _order)
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
for(int j=0;j<order;j++){
|
||||
RealD k=(order-1.0);
|
||||
RealD s=std::cos( j*M_PI*(k+0.5)/order );
|
||||
Coeffs[j] = s * 2.0/order;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
for(int j=0;j<order;j++){
|
||||
RealD s=0;
|
||||
for(int k=0;k<order;k++){
|
||||
RealD y=std::cos(M_PI*(k+0.5)/order);
|
||||
RealD x=0.5*(y*(hi-lo)+(hi+lo));
|
||||
RealD f=func(x);
|
||||
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
|
||||
}
|
||||
Coeffs[j] = s * 2.0/order;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void JacksonSmooth(void){
|
||||
RealD M=order;
|
||||
RealD alpha = M_PI/(M+2);
|
||||
RealD lmax = std::cos(alpha);
|
||||
RealD sumUsq =0;
|
||||
std::vector<RealD> U(M);
|
||||
std::vector<RealD> a(M);
|
||||
std::vector<RealD> g(M);
|
||||
for(int n=0;n<=M;n++){
|
||||
U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
|
||||
sumUsq += U[n]*U[n];
|
||||
}
|
||||
sumUsq = std::sqrt(sumUsq);
|
||||
|
||||
for(int i=1;i<=M;i++){
|
||||
a[i] = U[i]/sumUsq;
|
||||
}
|
||||
g[0] = 1.0;
|
||||
for(int m=1;m<=M;m++){
|
||||
g[m] = 0;
|
||||
for(int i=0;i<=M-m;i++){
|
||||
g[m]+= a[i]*a[m+i];
|
||||
}
|
||||
}
|
||||
for(int m=1;m<=M;m++){
|
||||
Coeffs[m]*=g[m];
|
||||
}
|
||||
}
|
||||
RealD approx(RealD x) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD T0=1;
|
||||
RealD T1=y;
|
||||
|
||||
RealD sum;
|
||||
sum = 0.5*Coeffs[0]*T0;
|
||||
sum+= Coeffs[1]*T1;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int i=2;i<order;i++){
|
||||
Tnp=2*y*Tn-Tnm;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
sum+= Tn*Coeffs[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
|
||||
RealD approxD(RealD x)
|
||||
{
|
||||
RealD Un;
|
||||
RealD Unm;
|
||||
RealD Unp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD U0=1;
|
||||
RealD U1=2*y;
|
||||
|
||||
RealD sum;
|
||||
sum = Coeffs[1]*U0;
|
||||
sum+= Coeffs[2]*U1*2.0;
|
||||
|
||||
Un =U1;
|
||||
Unm=U0;
|
||||
for(int i=2;i<order-1;i++){
|
||||
Unp=2*y*Un-Unm;
|
||||
Unm=Un;
|
||||
Un =Unp;
|
||||
sum+= Un*Coeffs[i+1]*(i+1.0);
|
||||
}
|
||||
return sum/(0.5*(hi-lo));
|
||||
};
|
||||
|
||||
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
|
||||
RealD x = x0;
|
||||
RealD eps;
|
||||
|
||||
int i;
|
||||
for (i=0;i<maxiter;i++) {
|
||||
eps = approx(x) - z;
|
||||
if (fabs(eps / z) < resid)
|
||||
return x;
|
||||
x = x - eps / approxD(x);
|
||||
}
|
||||
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in.Grid();
|
||||
|
||||
int vol=grid->gSites();
|
||||
typedef typename Field::vector_type vector_type;
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// Tn=T1 = (xscale M + mscale)in
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
axpby(T1,xscale,mscale,y,in);
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
// out = ()*T0 + Coeffs[1]*T1;
|
||||
axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
Linop.HermOp(*Tn,y);
|
||||
axpby(y,xscale,mscale,y,(*Tn));
|
||||
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
|
||||
if ( Coeffs[n] != 0.0) {
|
||||
axpy(out,Coeffs[n],*Tnp,out);
|
||||
}
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Field>
|
||||
class ChebyshevLanczos : public Chebyshev<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
int order;
|
||||
RealD alpha;
|
||||
RealD beta;
|
||||
RealD mu;
|
||||
|
||||
public:
|
||||
ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
|
||||
alpha(_alpha),
|
||||
beta(_beta),
|
||||
mu(_mu)
|
||||
{
|
||||
order=_order;
|
||||
Coeffs.resize(order);
|
||||
for(int i=0;i<_order;i++){
|
||||
Coeffs[i] = 0.0;
|
||||
}
|
||||
Coeffs[order-1]=1.0;
|
||||
};
|
||||
|
||||
void csv(std::ostream &out){
|
||||
for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
RealD approx(RealD xx) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
Real aa = alpha * alpha;
|
||||
Real bb = beta * beta;
|
||||
|
||||
RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
|
||||
|
||||
RealD y= x;
|
||||
|
||||
RealD T0=1;
|
||||
RealD T1=y;
|
||||
|
||||
RealD sum;
|
||||
sum = 0.5*Coeffs[0]*T0;
|
||||
sum+= Coeffs[1]*T1;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int i=2;i<order;i++){
|
||||
Tnp=2*y*Tn-Tnm;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
sum+= Tn*Coeffs[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
|
||||
// shift_Multiply in Rudy's code
|
||||
void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out)
|
||||
{
|
||||
GridBase *grid=in.Grid();
|
||||
Field tmp(grid);
|
||||
|
||||
RealD aa= alpha*alpha;
|
||||
RealD bb= beta * beta;
|
||||
|
||||
Linop.HermOp(in,out);
|
||||
out = out - mu*in;
|
||||
|
||||
Linop.HermOp(out,tmp);
|
||||
tmp = tmp - mu * out;
|
||||
|
||||
out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in;
|
||||
};
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in.Grid();
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// Tn=T1 = (xscale M )*in
|
||||
AminusMuSq(Linop,T0,T1);
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
AminusMuSq(Linop,*Tn,y);
|
||||
|
||||
*Tnp=2.0*y-(*Tnm);
|
||||
|
||||
out=out+Coeffs[n]* (*Tnp);
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,154 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Forecast.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef INCLUDED_FORECAST_H
|
||||
#define INCLUDED_FORECAST_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Abstract base class.
|
||||
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
|
||||
// and returns a forecasted solution to the system D*psi = phi (psi).
|
||||
// Changing to operator
|
||||
template<class LinearOperatorBase, class Field>
|
||||
class Forecast
|
||||
{
|
||||
public:
|
||||
virtual Field operator()(LinearOperatorBase &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
|
||||
};
|
||||
|
||||
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
|
||||
// used to forecast solutions across poles of the EOFA heatbath.
|
||||
//
|
||||
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
|
||||
template<class Matrix, class Field>
|
||||
class ChronoForecast : public Forecast<Matrix,Field>
|
||||
{
|
||||
public:
|
||||
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
|
||||
{
|
||||
int degree = prev_solns.size();
|
||||
std::cout << GridLogMessage << "ChronoForecast: degree= " << degree << std::endl;
|
||||
Field chi(phi); // forecasted solution
|
||||
|
||||
// Trivial cases
|
||||
if(degree == 0){ chi = Zero(); return chi; }
|
||||
else if(degree == 1){ return prev_solns[0]; }
|
||||
|
||||
ComplexD xp;
|
||||
Field r(phi); // residual
|
||||
Field Mv(phi);
|
||||
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
|
||||
std::vector<Field> MdagMv(degree,phi);
|
||||
|
||||
// Array to hold the matrix elements
|
||||
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
|
||||
|
||||
// Solution and source vectors
|
||||
std::vector<ComplexD> a(degree);
|
||||
std::vector<ComplexD> b(degree);
|
||||
|
||||
// Orthonormalize the vector basis
|
||||
for(int i=0; i<degree; i++){
|
||||
v[i] *= 1.0/std::sqrt(norm2(v[i]));
|
||||
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
|
||||
}
|
||||
|
||||
// Perform sparse matrix multiplication and construct rhs
|
||||
for(int i=0; i<degree; i++){
|
||||
b[i] = innerProduct(v[i],phi);
|
||||
// Mat.M(v[i],Mv);
|
||||
// Mat.Mdag(Mv,MdagMv[i]);
|
||||
Mat.HermOp(v[i],MdagMv[i]);
|
||||
G[i][i] = innerProduct(v[i],MdagMv[i]);
|
||||
}
|
||||
|
||||
// Construct the matrix
|
||||
for(int j=0; j<degree; j++){
|
||||
for(int k=j+1; k<degree; k++){
|
||||
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
||||
G[k][j] = conjugate(G[j][k]);
|
||||
}}
|
||||
|
||||
// Gauss-Jordan elimination with partial pivoting
|
||||
for(int i=0; i<degree; i++){
|
||||
|
||||
// Perform partial pivoting
|
||||
int k = i;
|
||||
for(int j=i+1; j<degree; j++){ if(abs(G[j][j]) > abs(G[k][k])){ k = j; } }
|
||||
if(k != i){
|
||||
xp = b[k];
|
||||
b[k] = b[i];
|
||||
b[i] = xp;
|
||||
for(int j=0; j<degree; j++){
|
||||
xp = G[k][j];
|
||||
G[k][j] = G[i][j];
|
||||
G[i][j] = xp;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert matrix to upper triangular form
|
||||
for(int j=i+1; j<degree; j++){
|
||||
xp = G[j][i]/G[i][i];
|
||||
b[j] -= xp * b[i];
|
||||
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
|
||||
}
|
||||
}
|
||||
|
||||
// Use Gaussian elimination to solve equations and calculate initial guess
|
||||
chi = Zero();
|
||||
r = phi;
|
||||
for(int i=degree-1; i>=0; i--){
|
||||
a[i] = 0.0;
|
||||
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
|
||||
a[i] = (b[i]-a[i])/G[i][i];
|
||||
chi += a[i]*v[i];
|
||||
r -= a[i]*MdagMv[i];
|
||||
}
|
||||
|
||||
RealD true_r(0.0);
|
||||
ComplexD tmp;
|
||||
for(int i=0; i<degree; i++){
|
||||
tmp = -b[i];
|
||||
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
||||
tmp = conjugate(tmp)*tmp;
|
||||
true_r += std::sqrt(tmp.real());
|
||||
}
|
||||
|
||||
RealD error = std::sqrt(norm2(r)/norm2(phi));
|
||||
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
|
||||
|
||||
return chi;
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,129 +0,0 @@
|
||||
#ifndef GRID_JACOBIPOLYNOMIAL_H
|
||||
#define GRID_JACOBIPOLYNOMIAL_H
|
||||
|
||||
#include <Grid/algorithms/LinearOperator.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field>
|
||||
class JacobiPolynomial : public OperatorFunction<Field> {
|
||||
private:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
int order;
|
||||
RealD hi;
|
||||
RealD lo;
|
||||
RealD alpha;
|
||||
RealD beta;
|
||||
|
||||
public:
|
||||
void csv(std::ostream &out){
|
||||
csv(out,lo,hi);
|
||||
}
|
||||
void csv(std::ostream &out,RealD llo,RealD hhi){
|
||||
RealD diff = hhi-llo;
|
||||
RealD delta = diff*1.0e-5;
|
||||
for (RealD x=llo-delta; x<=hhi; x+=delta) {
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f <<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
JacobiPolynomial(){};
|
||||
JacobiPolynomial(RealD _lo,RealD _hi,int _order,RealD _alpha, RealD _beta)
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
alpha=_alpha;
|
||||
beta=_beta;
|
||||
order=_order;
|
||||
};
|
||||
|
||||
RealD approx(RealD x) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD T0=1.0;
|
||||
RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int n=2;n<=order;n++){
|
||||
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
|
||||
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
|
||||
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
|
||||
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
|
||||
Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
}
|
||||
return Tnp;
|
||||
};
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
GridBase *grid=in.Grid();
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
Field T0(grid);
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// RealD T0=1.0;
|
||||
T0=in;
|
||||
|
||||
// RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
// = x * 2/(hi-lo) - (hi+lo)/(hi-lo)
|
||||
Linop.HermOp(T0,y);
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
y=y*xscale+in*mscale;
|
||||
|
||||
// RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
|
||||
RealD halfAmB = (alpha-beta)*0.5;
|
||||
RealD halfApBp2= (alpha+beta+2.0)*0.5;
|
||||
T1 = halfAmB * in + halfApBp2*y;
|
||||
|
||||
for(int n=2;n<=order;n++){
|
||||
|
||||
Linop.HermOp(*Tn,y);
|
||||
y=xscale*y+mscale*(*Tn);
|
||||
|
||||
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
|
||||
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
|
||||
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
|
||||
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
|
||||
|
||||
// Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
|
||||
cny=cny/cnp;
|
||||
cn1=cn1/cnp;
|
||||
cn1=cn1/cnp;
|
||||
cnm=cnm/cnp;
|
||||
|
||||
*Tnp=cny*y + cn1 *(*Tn) + cnm * (*Tnm);
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
}
|
||||
out=*Tnp;
|
||||
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,473 +0,0 @@
|
||||
#include<math.h>
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string>
|
||||
#include<iostream>
|
||||
#include<iomanip>
|
||||
#include<cassert>
|
||||
|
||||
#include<Grid/algorithms/approx/RemezGeneral.h>
|
||||
|
||||
|
||||
// Constructor
|
||||
AlgRemezGeneral::AlgRemezGeneral(double lower, double upper, long precision,
|
||||
bigfloat (*f)(bigfloat x, void *data), void *data): f(f),
|
||||
data(data),
|
||||
prec(precision),
|
||||
apstrt(lower), apend(upper), apwidt(upper - lower),
|
||||
n(0), d(0), pow_n(0), pow_d(0)
|
||||
{
|
||||
bigfloat::setDefaultPrecision(prec);
|
||||
|
||||
std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
|
||||
std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
|
||||
}
|
||||
|
||||
//Determine the properties of the numerator and denominator polynomials
|
||||
void AlgRemezGeneral::setupPolyProperties(int num_degree, int den_degree, PolyType num_type_in, PolyType den_type_in){
|
||||
pow_n = num_degree;
|
||||
pow_d = den_degree;
|
||||
|
||||
if(pow_n % 2 == 0 && num_type_in == PolyType::Odd) assert(0);
|
||||
if(pow_n % 2 == 1 && num_type_in == PolyType::Even) assert(0);
|
||||
|
||||
if(pow_d % 2 == 0 && den_type_in == PolyType::Odd) assert(0);
|
||||
if(pow_d % 2 == 1 && den_type_in == PolyType::Even) assert(0);
|
||||
|
||||
num_type = num_type_in;
|
||||
den_type = den_type_in;
|
||||
|
||||
num_pows.resize(pow_n+1);
|
||||
den_pows.resize(pow_d+1);
|
||||
|
||||
int n_in = 0;
|
||||
bool odd = num_type == PolyType::Full || num_type == PolyType::Odd;
|
||||
bool even = num_type == PolyType::Full || num_type == PolyType::Even;
|
||||
for(int i=0;i<=pow_n;i++){
|
||||
num_pows[i] = -1;
|
||||
if(i % 2 == 0 && even) num_pows[i] = n_in++;
|
||||
if(i % 2 == 1 && odd) num_pows[i] = n_in++;
|
||||
}
|
||||
|
||||
std::cout << n_in << " terms in numerator" << std::endl;
|
||||
--n_in; //power is 1 less than the number of terms, eg pow=1 a x^1 + b x^0
|
||||
|
||||
int d_in = 0;
|
||||
odd = den_type == PolyType::Full || den_type == PolyType::Odd;
|
||||
even = den_type == PolyType::Full || den_type == PolyType::Even;
|
||||
for(int i=0;i<=pow_d;i++){
|
||||
den_pows[i] = -1;
|
||||
if(i % 2 == 0 && even) den_pows[i] = d_in++;
|
||||
if(i % 2 == 1 && odd) den_pows[i] = d_in++;
|
||||
}
|
||||
|
||||
std::cout << d_in << " terms in denominator" << std::endl;
|
||||
--d_in;
|
||||
|
||||
n = n_in;
|
||||
d = d_in;
|
||||
}
|
||||
|
||||
//Setup algorithm
|
||||
void AlgRemezGeneral::reinitializeAlgorithm(){
|
||||
spread = 1.0e37;
|
||||
iter = 0;
|
||||
|
||||
neq = n + d + 1; //not +2 because highest-power term in denominator is fixed to 1
|
||||
|
||||
param.resize(neq);
|
||||
yy.resize(neq+1);
|
||||
|
||||
//Initialize linear equation temporaries
|
||||
A.resize(neq*neq);
|
||||
B.resize(neq);
|
||||
IPS.resize(neq);
|
||||
|
||||
//Initialize maximum and minimum errors
|
||||
xx.resize(neq+2);
|
||||
mm.resize(neq+1);
|
||||
initialGuess();
|
||||
|
||||
//Initialize search steps
|
||||
step.resize(neq+1);
|
||||
stpini();
|
||||
}
|
||||
|
||||
double AlgRemezGeneral::generateApprox(const int num_degree, const int den_degree,
|
||||
const PolyType num_type_in, const PolyType den_type_in,
|
||||
const double _tolerance, const int report_freq){
|
||||
//Setup the properties of the polynomial
|
||||
setupPolyProperties(num_degree, den_degree, num_type_in, den_type_in);
|
||||
|
||||
//Setup the algorithm
|
||||
reinitializeAlgorithm();
|
||||
|
||||
bigfloat tolerance = _tolerance;
|
||||
|
||||
//Iterate until convergance
|
||||
while (spread > tolerance) {
|
||||
if (iter++ % report_freq==0)
|
||||
std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta << std::endl;
|
||||
|
||||
equations();
|
||||
if (delta < tolerance) {
|
||||
std::cout<<"Iteration " << iter-1 << " delta too small (" << delta << "<" << tolerance << "), try increasing precision\n";
|
||||
assert(0);
|
||||
};
|
||||
assert( delta>= tolerance );
|
||||
|
||||
search();
|
||||
}
|
||||
|
||||
int sign;
|
||||
double error = (double)getErr(mm[0],&sign);
|
||||
std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
|
||||
|
||||
// Return the maximum error in the approximation
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Initial values of maximal and minimal errors
|
||||
void AlgRemezGeneral::initialGuess(){
|
||||
// Supply initial guesses for solution points
|
||||
long ncheb = neq; // Degree of Chebyshev error estimate
|
||||
|
||||
// Find ncheb+1 extrema of Chebyshev polynomial
|
||||
bigfloat a = ncheb;
|
||||
bigfloat r;
|
||||
|
||||
mm[0] = apstrt;
|
||||
for (long i = 1; i < ncheb; i++) {
|
||||
r = 0.5 * (1 - cos((M_PI * i)/(double) a));
|
||||
//r *= sqrt_bf(r);
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
mm[i] = apstrt + r * apwidt;
|
||||
}
|
||||
mm[ncheb] = apend;
|
||||
|
||||
a = 2.0 * ncheb;
|
||||
for (long i = 0; i <= ncheb; i++) {
|
||||
r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
|
||||
//r *= sqrt_bf(r); // Squeeze to low end of interval
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
xx[i] = apstrt + r * apwidt;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise step sizes
|
||||
void AlgRemezGeneral::stpini(){
|
||||
xx[neq+1] = apend;
|
||||
delta = 0.25;
|
||||
step[0] = xx[0] - apstrt;
|
||||
for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
|
||||
step[neq] = step[neq-1];
|
||||
}
|
||||
|
||||
// Search for error maxima and minima
|
||||
void AlgRemezGeneral::search(){
|
||||
bigfloat a, q, xm, ym, xn, yn, xx1;
|
||||
int emsign, ensign, steps;
|
||||
|
||||
int meq = neq + 1;
|
||||
|
||||
bigfloat eclose = 1.0e30;
|
||||
bigfloat farther = 0l;
|
||||
|
||||
bigfloat xx0 = apstrt;
|
||||
|
||||
for (int i = 0; i < meq; i++) {
|
||||
steps = 0;
|
||||
xx1 = xx[i]; // Next zero
|
||||
if (i == meq-1) xx1 = apend;
|
||||
xm = mm[i];
|
||||
ym = getErr(xm,&emsign);
|
||||
q = step[i];
|
||||
xn = xm + q;
|
||||
if (xn < xx0 || xn >= xx1) { // Cannot skip over adjacent boundaries
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
} else {
|
||||
yn = getErr(xn,&ensign);
|
||||
if (yn < ym) {
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
}
|
||||
}
|
||||
|
||||
while(yn >= ym) { // March until error becomes smaller.
|
||||
if (++steps > 10)
|
||||
break;
|
||||
|
||||
ym = yn;
|
||||
xm = xn;
|
||||
emsign = ensign;
|
||||
a = xm + q;
|
||||
if (a == xm || a <= xx0 || a >= xx1)
|
||||
break;// Must not skip over the zeros either side.
|
||||
|
||||
xn = a;
|
||||
yn = getErr(xn,&ensign);
|
||||
}
|
||||
|
||||
mm[i] = xm; // Position of maximum
|
||||
yy[i] = ym; // Value of maximum
|
||||
|
||||
if (eclose > ym) eclose = ym;
|
||||
if (farther < ym) farther = ym;
|
||||
|
||||
xx0 = xx1; // Walk to next zero.
|
||||
} // end of search loop
|
||||
|
||||
q = (farther - eclose); // Decrease step size if error spread increased
|
||||
|
||||
if (eclose != 0.0) q /= eclose; // Relative error spread
|
||||
|
||||
if (q >= spread)
|
||||
delta *= 0.5; // Spread is increasing; decrease step size
|
||||
|
||||
spread = q;
|
||||
|
||||
for (int i = 0; i < neq; i++) {
|
||||
q = yy[i+1];
|
||||
if (q != 0.0) q = yy[i] / q - (bigfloat)1l;
|
||||
else q = 0.0625;
|
||||
if (q > (bigfloat)0.25) q = 0.25;
|
||||
q *= mm[i+1] - mm[i];
|
||||
step[i] = q * delta;
|
||||
}
|
||||
step[neq] = step[neq-1];
|
||||
|
||||
for (int i = 0; i < neq; i++) { // Insert new locations for the zeros.
|
||||
xm = xx[i] - step[i];
|
||||
|
||||
if (xm <= apstrt)
|
||||
continue;
|
||||
|
||||
if (xm >= apend)
|
||||
continue;
|
||||
|
||||
if (xm <= mm[i])
|
||||
xm = (bigfloat)0.5 * (mm[i] + xx[i]);
|
||||
|
||||
if (xm >= mm[i+1])
|
||||
xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
|
||||
|
||||
xx[i] = xm;
|
||||
}
|
||||
}
|
||||
|
||||
// Solve the equations
|
||||
void AlgRemezGeneral::equations(){
|
||||
bigfloat x, y, z;
|
||||
bigfloat *aa;
|
||||
|
||||
for (int i = 0; i < neq; i++) { // set up the equations for solution by simq()
|
||||
int ip = neq * i; // offset to 1st element of this row of matrix
|
||||
x = xx[i]; // the guess for this row
|
||||
y = func(x); // right-hand-side vector
|
||||
|
||||
z = (bigfloat)1l;
|
||||
aa = A.data()+ip;
|
||||
int t = 0;
|
||||
for (int j = 0; j <= pow_n; j++) {
|
||||
if(num_pows[j] != -1){ *aa++ = z; t++; }
|
||||
z *= x;
|
||||
}
|
||||
assert(t == n+1);
|
||||
|
||||
z = (bigfloat)1l;
|
||||
t = 0;
|
||||
for (int j = 0; j < pow_d; j++) {
|
||||
if(den_pows[j] != -1){ *aa++ = -y * z; t++; }
|
||||
z *= x;
|
||||
}
|
||||
assert(t == d);
|
||||
|
||||
B[i] = y * z; // Right hand side vector
|
||||
}
|
||||
|
||||
// Solve the simultaneous linear equations.
|
||||
if (simq()){
|
||||
std::cout<<"simq failed\n";
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients
|
||||
// from the solution vector param
|
||||
bigfloat AlgRemezGeneral::approx(const bigfloat x) const{
|
||||
// Work backwards toward the constant term.
|
||||
int c = n;
|
||||
bigfloat yn = param[c--]; // Highest order numerator coefficient
|
||||
for (int i = pow_n-1; i >= 0; i--) yn = x * yn + (num_pows[i] != -1 ? param[c--] : bigfloat(0l));
|
||||
|
||||
c = n+d;
|
||||
bigfloat yd = 1l; //Highest degree coefficient is 1.0
|
||||
for (int i = pow_d-1; i >= 0; i--) yd = x * yd + (den_pows[i] != -1 ? param[c--] : bigfloat(0l));
|
||||
|
||||
return(yn/yd);
|
||||
}
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat AlgRemezGeneral::getErr(bigfloat x, int *sign) const{
|
||||
bigfloat f = func(x);
|
||||
bigfloat e = approx(x) - f;
|
||||
if (f != 0) e /= f;
|
||||
if (e < (bigfloat)0.0) {
|
||||
*sign = -1;
|
||||
e = -e;
|
||||
}
|
||||
else *sign = 1;
|
||||
|
||||
return(e);
|
||||
}
|
||||
|
||||
// Solve the system AX=B
|
||||
int AlgRemezGeneral::simq(){
|
||||
|
||||
int ip, ipj, ipk, ipn;
|
||||
int idxpiv;
|
||||
int kp, kp1, kpk, kpn;
|
||||
int nip, nkp;
|
||||
bigfloat em, q, rownrm, big, size, pivot, sum;
|
||||
bigfloat *aa;
|
||||
bigfloat *X = param.data();
|
||||
|
||||
int n = neq;
|
||||
int nm1 = n - 1;
|
||||
// Initialize IPS and X
|
||||
|
||||
int ij = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
IPS[i] = i;
|
||||
rownrm = 0.0;
|
||||
for(int j = 0; j < n; j++) {
|
||||
q = abs_bf(A[ij]);
|
||||
if(rownrm < q) rownrm = q;
|
||||
++ij;
|
||||
}
|
||||
if (rownrm == (bigfloat)0l) {
|
||||
std::cout<<"simq rownrm=0\n";
|
||||
return(1);
|
||||
}
|
||||
X[i] = (bigfloat)1.0 / rownrm;
|
||||
}
|
||||
|
||||
for (int k = 0; k < nm1; k++) {
|
||||
big = 0.0;
|
||||
idxpiv = 0;
|
||||
|
||||
for (int i = k; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
size = abs_bf(A[ipk]) * X[ip];
|
||||
if (size > big) {
|
||||
big = size;
|
||||
idxpiv = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (big == (bigfloat)0l) {
|
||||
std::cout<<"simq big=0\n";
|
||||
return(2);
|
||||
}
|
||||
if (idxpiv != k) {
|
||||
int j = IPS[k];
|
||||
IPS[k] = IPS[idxpiv];
|
||||
IPS[idxpiv] = j;
|
||||
}
|
||||
kp = IPS[k];
|
||||
kpk = n*kp + k;
|
||||
pivot = A[kpk];
|
||||
kp1 = k+1;
|
||||
for (int i = kp1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
em = -A[ipk] / pivot;
|
||||
A[ipk] = -em;
|
||||
nip = n*ip;
|
||||
nkp = n*kp;
|
||||
aa = A.data()+nkp+kp1;
|
||||
for (int j = kp1; j < n; j++) {
|
||||
ipj = nip + j;
|
||||
A[ipj] = A[ipj] + em * *aa++;
|
||||
}
|
||||
}
|
||||
}
|
||||
kpn = n * IPS[n-1] + n - 1; // last element of IPS[n] th row
|
||||
if (A[kpn] == (bigfloat)0l) {
|
||||
std::cout<<"simq A[kpn]=0\n";
|
||||
return(3);
|
||||
}
|
||||
|
||||
|
||||
ip = IPS[0];
|
||||
X[0] = B[ip];
|
||||
for (int i = 1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipj = n * ip;
|
||||
sum = 0.0;
|
||||
for (int j = 0; j < i; j++) {
|
||||
sum += A[ipj] * X[j];
|
||||
++ipj;
|
||||
}
|
||||
X[i] = B[ip] - sum;
|
||||
}
|
||||
|
||||
ipn = n * IPS[n-1] + n - 1;
|
||||
X[n-1] = X[n-1] / A[ipn];
|
||||
|
||||
for (int iback = 1; iback < n; iback++) {
|
||||
//i goes (n-1),...,1
|
||||
int i = nm1 - iback;
|
||||
ip = IPS[i];
|
||||
nip = n*ip;
|
||||
sum = 0.0;
|
||||
aa = A.data()+nip+i+1;
|
||||
for (int j= i + 1; j < n; j++)
|
||||
sum += *aa++ * X[j];
|
||||
X[i] = (X[i] - sum) / A[nip+i];
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
void AlgRemezGeneral::csv(std::ostream & os) const{
|
||||
os << "Numerator" << std::endl;
|
||||
for(int i=0;i<=pow_n;i++){
|
||||
os << getCoeffNum(i) << "*x^" << i;
|
||||
if(i!=pow_n) os << " + ";
|
||||
}
|
||||
os << std::endl;
|
||||
|
||||
os << "Denominator" << std::endl;
|
||||
for(int i=0;i<=pow_d;i++){
|
||||
os << getCoeffDen(i) << "*x^" << i;
|
||||
if(i!=pow_d) os << " + ";
|
||||
}
|
||||
os << std::endl;
|
||||
|
||||
//For a true minimax solution the errors should all be equal and the signs should oscillate +-+-+- etc
|
||||
int sign;
|
||||
os << "Errors at maxima: coordinate, error, (sign)" << std::endl;
|
||||
for(int i=0;i<neq+1;i++){
|
||||
os << mm[i] << " " << getErr(mm[i],&sign) << " (" << sign << ")" << std::endl;
|
||||
}
|
||||
|
||||
os << "Scan over range:" << std::endl;
|
||||
int npt = 60;
|
||||
bigfloat dlt = (apend - apstrt)/bigfloat(npt-1);
|
||||
|
||||
for (bigfloat x=apstrt; x<=apend; x = x + dlt) {
|
||||
double f = evaluateFunc(x);
|
||||
double r = evaluateApprox(x);
|
||||
os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
@ -1,170 +0,0 @@
|
||||
/*
|
||||
C.Kelly Jan 2020 based on implementation by M. Clark May 2005
|
||||
|
||||
AlgRemezGeneral is an implementation of the Remez algorithm for approximating an arbitrary function by a rational polynomial
|
||||
It includes optional restriction to odd/even polynomials for the numerator and/or denominator
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_ALG_REMEZ_GENERAL_H
|
||||
#define INCLUDED_ALG_REMEZ_GENERAL_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#ifdef HAVE_LIBGMP
|
||||
#include "bigfloat.h"
|
||||
#else
|
||||
#include "bigfloat_double.h"
|
||||
#endif
|
||||
|
||||
|
||||
class AlgRemezGeneral{
|
||||
public:
|
||||
enum PolyType { Even, Odd, Full };
|
||||
|
||||
private:
|
||||
|
||||
// In GSL-style, pass the function as a function pointer. Any data required to evaluate the function is passed in as a void pointer
|
||||
bigfloat (*f)(bigfloat x, void *data);
|
||||
void *data;
|
||||
|
||||
// The approximation parameters
|
||||
std::vector<bigfloat> param;
|
||||
bigfloat norm;
|
||||
|
||||
// The number of non-zero terms in the numerator and denominator
|
||||
int n, d;
|
||||
// The numerator and denominator degree (i.e. the largest power)
|
||||
int pow_n, pow_d;
|
||||
|
||||
// Specify if the numerator and/or denominator are odd/even polynomials
|
||||
PolyType num_type;
|
||||
PolyType den_type;
|
||||
std::vector<int> num_pows; //contains the mapping, with -1 if not present
|
||||
std::vector<int> den_pows;
|
||||
|
||||
// The bounds of the approximation
|
||||
bigfloat apstrt, apwidt, apend;
|
||||
|
||||
// Variables used to calculate the approximation
|
||||
int nd1, iter;
|
||||
std::vector<bigfloat> xx;
|
||||
std::vector<bigfloat> mm;
|
||||
std::vector<bigfloat> step;
|
||||
|
||||
bigfloat delta, spread;
|
||||
|
||||
// Variables used in search
|
||||
std::vector<bigfloat> yy;
|
||||
|
||||
// Variables used in solving linear equations
|
||||
std::vector<bigfloat> A;
|
||||
std::vector<bigfloat> B;
|
||||
std::vector<int> IPS;
|
||||
|
||||
// The number of equations we must solve at each iteration (n+d+1)
|
||||
int neq;
|
||||
|
||||
// The precision of the GNU MP library
|
||||
long prec;
|
||||
|
||||
// Initialize member variables associated with the polynomial's properties
|
||||
void setupPolyProperties(int num_degree, int den_degree, PolyType num_type_in, PolyType den_type_in);
|
||||
|
||||
// Initial values of maximal and minmal errors
|
||||
void initialGuess();
|
||||
|
||||
// Initialise step sizes
|
||||
void stpini();
|
||||
|
||||
// Initialize the algorithm
|
||||
void reinitializeAlgorithm();
|
||||
|
||||
// Solve the equations
|
||||
void equations();
|
||||
|
||||
// Search for error maxima and minima
|
||||
void search();
|
||||
|
||||
// Calculate function required for the approximation
|
||||
inline bigfloat func(bigfloat x) const{
|
||||
return f(x, data);
|
||||
}
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat getErr(bigfloat x, int *sign) const;
|
||||
|
||||
// Solve the system AX=B where X = param
|
||||
int simq();
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the solution vector param
|
||||
bigfloat approx(bigfloat x) const;
|
||||
|
||||
public:
|
||||
|
||||
AlgRemezGeneral(double lower, double upper, long prec,
|
||||
bigfloat (*f)(bigfloat x, void *data), void *data);
|
||||
|
||||
inline int getDegree(void) const{
|
||||
assert(n==d);
|
||||
return n;
|
||||
}
|
||||
// Reset the bounds of the approximation
|
||||
inline void setBounds(double lower, double upper) {
|
||||
apstrt = lower;
|
||||
apend = upper;
|
||||
apwidt = apend - apstrt;
|
||||
}
|
||||
|
||||
// Get the bounds of the approximation
|
||||
inline void getBounds(double &lower, double &upper) const{
|
||||
lower=(double)apstrt;
|
||||
upper=(double)apend;
|
||||
}
|
||||
|
||||
// Run the algorithm to generate the rational approximation
|
||||
double generateApprox(int num_degree, int den_degree,
|
||||
PolyType num_type, PolyType den_type,
|
||||
const double tolerance = 1e-15, const int report_freq = 1000);
|
||||
|
||||
inline double generateApprox(int num_degree, int den_degree,
|
||||
const double tolerance = 1e-15, const int report_freq = 1000){
|
||||
return generateApprox(num_degree, den_degree, Full, Full, tolerance, report_freq);
|
||||
}
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the
|
||||
// solution vector param
|
||||
inline double evaluateApprox(double x) const{
|
||||
return (double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
// Evaluate the rational form Q(x)/P(x) using coefficients from the solution vector param
|
||||
inline double evaluateInverseApprox(double x) const{
|
||||
return 1.0/(double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
// Calculate function required for the approximation
|
||||
inline double evaluateFunc(double x) const{
|
||||
return (double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
// Calculate inverse function required for the approximation
|
||||
inline double evaluateInverseFunc(double x) const{
|
||||
return 1.0/(double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
// Dump csv of function, approx and error
|
||||
void csv(std::ostream &os = std::cout) const;
|
||||
|
||||
// Get the coefficient of the term x^i in the numerator
|
||||
inline double getCoeffNum(const int i) const{
|
||||
return num_pows[i] == -1 ? 0. : double(param[num_pows[i]]);
|
||||
}
|
||||
// Get the coefficient of the term x^i in the denominator
|
||||
inline double getCoeffDen(const int i) const{
|
||||
if(i == pow_d) return 1.0;
|
||||
else return den_pows[i] == -1 ? 0. : double(param[den_pows[i]+n+1]);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -1,183 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/ZMobius.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/algorithms/approx/ZMobius.h>
|
||||
#include <Grid/algorithms/approx/RemezGeneral.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
NAMESPACE_BEGIN(Approx);
|
||||
|
||||
//Compute the tanh approximation
|
||||
inline double epsilonMobius(const double x, const std::vector<ComplexD> &w){
|
||||
int Ls = w.size();
|
||||
|
||||
ComplexD fxp = 1., fmp = 1.;
|
||||
for(int i=0;i<Ls;i++){
|
||||
fxp = fxp * ( w[i] + x );
|
||||
fmp = fmp * ( w[i] - x );
|
||||
}
|
||||
return ((fxp - fmp)/(fxp + fmp)).real();
|
||||
}
|
||||
inline double epsilonMobius(const double x, const std::vector<RealD> &w){
|
||||
int Ls = w.size();
|
||||
|
||||
double fxp = 1., fmp = 1.;
|
||||
for(int i=0;i<Ls;i++){
|
||||
fxp = fxp * ( w[i] + x );
|
||||
fmp = fmp * ( w[i] - x );
|
||||
}
|
||||
return (fxp - fmp)/(fxp + fmp);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Compute the tanh approximation in a form suitable for the Remez
|
||||
bigfloat epsilonMobius(bigfloat x, void* data){
|
||||
const std::vector<RealD> &omega = *( (std::vector<RealD> const*)data );
|
||||
bigfloat fxp(1.0);
|
||||
bigfloat fmp(1.0);
|
||||
|
||||
for(int i=0;i<omega.size();i++){
|
||||
fxp = fxp * ( bigfloat(omega[i]) + x);
|
||||
fmp = fmp * ( bigfloat(omega[i]) - x);
|
||||
}
|
||||
return (fxp - fmp)/(fxp + fmp);
|
||||
}
|
||||
|
||||
//Compute the Zmobius Omega parameters suitable for eigenvalue range -lambda_bound <= lambda <= lambda_bound
|
||||
//Note omega_i = 1/(b_i + c_i) where b_i and c_i are the Mobius parameters
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out,
|
||||
const std::vector<RealD> &omega_in, const int Ls_in,
|
||||
const RealD lambda_bound){
|
||||
assert(omega_in.size() == Ls_in);
|
||||
omega_out.resize(Ls_out);
|
||||
|
||||
//Use the Remez algorithm to generate the appropriate rational polynomial
|
||||
//For odd polynomial, to satisfy Haar condition must take either positive or negative half of range (cf https://arxiv.org/pdf/0803.0439.pdf page 6)
|
||||
AlgRemezGeneral remez(0, lambda_bound, 64, &epsilonMobius, (void*)&omega_in);
|
||||
remez.generateApprox(Ls_out-1, Ls_out,AlgRemezGeneral::Odd, AlgRemezGeneral::Even, 1e-15, 100);
|
||||
remez.csv(std::cout);
|
||||
|
||||
//The rational approximation has the form [ f(x) - f(-x) ] / [ f(x) + f(-x) ] where f(x) = \Prod_{i=0}^{L_s-1} ( \omega_i + x )
|
||||
//cf https://academiccommons.columbia.edu/doi/10.7916/D8T72HD7 pg 102
|
||||
//omega_i are therefore the negative of the complex roots of f(x)
|
||||
|
||||
//We can find the roots by recognizing that the eigenvalues of a matrix A are the roots of the characteristic polynomial
|
||||
// \rho(\lambda) = det( A - \lambda I ) where I is the unit matrix
|
||||
//The matrix whose characteristic polynomial is an arbitrary monic polynomial a0 + a1 x + a2 x^2 + ... x^n is the companion matrix
|
||||
// A = | 0 1 0 0 0 .... 0 |
|
||||
// | 0 0 1 0 0 .... 0 |
|
||||
// | : : : : : : |
|
||||
// | 0 0 0 0 0 1
|
||||
// | -a0 -a1 -a2 ... ... -an|
|
||||
|
||||
|
||||
//Note the Remez defines the largest power to have unit coefficient
|
||||
std::vector<RealD> coeffs(Ls_out+1);
|
||||
for(int i=0;i<Ls_out+1;i+=2) coeffs[i] = coeffs[i] = remez.getCoeffDen(i); //even powers
|
||||
for(int i=1;i<Ls_out+1;i+=2) coeffs[i] = coeffs[i] = remez.getCoeffNum(i); //odd powers
|
||||
|
||||
std::vector<std::complex<RealD> > roots(Ls_out);
|
||||
|
||||
//Form the companion matrix
|
||||
Eigen::MatrixXd compn(Ls_out,Ls_out);
|
||||
for(int i=0;i<Ls_out-1;i++) compn(i,0) = 0.;
|
||||
compn(Ls_out - 1, 0) = -coeffs[0];
|
||||
|
||||
for(int j=1;j<Ls_out;j++){
|
||||
for(int i=0;i<Ls_out-1;i++) compn(i,j) = i == j-1 ? 1. : 0.;
|
||||
compn(Ls_out - 1, j) = -coeffs[j];
|
||||
}
|
||||
|
||||
//Eigensolve
|
||||
Eigen::EigenSolver<Eigen::MatrixXd> slv(compn, false);
|
||||
|
||||
const auto & ev = slv.eigenvalues();
|
||||
for(int i=0;i<Ls_out;i++)
|
||||
omega_out[i] = -ev(i);
|
||||
|
||||
//Sort ascending (smallest at start of vector!)
|
||||
std::sort(omega_out.begin(), omega_out.end(),
|
||||
[&](const ComplexD &a, const ComplexD &b){ return a.real() < b.real() || (a.real() == b.real() && a.imag() < b.imag()); });
|
||||
|
||||
//McGlynn thesis pg 122 suggest improved iteration counts if magnitude of omega diminishes towards the center of the 5th dimension
|
||||
std::vector<ComplexD> omega_tmp = omega_out;
|
||||
int s_low=0, s_high=Ls_out-1, ss=0;
|
||||
for(int s_from = Ls_out-1; s_from >= 0; s_from--){ //loop from largest omega
|
||||
int s_to;
|
||||
if(ss % 2 == 0){
|
||||
s_to = s_low++;
|
||||
}else{
|
||||
s_to = s_high--;
|
||||
}
|
||||
omega_out[s_to] = omega_tmp[s_from];
|
||||
++ss;
|
||||
}
|
||||
|
||||
std::cout << "Resulting omega_i:" << std::endl;
|
||||
for(int i=0;i<Ls_out;i++)
|
||||
std::cout << omega_out[i] << std::endl;
|
||||
|
||||
std::cout << "Test result matches the approximate polynomial found by the Remez" << std::endl;
|
||||
std::cout << "<x> <remez approx> <poly approx> <diff poly approx remez approx> <exact> <diff poly approx exact>\n";
|
||||
|
||||
int npt = 60;
|
||||
double dlt = lambda_bound/double(npt-1);
|
||||
|
||||
for (int i =0; i<npt; i++){
|
||||
double x = i*dlt;
|
||||
double r = remez.evaluateApprox(x);
|
||||
double p = epsilonMobius(x, omega_out);
|
||||
double e = epsilonMobius(x, omega_in);
|
||||
|
||||
std::cout << x<< " " << r << " " << p <<" " <<r-p << " " << e << " " << e-p << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//mobius_param = b+c with b-c=1
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound){
|
||||
std::vector<RealD> omega_in(Ls_in, 1./mobius_param);
|
||||
computeZmobiusOmega(omega_out, Ls_out, omega_in, Ls_in, lambda_bound);
|
||||
}
|
||||
|
||||
//ZMobius class takes gamma_i = (b+c) omega_i as its input, where b, c are factored out
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out,
|
||||
const RealD mobius_param_out, const int Ls_out,
|
||||
const RealD mobius_param_in, const int Ls_in,
|
||||
const RealD lambda_bound){
|
||||
computeZmobiusOmega(gamma_out, Ls_out, mobius_param_in, Ls_in, lambda_bound);
|
||||
for(int i=0;i<Ls_out;i++) gamma_out[i] = gamma_out[i] * mobius_param_out;
|
||||
}
|
||||
//Assumes mobius_param_out == mobius_param_in
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound){
|
||||
computeZmobiusGamma(gamma_out, mobius_param, Ls_out, mobius_param, Ls_in, lambda_bound);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Approx);
|
||||
NAMESPACE_END(Grid);
|
@ -1,57 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/ZMobius.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ZMOBIUS_APPROX_H
|
||||
#define GRID_ZMOBIUS_APPROX_H
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
NAMESPACE_BEGIN(Approx);
|
||||
|
||||
//Compute the Zmobius Omega parameters suitable for eigenvalue range -lambda_bound <= lambda <= lambda_bound
|
||||
//Note omega_i = 1/(b_i + c_i) where b_i and c_i are the Mobius parameters
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out,
|
||||
const std::vector<RealD> &omega_in, const int Ls_in,
|
||||
const RealD lambda_bound);
|
||||
|
||||
//mobius_param = b+c with b-c=1
|
||||
void computeZmobiusOmega(std::vector<ComplexD> &omega_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound);
|
||||
|
||||
//ZMobius class takes gamma_i = (b+c) omega_i as its input, where b, c are factored out
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out,
|
||||
const RealD mobius_param_out, const int Ls_out,
|
||||
const RealD mobius_param_in, const int Ls_in,
|
||||
const RealD lambda_bound);
|
||||
|
||||
//Assumes mobius_param_out == mobius_param_in
|
||||
void computeZmobiusGamma(std::vector<ComplexD> &gamma_out, const int Ls_out, const RealD mobius_param, const int Ls_in, const RealD lambda_bound);
|
||||
|
||||
NAMESPACE_END(Approx);
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,234 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BiCGSTAB.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: juettner <juettner@soton.ac.uk>
|
||||
Author: David Murphy <djmurphy@mit.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_BICGSTAB_H
|
||||
#define GRID_BICGSTAB_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Field>
|
||||
class BiCGSTAB : public OperatorFunction<Field>
|
||||
{
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
BiCGSTAB(RealD tol, Integer maxit, bool err_on_no_conv = true) :
|
||||
Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field>& Linop, const Field& src, Field& psi)
|
||||
{
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp(0), rho(1), rho_prev(0), alpha(1), beta(0), omega(1);
|
||||
RealD a(0), bo(0), b(0), ssq(0);
|
||||
|
||||
Field p(src);
|
||||
Field r(src);
|
||||
Field rhat(src);
|
||||
Field v(src);
|
||||
Field s(src);
|
||||
Field t(src);
|
||||
Field h(src);
|
||||
|
||||
v = Zero();
|
||||
p = Zero();
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop.Op(psi, v);
|
||||
b = norm2(v);
|
||||
|
||||
r = src - v;
|
||||
rhat = r;
|
||||
a = norm2(r);
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: mp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: r " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if(a <= rsq){ return; }
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "BiCGSTAB: k=0 residual " << a << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++)
|
||||
{
|
||||
rho_prev = rho;
|
||||
|
||||
LinalgTimer.Start();
|
||||
InnerTimer.Start();
|
||||
ComplexD Crho = innerProduct(rhat,r);
|
||||
InnerTimer.Stop();
|
||||
rho = Crho.real();
|
||||
|
||||
beta = (rho / rho_prev) * (alpha / omega);
|
||||
|
||||
LinearCombTimer.Start();
|
||||
bo = beta * omega;
|
||||
{
|
||||
autoView( p_v , p, AcceleratorWrite);
|
||||
autoView( r_v , r, AcceleratorRead);
|
||||
autoView( v_v , v, AcceleratorRead);
|
||||
accelerator_for(ss, p_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(p_v[ss], beta*p_v(ss) - bo*v_v(ss) + r_v(ss));
|
||||
});
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.Op(p,v);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
InnerTimer.Start();
|
||||
ComplexD Calpha = innerProduct(rhat,v);
|
||||
InnerTimer.Stop();
|
||||
alpha = rho / Calpha.real();
|
||||
|
||||
LinearCombTimer.Start();
|
||||
{
|
||||
autoView( p_v , p, AcceleratorRead);
|
||||
autoView( r_v , r, AcceleratorRead);
|
||||
autoView( v_v , v, AcceleratorRead);
|
||||
autoView( psi_v,psi, AcceleratorRead);
|
||||
autoView( h_v , h, AcceleratorWrite);
|
||||
autoView( s_v , s, AcceleratorWrite);
|
||||
accelerator_for(ss, h_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(h_v[ss], alpha*p_v(ss) + psi_v(ss));
|
||||
});
|
||||
accelerator_for(ss, s_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(s_v[ss], -alpha*v_v(ss) + r_v(ss));
|
||||
});
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.Op(s,t);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
InnerTimer.Start();
|
||||
ComplexD Comega = innerProduct(t,s);
|
||||
InnerTimer.Stop();
|
||||
omega = Comega.real() / norm2(t);
|
||||
|
||||
LinearCombTimer.Start();
|
||||
{
|
||||
autoView( psi_v,psi, AcceleratorWrite);
|
||||
autoView( r_v , r, AcceleratorWrite);
|
||||
autoView( h_v , h, AcceleratorRead);
|
||||
autoView( s_v , s, AcceleratorRead);
|
||||
autoView( t_v , t, AcceleratorRead);
|
||||
accelerator_for(ss, psi_v.size(), Field::vector_object::Nsimd(),{
|
||||
coalescedWrite(psi_v[ss], h_v(ss) + omega * s_v(ss));
|
||||
coalescedWrite(r_v[ss], -omega * t_v(ss) + s_v(ss));
|
||||
});
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
|
||||
cp = norm2(r);
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "BiCGSTAB: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if(cp <= rsq)
|
||||
{
|
||||
SolverTimer.Stop();
|
||||
Linop.Op(psi, v);
|
||||
p = v - src;
|
||||
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "BiCGSTAB Converged on iteration " << k << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp/ssq) << std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual << std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown " << std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() << std::endl;
|
||||
|
||||
if(ErrorOnNoConverge){ assert(true_residual / Tolerance < 10000.0); }
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "BiCGSTAB did NOT converge" << std::endl;
|
||||
|
||||
if(ErrorOnNoConverge){ assert(0); }
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,159 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BiCGSTABMixedPrec.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
Author: David Murphy <djmurphy@mit.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_BICGSTAB_MIXED_PREC_H
|
||||
#define GRID_BICGSTAB_MIXED_PREC_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Mixed precision restarted defect correction BiCGSTAB
|
||||
template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD>
|
||||
{
|
||||
public:
|
||||
using LinearFunction<FieldD>::operator();
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; // Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; // Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionBiCGSTAB(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid,
|
||||
LinearOperatorBase<FieldF>& _Linop_f, LinearOperatorBase<FieldD>& _Linop_d) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d), Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit),
|
||||
MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), OuterLoopNormMult(100.), guesser(NULL) {};
|
||||
|
||||
void useGuesser(LinearFunction<FieldF>& g){
|
||||
guesser = &g;
|
||||
}
|
||||
|
||||
void operator() (const FieldD& src_d_in, FieldD& sol_d)
|
||||
{
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
int cb = src_d_in.Checkerboard();
|
||||
sol_d.Checkerboard() = cb;
|
||||
|
||||
RealD src_norm = norm2(src_d_in);
|
||||
RealD stop = src_norm * Tolerance*Tolerance;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in.Grid();
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
tmp_d.Checkerboard() = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid);
|
||||
tmp2_d.Checkerboard() = cb;
|
||||
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.Checkerboard() = cb;
|
||||
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.Checkerboard() = cb;
|
||||
|
||||
BiCGSTAB<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++)
|
||||
{
|
||||
// Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.Op(sol_d, tmp_d);
|
||||
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Outer iteration " << outer_iter << " residual " << norm << " target " << stop << std::endl;
|
||||
|
||||
if(norm < OuterLoopNormMult * stop){
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Outer iteration converged on iteration " << outer_iter << std::endl;
|
||||
break;
|
||||
}
|
||||
while(norm * inner_tol * inner_tol < stop){ inner_tol *= 2; } // inner_tol = sqrt(stop/norm) ??
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f, src_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
sol_f = Zero();
|
||||
|
||||
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||
if(guesser != NULL){ (*guesser)(src_f, sol_f); }
|
||||
|
||||
//Inner CG
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(tmp_d, sol_f);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Starting final patch-up double-precision solve" << std::endl;
|
||||
|
||||
BiCGSTAB<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
std::cout << GridLogMessage << "MixedPrecisionBiCGSTAB: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,248 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/CommunicationAvoidingGeneralisedMinimalResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
#define GRID_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge,
|
||||
// defaults to true
|
||||
|
||||
RealD Tolerance;
|
||||
|
||||
Integer MaxIterations;
|
||||
Integer RestartLength;
|
||||
Integer MaxNumberOfRestarts;
|
||||
Integer IterationCount; // Number of iterations the CAGMRES took to finish,
|
||||
// filled in upon completion
|
||||
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch QrTimer;
|
||||
GridStopWatch CompSolutionTimer;
|
||||
|
||||
Eigen::MatrixXcd H;
|
||||
|
||||
std::vector<ComplexD> y;
|
||||
std::vector<ComplexD> gamma;
|
||||
std::vector<ComplexD> c;
|
||||
std::vector<ComplexD> s;
|
||||
|
||||
CommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
|
||||
Integer maxit,
|
||||
Integer restart_length,
|
||||
bool err_on_no_conv = true)
|
||||
: Tolerance(tol)
|
||||
, MaxIterations(maxit)
|
||||
, RestartLength(restart_length)
|
||||
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
|
||||
, ErrorOnNoConverge(err_on_no_conv)
|
||||
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
|
||||
, y(RestartLength + 1, 0.)
|
||||
, gamma(RestartLength + 1, 0.)
|
||||
, c(RestartLength + 1, 0.)
|
||||
, s(RestartLength + 1, 0.) {};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
|
||||
|
||||
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl;
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
RealD cp;
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
std::cout << std::setprecision(4) << std::scientific;
|
||||
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
|
||||
|
||||
MatrixTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
QrTimer.Reset();
|
||||
CompSolutionTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
IterationCount = 0;
|
||||
|
||||
for (int k=0; k<MaxNumberOfRestarts; k++) {
|
||||
|
||||
cp = outerLoopBody(LinOp, src, psi, rsq);
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
LinOp.Op(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
|
||||
RealD srcnorm = sqrt(ssq);
|
||||
RealD resnorm = sqrt(norm2(r));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
|
||||
<< " computed residual " << sqrt(cp / ssq)
|
||||
<< " true residual " << true_residual
|
||||
<< " target " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "CAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "CAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "CAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "CAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "CAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "CommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
|
||||
|
||||
RealD cp = 0;
|
||||
|
||||
Field w(src.Grid());
|
||||
Field r(src.Grid());
|
||||
|
||||
// this should probably be made a class member so that it is only allocated once, not in every restart
|
||||
std::vector<Field> v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(psi, w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
r = src - w;
|
||||
|
||||
gamma[0] = sqrt(norm2(r));
|
||||
|
||||
ComplexD scale = 1.0/gamma[0];
|
||||
v[0] = scale * r;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for (int i=0; i<RestartLength; i++) {
|
||||
|
||||
IterationCount++;
|
||||
|
||||
arnoldiStep(LinOp, v, w, i);
|
||||
|
||||
qrUpdate(i);
|
||||
|
||||
cp = norm(gamma[i+1]);
|
||||
|
||||
std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
|
||||
|
||||
computeSolution(v, psi, i);
|
||||
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // Never reached
|
||||
return cp;
|
||||
}
|
||||
|
||||
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(v[iter], w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
for (int i = 0; i <= iter; ++i) {
|
||||
H(iter, i) = innerProduct(v[i], w);
|
||||
w = w - ComplexD(H(iter, i)) * v[i];
|
||||
}
|
||||
|
||||
H(iter, iter + 1) = sqrt(norm2(w));
|
||||
v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w;
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
|
||||
void qrUpdate(int iter) {
|
||||
|
||||
QrTimer.Start();
|
||||
for (int i = 0; i < iter ; ++i) {
|
||||
auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1));
|
||||
H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1));
|
||||
H(iter, i + 1) = tmp;
|
||||
}
|
||||
|
||||
// Compute new Givens Rotation
|
||||
auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
|
||||
c[iter] = H(iter, iter) / nu;
|
||||
s[iter] = H(iter, iter + 1) / nu;
|
||||
|
||||
// Apply new Givens rotation
|
||||
H(iter, iter) = nu;
|
||||
H(iter, iter + 1) = 0.;
|
||||
|
||||
gamma[iter + 1] = -s[iter] * gamma[iter];
|
||||
gamma[iter] = conjugate(c[iter]) * gamma[iter];
|
||||
QrTimer.Stop();
|
||||
}
|
||||
|
||||
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
|
||||
|
||||
CompSolutionTimer.Start();
|
||||
for (int i = iter; i >= 0; i--) {
|
||||
y[i] = gamma[i];
|
||||
for (int k = i + 1; k <= iter; k++)
|
||||
y[i] = y[i] - ComplexD(H(k, i)) * y[k];
|
||||
y[i] = y[i] / ComplexD(H(i, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i <= iter; i++)
|
||||
psi = psi + v[i] * y[i];
|
||||
CompSolutionTimer.Stop();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,170 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//Mixed precision restarted defect correction CG
|
||||
template<class FieldD,class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||
public:
|
||||
using LinearFunction<FieldD>::operator();
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
RealD TrueResidual;
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionConjugateGradient(RealD tol,
|
||||
Integer maxinnerit,
|
||||
Integer maxouterit,
|
||||
GridBase* _sp_grid,
|
||||
LinearOperatorBase<FieldF> &_Linop_f,
|
||||
LinearOperatorBase<FieldD> &_Linop_d) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||
|
||||
void useGuesser(LinearFunction<FieldF> &g){
|
||||
guesser = &g;
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
std::cout << GridLogMessage << "MixedPrecisionConjugateGradient: Starting mixed precision CG with outer tolerance " << Tolerance << " and inner tolerance " << InnerTolerance << std::endl;
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
int cb = src_d_in.Checkerboard();
|
||||
sol_d.Checkerboard() = cb;
|
||||
|
||||
RealD src_norm = norm2(src_d_in);
|
||||
RealD stop = src_norm * Tolerance*Tolerance;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in.Grid();
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
tmp_d.Checkerboard() = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid);
|
||||
tmp2_d.Checkerboard() = cb;
|
||||
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.Checkerboard() = cb;
|
||||
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.Checkerboard() = cb;
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting initial inner CG with tolerance " << inner_tol << std::endl;
|
||||
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
precisionChangeWorkspace pc_wk_sp_to_dp(DoublePrecGrid, SinglePrecGrid);
|
||||
precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, DoublePrecGrid);
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||
//Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.HermOp(sol_d, tmp_d);
|
||||
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||
|
||||
if(norm < OuterLoopNormMult * stop){
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||
break;
|
||||
}
|
||||
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f, src_d, pc_wk_dp_to_sp);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
sol_f = Zero();
|
||||
|
||||
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||
if(guesser != NULL)
|
||||
(*guesser)(src_f, sol_f);
|
||||
|
||||
//Inner CG
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " << outer_iter << " starting inner CG with tolerance " << inner_tol << std::endl;
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(tmp_d, sol_f, pc_wk_sp_to_dp);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||
|
||||
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
TrueResidual = CG_d.TrueResidual;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,213 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrecBatched.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Raoul Hodgson <raoul.hodgson@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_BATCHED_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_BATCHED_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//Mixed precision restarted defect correction CG
|
||||
template<class FieldD,class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionConjugateGradientBatched : public LinearFunction<FieldD> {
|
||||
public:
|
||||
using LinearFunction<FieldD>::operator();
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
Integer MaxPatchupIterations;
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
bool updateResidual;
|
||||
|
||||
MixedPrecisionConjugateGradientBatched(RealD tol,
|
||||
Integer maxinnerit,
|
||||
Integer maxouterit,
|
||||
Integer maxpatchit,
|
||||
GridBase* _sp_grid,
|
||||
LinearOperatorBase<FieldF> &_Linop_f,
|
||||
LinearOperatorBase<FieldD> &_Linop_d,
|
||||
bool _updateResidual=true) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), MaxPatchupIterations(maxpatchit), SinglePrecGrid(_sp_grid),
|
||||
OuterLoopNormMult(100.), guesser(NULL), updateResidual(_updateResidual) { };
|
||||
|
||||
void useGuesser(LinearFunction<FieldF> &g){
|
||||
guesser = &g;
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
std::vector<FieldD> srcs_d_in{src_d_in};
|
||||
std::vector<FieldD> sols_d{sol_d};
|
||||
|
||||
(*this)(srcs_d_in,sols_d);
|
||||
|
||||
sol_d = sols_d[0];
|
||||
}
|
||||
|
||||
void operator() (const std::vector<FieldD> &src_d_in, std::vector<FieldD> &sol_d){
|
||||
assert(src_d_in.size() == sol_d.size());
|
||||
int NBatch = src_d_in.size();
|
||||
|
||||
std::cout << GridLogMessage << "NBatch = " << NBatch << std::endl;
|
||||
|
||||
Integer TotalOuterIterations = 0; //Number of restarts
|
||||
std::vector<Integer> TotalInnerIterations(NBatch,0); //Number of inner CG iterations
|
||||
std::vector<Integer> TotalFinalStepIterations(NBatch,0); //Number of CG iterations in final patch-up step
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
int cb = src_d_in[0].Checkerboard();
|
||||
|
||||
std::vector<RealD> src_norm;
|
||||
std::vector<RealD> norm;
|
||||
std::vector<RealD> stop;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in[0].Grid();
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
tmp_d.Checkerboard() = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid);
|
||||
tmp2_d.Checkerboard() = cb;
|
||||
|
||||
std::vector<FieldD> src_d;
|
||||
std::vector<FieldF> src_f;
|
||||
std::vector<FieldF> sol_f;
|
||||
|
||||
for (int i=0; i<NBatch; i++) {
|
||||
sol_d[i].Checkerboard() = cb;
|
||||
|
||||
src_norm.push_back(norm2(src_d_in[i]));
|
||||
norm.push_back(0.);
|
||||
stop.push_back(src_norm[i] * Tolerance*Tolerance);
|
||||
|
||||
src_d.push_back(src_d_in[i]); //source for next inner iteration, computed from residual during operation
|
||||
|
||||
src_f.push_back(SinglePrecGrid);
|
||||
src_f[i].Checkerboard() = cb;
|
||||
|
||||
sol_f.push_back(SinglePrecGrid);
|
||||
sol_f[i].Checkerboard() = cb;
|
||||
}
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
|
||||
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
std::cout << GridLogMessage << "Outer iteration " << outer_iter << std::endl;
|
||||
|
||||
bool allConverged = true;
|
||||
|
||||
for (int i=0; i<NBatch; i++) {
|
||||
//Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.HermOp(sol_d[i], tmp_d);
|
||||
norm[i] = axpy_norm(src_d[i], -1., tmp_d, src_d_in[i]); //src_d is residual vector
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientBatched: Outer iteration " << outer_iter <<" solve " << i << " residual "<< norm[i] << " target "<< stop[i] <<std::endl;
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f[i], src_d[i]);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
sol_f[i] = Zero();
|
||||
|
||||
if(norm[i] > OuterLoopNormMult * stop[i]) {
|
||||
allConverged = false;
|
||||
}
|
||||
}
|
||||
if (allConverged) break;
|
||||
|
||||
if (updateResidual) {
|
||||
RealD normMax = *std::max_element(std::begin(norm), std::end(norm));
|
||||
RealD stopMax = *std::max_element(std::begin(stop), std::end(stop));
|
||||
while( normMax * inner_tol * inner_tol < stopMax) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||
CG_f.Tolerance = inner_tol;
|
||||
}
|
||||
|
||||
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||
if(guesser != NULL) {
|
||||
(*guesser)(src_f, sol_f);
|
||||
}
|
||||
|
||||
for (int i=0; i<NBatch; i++) {
|
||||
//Inner CG
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f[i], sol_f[i]);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations[i] += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(tmp_d, sol_f[i]);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
axpy(sol_d[i], 1.0, tmp_d, sol_d[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientBatched: Starting final patch-up double-precision solve"<<std::endl;
|
||||
|
||||
for (int i=0; i<NBatch; i++) {
|
||||
ConjugateGradient<FieldD> CG_d(Tolerance, MaxPatchupIterations);
|
||||
CG_d(Linop_d, src_d_in[i], sol_d[i]);
|
||||
TotalFinalStepIterations[i] += CG_d.IterationsToComplete;
|
||||
}
|
||||
|
||||
TotalTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
for (int i=0; i<NBatch; i++) {
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientBatched: solve " << i << " Inner CG iterations " << TotalInnerIterations[i] << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations[i] << std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientBatched: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,346 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateGradientMultiShift : public OperatorMultiFunction<Field>,
|
||||
public OperatorFunction<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
// RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
std::vector<RealD> TrueResidualShift;
|
||||
|
||||
ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :
|
||||
MaxIterations(maxit),
|
||||
shifts(_shifts)
|
||||
{
|
||||
verbose=1;
|
||||
IterationsToCompleteShift.resize(_shifts.order);
|
||||
TrueResidualShift.resize(_shifts.order);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
int nshift = shifts.order;
|
||||
std::vector<Field> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
|
||||
(*this)(Linop,src,results);
|
||||
|
||||
psi = shifts.norm*src;
|
||||
for(int i=0;i<nshift;i++){
|
||||
psi = psi + shifts.residues[i]*results[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &psi)
|
||||
{
|
||||
GRID_TRACE("ConjugateGradientMultiShift");
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.0);
|
||||
std::vector<Field> ps(nshift,grid);// Search directions
|
||||
|
||||
assert(psi.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
// Matrix mult fields
|
||||
Field r(grid);
|
||||
Field p(grid);
|
||||
Field tmp(grid);
|
||||
Field mmp(grid);
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src);
|
||||
|
||||
// Handle trivial case of zero src.
|
||||
if( cp == 0. ){
|
||||
for(int s=0;s<nshift;s++){
|
||||
psi[s] = Zero();
|
||||
IterationsToCompleteShift[s] = 1;
|
||||
TrueResidualShift[s] = 0.;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
|
||||
<<" target resid "<<rsq[s]<<std::endl;
|
||||
ps[s] = src;
|
||||
}
|
||||
// r and p for primary
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
//MdagM+m[0]
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
// have verified that inner product of
|
||||
// p and mmp is equal to d after this since
|
||||
// the d computation is tricky
|
||||
// qq = real(innerProduct(p,mmp));
|
||||
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl;
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer;
|
||||
GridStopWatch ShiftTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p,a,p,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
// for avoiding linear algebra in "single" case.
|
||||
//
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps[s],a,ps[s],r);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps[s],z[s][iz],as,r,ps[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||
// The below is faster on KNL
|
||||
Linop.HermOp(p,mmp);
|
||||
d=real(innerProduct(p,mmp));
|
||||
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
AXPYTimer.Start();
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
// Scope for optimisation here in case of "single".
|
||||
// Could load psi[0] and pull all ps[s] in.
|
||||
// if ( single ) ss=primary;
|
||||
// Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving
|
||||
// Pipelined CG gain:
|
||||
//
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
// New Kernel: Load psi[0], vector of coeffs, vector of pointers ps
|
||||
// If can predict the coefficient bs then we can fuse these and avoid write reread cyce
|
||||
// on ps[s].
|
||||
// Before: 3 x npole + 3 x npole
|
||||
// After : 2 x npole (ps[s]) => 3x speed up of multishift CG.
|
||||
|
||||
if( (!converged[s]) ) {
|
||||
axpy(psi[ss],-bs[s]*alpha[s],ps[s],psi[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
// Convergence checks
|
||||
int all_converged = 1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
IterationsToCompleteShift[s] = k;
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsq[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
Linop.HermOpAndNorm(psi[s],mmp,d,qq);
|
||||
axpy(tmp,mass[s],psi[s],mmp);
|
||||
axpy(r,-alpha[s],src,tmp);
|
||||
RealD rn = norm2(r);
|
||||
RealD cn = norm2(src);
|
||||
TrueResidualShift[s] = std::sqrt(rn/cn);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<< TrueResidualShift[s] <<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,373 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
|
||||
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
|
||||
//Every update_freq iterations the residual is corrected in double precision.
|
||||
//For safety the a final regular CG is applied to clean up if necessary
|
||||
|
||||
//PB Pure single, then double fixup
|
||||
|
||||
template<class FieldD, class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientMultiShiftMixedPrecCleanup : public OperatorMultiFunction<FieldD>,
|
||||
public OperatorFunction<FieldD>
|
||||
{
|
||||
public:
|
||||
|
||||
using OperatorFunction<FieldD>::operator();
|
||||
|
||||
RealD Tolerance;
|
||||
Integer MaxIterationsMshift;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
std::vector<RealD> TrueResidualShift;
|
||||
|
||||
int ReliableUpdateFreq; //number of iterations between reliable updates
|
||||
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
LinearOperatorBase<FieldF> &Linop_f; //single precision
|
||||
|
||||
ConjugateGradientMultiShiftMixedPrecCleanup(Integer maxit, const MultiShiftFunction &_shifts,
|
||||
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
||||
int _ReliableUpdateFreq) :
|
||||
MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq),
|
||||
MaxIterations(20000)
|
||||
{
|
||||
verbose=1;
|
||||
IterationsToCompleteShift.resize(_shifts.order);
|
||||
TrueResidualShift.resize(_shifts.order);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
int nshift = shifts.order;
|
||||
std::vector<FieldD> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
|
||||
(*this)(Linop,src,results);
|
||||
|
||||
psi = shifts.norm*src;
|
||||
for(int i=0;i<nshift;i++){
|
||||
psi = psi + shifts.residues[i]*results[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
||||
{
|
||||
GRID_TRACE("ConjugateGradientMultiShiftMixedPrecCleanup");
|
||||
GridBase *DoublePrecGrid = src_d.Grid();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.0);
|
||||
|
||||
//Double precision search directions
|
||||
FieldD p_d(DoublePrecGrid);
|
||||
std::vector<FieldF> ps_f (nshift, SinglePrecGrid);// Search directions (single precision)
|
||||
std::vector<FieldF> psi_f(nshift, SinglePrecGrid);// solutions (single precision)
|
||||
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
FieldD r_d(DoublePrecGrid);
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
FieldD mmp_d(DoublePrecGrid);
|
||||
|
||||
assert(psi_d.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD rsqf[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
// Matrix mult fields
|
||||
FieldF p_f(SinglePrecGrid);
|
||||
FieldF mmp_f(SinglePrecGrid);
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src_d);
|
||||
|
||||
// Handle trivial case of zero src.
|
||||
if( cp == 0. ){
|
||||
for(int s=0;s<nshift;s++){
|
||||
psi_d[s] = Zero();
|
||||
psi_f[s] = Zero();
|
||||
IterationsToCompleteShift[s] = 1;
|
||||
TrueResidualShift[s] = 0.;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
rsqf[s] =rsq[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
|
||||
// ps_d[s] = src_d;
|
||||
precisionChange(ps_f[s],src_d);
|
||||
}
|
||||
// r and p for primary
|
||||
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
|
||||
r_d = p_d;
|
||||
|
||||
//MdagM+m[0]
|
||||
precisionChange(p_f,p_d);
|
||||
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
precisionChange(tmp_d,mmp_f);
|
||||
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
tmp_d = tmp_d - mmp_d;
|
||||
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
||||
// assert(norm2(tmp_d)< 1.0e-4);
|
||||
|
||||
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||
RealD rn = norm2(p_d);
|
||||
d += rn*mass[0];
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r_d,b,mmp_d,r_d);
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
|
||||
precisionChange(psi_f[s],psi_d[s]);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
|
||||
for (k=1;k<=MaxIterationsMshift;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p_d,a,p_d,r_d);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(r_f, r_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps_f[s],a,ps_f[s],r_f);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps_f[s],z[s][iz],as,r_f,ps_f[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(p_f, p_d); //get back single prec search direction for linop
|
||||
PrecChangeTimer.Stop();
|
||||
MatrixTimer.Start();
|
||||
Linop_f.HermOp(p_f,mmp_f);
|
||||
MatrixTimer.Stop();
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(mmp_d, mmp_f); // From Float to Double
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
d=real(innerProduct(p_d,mmp_d));
|
||||
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||
RealD rn = norm2(p_d);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
//Update single precision solutions
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
if( (!converged[s]) ) {
|
||||
axpy(psi_f[ss],-bs[s]*alpha[s],ps_f[s],psi_f[ss]);
|
||||
}
|
||||
}
|
||||
c = axpy_norm(r_d,b,mmp_d,r_d);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Convergence checks
|
||||
int all_converged = 1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
IterationsToCompleteShift[s] = k;
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsqf[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( all_converged || k == MaxIterationsMshift-1){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
precisionChange(psi_d[s],psi_f[s]);
|
||||
}
|
||||
|
||||
|
||||
if ( all_converged ){
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrecCleanup: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrecCleanup: Checking solutions"<<std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrecCleanup: Not all shifts have converged iteration "<<k<<std::endl;
|
||||
}
|
||||
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
|
||||
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
|
||||
axpy(r_d,-alpha[s],src_d,tmp_d);
|
||||
RealD rn = norm2(r_d);
|
||||
RealD cn = norm2(src_d);
|
||||
TrueResidualShift[s] = std::sqrt(rn/cn);
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
|
||||
|
||||
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
|
||||
if(rn >= rsq[s]){
|
||||
CleanupTimer.Start();
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: performing cleanup step for shift " << s << std::endl;
|
||||
|
||||
//Setup linear operators for final cleanup
|
||||
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
|
||||
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
|
||||
|
||||
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
|
||||
cg(src_d, psi_d[s]);
|
||||
|
||||
TrueResidualShift[s] = cg.TrueResidual;
|
||||
CleanupTimer.Stop();
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrecCleanup: Time Breakdown for body"<<std::endl;
|
||||
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,416 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
|
||||
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
|
||||
//Every update_freq iterations the residual is corrected in double precision.
|
||||
|
||||
//For safety the a final regular CG is applied to clean up if necessary
|
||||
|
||||
//Linop to add shift to input linop, used in cleanup CG
|
||||
namespace ConjugateGradientMultiShiftMixedPrecSupport{
|
||||
template<typename Field>
|
||||
class ShiftedLinop: public LinearOperatorBase<Field>{
|
||||
public:
|
||||
LinearOperatorBase<Field> &linop_base;
|
||||
RealD shift;
|
||||
|
||||
ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){}
|
||||
|
||||
void OpDiag (const Field &in, Field &out){ assert(0); }
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
|
||||
|
||||
void Op (const Field &in, Field &out){ assert(0); }
|
||||
void AdjOp (const Field &in, Field &out){ assert(0); }
|
||||
|
||||
void HermOp(const Field &in, Field &out){
|
||||
linop_base.HermOp(in, out);
|
||||
axpy(out, shift, in, out);
|
||||
}
|
||||
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
HermOp(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<class FieldD, class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>,
|
||||
public OperatorFunction<FieldD>
|
||||
{
|
||||
public:
|
||||
|
||||
using OperatorFunction<FieldD>::operator();
|
||||
|
||||
RealD Tolerance;
|
||||
Integer MaxIterationsMshift;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
std::vector<RealD> TrueResidualShift;
|
||||
|
||||
int ReliableUpdateFreq; //number of iterations between reliable updates
|
||||
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
LinearOperatorBase<FieldF> &Linop_f; //single precision
|
||||
|
||||
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
|
||||
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
||||
int _ReliableUpdateFreq) :
|
||||
MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq),
|
||||
MaxIterations(20000)
|
||||
{
|
||||
verbose=1;
|
||||
IterationsToCompleteShift.resize(_shifts.order);
|
||||
TrueResidualShift.resize(_shifts.order);
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
int nshift = shifts.order;
|
||||
std::vector<FieldD> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
|
||||
(*this)(Linop,src,results);
|
||||
|
||||
psi = shifts.norm*src;
|
||||
for(int i=0;i<nshift;i++){
|
||||
psi = psi + shifts.residues[i]*results[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
||||
{
|
||||
GRID_TRACE("ConjugateGradientMultiShiftMixedPrec");
|
||||
GridBase *DoublePrecGrid = src_d.Grid();
|
||||
|
||||
precisionChangeWorkspace pc_wk_s_to_d(DoublePrecGrid,SinglePrecGrid);
|
||||
precisionChangeWorkspace pc_wk_d_to_s(SinglePrecGrid,DoublePrecGrid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.0);
|
||||
|
||||
//Double precision search directions
|
||||
FieldD p_d(DoublePrecGrid);
|
||||
std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision)
|
||||
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
FieldD r_d(DoublePrecGrid);
|
||||
FieldD mmp_d(DoublePrecGrid);
|
||||
|
||||
assert(psi_d.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD rsqf[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
// Matrix mult fields
|
||||
FieldF p_f(SinglePrecGrid);
|
||||
FieldF mmp_f(SinglePrecGrid);
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src_d);
|
||||
|
||||
// Handle trivial case of zero src.
|
||||
if( cp == 0. ){
|
||||
for(int s=0;s<nshift;s++){
|
||||
psi_d[s] = Zero();
|
||||
IterationsToCompleteShift[s] = 1;
|
||||
TrueResidualShift[s] = 0.;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
rsqf[s] =rsq[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
|
||||
ps_d[s] = src_d;
|
||||
}
|
||||
// r and p for primary
|
||||
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
|
||||
r_d = p_d;
|
||||
|
||||
//MdagM+m[0]
|
||||
precisionChange(p_f, p_d, pc_wk_d_to_s);
|
||||
|
||||
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
precisionChange(tmp_d, mmp_f, pc_wk_s_to_d);
|
||||
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
tmp_d = tmp_d - mmp_d;
|
||||
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
||||
assert(norm2(tmp_d)< 1.0);
|
||||
|
||||
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||
RealD rn = norm2(p_d);
|
||||
d += rn*mass[0];
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r_d,b,mmp_d,r_d);
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
|
||||
for (k=1;k<=MaxIterationsMshift;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p_d,a,p_d,r_d);
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps_d[s],a,ps_d[s],r_d);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(p_f, p_d, pc_wk_d_to_s); //get back single prec search direction for linop
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
Linop_f.HermOp(p_f,mmp_f);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(mmp_d, mmp_f, pc_wk_s_to_d); // From Float to Double
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
d=real(innerProduct(p_d,mmp_d));
|
||||
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p_d);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
//Update double precision solutions
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
if( (!converged[s]) ) {
|
||||
axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
//Perform reliable update if necessary; otherwise update residual from single-prec mmp
|
||||
c = axpy_norm(r_d,b,mmp_d,r_d);
|
||||
|
||||
AXPYTimer.Stop();
|
||||
|
||||
if(k % ReliableUpdateFreq == 0){
|
||||
RealD c_old = c;
|
||||
//Replace r with true residual
|
||||
MatrixTimer.Start();
|
||||
Linop_d.HermOp(psi_d[0],mmp_d);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp_d,mass[0],psi_d[0],mmp_d);
|
||||
|
||||
c = axpy_norm(r_d, -1.0, mmp_d, src_d);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_old <<" with |r|^2 = "<<c<<std::endl;
|
||||
}
|
||||
|
||||
// Convergence checks
|
||||
int all_converged = 1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
IterationsToCompleteShift[s] = k;
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsqf[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( all_converged || k == MaxIterationsMshift-1){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
if ( all_converged ){
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Not all shifts have converged iteration "<<k<<std::endl;
|
||||
}
|
||||
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
|
||||
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
|
||||
axpy(r_d,-alpha[s],src_d,tmp_d);
|
||||
RealD rn = norm2(r_d);
|
||||
RealD cn = norm2(src_d);
|
||||
TrueResidualShift[s] = std::sqrt(rn/cn);
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
|
||||
|
||||
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
|
||||
if(rn >= rsq[s]){
|
||||
CleanupTimer.Start();
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl;
|
||||
|
||||
//Setup linear operators for final cleanup
|
||||
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
|
||||
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
|
||||
|
||||
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
|
||||
cg(src_d, psi_d[s]);
|
||||
|
||||
TrueResidualShift[s] = cg.TrueResidual;
|
||||
CleanupTimer.Stop();
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl;
|
||||
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,277 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class FieldD,class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer ReliableUpdatesPerformed;
|
||||
|
||||
bool DoFinalCleanup; //Final DP cleanup, defaults to true
|
||||
Integer IterationsToCleanup; //Final DP cleanup step iterations
|
||||
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
GridBase* SinglePrecGrid;
|
||||
RealD Delta; //reliable update parameter. A reliable update is performed when the residual drops by a factor of Delta relative to its value at the last update
|
||||
|
||||
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
||||
LinearOperatorBase<FieldF> *Linop_fallback;
|
||||
RealD fallback_transition_tol;
|
||||
|
||||
|
||||
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Delta(_delta),
|
||||
Linop_f(_Linop_f),
|
||||
Linop_d(_Linop_d),
|
||||
SinglePrecGrid(_sp_grid),
|
||||
ErrorOnNoConverge(err_on_no_conv),
|
||||
DoFinalCleanup(true),
|
||||
Linop_fallback(NULL)
|
||||
{
|
||||
assert(Delta > 0. && Delta < 1. && "Expect 0 < Delta < 1");
|
||||
};
|
||||
|
||||
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
||||
Linop_fallback = &_Linop_fallback;
|
||||
fallback_transition_tol = _fallback_transition_tol;
|
||||
}
|
||||
|
||||
void operator()(const FieldD &src, FieldD &psi) {
|
||||
GRID_TRACE("ConjugateGradientReliableUpdate");
|
||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||
bool using_fallback = false;
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
FieldD p(src);
|
||||
FieldD mmp(src);
|
||||
FieldD r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
||||
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
//Single prec initialization
|
||||
precisionChangeWorkspace pc_wk_sp_to_dp(src.Grid(), SinglePrecGrid);
|
||||
precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, src.Grid());
|
||||
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
r_f.Checkerboard() = r.Checkerboard();
|
||||
precisionChange(r_f, r, pc_wk_dp_to_sp);
|
||||
|
||||
FieldF psi_f(r_f);
|
||||
psi_f = Zero();
|
||||
|
||||
FieldF p_f(r_f);
|
||||
FieldF mmp_f(r_f);
|
||||
|
||||
RealD MaxResidSinceLastRelUp = cp; //initial residual
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k = 0;
|
||||
int l = 0;
|
||||
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
cp = axpy_norm(r_f, -a, mmp_f, r_f);
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi_f = a * p_f + psi_f;
|
||||
//p_f = p_f * b + r_f;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
if(cp > MaxResidSinceLastRelUp){
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
}
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
//Although not written in the paper, I assume that I have to add on the final solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(mmp, psi_f, pc_wk_sp_to_dp);
|
||||
PrecChangeTimer.Stop();
|
||||
psi = psi + mmp;
|
||||
|
||||
|
||||
SolverTimer.Stop();
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = std::sqrt(norm2(src));
|
||||
RealD resnorm = std::sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tPrecChange " << PrecChangeTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tPrecChange avg time " << PrecChangeTimer.Elapsed()/(2*l+1) <<std::endl;
|
||||
|
||||
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
|
||||
if(DoFinalCleanup){
|
||||
//Do a final CG to cleanup
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
|
||||
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
|
||||
CG.ErrorOnNoConverge = ErrorOnNoConverge;
|
||||
CG(Linop_d,src,psi);
|
||||
IterationsToCleanup = CG.IterationsToComplete;
|
||||
}
|
||||
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
|
||||
return;
|
||||
}
|
||||
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
||||
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(mmp, psi_f, pc_wk_sp_to_dp);
|
||||
PrecChangeTimer.Stop();
|
||||
psi = psi + mmp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
r = src - mmp;
|
||||
|
||||
psi_f = Zero();
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(r_f, r, pc_wk_dp_to_sp);
|
||||
PrecChangeTimer.Stop();
|
||||
cp = norm2(r);
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
|
||||
b = cp/c;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
|
||||
|
||||
l = l+1;
|
||||
}
|
||||
|
||||
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
|
||||
|
||||
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
|
||||
Linop_f_use = Linop_fallback;
|
||||
using_fallback = true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -1,113 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_CONJUGATE_RESIDUAL_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
|
||||
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b; // c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
psi=Zero();
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid);
|
||||
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
if (verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=1;k<MaxIterations;k++){
|
||||
|
||||
a = rAr/pAAp;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
|
||||
cp = axpy_norm(r,-a,Ap,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
if(verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r)/ssq;
|
||||
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
|
||||
<< " computed residual "<<std::sqrt(cp/ssq)
|
||||
<< " true residual "<<std::sqrt(true_resid)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"ConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,157 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = Zero(); };
|
||||
};
|
||||
template<class Field>
|
||||
class DoNothingGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
virtual void operator()(const Field &src, Field &guess) { };
|
||||
};
|
||||
template<class Field>
|
||||
class SourceGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public LinearFunction<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
const unsigned int N;
|
||||
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval)
|
||||
: DeflatedGuesser(_evec, _eval, _evec.size())
|
||||
{}
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N)
|
||||
: evec(_evec), eval(_eval), N(_N)
|
||||
{
|
||||
assert(evec.size()==eval.size());
|
||||
assert(N <= evec.size());
|
||||
}
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = Zero();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
using LinearFunction<FineField>::operator();
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0].Grid());
|
||||
CoarseField guess_coarse(evec_coarse[0].Grid()); guess_coarse = Zero();
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
};
|
||||
|
||||
void operator()(const std::vector<FineField> &src,std::vector<FineField> &guess) {
|
||||
int Nevec = (int)evec_coarse.size();
|
||||
int Nsrc = (int)src.size();
|
||||
// make temp variables
|
||||
std::vector<CoarseField> src_coarse(Nsrc,evec_coarse[0].Grid());
|
||||
std::vector<CoarseField> guess_coarse(Nsrc,evec_coarse[0].Grid());
|
||||
//Preporcessing
|
||||
std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl;
|
||||
for (int j=0;j<Nsrc;j++)
|
||||
{
|
||||
guess_coarse[j] = Zero();
|
||||
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
|
||||
blockProject(src_coarse[j],src[j],subspace);
|
||||
}
|
||||
//deflation set up for eigen vector batchsize 1 and source batch size equal number of sources
|
||||
std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl;
|
||||
for (int i=0;i<Nevec;i++)
|
||||
{
|
||||
std::cout << GridLogMessage << "ProjectAccum Nvec: " << i << std::endl;
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
for (int j=0;j<Nsrc;j++)
|
||||
{
|
||||
axpy(guess_coarse[j],TensorRemove(innerProduct(tmp,src_coarse[j])) / eval_coarse[i],tmp,guess_coarse[j]);
|
||||
}
|
||||
}
|
||||
//postprocessing
|
||||
std::cout << GridLogMessage << "Start BlockPromote for loop" << std::endl;
|
||||
for (int j=0;j<Nsrc;j++)
|
||||
{
|
||||
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
|
||||
blockPromote(guess_coarse[j],guess[j],subspace);
|
||||
guess[j].Checkerboard() = src[j].Checkerboard();
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@ -1,258 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
#define GRID_FLEXIBLE_COMMUNICATION_AVOIDING_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge,
|
||||
// defaults to true
|
||||
|
||||
RealD Tolerance;
|
||||
|
||||
Integer MaxIterations;
|
||||
Integer RestartLength;
|
||||
Integer MaxNumberOfRestarts;
|
||||
Integer IterationCount; // Number of iterations the FCAGMRES took to finish,
|
||||
// filled in upon completion
|
||||
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch QrTimer;
|
||||
GridStopWatch CompSolutionTimer;
|
||||
|
||||
Eigen::MatrixXcd H;
|
||||
|
||||
std::vector<ComplexD> y;
|
||||
std::vector<ComplexD> gamma;
|
||||
std::vector<ComplexD> c;
|
||||
std::vector<ComplexD> s;
|
||||
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
FlexibleCommunicationAvoidingGeneralisedMinimalResidual(RealD tol,
|
||||
Integer maxit,
|
||||
LinearFunction<Field> &Prec,
|
||||
Integer restart_length,
|
||||
bool err_on_no_conv = true)
|
||||
: Tolerance(tol)
|
||||
, MaxIterations(maxit)
|
||||
, RestartLength(restart_length)
|
||||
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
|
||||
, ErrorOnNoConverge(err_on_no_conv)
|
||||
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
|
||||
, y(RestartLength + 1, 0.)
|
||||
, gamma(RestartLength + 1, 0.)
|
||||
, c(RestartLength + 1, 0.)
|
||||
, s(RestartLength + 1, 0.)
|
||||
, Preconditioner(Prec) {};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
|
||||
|
||||
std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl;
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
RealD cp;
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
std::cout << std::setprecision(4) << std::scientific;
|
||||
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: src " << ssq << std::endl;
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatrixTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
QrTimer.Reset();
|
||||
CompSolutionTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
IterationCount = 0;
|
||||
|
||||
for (int k=0; k<MaxNumberOfRestarts; k++) {
|
||||
|
||||
cp = outerLoopBody(LinOp, src, psi, rsq);
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
LinOp.Op(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
|
||||
RealD srcnorm = sqrt(ssq);
|
||||
RealD resnorm = sqrt(norm2(r));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Converged on iteration " << IterationCount
|
||||
<< " computed residual " << sqrt(cp / ssq)
|
||||
<< " true residual " << true_residual
|
||||
<< " target " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FCAGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FCAGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FCAGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
|
||||
|
||||
RealD cp = 0;
|
||||
|
||||
Field w(src.Grid());
|
||||
Field r(src.Grid());
|
||||
|
||||
// these should probably be made class members so that they are only allocated once, not in every restart
|
||||
std::vector<Field> v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero();
|
||||
std::vector<Field> z(RestartLength + 1, src.Grid()); for (auto &elem : z) elem = Zero();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(psi, w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
r = src - w;
|
||||
|
||||
gamma[0] = sqrt(norm2(r));
|
||||
|
||||
v[0] = (1. / gamma[0]) * r;
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for (int i=0; i<RestartLength; i++) {
|
||||
|
||||
IterationCount++;
|
||||
|
||||
arnoldiStep(LinOp, v, z, w, i);
|
||||
|
||||
qrUpdate(i);
|
||||
|
||||
cp = norm(gamma[i+1]);
|
||||
|
||||
std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
|
||||
|
||||
computeSolution(z, psi, i);
|
||||
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // Never reached
|
||||
return cp;
|
||||
}
|
||||
|
||||
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(v[iter], z[iter]);
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(z[iter], w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
for (int i = 0; i <= iter; ++i) {
|
||||
H(iter, i) = innerProduct(v[i], w);
|
||||
w = w - ComplexD(H(iter, i)) * v[i];
|
||||
}
|
||||
|
||||
H(iter, iter + 1) = sqrt(norm2(w));
|
||||
v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w;
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
|
||||
void qrUpdate(int iter) {
|
||||
|
||||
QrTimer.Start();
|
||||
for (int i = 0; i < iter ; ++i) {
|
||||
auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1));
|
||||
H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1));
|
||||
H(iter, i + 1) = tmp;
|
||||
}
|
||||
|
||||
// Compute new Givens Rotation
|
||||
auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
|
||||
c[iter] = H(iter, iter) / nu;
|
||||
s[iter] = H(iter, iter + 1) / nu;
|
||||
|
||||
// Apply new Givens rotation
|
||||
H(iter, iter) = nu;
|
||||
H(iter, iter + 1) = 0.;
|
||||
|
||||
gamma[iter + 1] = -s[iter] * gamma[iter];
|
||||
gamma[iter] = conjugate(c[iter]) * gamma[iter];
|
||||
QrTimer.Stop();
|
||||
}
|
||||
|
||||
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
|
||||
|
||||
CompSolutionTimer.Start();
|
||||
for (int i = iter; i >= 0; i--) {
|
||||
y[i] = gamma[i];
|
||||
for (int k = i + 1; k <= iter; k++)
|
||||
y[i] = y[i] - ComplexD(H(k, i)) * y[k];
|
||||
y[i] = y[i] / ComplexD(H(i, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i <= iter; i++)
|
||||
psi = psi + z[i] * y[i];
|
||||
CompSolutionTimer.Stop();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,256 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
#define GRID_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class FlexibleGeneralisedMinimalResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge,
|
||||
// defaults to true
|
||||
|
||||
RealD Tolerance;
|
||||
|
||||
Integer MaxIterations;
|
||||
Integer RestartLength;
|
||||
Integer MaxNumberOfRestarts;
|
||||
Integer IterationCount; // Number of iterations the FGMRES took to finish,
|
||||
// filled in upon completion
|
||||
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch QrTimer;
|
||||
GridStopWatch CompSolutionTimer;
|
||||
|
||||
Eigen::MatrixXcd H;
|
||||
|
||||
std::vector<ComplexD> y;
|
||||
std::vector<ComplexD> gamma;
|
||||
std::vector<ComplexD> c;
|
||||
std::vector<ComplexD> s;
|
||||
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
FlexibleGeneralisedMinimalResidual(RealD tol,
|
||||
Integer maxit,
|
||||
LinearFunction<Field> &Prec,
|
||||
Integer restart_length,
|
||||
bool err_on_no_conv = true)
|
||||
: Tolerance(tol)
|
||||
, MaxIterations(maxit)
|
||||
, RestartLength(restart_length)
|
||||
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
|
||||
, ErrorOnNoConverge(err_on_no_conv)
|
||||
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
|
||||
, y(RestartLength + 1, 0.)
|
||||
, gamma(RestartLength + 1, 0.)
|
||||
, c(RestartLength + 1, 0.)
|
||||
, s(RestartLength + 1, 0.)
|
||||
, Preconditioner(Prec) {};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
RealD cp;
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
std::cout << std::setprecision(4) << std::scientific;
|
||||
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: src " << ssq << std::endl;
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatrixTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
QrTimer.Reset();
|
||||
CompSolutionTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
IterationCount = 0;
|
||||
|
||||
for (int k=0; k<MaxNumberOfRestarts; k++) {
|
||||
|
||||
cp = outerLoopBody(LinOp, src, psi, rsq);
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
LinOp.Op(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
|
||||
RealD srcnorm = sqrt(ssq);
|
||||
RealD resnorm = sqrt(norm2(r));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual: Converged on iteration " << IterationCount
|
||||
<< " computed residual " << sqrt(cp / ssq)
|
||||
<< " true residual " << true_residual
|
||||
<< " target " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "FGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "FGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "FlexibleGeneralisedMinimalResidual did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
|
||||
|
||||
RealD cp = 0;
|
||||
|
||||
Field w(src.Grid());
|
||||
Field r(src.Grid());
|
||||
|
||||
// these should probably be made class members so that they are only allocated once, not in every restart
|
||||
std::vector<Field> v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero();
|
||||
std::vector<Field> z(RestartLength + 1, src.Grid()); for (auto &elem : z) elem = Zero();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(psi, w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
r = src - w;
|
||||
|
||||
gamma[0] = sqrt(norm2(r));
|
||||
|
||||
v[0] = (1. / gamma[0]) * r;
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for (int i=0; i<RestartLength; i++) {
|
||||
|
||||
IterationCount++;
|
||||
|
||||
arnoldiStep(LinOp, v, z, w, i);
|
||||
|
||||
qrUpdate(i);
|
||||
|
||||
cp = norm(gamma[i+1]);
|
||||
|
||||
std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
|
||||
|
||||
computeSolution(z, psi, i);
|
||||
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // Never reached
|
||||
return cp;
|
||||
}
|
||||
|
||||
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, std::vector<Field> &z, Field &w, int iter) {
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(v[iter], z[iter]);
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(z[iter], w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
for (int i = 0; i <= iter; ++i) {
|
||||
H(iter, i) = innerProduct(v[i], w);
|
||||
w = w - ComplexD(H(iter, i)) * v[i];
|
||||
}
|
||||
|
||||
H(iter, iter + 1) = sqrt(norm2(w));
|
||||
v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w;
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
|
||||
void qrUpdate(int iter) {
|
||||
|
||||
QrTimer.Start();
|
||||
for (int i = 0; i < iter ; ++i) {
|
||||
auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1));
|
||||
H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1));
|
||||
H(iter, i + 1) = tmp;
|
||||
}
|
||||
|
||||
// Compute new Givens Rotation
|
||||
auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
|
||||
c[iter] = H(iter, iter) / nu;
|
||||
s[iter] = H(iter, iter + 1) / nu;
|
||||
|
||||
// Apply new Givens rotation
|
||||
H(iter, iter) = nu;
|
||||
H(iter, iter + 1) = 0.;
|
||||
|
||||
gamma[iter + 1] = -s[iter] * gamma[iter];
|
||||
gamma[iter] = conjugate(c[iter]) * gamma[iter];
|
||||
QrTimer.Stop();
|
||||
}
|
||||
|
||||
void computeSolution(std::vector<Field> const &z, Field &psi, int iter) {
|
||||
|
||||
CompSolutionTimer.Start();
|
||||
for (int i = iter; i >= 0; i--) {
|
||||
y[i] = gamma[i];
|
||||
for (int k = i + 1; k <= iter; k++)
|
||||
y[i] = y[i] - ComplexD(H(k, i)) * y[k];
|
||||
y[i] = y[i] / ComplexD(H(i, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i <= iter; i++)
|
||||
psi = psi + z[i] * y[i];
|
||||
CompSolutionTimer.Stop();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,244 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/GeneralisedMinimalResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
#define GRID_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class GeneralisedMinimalResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge,
|
||||
// defaults to true
|
||||
|
||||
RealD Tolerance;
|
||||
|
||||
Integer MaxIterations;
|
||||
Integer RestartLength;
|
||||
Integer MaxNumberOfRestarts;
|
||||
Integer IterationCount; // Number of iterations the GMRES took to finish,
|
||||
// filled in upon completion
|
||||
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch QrTimer;
|
||||
GridStopWatch CompSolutionTimer;
|
||||
|
||||
Eigen::MatrixXcd H;
|
||||
|
||||
std::vector<ComplexD> y;
|
||||
std::vector<ComplexD> gamma;
|
||||
std::vector<ComplexD> c;
|
||||
std::vector<ComplexD> s;
|
||||
|
||||
GeneralisedMinimalResidual(RealD tol,
|
||||
Integer maxit,
|
||||
Integer restart_length,
|
||||
bool err_on_no_conv = true)
|
||||
: Tolerance(tol)
|
||||
, MaxIterations(maxit)
|
||||
, RestartLength(restart_length)
|
||||
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
|
||||
, ErrorOnNoConverge(err_on_no_conv)
|
||||
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
|
||||
, y(RestartLength + 1, 0.)
|
||||
, gamma(RestartLength + 1, 0.)
|
||||
, c(RestartLength + 1, 0.)
|
||||
, s(RestartLength + 1, 0.) {};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi) {
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
RealD cp;
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
std::cout << std::setprecision(4) << std::scientific;
|
||||
std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "GeneralisedMinimalResidual: src " << ssq << std::endl;
|
||||
|
||||
MatrixTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
QrTimer.Reset();
|
||||
CompSolutionTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
IterationCount = 0;
|
||||
|
||||
for (int k=0; k<MaxNumberOfRestarts; k++) {
|
||||
|
||||
cp = outerLoopBody(LinOp, src, psi, rsq);
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
LinOp.Op(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
|
||||
RealD srcnorm = sqrt(ssq);
|
||||
RealD resnorm = sqrt(norm2(r));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "GeneralisedMinimalResidual: Converged on iteration " << IterationCount
|
||||
<< " computed residual " << sqrt(cp / ssq)
|
||||
<< " true residual " << true_residual
|
||||
<< " target " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "GMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "GMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "GMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "GMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "GMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "GeneralisedMinimalResidual did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD outerLoopBody(LinearOperatorBase<Field> &LinOp, const Field &src, Field &psi, RealD rsq) {
|
||||
|
||||
RealD cp = 0;
|
||||
|
||||
Field w(src.Grid());
|
||||
Field r(src.Grid());
|
||||
|
||||
// this should probably be made a class member so that it is only allocated once, not in every restart
|
||||
std::vector<Field> v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(psi, w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
r = src - w;
|
||||
|
||||
gamma[0] = sqrt(norm2(r));
|
||||
|
||||
v[0] = (1. / gamma[0]) * r;
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for (int i=0; i<RestartLength; i++) {
|
||||
|
||||
IterationCount++;
|
||||
|
||||
arnoldiStep(LinOp, v, w, i);
|
||||
|
||||
qrUpdate(i);
|
||||
|
||||
cp = norm(gamma[i+1]);
|
||||
|
||||
std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
|
||||
|
||||
computeSolution(v, psi, i);
|
||||
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // Never reached
|
||||
return cp;
|
||||
}
|
||||
|
||||
void arnoldiStep(LinearOperatorBase<Field> &LinOp, std::vector<Field> &v, Field &w, int iter) {
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(v[iter], w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
for (int i = 0; i <= iter; ++i) {
|
||||
H(iter, i) = innerProduct(v[i], w);
|
||||
w = w - ComplexD(H(iter, i)) * v[i];
|
||||
}
|
||||
|
||||
H(iter, iter + 1) = sqrt(norm2(w));
|
||||
v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w;
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
|
||||
void qrUpdate(int iter) {
|
||||
|
||||
QrTimer.Start();
|
||||
for (int i = 0; i < iter ; ++i) {
|
||||
auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1));
|
||||
H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1));
|
||||
H(iter, i + 1) = tmp;
|
||||
}
|
||||
|
||||
// Compute new Givens Rotation
|
||||
auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
|
||||
c[iter] = H(iter, iter) / nu;
|
||||
s[iter] = H(iter, iter + 1) / nu;
|
||||
|
||||
// Apply new Givens rotation
|
||||
H(iter, iter) = nu;
|
||||
H(iter, iter + 1) = 0.;
|
||||
|
||||
gamma[iter + 1] = -s[iter] * gamma[iter];
|
||||
gamma[iter] = conjugate(c[iter]) * gamma[iter];
|
||||
QrTimer.Stop();
|
||||
}
|
||||
|
||||
void computeSolution(std::vector<Field> const &v, Field &psi, int iter) {
|
||||
|
||||
CompSolutionTimer.Start();
|
||||
for (int i = iter; i >= 0; i--) {
|
||||
y[i] = gamma[i];
|
||||
for (int k = i + 1; k <= iter; k++)
|
||||
y[i] = y[i] - ComplexD(H(k, i)) * y[k];
|
||||
y[i] = y[i] / ComplexD(H(i, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i <= iter; i++)
|
||||
psi = psi + v[i] * y[i];
|
||||
CompSolutionTimer.Stop();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,157 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/MinimalResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_MINIMAL_RESIDUAL_H
|
||||
#define GRID_MINIMAL_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field> class MinimalResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // throw an assert when the MR fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
RealD overRelaxParam;
|
||||
Integer IterationsToComplete; // Number of iterations the MR took to finish.
|
||||
// Filled in upon completion
|
||||
|
||||
MinimalResidual(RealD tol, Integer maxit, Real ovrelparam = 1.0, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), MaxIterations(maxit), overRelaxParam(ovrelparam), ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
ComplexD a, c;
|
||||
RealD d;
|
||||
|
||||
Field Mr(src);
|
||||
Field r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
Linop.Op(psi, Mr);
|
||||
|
||||
r = src - Mr;
|
||||
|
||||
RealD cp = norm2(r);
|
||||
|
||||
std::cout << std::setprecision(4) << std::scientific;
|
||||
std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "MinimalResidual: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << "MinimalResidual: cp,r " << cp << std::endl;
|
||||
|
||||
if (cp <= rsq) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "MinimalResidual: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.Op(r, Mr);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
c = innerProduct(Mr, r);
|
||||
|
||||
d = norm2(Mr);
|
||||
|
||||
a = c / d;
|
||||
|
||||
a = a * overRelaxParam;
|
||||
|
||||
psi = psi + r * a;
|
||||
|
||||
r = r - Mr * a;
|
||||
|
||||
cp = norm2(r);
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "MinimalResidual: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = " << a << " c = " << c << " d = " << d << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
SolverTimer.Stop();
|
||||
|
||||
Linop.Op(psi, Mr);
|
||||
r = src - Mr;
|
||||
|
||||
RealD srcnorm = sqrt(ssq);
|
||||
RealD resnorm = sqrt(norm2(r));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "MinimalResidual Converged on iteration " << k
|
||||
<< " computed residual " << sqrt(cp / ssq)
|
||||
<< " true residual " << true_residual
|
||||
<< " target " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "MR Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MR Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MR Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "MinimalResidual did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(0);
|
||||
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
};
|
||||
} // namespace Grid
|
||||
#endif
|
@ -1,276 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
#define GRID_MIXED_PRECISION_FLEXIBLE_GENERALISED_MINIMAL_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class FieldD, class FieldF, typename std::enable_if<getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction<FieldD> {
|
||||
public:
|
||||
|
||||
using OperatorFunction<FieldD>::operator();
|
||||
|
||||
bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge,
|
||||
// defaults to true
|
||||
|
||||
RealD Tolerance;
|
||||
|
||||
Integer MaxIterations;
|
||||
Integer RestartLength;
|
||||
Integer MaxNumberOfRestarts;
|
||||
Integer IterationCount; // Number of iterations the MPFGMRES took to finish,
|
||||
// filled in upon completion
|
||||
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch QrTimer;
|
||||
GridStopWatch CompSolutionTimer;
|
||||
GridStopWatch ChangePrecTimer;
|
||||
|
||||
Eigen::MatrixXcd H;
|
||||
|
||||
std::vector<ComplexD> y;
|
||||
std::vector<ComplexD> gamma;
|
||||
std::vector<ComplexD> c;
|
||||
std::vector<ComplexD> s;
|
||||
|
||||
GridBase* SinglePrecGrid;
|
||||
|
||||
LinearFunction<FieldF> &Preconditioner;
|
||||
|
||||
MixedPrecisionFlexibleGeneralisedMinimalResidual(RealD tol,
|
||||
Integer maxit,
|
||||
GridBase * sp_grid,
|
||||
LinearFunction<FieldF> &Prec,
|
||||
Integer restart_length,
|
||||
bool err_on_no_conv = true)
|
||||
: Tolerance(tol)
|
||||
, MaxIterations(maxit)
|
||||
, RestartLength(restart_length)
|
||||
, MaxNumberOfRestarts(MaxIterations/RestartLength + ((MaxIterations%RestartLength == 0) ? 0 : 1))
|
||||
, ErrorOnNoConverge(err_on_no_conv)
|
||||
, H(Eigen::MatrixXcd::Zero(RestartLength, RestartLength + 1)) // sizes taken from DD-αAMG code base
|
||||
, y(RestartLength + 1, 0.)
|
||||
, gamma(RestartLength + 1, 0.)
|
||||
, c(RestartLength + 1, 0.)
|
||||
, s(RestartLength + 1, 0.)
|
||||
, SinglePrecGrid(sp_grid)
|
||||
, Preconditioner(Prec) {};
|
||||
|
||||
void operator()(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi) {
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
RealD cp;
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
FieldD r(src.Grid());
|
||||
|
||||
std::cout << std::setprecision(4) << std::scientific;
|
||||
std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "MPFGMRES: src " << ssq << std::endl;
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatrixTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
QrTimer.Reset();
|
||||
CompSolutionTimer.Reset();
|
||||
ChangePrecTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
IterationCount = 0;
|
||||
|
||||
for (int k=0; k<MaxNumberOfRestarts; k++) {
|
||||
|
||||
cp = outerLoopBody(LinOp, src, psi, rsq);
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
LinOp.Op(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
|
||||
RealD srcnorm = sqrt(ssq);
|
||||
RealD resnorm = sqrt(norm2(r));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "MPFGMRES: Converged on iteration " << IterationCount
|
||||
<< " computed residual " << sqrt(cp / ssq)
|
||||
<< " true residual " << true_residual
|
||||
<< " target " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Total " << SolverTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Precon " << PrecTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Matrix " << MatrixTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: Linalg " << LinalgTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: QR " << QrTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: CompSol " << CompSolutionTimer.Elapsed() << std::endl;
|
||||
std::cout << GridLogMessage << "MPFGMRES Time elapsed: PrecChange " << ChangePrecTimer.Elapsed() << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "MPFGMRES did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge)
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD outerLoopBody(LinearOperatorBase<FieldD> &LinOp, const FieldD &src, FieldD &psi, RealD rsq) {
|
||||
|
||||
RealD cp = 0;
|
||||
|
||||
FieldD w(src.Grid());
|
||||
FieldD r(src.Grid());
|
||||
|
||||
// these should probably be made class members so that they are only allocated once, not in every restart
|
||||
std::vector<FieldD> v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero();
|
||||
std::vector<FieldD> z(RestartLength + 1, src.Grid()); for (auto &elem : z) elem = Zero();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(psi, w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
r = src - w;
|
||||
|
||||
gamma[0] = sqrt(norm2(r));
|
||||
|
||||
v[0] = (1. / gamma[0]) * r;
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for (int i=0; i<RestartLength; i++) {
|
||||
|
||||
IterationCount++;
|
||||
|
||||
arnoldiStep(LinOp, v, z, w, i);
|
||||
|
||||
qrUpdate(i);
|
||||
|
||||
cp = norm(gamma[i+1]);
|
||||
|
||||
std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
if ((i == RestartLength - 1) || (IterationCount == MaxIterations) || (cp <= rsq)) {
|
||||
|
||||
computeSolution(z, psi, i);
|
||||
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // Never reached
|
||||
return cp;
|
||||
}
|
||||
|
||||
void arnoldiStep(LinearOperatorBase<FieldD> &LinOp, std::vector<FieldD> &v, std::vector<FieldD> &z, FieldD &w, int iter) {
|
||||
|
||||
FieldF v_f(SinglePrecGrid);
|
||||
FieldF z_f(SinglePrecGrid);
|
||||
|
||||
ChangePrecTimer.Start();
|
||||
precisionChange(v_f, v[iter]);
|
||||
precisionChange(z_f, z[iter]);
|
||||
ChangePrecTimer.Stop();
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(v_f, z_f);
|
||||
PrecTimer.Stop();
|
||||
|
||||
ChangePrecTimer.Start();
|
||||
precisionChange(z[iter], z_f);
|
||||
ChangePrecTimer.Stop();
|
||||
|
||||
MatrixTimer.Start();
|
||||
LinOp.Op(z[iter], w);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
for (int i = 0; i <= iter; ++i) {
|
||||
H(iter, i) = innerProduct(v[i], w);
|
||||
w = w - ComplexD(H(iter, i)) * v[i];
|
||||
}
|
||||
|
||||
H(iter, iter + 1) = sqrt(norm2(w));
|
||||
v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w;
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
|
||||
void qrUpdate(int iter) {
|
||||
|
||||
QrTimer.Start();
|
||||
for (int i = 0; i < iter ; ++i) {
|
||||
auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1));
|
||||
H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1));
|
||||
H(iter, i + 1) = tmp;
|
||||
}
|
||||
|
||||
// Compute new Givens Rotation
|
||||
auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1)));
|
||||
c[iter] = H(iter, iter) / nu;
|
||||
s[iter] = H(iter, iter + 1) / nu;
|
||||
|
||||
// Apply new Givens rotation
|
||||
H(iter, iter) = nu;
|
||||
H(iter, iter + 1) = 0.;
|
||||
|
||||
gamma[iter + 1] = -s[iter] * gamma[iter];
|
||||
gamma[iter] = conjugate(c[iter]) * gamma[iter];
|
||||
QrTimer.Stop();
|
||||
}
|
||||
|
||||
void computeSolution(std::vector<FieldD> const &z, FieldD &psi, int iter) {
|
||||
|
||||
CompSolutionTimer.Start();
|
||||
for (int i = iter; i >= 0; i--) {
|
||||
y[i] = gamma[i];
|
||||
for (int k = i + 1; k <= iter; k++)
|
||||
y[i] = y[i] - ComplexD(H(k, i)) * y[k];
|
||||
y[i] = y[i] / ComplexD(H(i, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i <= iter; i++)
|
||||
psi = psi + z[i] * y[i];
|
||||
CompSolutionTimer.Stop();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,112 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/NormalEquations.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_NORMAL_EQUATIONS_H
|
||||
#define GRID_NORMAL_EQUATIONS_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form an NE solver calling a Herm solver
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class NormalEquations {
|
||||
private:
|
||||
SparseMatrixBase<Field> & _Matrix;
|
||||
OperatorFunction<Field> & _HermitianSolver;
|
||||
LinearFunction<Field> & _Guess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations trick
|
||||
/////////////////////////////////////////////////////
|
||||
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
|
||||
LinearFunction<Field> &Guess)
|
||||
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
|
||||
|
||||
void operator() (const Field &in, Field &out){
|
||||
|
||||
Field src(in.Grid());
|
||||
Field tmp(in.Grid());
|
||||
|
||||
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
|
||||
_Matrix.Mdag(in,src);
|
||||
_Guess(src,out);
|
||||
_HermitianSolver(MdagMOp,src,out); // Mdag M out = Mdag in
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class HPDSolver {
|
||||
private:
|
||||
LinearOperatorBase<Field> & _Matrix;
|
||||
OperatorFunction<Field> & _HermitianSolver;
|
||||
LinearFunction<Field> & _Guess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations trick
|
||||
/////////////////////////////////////////////////////
|
||||
HPDSolver(LinearOperatorBase<Field> &Matrix,
|
||||
OperatorFunction<Field> &HermitianSolver,
|
||||
LinearFunction<Field> &Guess)
|
||||
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
|
||||
|
||||
void operator() (const Field &in, Field &out){
|
||||
|
||||
_Guess(in,out);
|
||||
_HermitianSolver(_Matrix,in,out); // Mdag M out = Mdag in
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Field> class MdagMSolver {
|
||||
private:
|
||||
SparseMatrixBase<Field> & _Matrix;
|
||||
OperatorFunction<Field> & _HermitianSolver;
|
||||
LinearFunction<Field> & _Guess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations trick
|
||||
/////////////////////////////////////////////////////
|
||||
MdagMSolver(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
|
||||
LinearFunction<Field> &Guess)
|
||||
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
|
||||
|
||||
void operator() (const Field &in, Field &out){
|
||||
|
||||
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
|
||||
_Guess(in,out);
|
||||
|
||||
_HermitianSolver(MdagMOp,in,out); // Mdag M out = Mdag in
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,47 +0,0 @@
|
||||
#pragma once
|
||||
namespace Grid {
|
||||
template<class Field> class PowerMethod
|
||||
{
|
||||
public:
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
RealD operator()(LinearOperatorBase<Field> &HermOp, const Field &src)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
|
||||
RealD evalMaxApprox = 0.0;
|
||||
auto src_n = src;
|
||||
auto tmp = src;
|
||||
const int _MAX_ITER_EST_ = 50;
|
||||
|
||||
for (int i=0;i<_MAX_ITER_EST_;i++) {
|
||||
|
||||
normalise(src_n);
|
||||
HermOp.HermOp(src_n,tmp);
|
||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
|
||||
std::cout << GridLogIterative << "PowerMethod: Current approximation of largest eigenvalue " << na << std::endl;
|
||||
|
||||
if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
|
||||
evalMaxApprox = na;
|
||||
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||
return evalMaxApprox;
|
||||
}
|
||||
evalMaxApprox = na;
|
||||
src_n = tmp;
|
||||
}
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
@ -1,119 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class PrecConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec)
|
||||
{
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b, c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
||||
|
||||
psi=zero;
|
||||
r = src;
|
||||
Preconditioner(r,p);
|
||||
|
||||
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Ar=Ap;
|
||||
rAr=pAp;
|
||||
rAAr=pAAp;
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
if (verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
|
||||
Preconditioner(Ap,z);
|
||||
RealD rq= real(innerProduct(Ap,z));
|
||||
|
||||
a = rAr/rq;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
cp = axpy_norm(r,-a,z,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
if(verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r)/ssq;
|
||||
std::cout<<GridLogMessage<<"PrecConjugateResidual: Converged on iteration " <<k
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual "<<sqrt(true_resid)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"PrecConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,239 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_GCR_H
|
||||
#define GRID_PREC_GCR_H
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//VPGCR Abe and Zhang, 2005.
|
||||
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||
// but VPGCR was nicely written up in their paper
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
|
||||
|
||||
template<class Field>
|
||||
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
int mmax;
|
||||
int nstep;
|
||||
int steps;
|
||||
int level;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch MatTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
LinearOperatorBase<Field> &Linop;
|
||||
|
||||
void Level(int lv) { level=lv; };
|
||||
|
||||
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Linop(_Linop),
|
||||
Preconditioner(Prec),
|
||||
mmax(_mmax),
|
||||
nstep(_nstep)
|
||||
{
|
||||
level=1;
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi=Zero();
|
||||
RealD cp, ssq,rsq;
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
steps=0;
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
cp=GCRnStep(src,psi,rsq);
|
||||
|
||||
GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
Linop.HermOp(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
RealD tr = norm2(r);
|
||||
GCRLogLevel<<"PGCR: Converged on iteration " <<steps
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual " <<sqrt(tr/ssq)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
|
||||
GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||
/*
|
||||
GCRLogLevel<<"PGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
|
||||
GCRLogLevel<<"PGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
|
||||
GCRLogLevel<<"PGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
||||
|
||||
RealD cp;
|
||||
RealD a, b;
|
||||
RealD zAz, zAAz;
|
||||
RealD rq;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
Field r(grid);
|
||||
Field z(grid);
|
||||
Field tmp(grid);
|
||||
Field ttmp(grid);
|
||||
Field Az(grid);
|
||||
|
||||
////////////////////////////////
|
||||
// history for flexible orthog
|
||||
////////////////////////////////
|
||||
std::vector<Field> q(mmax,grid);
|
||||
std::vector<Field> p(mmax,grid);
|
||||
std::vector<RealD> qq(mmax);
|
||||
|
||||
GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// initial guess x0 is taken as nonzero.
|
||||
// r0=src-A x0 = src
|
||||
//////////////////////////////////
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
|
||||
|
||||
LinalgTimer.Start();
|
||||
r=src-Az;
|
||||
LinalgTimer.Stop();
|
||||
GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
|
||||
|
||||
/////////////////////
|
||||
// p = Prec(r)
|
||||
/////////////////////
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
//p[0],q[0],qq[0]
|
||||
p[0]= z;
|
||||
q[0]= Az;
|
||||
qq[0]= zAAz;
|
||||
|
||||
cp =norm2(r);
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for(int k=0;k<nstep;k++){
|
||||
|
||||
steps++;
|
||||
|
||||
int kp = k+1;
|
||||
int peri_k = k %mmax;
|
||||
int peri_kp= kp%mmax;
|
||||
|
||||
LinalgTimer.Start();
|
||||
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
|
||||
a = rq/qq[peri_k];
|
||||
|
||||
axpy(psi,a,p[peri_k],psi);
|
||||
|
||||
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||
LinalgTimer.Stop();
|
||||
|
||||
GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
|
||||
|
||||
if((k==nstep-1)||(cp<rsq)){
|
||||
return cp;
|
||||
}
|
||||
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);// solve Az = r
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
q[peri_kp]=Az;
|
||||
p[peri_kp]=z;
|
||||
|
||||
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||
for(int back=0;back<northog;back++){
|
||||
|
||||
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||
|
||||
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||
|
||||
}
|
||||
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
assert(0); // never reached
|
||||
return cp;
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,242 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_GCR_NON_HERM_H
|
||||
#define GRID_PREC_GCR_NON_HERM_H
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//VPGCR Abe and Zhang, 2005.
|
||||
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||
// but VPGCR was nicely written up in their paper
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
|
||||
|
||||
template<class Field>
|
||||
class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
int mmax;
|
||||
int nstep;
|
||||
int steps;
|
||||
int level;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch MatTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
LinearOperatorBase<Field> &Linop;
|
||||
|
||||
void Level(int lv) { level=lv; };
|
||||
|
||||
PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Linop(_Linop),
|
||||
Preconditioner(Prec),
|
||||
mmax(_mmax),
|
||||
nstep(_nstep)
|
||||
{
|
||||
level=1;
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi=Zero();
|
||||
RealD cp, ssq,rsq;
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
steps=0;
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
cp=GCRnStep(src,psi,rsq);
|
||||
|
||||
GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
Linop.Op(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
RealD tr = norm2(r);
|
||||
GCRLogLevel<<"PGCR: Converged on iteration " <<steps
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual " <<sqrt(tr/ssq)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
|
||||
GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
||||
|
||||
RealD cp;
|
||||
ComplexD a, b;
|
||||
// ComplexD zAz;
|
||||
RealD zAAz;
|
||||
ComplexD rq;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
Field r(grid);
|
||||
Field z(grid);
|
||||
Field tmp(grid);
|
||||
Field ttmp(grid);
|
||||
Field Az(grid);
|
||||
|
||||
////////////////////////////////
|
||||
// history for flexible orthog
|
||||
////////////////////////////////
|
||||
std::vector<Field> q(mmax,grid);
|
||||
std::vector<Field> p(mmax,grid);
|
||||
std::vector<RealD> qq(mmax);
|
||||
|
||||
GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// initial guess x0 is taken as nonzero.
|
||||
// r0=src-A x0 = src
|
||||
//////////////////////////////////
|
||||
MatTimer.Start();
|
||||
Linop.Op(psi,Az);
|
||||
// zAz = innerProduct(Az,psi);
|
||||
zAAz= norm2(Az);
|
||||
MatTimer.Stop();
|
||||
|
||||
|
||||
LinalgTimer.Start();
|
||||
r=src-Az;
|
||||
LinalgTimer.Stop();
|
||||
GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
|
||||
|
||||
/////////////////////
|
||||
// p = Prec(r)
|
||||
/////////////////////
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.Op(z,Az);
|
||||
MatTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
// zAz = innerProduct(Az,psi);
|
||||
zAAz= norm2(Az);
|
||||
|
||||
//p[0],q[0],qq[0]
|
||||
p[0]= z;
|
||||
q[0]= Az;
|
||||
qq[0]= zAAz;
|
||||
|
||||
cp =norm2(r);
|
||||
LinalgTimer.Stop();
|
||||
|
||||
for(int k=0;k<nstep;k++){
|
||||
|
||||
steps++;
|
||||
|
||||
int kp = k+1;
|
||||
int peri_k = k %mmax;
|
||||
int peri_kp= kp%mmax;
|
||||
|
||||
LinalgTimer.Start();
|
||||
rq= innerProduct(q[peri_k],r); // what if rAr not real?
|
||||
a = rq/qq[peri_k];
|
||||
|
||||
axpy(psi,a,p[peri_k],psi);
|
||||
|
||||
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||
LinalgTimer.Stop();
|
||||
|
||||
GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
|
||||
|
||||
if((k==nstep-1)||(cp<rsq)){
|
||||
return cp;
|
||||
}
|
||||
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);// solve Az = r
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.Op(z,Az);
|
||||
MatTimer.Stop();
|
||||
// zAz = innerProduct(Az,psi);
|
||||
zAAz= norm2(Az);
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
q[peri_kp]=Az;
|
||||
p[peri_kp]=z;
|
||||
|
||||
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||
for(int back=0;back<northog;back++){
|
||||
|
||||
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||
|
||||
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||
|
||||
}
|
||||
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||
LinalgTimer.Stop();
|
||||
}
|
||||
assert(0); // never reached
|
||||
return cp;
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,371 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithmsf/iterative/QuasiMinimalResidual.h
|
||||
|
||||
Copyright (C) 2019
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field>
|
||||
RealD innerG5ProductReal(Field &l, Field &r)
|
||||
{
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
Field tmp(l.Grid());
|
||||
// tmp = G5*r;
|
||||
G5R5(tmp,r);
|
||||
ComplexD ip =innerProduct(l,tmp);
|
||||
std::cout << "innerProductRealG5R5 "<<ip<<std::endl;
|
||||
return ip.real();
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class QuasiMinimalResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
using OperatorFunction<Field>::operator();
|
||||
|
||||
bool ErrorOnNoConverge;
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationCount;
|
||||
|
||||
QuasiMinimalResidual(RealD tol,
|
||||
Integer maxit,
|
||||
bool err_on_no_conv = true)
|
||||
: Tolerance(tol)
|
||||
, MaxIterations(maxit)
|
||||
, ErrorOnNoConverge(err_on_no_conv)
|
||||
{};
|
||||
|
||||
#if 1
|
||||
void operator()(LinearOperatorBase<Field> &LinOp, const Field &b, Field &x)
|
||||
{
|
||||
RealD resid;
|
||||
IterationCount=0;
|
||||
|
||||
RealD rho, rho_1, xi, gamma, gamma_1, theta, theta_1;
|
||||
RealD eta, delta, ep, beta;
|
||||
|
||||
GridBase *Grid = b.Grid();
|
||||
Field r(Grid), d(Grid), s(Grid);
|
||||
Field v(Grid), w(Grid), y(Grid), z(Grid);
|
||||
Field v_tld(Grid), w_tld(Grid), y_tld(Grid), z_tld(Grid);
|
||||
Field p(Grid), q(Grid), p_tld(Grid);
|
||||
|
||||
Real normb = norm2(b);
|
||||
|
||||
LinOp.Op(x,r); r = b - r;
|
||||
|
||||
assert(normb> 0.0);
|
||||
|
||||
resid = norm2(r)/normb;
|
||||
if (resid <= Tolerance) {
|
||||
return;
|
||||
}
|
||||
|
||||
v_tld = r;
|
||||
y = v_tld;
|
||||
rho = norm2(y);
|
||||
|
||||
// Take Gamma5 conjugate
|
||||
// Gamma G5(Gamma::Algebra::Gamma5);
|
||||
// G5R5(w_tld,r);
|
||||
// w_tld = G5* v_tld;
|
||||
w_tld=v_tld;
|
||||
z = w_tld;
|
||||
xi = norm2(z);
|
||||
|
||||
gamma = 1.0;
|
||||
eta = -1.0;
|
||||
theta = 0.0;
|
||||
|
||||
for (int i = 1; i <= MaxIterations; i++) {
|
||||
|
||||
// Breakdown tests
|
||||
assert( rho != 0.0);
|
||||
assert( xi != 0.0);
|
||||
|
||||
v = (1. / rho) * v_tld;
|
||||
y = (1. / rho) * y;
|
||||
|
||||
w = (1. / xi) * w_tld;
|
||||
z = (1. / xi) * z;
|
||||
|
||||
ComplexD Zdelta = innerProduct(z, y); // Complex?
|
||||
std::cout << "Zdelta "<<Zdelta<<std::endl;
|
||||
delta = Zdelta.real();
|
||||
|
||||
y_tld = y;
|
||||
z_tld = z;
|
||||
|
||||
if (i > 1) {
|
||||
p = y_tld - (xi * delta / ep) * p;
|
||||
q = z_tld - (rho * delta / ep) * q;
|
||||
} else {
|
||||
p = y_tld;
|
||||
q = z_tld;
|
||||
}
|
||||
|
||||
LinOp.Op(p,p_tld); // p_tld = A * p;
|
||||
ComplexD Zep = innerProduct(q, p_tld);
|
||||
ep=Zep.real();
|
||||
std::cout << "Zep "<<Zep <<std::endl;
|
||||
// Complex Audit
|
||||
assert(abs(ep)>0);
|
||||
|
||||
beta = ep / delta;
|
||||
assert(abs(beta)>0);
|
||||
|
||||
v_tld = p_tld - beta * v;
|
||||
y = v_tld;
|
||||
|
||||
rho_1 = rho;
|
||||
rho = norm2(y);
|
||||
LinOp.AdjOp(q,w_tld);
|
||||
w_tld = w_tld - beta * w;
|
||||
z = w_tld;
|
||||
|
||||
xi = norm2(z);
|
||||
|
||||
gamma_1 = gamma;
|
||||
theta_1 = theta;
|
||||
|
||||
theta = rho / (gamma_1 * beta);
|
||||
gamma = 1.0 / sqrt(1.0 + theta * theta);
|
||||
std::cout << "theta "<<theta<<std::endl;
|
||||
std::cout << "gamma "<<gamma<<std::endl;
|
||||
|
||||
assert(abs(gamma)> 0.0);
|
||||
|
||||
eta = -eta * rho_1 * gamma* gamma / (beta * gamma_1 * gamma_1);
|
||||
|
||||
if (i > 1) {
|
||||
d = eta * p + (theta_1 * theta_1 * gamma * gamma) * d;
|
||||
s = eta * p_tld + (theta_1 * theta_1 * gamma * gamma) * s;
|
||||
} else {
|
||||
d = eta * p;
|
||||
s = eta * p_tld;
|
||||
}
|
||||
|
||||
x =x+d; // update approximation vector
|
||||
r =r-s; // compute residual
|
||||
|
||||
if ((resid = norm2(r) / normb) <= Tolerance) {
|
||||
return;
|
||||
}
|
||||
std::cout << "Iteration "<<i<<" resid " << resid<<std::endl;
|
||||
}
|
||||
assert(0);
|
||||
return; // no convergence
|
||||
}
|
||||
#else
|
||||
// QMRg5 SMP thesis
|
||||
void operator()(LinearOperatorBase<Field> &LinOp, const Field &b, Field &x)
|
||||
{
|
||||
// Real scalars
|
||||
GridBase *grid = b.Grid();
|
||||
|
||||
Field r(grid);
|
||||
Field p_m(grid), p_m_minus_1(grid), p_m_minus_2(grid);
|
||||
Field v_m(grid), v_m_minus_1(grid), v_m_plus_1(grid);
|
||||
Field tmp(grid);
|
||||
|
||||
RealD w;
|
||||
RealD z1, z2;
|
||||
RealD delta_m, delta_m_minus_1;
|
||||
RealD c_m_plus_1, c_m, c_m_minus_1;
|
||||
RealD s_m_plus_1, s_m, s_m_minus_1;
|
||||
RealD alpha, beta, gamma, epsilon;
|
||||
RealD mu, nu, rho, theta, xi, chi;
|
||||
RealD mod2r, mod2b;
|
||||
RealD tau2, target2;
|
||||
|
||||
mod2b=norm2(b);
|
||||
|
||||
/////////////////////////
|
||||
// Initial residual
|
||||
/////////////////////////
|
||||
LinOp.Op(x,tmp);
|
||||
r = b - tmp;
|
||||
|
||||
/////////////////////////
|
||||
// \mu = \rho = |r_0|
|
||||
/////////////////////////
|
||||
mod2r = norm2(r);
|
||||
rho = sqrt( mod2r);
|
||||
mu=rho;
|
||||
|
||||
std::cout << "QuasiMinimalResidual rho "<< rho<<std::endl;
|
||||
/////////////////////////
|
||||
// Zero negative history
|
||||
/////////////////////////
|
||||
v_m_plus_1 = Zero();
|
||||
v_m_minus_1 = Zero();
|
||||
p_m_minus_1 = Zero();
|
||||
p_m_minus_2 = Zero();
|
||||
|
||||
// v0
|
||||
v_m = (1.0/rho)*r;
|
||||
|
||||
/////////////////////////
|
||||
// Initial coeffs
|
||||
/////////////////////////
|
||||
delta_m_minus_1 = 1.0;
|
||||
c_m_minus_1 = 1.0;
|
||||
c_m = 1.0;
|
||||
s_m_minus_1 = 0.0;
|
||||
s_m = 0.0;
|
||||
|
||||
/////////////////////////
|
||||
// Set up convergence check
|
||||
/////////////////////////
|
||||
tau2 = mod2r;
|
||||
target2 = mod2b * Tolerance*Tolerance;
|
||||
|
||||
for(int iter = 0 ; iter < MaxIterations; iter++){
|
||||
|
||||
/////////////////////////
|
||||
// \delta_m = (v_m, \gamma_5 v_m)
|
||||
/////////////////////////
|
||||
delta_m = innerG5ProductReal(v_m,v_m);
|
||||
std::cout << "QuasiMinimalResidual delta_m "<< delta_m<<std::endl;
|
||||
|
||||
/////////////////////////
|
||||
// tmp = A v_m
|
||||
/////////////////////////
|
||||
LinOp.Op(v_m,tmp);
|
||||
|
||||
/////////////////////////
|
||||
// \alpha = (v_m, \gamma_5 temp) / \delta_m
|
||||
/////////////////////////
|
||||
alpha = innerG5ProductReal(v_m,tmp);
|
||||
alpha = alpha/delta_m ;
|
||||
std::cout << "QuasiMinimalResidual alpha "<< alpha<<std::endl;
|
||||
|
||||
/////////////////////////
|
||||
// \beta = \rho \delta_m / \delta_{m-1}
|
||||
/////////////////////////
|
||||
beta = rho * delta_m / delta_m_minus_1;
|
||||
std::cout << "QuasiMinimalResidual beta "<< beta<<std::endl;
|
||||
|
||||
/////////////////////////
|
||||
// \tilde{v}_{m+1} = temp - \alpha v_m - \beta v_{m-1}
|
||||
/////////////////////////
|
||||
v_m_plus_1 = tmp - alpha*v_m - beta*v_m_minus_1;
|
||||
|
||||
///////////////////////////////
|
||||
// \rho = || \tilde{v}_{m+1} ||
|
||||
///////////////////////////////
|
||||
rho = sqrt( norm2(v_m_plus_1) );
|
||||
std::cout << "QuasiMinimalResidual rho "<< rho<<std::endl;
|
||||
|
||||
///////////////////////////////
|
||||
// v_{m+1} = \tilde{v}_{m+1}
|
||||
///////////////////////////////
|
||||
v_m_plus_1 = (1.0 / rho) * v_m_plus_1;
|
||||
|
||||
////////////////////////////////
|
||||
// QMR recurrence coefficients.
|
||||
////////////////////////////////
|
||||
theta = s_m_minus_1 * beta;
|
||||
gamma = c_m_minus_1 * beta;
|
||||
epsilon = c_m * gamma + s_m * alpha;
|
||||
xi = -s_m * gamma + c_m * alpha;
|
||||
nu = sqrt( xi*xi + rho*rho );
|
||||
c_m_plus_1 = fabs(xi) / nu;
|
||||
if ( xi == 0.0 ) {
|
||||
s_m_plus_1 = 1.0;
|
||||
} else {
|
||||
s_m_plus_1 = c_m_plus_1 * rho / xi;
|
||||
}
|
||||
chi = c_m_plus_1 * xi + s_m_plus_1 * rho;
|
||||
|
||||
std::cout << "QuasiMinimalResidual coeffs "<< theta <<" "<<gamma<<" "<< epsilon<<" "<< xi<<" "<< nu<<std::endl;
|
||||
std::cout << "QuasiMinimalResidual coeffs "<< chi <<std::endl;
|
||||
|
||||
////////////////////////////////
|
||||
//p_m=(v_m - \epsilon p_{m-1} - \theta p_{m-2}) / \chi
|
||||
////////////////////////////////
|
||||
p_m = (1.0/chi) * v_m - (epsilon/chi) * p_m_minus_1 - (theta/chi) * p_m_minus_2;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// \psi = \psi + c_{m+1} \mu p_m
|
||||
////////////////////////////////////////////////////////////////
|
||||
x = x + ( c_m_plus_1 * mu ) * p_m;
|
||||
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
mu = -s_m_plus_1 * mu;
|
||||
delta_m_minus_1 = delta_m;
|
||||
c_m_minus_1 = c_m;
|
||||
c_m = c_m_plus_1;
|
||||
s_m_minus_1 = s_m;
|
||||
s_m = s_m_plus_1;
|
||||
|
||||
////////////////////////////////////
|
||||
// Could use pointer swizzle games.
|
||||
////////////////////////////////////
|
||||
v_m_minus_1 = v_m;
|
||||
v_m = v_m_plus_1;
|
||||
p_m_minus_2 = p_m_minus_1;
|
||||
p_m_minus_1 = p_m;
|
||||
|
||||
|
||||
/////////////////////////////////////
|
||||
// Convergence checks
|
||||
/////////////////////////////////////
|
||||
z1 = RealD(iter+1.0);
|
||||
z2 = z1 + 1.0;
|
||||
tau2 = tau2 *( z2 / z1 ) * s_m * s_m;
|
||||
std::cout << " QuasiMinimumResidual iteration "<< iter<<std::endl;
|
||||
std::cout << " QuasiMinimumResidual tau bound "<< tau2<<std::endl;
|
||||
|
||||
// Compute true residual
|
||||
mod2r = tau2;
|
||||
if ( 1 || (tau2 < (100.0 * target2)) ) {
|
||||
LinOp.Op(x,tmp);
|
||||
r = b - tmp;
|
||||
mod2r = norm2(r);
|
||||
std::cout << " QuasiMinimumResidual true residual is "<< mod2r<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
if ( mod2r < target2 ) {
|
||||
|
||||
std::cout << " QuasiMinimumResidual has converged"<<std::endl;
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,651 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
* = L D U
|
||||
*
|
||||
* L^-1 = (1 0 )
|
||||
* (-MoeMee^{-1} 1 )
|
||||
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
*
|
||||
* U^-1 = (1 -Mee^{-1} Meo)
|
||||
* (0 1 )
|
||||
* U^{dag} = ( 1 0)
|
||||
* (Meo^dag Mee^{-dag} 1)
|
||||
* U^{-dag} = ( 1 0)
|
||||
* (-Meo^dag Mee^{-dag} 1)
|
||||
***********************
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Use base class to share code
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackBase {
|
||||
protected:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver
|
||||
public:
|
||||
|
||||
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver),
|
||||
useSolnAsInitGuess(_solnAsInitGuess)
|
||||
{
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Shared code
|
||||
/////////////////////////////////////////////////////////////
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
|
||||
void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
Field tmp(grid);
|
||||
int nblock = in.size();
|
||||
for(int b=0;b<nblock;b++){
|
||||
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||
}
|
||||
}
|
||||
// James can write his own deflated guesser
|
||||
// with optimised code for the inner products
|
||||
// RedBlackSolveSplitGrid();
|
||||
// RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
Field tmp(grid);
|
||||
int nblock = in.size();
|
||||
for(int b=0;b<nblock;b++) {
|
||||
pickCheckerboard(Even,tmp,in[b]);
|
||||
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Guesser>
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
int nblock = in.size();
|
||||
|
||||
std::vector<Field> src_o(nblock,grid);
|
||||
std::vector<Field> sol_o(nblock,grid);
|
||||
|
||||
std::vector<Field> guess_save;
|
||||
|
||||
Field resid(fgrid);
|
||||
Field tmp(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Prepare RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
RedBlackSource(_Matrix,in,src_o);
|
||||
// for(int b=0;b<nblock;b++){
|
||||
// RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||
// }
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Make the guesses
|
||||
////////////////////////////////////////////////
|
||||
if ( subGuess ) guess_save.resize(nblock,grid);
|
||||
|
||||
|
||||
if(useSolnAsInitGuess) {
|
||||
for(int b=0;b<nblock;b++){
|
||||
pickCheckerboard(Odd, sol_o[b], out[b]);
|
||||
}
|
||||
} else {
|
||||
guess(src_o, sol_o);
|
||||
}
|
||||
|
||||
if ( subGuess ) {
|
||||
for(int b=0;b<nblock;b++){
|
||||
guess_save[b] = sol_o[b];
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the block solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// A2A boolean behavioural control & reconstruct other checkerboard
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++) {
|
||||
|
||||
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
|
||||
|
||||
///////// Needs even source //////////////
|
||||
pickCheckerboard(Even,tmp,in[b]);
|
||||
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Check unprec residual if possible
|
||||
/////////////////////////////////////////////////
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out[b],resid);
|
||||
resid = resid-in[b];
|
||||
RealD ns = norm2(in[b]);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
template<class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field resid(fgrid);
|
||||
Field src_o(grid);
|
||||
Field src_e(grid);
|
||||
Field sol_o(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
RedBlackSource(_Matrix,in,src_e,src_o);
|
||||
|
||||
////////////////////////////////
|
||||
// Construct the guess
|
||||
////////////////////////////////
|
||||
if(useSolnAsInitGuess) {
|
||||
pickCheckerboard(Odd, sol_o, out);
|
||||
} else {
|
||||
guess(src_o,sol_o);
|
||||
}
|
||||
|
||||
Field guess_save(grid);
|
||||
guess_save = sol_o;
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Fionn A2A boolean behavioural control
|
||||
////////////////////////////////////////////////
|
||||
if (subGuess) sol_o= sol_o-guess_save;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// RedBlack solution needs the even source
|
||||
///////////////////////////////////////////////////
|
||||
RedBlackSolution(_Matrix,sol_o,src_e,out);
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Override in derived.
|
||||
/////////////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
|
||||
|
||||
};
|
||||
|
||||
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess)
|
||||
{
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e(grid);
|
||||
|
||||
src_e = src_e_c; // Const correctness
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal has Mooee on it.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
src_e_i = src_e-tmp; assert( src_e_i.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class NonHermitianSchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field>
|
||||
{
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
NonHermitianSchurRedBlackDiagMooeeSolve(OperatorFunction<Field>& RBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field>(RBSolver, initSubGuess, _solnAsInitGuess) {};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix& _Matrix, const Field& src, Field& src_e, Field& src_o)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even, src_e, src);
|
||||
pickCheckerboard(Odd , src_o, src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e, tmp); assert( tmp.Checkerboard() == Even );
|
||||
_Matrix.Meooe (tmp, Mtmp); assert( Mtmp.Checkerboard() == Odd );
|
||||
src_o -= Mtmp; assert( src_o.Checkerboard() == Odd );
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix& _Matrix, const Field& sol_o, const Field& src_e, Field& sol)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o, tmp); assert( tmp.Checkerboard() == Even );
|
||||
src_e_i = src_e - tmp; assert( src_e_i.Checkerboard() == Even );
|
||||
_Matrix.MooeeInv(src_e_i, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
|
||||
setCheckerboard(sol, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
setCheckerboard(sol, sol_o); assert( sol_o.Checkerboard() == Odd );
|
||||
}
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const Field& src_o, Field& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagMooeeOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o); assert(sol_o.Checkerboard() == Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const std::vector<Field>& src_o, std::vector<Field>& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagMooeeOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
//=> psi = MeeInv phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o,tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o_i); assert( sol_o_i.Checkerboard() ==Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class NonHermitianSchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field>
|
||||
{
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
NonHermitianSchurRedBlackDiagTwoSolve(OperatorFunction<Field>& RBSolver, const bool initSubGuess = false,
|
||||
const bool _solnAsInitGuess = false)
|
||||
: SchurRedBlackBase<Field>(RBSolver, initSubGuess, _solnAsInitGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix& _Matrix, const Field& src, Field& src_e, Field& src_o)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even, src_e, src);
|
||||
pickCheckerboard(Odd , src_o, src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e, tmp); assert( tmp.Checkerboard() == Even );
|
||||
_Matrix.Meooe (tmp, Mtmp); assert( Mtmp.Checkerboard() == Odd );
|
||||
src_o -= Mtmp; assert( src_o.Checkerboard() == Odd );
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix& _Matrix, const Field& sol_o, const Field& src_e, Field& sol)
|
||||
{
|
||||
GridBase* grid = _Matrix.RedBlackGrid();
|
||||
GridBase* fgrid = _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o, tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i, tmp); assert( tmp.Checkerboard() == Even );
|
||||
tmp = src_e - tmp; assert( src_e.Checkerboard() == Even );
|
||||
_Matrix.MooeeInv(tmp, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
|
||||
setCheckerboard(sol, sol_e); assert( sol_e.Checkerboard() == Even );
|
||||
setCheckerboard(sol, sol_o_i); assert( sol_o_i.Checkerboard() == Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const Field& src_o, Field& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagTwoOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o);
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve(Matrix& _Matrix, const std::vector<Field>& src_o, std::vector<Field>& sol_o)
|
||||
{
|
||||
NonHermitianSchurDiagTwoOperator<Matrix,Field> _OpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_OpEO, src_o, sol_o);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
@ -1,183 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/AlignedAllocator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<typename _Tp>
|
||||
class alignedAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||
alignedAllocator() throw() { }
|
||||
alignedAllocator(const alignedAllocator&) throw() { }
|
||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||
~alignedAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n)
|
||||
{
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
profilerFree(bytes);
|
||||
MemoryManager::CpuFree((void *)__p,bytes);
|
||||
}
|
||||
|
||||
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
|
||||
void construct(pointer __p, const _Tp& __val) { assert(0);};
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Unified virtual memory
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename _Tp>
|
||||
class uvmAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef uvmAllocator<_Tp1> other; };
|
||||
uvmAllocator() throw() { }
|
||||
uvmAllocator(const uvmAllocator&) throw() { }
|
||||
template<typename _Tp1> uvmAllocator(const uvmAllocator<_Tp1>&) throw() { }
|
||||
~uvmAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n)
|
||||
{
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
profilerFree(bytes);
|
||||
MemoryManager::SharedFree((void *)__p,bytes);
|
||||
}
|
||||
|
||||
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Device memory
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename _Tp>
|
||||
class devAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef devAllocator<_Tp1> other; };
|
||||
devAllocator() throw() { }
|
||||
devAllocator(const devAllocator&) throw() { }
|
||||
template<typename _Tp1> devAllocator(const devAllocator<_Tp1>&) throw() { }
|
||||
~devAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
_Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes);
|
||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n)
|
||||
{
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
profilerFree(bytes);
|
||||
MemoryManager::AcceleratorFree((void *)__p,bytes);
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return false; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Template typedefs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
// Cshift on device
|
||||
template<class T> using cshiftAllocator = devAllocator<T>;
|
||||
#else
|
||||
// Cshift on host
|
||||
template<class T> using cshiftAllocator = std::allocator<T>;
|
||||
#endif
|
||||
|
||||
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
|
||||
template<class T> using stencilVector = std::vector<T,alignedAllocator<T> >;
|
||||
template<class T> using commVector = std::vector<T,devAllocator<T> >;
|
||||
template<class T> using cshiftVector = std::vector<T,cshiftAllocator<T> >;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -1,4 +0,0 @@
|
||||
#pragma once
|
||||
#include <Grid/allocator/MemoryStats.h>
|
||||
#include <Grid/allocator/MemoryManager.h>
|
||||
#include <Grid/allocator/AlignedAllocator.h>
|
@ -1,324 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/*Allocation types, saying which pointer cache should be used*/
|
||||
#define Cpu (0)
|
||||
#define CpuHuge (1)
|
||||
#define CpuSmall (2)
|
||||
#define Acc (3)
|
||||
#define AccHuge (4)
|
||||
#define AccSmall (5)
|
||||
#define Shared (6)
|
||||
#define SharedHuge (7)
|
||||
#define SharedSmall (8)
|
||||
#undef GRID_MM_VERBOSE
|
||||
uint64_t total_shared;
|
||||
uint64_t total_device;
|
||||
uint64_t total_host;;
|
||||
void MemoryManager::PrintBytes(void)
|
||||
{
|
||||
std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
|
||||
std::cout << " MemoryManager : PrintBytes "<<std::endl;
|
||||
std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
|
||||
std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared Mbytes "<<std::endl;
|
||||
std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl;
|
||||
std::cout << " MemoryManager : "<<(total_host>>20) <<" cpu Mbytes "<<std::endl;
|
||||
uint64_t cacheBytes;
|
||||
cacheBytes = CacheBytes[Cpu];
|
||||
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl;
|
||||
cacheBytes = CacheBytes[Acc];
|
||||
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl;
|
||||
cacheBytes = CacheBytes[Shared];
|
||||
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl;
|
||||
|
||||
#ifdef GRID_CUDA
|
||||
cuda_mem();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
uint64_t MemoryManager::DeviceCacheBytes() { return CacheBytes[Acc] + CacheBytes[AccHuge] + CacheBytes[AccSmall]; }
|
||||
uint64_t MemoryManager::HostCacheBytes() { return CacheBytes[Cpu] + CacheBytes[CpuHuge] + CacheBytes[CpuSmall]; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Data tables for recently freed pooiniter caches
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
||||
int MemoryManager::Victim[MemoryManager::NallocType];
|
||||
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 0, 8, 8, 0, 16, 8, 0, 16 };
|
||||
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Actual allocation and deallocation utils
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
||||
{
|
||||
total_device+=bytes;
|
||||
void *ptr = (void *) Lookup(bytes,Acc);
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocDevice(bytes);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"AcceleratorAllocate "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
|
||||
{
|
||||
total_device-=bytes;
|
||||
void *__freeme = Insert(ptr,bytes,Acc);
|
||||
if ( __freeme ) {
|
||||
acceleratorFreeDevice(__freeme);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"AcceleratorFree "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
}
|
||||
void *MemoryManager::SharedAllocate(size_t bytes)
|
||||
{
|
||||
total_shared+=bytes;
|
||||
void *ptr = (void *) Lookup(bytes,Shared);
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocShared(bytes);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"SharedAllocate "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
void MemoryManager::SharedFree (void *ptr,size_t bytes)
|
||||
{
|
||||
total_shared-=bytes;
|
||||
void *__freeme = Insert(ptr,bytes,Shared);
|
||||
if ( __freeme ) {
|
||||
acceleratorFreeShared(__freeme);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"SharedFree "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
}
|
||||
#ifdef GRID_UVM
|
||||
void *MemoryManager::CpuAllocate(size_t bytes)
|
||||
{
|
||||
total_host+=bytes;
|
||||
void *ptr = (void *) Lookup(bytes,Cpu);
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocShared(bytes);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"CpuAllocate "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
||||
{
|
||||
total_host-=bytes;
|
||||
NotifyDeletion(_ptr);
|
||||
void *__freeme = Insert(_ptr,bytes,Cpu);
|
||||
if ( __freeme ) {
|
||||
acceleratorFreeShared(__freeme);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"CpuFree "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
void *MemoryManager::CpuAllocate(size_t bytes)
|
||||
{
|
||||
total_host+=bytes;
|
||||
void *ptr = (void *) Lookup(bytes,Cpu);
|
||||
if ( ptr == (void *) NULL ) {
|
||||
ptr = (void *) acceleratorAllocCpu(bytes);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"CpuAllocate "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
||||
{
|
||||
total_host-=bytes;
|
||||
NotifyDeletion(_ptr);
|
||||
void *__freeme = Insert(_ptr,bytes,Cpu);
|
||||
if ( __freeme ) {
|
||||
acceleratorFreeCpu(__freeme);
|
||||
}
|
||||
#ifdef GRID_MM_VERBOSE
|
||||
std::cout <<"CpuFree "<<std::endl;
|
||||
PrintBytes();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////
|
||||
// call only once
|
||||
//////////////////////////////////////////
|
||||
void MemoryManager::Init(void)
|
||||
{
|
||||
|
||||
char * str;
|
||||
int Nc;
|
||||
|
||||
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
||||
if ( str ) {
|
||||
Nc = atoi(str);
|
||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||
Ncache[Cpu]=Nc;
|
||||
Ncache[Acc]=Nc;
|
||||
Ncache[Shared]=Nc;
|
||||
}
|
||||
}
|
||||
|
||||
str= getenv("GRID_ALLOC_NCACHE_HUGE");
|
||||
if ( str ) {
|
||||
Nc = atoi(str);
|
||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||
Ncache[CpuHuge]=Nc;
|
||||
Ncache[AccHuge]=Nc;
|
||||
Ncache[SharedHuge]=Nc;
|
||||
}
|
||||
}
|
||||
|
||||
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
||||
if ( str ) {
|
||||
Nc = atoi(str);
|
||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||
Ncache[CpuSmall]=Nc;
|
||||
Ncache[AccSmall]=Nc;
|
||||
Ncache[SharedSmall]=Nc;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void MemoryManager::InitMessage(void) {
|
||||
|
||||
#ifndef GRID_UVM
|
||||
std::cout << GridLogMessage << "MemoryManager Cache "<< MemoryManager::DeviceMaxBytes <<" bytes "<<std::endl;
|
||||
#endif
|
||||
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
||||
#ifdef ALLOCATION_CACHE
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent host allocations: SMALL "<<Ncache[CpuSmall]<<" LARGE "<<Ncache[Cpu]<<" HUGE "<<Ncache[CpuHuge]<<std::endl;
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent device allocations: SMALL "<<Ncache[AccSmall]<<" LARGE "<<Ncache[Acc]<<" Huge "<<Ncache[AccHuge]<<std::endl;
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent shared allocations: SMALL "<<Ncache[SharedSmall]<<" LARGE "<<Ncache[Shared]<<" Huge "<<Ncache[SharedHuge]<<std::endl;
|
||||
#endif
|
||||
|
||||
#ifdef GRID_UVM
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Unified memory space"<<std::endl;
|
||||
#ifdef GRID_CUDA
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMallocManaged"<<std::endl;
|
||||
#endif
|
||||
#ifdef GRID_HIP
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMallocManaged"<<std::endl;
|
||||
#endif
|
||||
#ifdef GRID_SYCL
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
|
||||
#endif
|
||||
#else
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
|
||||
#ifdef GRID_CUDA
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMalloc"<<std::endl;
|
||||
#endif
|
||||
#ifdef GRID_HIP
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMalloc"<<std::endl;
|
||||
#endif
|
||||
#ifdef GRID_SYCL
|
||||
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void *MemoryManager::Insert(void *ptr,size_t bytes,int type)
|
||||
{
|
||||
#ifdef ALLOCATION_CACHE
|
||||
int cache;
|
||||
if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2;
|
||||
else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1;
|
||||
else cache = type;
|
||||
|
||||
return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);
|
||||
#else
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
if (ncache == 0) return ptr;
|
||||
|
||||
void * ret = NULL;
|
||||
int v = -1;
|
||||
|
||||
for(int e=0;e<ncache;e++) {
|
||||
if ( entries[e].valid==0 ) {
|
||||
v=e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( v==-1 ) {
|
||||
v=victim;
|
||||
victim = (victim+1)%ncache;
|
||||
}
|
||||
|
||||
if ( entries[v].valid ) {
|
||||
ret = entries[v].address;
|
||||
cacheBytes -= entries[v].bytes;
|
||||
entries[v].valid = 0;
|
||||
entries[v].address = NULL;
|
||||
entries[v].bytes = 0;
|
||||
}
|
||||
|
||||
entries[v].address=ptr;
|
||||
entries[v].bytes =bytes;
|
||||
entries[v].valid =1;
|
||||
cacheBytes += bytes;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *MemoryManager::Lookup(size_t bytes,int type)
|
||||
{
|
||||
#ifdef ALLOCATION_CACHE
|
||||
int cache;
|
||||
if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2;
|
||||
else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1;
|
||||
else cache = type;
|
||||
|
||||
return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]);
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
for(int e=0;e<ncache;e++){
|
||||
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
|
||||
entries[e].valid = 0;
|
||||
cacheBytes -= entries[e].bytes;
|
||||
return entries[e].address;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,226 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/MemoryManager.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Move control to configure.ac and Config.h?
|
||||
|
||||
#define GRID_ALLOC_SMALL_LIMIT (4096)
|
||||
#define GRID_ALLOC_HUGE_LIMIT (2147483648)
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define TOSTRING(x) STRINGIFY(x)
|
||||
#define FILE_LINE __FILE__ ":" TOSTRING(__LINE__)
|
||||
#define AUDIT(a) MemoryManager::Audit(FILE_LINE)
|
||||
|
||||
/*Pinning pages is costly*/
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Advise the LatticeAccelerator class
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum ViewAdvise {
|
||||
AdviseDefault = 0x0, // Regular data
|
||||
AdviseInfrequentUse = 0x1 // Advise that the data is used infrequently. This can
|
||||
// significantly influence performance of bulk storage.
|
||||
|
||||
// AdviseTransient = 0x2, // Data will mostly be read. On some architectures
|
||||
// enables read-only copies of memory to be kept on
|
||||
// host and device.
|
||||
|
||||
// AdviseAcceleratorWriteDiscard = 0x4 // Field will be written in entirety on device
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// View Access Mode
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
enum ViewMode {
|
||||
AcceleratorRead = 0x01,
|
||||
AcceleratorWrite = 0x02,
|
||||
AcceleratorWriteDiscard = 0x04,
|
||||
CpuRead = 0x08,
|
||||
CpuWrite = 0x10,
|
||||
CpuWriteDiscard = 0x10 // same for now
|
||||
};
|
||||
|
||||
struct MemoryStatus {
|
||||
uint64_t DeviceBytes;
|
||||
uint64_t DeviceLRUBytes;
|
||||
uint64_t DeviceMaxBytes;
|
||||
uint64_t HostToDeviceBytes;
|
||||
uint64_t DeviceToHostBytes;
|
||||
uint64_t HostToDeviceXfer;
|
||||
uint64_t DeviceToHostXfer;
|
||||
uint64_t DeviceEvictions;
|
||||
uint64_t DeviceDestroy;
|
||||
uint64_t DeviceAllocCacheBytes;
|
||||
uint64_t HostAllocCacheBytes;
|
||||
};
|
||||
|
||||
|
||||
class MemoryManager {
|
||||
private:
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// For caching recently freed allocations
|
||||
////////////////////////////////////////////////////////////
|
||||
typedef struct {
|
||||
void *address;
|
||||
size_t bytes;
|
||||
int valid;
|
||||
} AllocationCacheEntry;
|
||||
|
||||
static const int NallocCacheMax=128;
|
||||
static const int NallocType=9;
|
||||
static AllocationCacheEntry Entries[NallocType][NallocCacheMax];
|
||||
static int Victim[NallocType];
|
||||
static int Ncache[NallocType];
|
||||
static uint64_t CacheBytes[NallocType];
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Free pool
|
||||
/////////////////////////////////////////////////
|
||||
static void *Insert(void *ptr,size_t bytes,int type) ;
|
||||
static void *Lookup(size_t bytes,int type) ;
|
||||
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ;
|
||||
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ;
|
||||
|
||||
public:
|
||||
static void PrintBytes(void);
|
||||
static void Audit(std::string s);
|
||||
static void Init(void);
|
||||
static void InitMessage(void);
|
||||
static void *AcceleratorAllocate(size_t bytes);
|
||||
static void AcceleratorFree (void *ptr,size_t bytes);
|
||||
static void *SharedAllocate(size_t bytes);
|
||||
static void SharedFree (void *ptr,size_t bytes);
|
||||
static void *CpuAllocate(size_t bytes);
|
||||
static void CpuFree (void *ptr,size_t bytes);
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Footprint tracking
|
||||
////////////////////////////////////////////////////////
|
||||
static uint64_t DeviceBytes;
|
||||
static uint64_t DeviceLRUBytes;
|
||||
static uint64_t DeviceMaxBytes;
|
||||
static uint64_t HostToDeviceBytes;
|
||||
static uint64_t DeviceToHostBytes;
|
||||
static uint64_t HostToDeviceXfer;
|
||||
static uint64_t DeviceToHostXfer;
|
||||
static uint64_t DeviceEvictions;
|
||||
static uint64_t DeviceDestroy;
|
||||
|
||||
static uint64_t DeviceCacheBytes();
|
||||
static uint64_t HostCacheBytes();
|
||||
|
||||
static MemoryStatus GetFootprint(void) {
|
||||
MemoryStatus stat;
|
||||
stat.DeviceBytes = DeviceBytes;
|
||||
stat.DeviceLRUBytes = DeviceLRUBytes;
|
||||
stat.DeviceMaxBytes = DeviceMaxBytes;
|
||||
stat.HostToDeviceBytes = HostToDeviceBytes;
|
||||
stat.DeviceToHostBytes = DeviceToHostBytes;
|
||||
stat.HostToDeviceXfer = HostToDeviceXfer;
|
||||
stat.DeviceToHostXfer = DeviceToHostXfer;
|
||||
stat.DeviceEvictions = DeviceEvictions;
|
||||
stat.DeviceDestroy = DeviceDestroy;
|
||||
stat.DeviceAllocCacheBytes = DeviceCacheBytes();
|
||||
stat.HostAllocCacheBytes = HostCacheBytes();
|
||||
return stat;
|
||||
};
|
||||
|
||||
private:
|
||||
#ifndef GRID_UVM
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Data tables for ViewCache
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
typedef std::list<uint64_t> LRU_t;
|
||||
typedef typename LRU_t::iterator LRUiterator;
|
||||
typedef struct {
|
||||
int LRU_valid;
|
||||
LRUiterator LRU_entry;
|
||||
uint64_t CpuPtr;
|
||||
uint64_t AccPtr;
|
||||
size_t bytes;
|
||||
uint32_t transient;
|
||||
uint32_t state;
|
||||
uint32_t accLock;
|
||||
uint32_t cpuLock;
|
||||
} AcceleratorViewEntry;
|
||||
|
||||
typedef std::unordered_map<uint64_t,AcceleratorViewEntry> AccViewTable_t;
|
||||
typedef typename AccViewTable_t::iterator AccViewTableIterator ;
|
||||
|
||||
static AccViewTable_t AccViewTable;
|
||||
static LRU_t LRU;
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Device motion
|
||||
/////////////////////////////////////////////////
|
||||
static void Create(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);
|
||||
static void EvictVictims(uint64_t bytes); // Frees up <bytes>
|
||||
static void Evict(AcceleratorViewEntry &AccCache);
|
||||
static void Flush(AcceleratorViewEntry &AccCache);
|
||||
static void Clone(AcceleratorViewEntry &AccCache);
|
||||
static void AccDiscard(AcceleratorViewEntry &AccCache);
|
||||
static void CpuDiscard(AcceleratorViewEntry &AccCache);
|
||||
|
||||
// static void LRUupdate(AcceleratorViewEntry &AccCache);
|
||||
static void LRUinsert(AcceleratorViewEntry &AccCache);
|
||||
static void LRUremove(AcceleratorViewEntry &AccCache);
|
||||
|
||||
// manage entries in the table
|
||||
static int EntryPresent(uint64_t CpuPtr);
|
||||
static void EntryCreate(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);
|
||||
static void EntryErase (uint64_t CpuPtr);
|
||||
static AccViewTableIterator EntryLookup(uint64_t CpuPtr);
|
||||
static void EntrySet (uint64_t CpuPtr,AcceleratorViewEntry &entry);
|
||||
|
||||
static void AcceleratorViewClose(uint64_t AccPtr);
|
||||
static uint64_t AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);
|
||||
static void CpuViewClose(uint64_t Ptr);
|
||||
static uint64_t CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);
|
||||
#endif
|
||||
static void NotifyDeletion(void * CpuPtr);
|
||||
|
||||
public:
|
||||
static void Print(void);
|
||||
static void PrintAll(void);
|
||||
static void PrintState( void* CpuPtr);
|
||||
static int isOpen (void* CpuPtr);
|
||||
static void ViewClose(void* CpuPtr,ViewMode mode);
|
||||
static void *ViewOpen (void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -1,601 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#ifndef GRID_UVM
|
||||
|
||||
#warning "Using explicit device memory copies"
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#define MAXLINE 512
|
||||
static char print_buffer [ MAXLINE ];
|
||||
|
||||
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
|
||||
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
|
||||
//#define dprintf(...)
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// For caching copies of data on device
|
||||
////////////////////////////////////////////////////////////
|
||||
MemoryManager::AccViewTable_t MemoryManager::AccViewTable;
|
||||
MemoryManager::LRU_t MemoryManager::LRU;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Footprint tracking
|
||||
////////////////////////////////////////////////////////
|
||||
uint64_t MemoryManager::DeviceBytes;
|
||||
uint64_t MemoryManager::DeviceLRUBytes;
|
||||
uint64_t MemoryManager::DeviceMaxBytes = 1024*1024*128;
|
||||
uint64_t MemoryManager::HostToDeviceBytes;
|
||||
uint64_t MemoryManager::DeviceToHostBytes;
|
||||
uint64_t MemoryManager::HostToDeviceXfer;
|
||||
uint64_t MemoryManager::DeviceToHostXfer;
|
||||
uint64_t MemoryManager::DeviceEvictions;
|
||||
uint64_t MemoryManager::DeviceDestroy;
|
||||
|
||||
////////////////////////////////////
|
||||
// Priority ordering for unlocked entries
|
||||
// Empty
|
||||
// CpuDirty
|
||||
// Consistent
|
||||
// AccDirty
|
||||
////////////////////////////////////
|
||||
#define Empty (0x0) /*Entry unoccupied */
|
||||
#define CpuDirty (0x1) /*CPU copy is golden, Acc buffer MAY not be allocated*/
|
||||
#define Consistent (0x2) /*ACC copy AND CPU copy are valid */
|
||||
#define AccDirty (0x4) /*ACC copy is golden */
|
||||
#define EvictNext (0x8) /*Priority for eviction*/
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Mechanics of data table maintenance
|
||||
/////////////////////////////////////////////////
|
||||
int MemoryManager::EntryPresent(uint64_t CpuPtr)
|
||||
{
|
||||
if(AccViewTable.empty()) return 0;
|
||||
|
||||
auto count = AccViewTable.count(CpuPtr); assert((count==0)||(count==1));
|
||||
return count;
|
||||
}
|
||||
void MemoryManager::EntryCreate(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
||||
{
|
||||
assert(!EntryPresent(CpuPtr));
|
||||
AcceleratorViewEntry AccCache;
|
||||
AccCache.CpuPtr = CpuPtr;
|
||||
AccCache.AccPtr = (uint64_t)NULL;
|
||||
AccCache.bytes = bytes;
|
||||
AccCache.state = CpuDirty;
|
||||
AccCache.LRU_valid=0;
|
||||
AccCache.transient=0;
|
||||
AccCache.accLock=0;
|
||||
AccCache.cpuLock=0;
|
||||
AccViewTable[CpuPtr] = AccCache;
|
||||
}
|
||||
MemoryManager::AccViewTableIterator MemoryManager::EntryLookup(uint64_t CpuPtr)
|
||||
{
|
||||
assert(EntryPresent(CpuPtr));
|
||||
auto AccCacheIterator = AccViewTable.find(CpuPtr);
|
||||
assert(AccCacheIterator!=AccViewTable.end());
|
||||
return AccCacheIterator;
|
||||
}
|
||||
void MemoryManager::EntryErase(uint64_t CpuPtr)
|
||||
{
|
||||
auto AccCache = EntryLookup(CpuPtr);
|
||||
AccViewTable.erase(CpuPtr);
|
||||
}
|
||||
void MemoryManager::LRUinsert(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
assert(AccCache.LRU_valid==0);
|
||||
if (AccCache.transient) {
|
||||
LRU.push_back(AccCache.CpuPtr);
|
||||
AccCache.LRU_entry = --LRU.end();
|
||||
} else {
|
||||
LRU.push_front(AccCache.CpuPtr);
|
||||
AccCache.LRU_entry = LRU.begin();
|
||||
}
|
||||
AccCache.LRU_valid = 1;
|
||||
DeviceLRUBytes+=AccCache.bytes;
|
||||
}
|
||||
void MemoryManager::LRUremove(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
assert(AccCache.LRU_valid==1);
|
||||
LRU.erase(AccCache.LRU_entry);
|
||||
AccCache.LRU_valid = 0;
|
||||
DeviceLRUBytes-=AccCache.bytes;
|
||||
}
|
||||
/////////////////////////////////////////////////
|
||||
// Accelerator cache motion & consistency logic
|
||||
/////////////////////////////////////////////////
|
||||
void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
///////////////////////////////////////////////////////////
|
||||
// Remove from Accelerator, remove entry, without flush
|
||||
// Cannot be locked. If allocated Must be in LRU pool.
|
||||
///////////////////////////////////////////////////////////
|
||||
assert(AccCache.state!=Empty);
|
||||
|
||||
mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
if(AccCache.AccPtr) {
|
||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||
DeviceDestroy++;
|
||||
DeviceBytes -=AccCache.bytes;
|
||||
LRUremove(AccCache);
|
||||
AccCache.AccPtr=(uint64_t) NULL;
|
||||
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||
}
|
||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||
EntryErase(CpuPtr);
|
||||
}
|
||||
|
||||
void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Make CPU consistent, remove from Accelerator, remove from LRU, LEAVE CPU only entry
|
||||
// Cannot be acclocked. If allocated must be in LRU pool.
|
||||
//
|
||||
// Nov 2022... Felix issue: Allocating two CpuPtrs, can have an entry in LRU-q with CPUlock.
|
||||
// and require to evict the AccPtr copy. Eviction was a mistake in CpuViewOpen
|
||||
// but there is a weakness where CpuLock entries are attempted for erase
|
||||
// Take these OUT LRU queue when CPU locked?
|
||||
// Cannot take out the table as cpuLock data is important.
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
assert(AccCache.state!=Empty);
|
||||
|
||||
mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n",
|
||||
(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
|
||||
(uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
|
||||
if (AccCache.accLock!=0) return;
|
||||
if (AccCache.cpuLock!=0) return;
|
||||
if(AccCache.state==AccDirty) {
|
||||
Flush(AccCache);
|
||||
}
|
||||
if(AccCache.AccPtr) {
|
||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||
LRUremove(AccCache);
|
||||
AccCache.AccPtr=(uint64_t)NULL;
|
||||
AccCache.state=CpuDirty; // CPU primary now
|
||||
DeviceBytes -=AccCache.bytes;
|
||||
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||
}
|
||||
// uint64_t CpuPtr = AccCache.CpuPtr;
|
||||
DeviceEvictions++;
|
||||
// EntryErase(CpuPtr);
|
||||
}
|
||||
void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
assert(AccCache.state==AccDirty);
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
||||
mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
DeviceToHostBytes+=AccCache.bytes;
|
||||
DeviceToHostXfer++;
|
||||
AccCache.state=Consistent;
|
||||
}
|
||||
void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
assert(AccCache.state==CpuDirty);
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
if(AccCache.AccPtr==(uint64_t)NULL){
|
||||
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
||||
DeviceBytes+=AccCache.bytes;
|
||||
}
|
||||
mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
||||
HostToDeviceBytes+=AccCache.bytes;
|
||||
HostToDeviceXfer++;
|
||||
AccCache.state=Consistent;
|
||||
}
|
||||
|
||||
void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache)
|
||||
{
|
||||
assert(AccCache.state!=Empty);
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
if(AccCache.AccPtr==(uint64_t)NULL){
|
||||
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
||||
DeviceBytes+=AccCache.bytes;
|
||||
}
|
||||
AccCache.state=AccDirty;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// View management
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
|
||||
{
|
||||
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
|
||||
dprintf("AcceleratorViewClose %lx\n",(uint64_t)Ptr);
|
||||
AcceleratorViewClose((uint64_t)Ptr);
|
||||
} else if( (mode==CpuRead)||(mode==CpuWrite)){
|
||||
CpuViewClose((uint64_t)Ptr);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
||||
{
|
||||
uint64_t CpuPtr = (uint64_t)_CpuPtr;
|
||||
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
|
||||
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
|
||||
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
|
||||
} else if( (mode==CpuRead)||(mode==CpuWrite)){
|
||||
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
|
||||
} else {
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
void MemoryManager::EvictVictims(uint64_t bytes)
|
||||
{
|
||||
assert(bytes<DeviceMaxBytes);
|
||||
while(bytes+DeviceLRUBytes > DeviceMaxBytes){
|
||||
if ( DeviceLRUBytes > 0){
|
||||
assert(LRU.size()>0);
|
||||
uint64_t victim = LRU.back(); // From the LRU
|
||||
auto AccCacheIterator = EntryLookup(victim);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
Evict(AccCache);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Find if present, otherwise get or force an empty
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
if ( EntryPresent(CpuPtr)==0 ){
|
||||
EntryCreate(CpuPtr,bytes,mode,hint);
|
||||
}
|
||||
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
if (!AccCache.AccPtr) {
|
||||
EvictVictims(bytes);
|
||||
}
|
||||
assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard));
|
||||
|
||||
assert(AccCache.cpuLock==0); // Programming error
|
||||
|
||||
if(AccCache.state!=Empty) {
|
||||
dprintf("ViewOpen found entry %lx %lx : %ld %ld accLock %ld\n",
|
||||
(uint64_t)AccCache.CpuPtr,
|
||||
(uint64_t)CpuPtr,
|
||||
(uint64_t)AccCache.bytes,
|
||||
(uint64_t)bytes,
|
||||
(uint64_t)AccCache.accLock);
|
||||
assert(AccCache.CpuPtr == CpuPtr);
|
||||
assert(AccCache.bytes ==bytes);
|
||||
}
|
||||
/*
|
||||
* State transitions and actions
|
||||
*
|
||||
* Action State StateNext Flush Clone
|
||||
*
|
||||
* AccRead Empty Consistent - Y
|
||||
* AccWrite Empty AccDirty - Y
|
||||
* AccRead CpuDirty Consistent - Y
|
||||
* AccWrite CpuDirty AccDirty - Y
|
||||
* AccRead Consistent Consistent - -
|
||||
* AccWrite Consistent AccDirty - -
|
||||
* AccRead AccDirty AccDirty - -
|
||||
* AccWrite AccDirty AccDirty - -
|
||||
*/
|
||||
if(AccCache.state==Empty) {
|
||||
assert(AccCache.LRU_valid==0);
|
||||
AccCache.CpuPtr = CpuPtr;
|
||||
AccCache.AccPtr = (uint64_t)NULL;
|
||||
AccCache.bytes = bytes;
|
||||
AccCache.state = CpuDirty; // Cpu starts primary
|
||||
if(mode==AcceleratorWriteDiscard){
|
||||
CpuDiscard(AccCache);
|
||||
AccCache.state = AccDirty; // Empty + AcceleratorWrite=> AccDirty
|
||||
} else if(mode==AcceleratorWrite){
|
||||
Clone(AccCache);
|
||||
AccCache.state = AccDirty; // Empty + AcceleratorWrite=> AccDirty
|
||||
} else {
|
||||
Clone(AccCache);
|
||||
AccCache.state = Consistent; // Empty + AccRead => Consistent
|
||||
}
|
||||
AccCache.accLock= 1;
|
||||
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
|
||||
} else if(AccCache.state==CpuDirty ){
|
||||
if(mode==AcceleratorWriteDiscard) {
|
||||
CpuDiscard(AccCache);
|
||||
AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty
|
||||
} else if(mode==AcceleratorWrite) {
|
||||
Clone(AccCache);
|
||||
AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty
|
||||
} else {
|
||||
Clone(AccCache);
|
||||
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
|
||||
}
|
||||
AccCache.accLock++;
|
||||
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
|
||||
} else if(AccCache.state==Consistent) {
|
||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
|
||||
else
|
||||
AccCache.state = Consistent; // Consistent + AccRead => Consistent
|
||||
AccCache.accLock++;
|
||||
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
|
||||
} else if(AccCache.state==AccDirty) {
|
||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
|
||||
else
|
||||
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
|
||||
AccCache.accLock++;
|
||||
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
assert(AccCache.accLock>0);
|
||||
// If view is opened on device must remove from LRU
|
||||
if(AccCache.LRU_valid==1){
|
||||
// must possibly remove from LRU as now locked on GPU
|
||||
dprintf("AccCache entry removed from LRU \n");
|
||||
LRUremove(AccCache);
|
||||
}
|
||||
|
||||
int transient =hint;
|
||||
AccCache.transient= transient? EvictNext : 0;
|
||||
|
||||
return AccCache.AccPtr;
|
||||
}
|
||||
////////////////////////////////////
|
||||
// look up & decrement lock count
|
||||
////////////////////////////////////
|
||||
void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr)
|
||||
{
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.accLock>0);
|
||||
|
||||
AccCache.accLock--;
|
||||
// Move to LRU queue if not locked and close on device
|
||||
if(AccCache.accLock==0) {
|
||||
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
|
||||
LRUinsert(AccCache);
|
||||
} else {
|
||||
dprintf("AccleratorViewClose %lx AccLock decremented to %ld\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
|
||||
}
|
||||
}
|
||||
void MemoryManager::CpuViewClose(uint64_t CpuPtr)
|
||||
{
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
|
||||
assert(AccCache.cpuLock>0);
|
||||
assert(AccCache.accLock==0);
|
||||
|
||||
AccCache.cpuLock--;
|
||||
}
|
||||
/*
|
||||
* Action State StateNext Flush Clone
|
||||
*
|
||||
* CpuRead Empty CpuDirty - -
|
||||
* CpuWrite Empty CpuDirty - -
|
||||
* CpuRead CpuDirty CpuDirty - -
|
||||
* CpuWrite CpuDirty CpuDirty - -
|
||||
* CpuRead Consistent Consistent - -
|
||||
* CpuWrite Consistent CpuDirty - -
|
||||
* CpuRead AccDirty Consistent Y -
|
||||
* CpuWrite AccDirty CpuDirty Y -
|
||||
*/
|
||||
uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Find if present, otherwise get or force an empty
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
if ( EntryPresent(CpuPtr)==0 ){
|
||||
EntryCreate(CpuPtr,bytes,mode,transient);
|
||||
}
|
||||
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
|
||||
// CPU doesn't need to free space
|
||||
// if (!AccCache.AccPtr) {
|
||||
// EvictVictims(bytes);
|
||||
// }
|
||||
|
||||
assert((mode==CpuRead)||(mode==CpuWrite));
|
||||
assert(AccCache.accLock==0); // Programming error
|
||||
|
||||
if(AccCache.state!=Empty) {
|
||||
assert(AccCache.CpuPtr == CpuPtr);
|
||||
assert(AccCache.bytes==bytes);
|
||||
}
|
||||
|
||||
if(AccCache.state==Empty) {
|
||||
AccCache.CpuPtr = CpuPtr;
|
||||
AccCache.AccPtr = (uint64_t)NULL;
|
||||
AccCache.bytes = bytes;
|
||||
AccCache.state = CpuDirty; // Empty + CpuRead/CpuWrite => CpuDirty
|
||||
AccCache.accLock= 0;
|
||||
AccCache.cpuLock= 1;
|
||||
} else if(AccCache.state==CpuDirty ){
|
||||
// AccPtr dont care, deferred allocate
|
||||
AccCache.state = CpuDirty; // CpuDirty +CpuRead/CpuWrite => CpuDirty
|
||||
AccCache.cpuLock++;
|
||||
} else if(AccCache.state==Consistent) {
|
||||
assert(AccCache.AccPtr != (uint64_t)NULL);
|
||||
if(mode==CpuWrite)
|
||||
AccCache.state = CpuDirty; // Consistent +CpuWrite => CpuDirty
|
||||
else
|
||||
AccCache.state = Consistent; // Consistent +CpuRead => Consistent
|
||||
AccCache.cpuLock++;
|
||||
} else if(AccCache.state==AccDirty) {
|
||||
assert(AccCache.AccPtr != (uint64_t)NULL);
|
||||
Flush(AccCache);
|
||||
if(mode==CpuWrite) AccCache.state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush
|
||||
else AccCache.state = Consistent; // AccDirty +CpuRead => Consistent, Flush
|
||||
AccCache.cpuLock++;
|
||||
} else {
|
||||
assert(0); // should be unreachable
|
||||
}
|
||||
|
||||
AccCache.transient= transient? EvictNext : 0;
|
||||
|
||||
return AccCache.CpuPtr;
|
||||
}
|
||||
void MemoryManager::NotifyDeletion(void *_ptr)
|
||||
{
|
||||
// Look up in ViewCache
|
||||
uint64_t ptr = (uint64_t)_ptr;
|
||||
if(EntryPresent(ptr)) {
|
||||
auto e = EntryLookup(ptr);
|
||||
AccDiscard(e->second);
|
||||
}
|
||||
}
|
||||
void MemoryManager::Print(void)
|
||||
{
|
||||
PrintBytes();
|
||||
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
|
||||
std::cout << GridLogMessage << "Memory Manager " << std::endl;
|
||||
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
|
||||
std::cout << GridLogMessage << DeviceBytes << " bytes allocated on device " << std::endl;
|
||||
std::cout << GridLogMessage << DeviceLRUBytes<< " bytes evictable on device " << std::endl;
|
||||
std::cout << GridLogMessage << DeviceMaxBytes<< " bytes max on device " << std::endl;
|
||||
std::cout << GridLogMessage << HostToDeviceXfer << " transfers to device " << std::endl;
|
||||
std::cout << GridLogMessage << DeviceToHostXfer << " transfers from device " << std::endl;
|
||||
std::cout << GridLogMessage << HostToDeviceBytes<< " bytes transfered to device " << std::endl;
|
||||
std::cout << GridLogMessage << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
|
||||
std::cout << GridLogMessage << DeviceEvictions << " Evictions from device " << std::endl;
|
||||
std::cout << GridLogMessage << DeviceDestroy << " Destroyed vectors on device " << std::endl;
|
||||
std::cout << GridLogMessage << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
|
||||
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
|
||||
}
|
||||
void MemoryManager::PrintAll(void)
|
||||
{
|
||||
Print();
|
||||
std::cout << GridLogMessage << std::endl;
|
||||
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
|
||||
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
|
||||
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
|
||||
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
|
||||
auto &AccCache = it->second;
|
||||
|
||||
std::string str;
|
||||
if ( AccCache.state==Empty ) str = std::string("Empty");
|
||||
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
|
||||
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
|
||||
if ( AccCache.state==Consistent)str = std::string("Consistent");
|
||||
|
||||
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
|
||||
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
|
||||
<< "\t" << AccCache.cpuLock
|
||||
<< "\t" << AccCache.accLock
|
||||
<< "\t" << AccCache.LRU_valid<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
|
||||
|
||||
};
|
||||
int MemoryManager::isOpen (void* _CpuPtr)
|
||||
{
|
||||
uint64_t CpuPtr = (uint64_t)_CpuPtr;
|
||||
if ( EntryPresent(CpuPtr) ){
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
return AccCache.cpuLock+AccCache.accLock;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
void MemoryManager::Audit(std::string s)
|
||||
{
|
||||
uint64_t CpuBytes=0;
|
||||
uint64_t AccBytes=0;
|
||||
uint64_t LruBytes1=0;
|
||||
uint64_t LruBytes2=0;
|
||||
uint64_t LruCnt=0;
|
||||
|
||||
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
|
||||
for(auto it=LRU.begin();it!=LRU.end();it++){
|
||||
uint64_t cpuPtr = *it;
|
||||
assert(EntryPresent(cpuPtr));
|
||||
auto AccCacheIterator = EntryLookup(cpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
LruBytes2+=AccCache.bytes;
|
||||
assert(AccCache.LRU_valid==1);
|
||||
assert(AccCache.LRU_entry==it);
|
||||
}
|
||||
std::cout << " Memory Manager::Audit() LRU queue matches table entries "<<std::endl;
|
||||
|
||||
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
|
||||
auto &AccCache = it->second;
|
||||
|
||||
std::string str;
|
||||
if ( AccCache.state==Empty ) str = std::string("Empty");
|
||||
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
|
||||
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
|
||||
if ( AccCache.state==Consistent)str = std::string("Consistent");
|
||||
|
||||
CpuBytes+=AccCache.bytes;
|
||||
if( AccCache.AccPtr ) AccBytes+=AccCache.bytes;
|
||||
if( AccCache.LRU_valid ) LruBytes1+=AccCache.bytes;
|
||||
if( AccCache.LRU_valid ) LruCnt++;
|
||||
|
||||
if ( AccCache.cpuLock || AccCache.accLock ) {
|
||||
assert(AccCache.LRU_valid==0);
|
||||
|
||||
std::cout << GridLogError << s<< "\n\t 0x"<<std::hex<<AccCache.CpuPtr<<std::dec
|
||||
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
|
||||
<< "\t cpuLock " << AccCache.cpuLock
|
||||
<< "\t accLock " << AccCache.accLock
|
||||
<< "\t LRUvalid " << AccCache.LRU_valid<<std::endl;
|
||||
}
|
||||
|
||||
assert( AccCache.cpuLock== 0 ) ;
|
||||
assert( AccCache.accLock== 0 ) ;
|
||||
}
|
||||
std::cout << " Memory Manager::Audit() no locked table entries "<<std::endl;
|
||||
assert(LruBytes1==LruBytes2);
|
||||
assert(LruBytes1==DeviceLRUBytes);
|
||||
std::cout << " Memory Manager::Audit() evictable bytes matches sum over table "<<std::endl;
|
||||
assert(AccBytes==DeviceBytes);
|
||||
std::cout << " Memory Manager::Audit() device bytes matches sum over table "<<std::endl;
|
||||
assert(LruCnt == LRU.size());
|
||||
std::cout << " Memory Manager::Audit() LRU entry count matches "<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
void MemoryManager::PrintState(void* _CpuPtr)
|
||||
{
|
||||
uint64_t CpuPtr = (uint64_t)_CpuPtr;
|
||||
|
||||
if ( EntryPresent(CpuPtr) ){
|
||||
auto AccCacheIterator = EntryLookup(CpuPtr);
|
||||
auto & AccCache = AccCacheIterator->second;
|
||||
std::string str;
|
||||
if ( AccCache.state==Empty ) str = std::string("Empty");
|
||||
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
|
||||
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
|
||||
if ( AccCache.state==Consistent)str = std::string("Consistent");
|
||||
if ( AccCache.state==EvictNext) str = std::string("EvictNext");
|
||||
|
||||
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tx"<<std::hex<<AccCache.CpuPtr<<std::dec
|
||||
<< "\tx"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
|
||||
<< "\t" << AccCache.cpuLock
|
||||
<< "\t" << AccCache.accLock
|
||||
<< "\t" << AccCache.LRU_valid<<std::endl;
|
||||
|
||||
} else {
|
||||
std::cout << GridLogMessage << "No Entry in AccCache table." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,31 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#ifdef GRID_UVM
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// View management is 1:1 address space mapping
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
uint64_t MemoryManager::DeviceBytes;
|
||||
uint64_t MemoryManager::DeviceLRUBytes;
|
||||
uint64_t MemoryManager::DeviceMaxBytes = 1024*1024*128;
|
||||
uint64_t MemoryManager::HostToDeviceBytes;
|
||||
uint64_t MemoryManager::DeviceToHostBytes;
|
||||
uint64_t MemoryManager::HostToDeviceXfer;
|
||||
uint64_t MemoryManager::DeviceToHostXfer;
|
||||
uint64_t MemoryManager::DeviceEvictions;
|
||||
uint64_t MemoryManager::DeviceDestroy;
|
||||
|
||||
void MemoryManager::Audit(std::string s){};
|
||||
void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){};
|
||||
void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; };
|
||||
int MemoryManager::isOpen (void* CpuPtr) { return 0;}
|
||||
void MemoryManager::PrintState(void* CpuPtr)
|
||||
{
|
||||
std::cout << GridLogMessage << "Host<->Device memory movement not currently managed by Grid." << std::endl;
|
||||
};
|
||||
void MemoryManager::Print(void){};
|
||||
void MemoryManager::PrintAll(void){};
|
||||
void MemoryManager::NotifyDeletion(void *ptr){};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,95 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/MemoryStats.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
std::string sizeString(size_t bytes);
|
||||
|
||||
struct MemoryStats
|
||||
{
|
||||
size_t totalAllocated{0}, maxAllocated{0},
|
||||
currentlyAllocated{0}, totalFreed{0};
|
||||
};
|
||||
|
||||
class MemoryProfiler
|
||||
{
|
||||
public:
|
||||
static MemoryStats *stats;
|
||||
static bool debug;
|
||||
};
|
||||
|
||||
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
|
||||
#define profilerDebugPrint \
|
||||
if (MemoryProfiler::stats) \
|
||||
{ \
|
||||
auto s = MemoryProfiler::stats; \
|
||||
std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
|
||||
<< std::endl; \
|
||||
}
|
||||
|
||||
#define profilerAllocate(bytes) \
|
||||
if (MemoryProfiler::stats) \
|
||||
{ \
|
||||
auto s = MemoryProfiler::stats; \
|
||||
s->totalAllocated += (bytes); \
|
||||
s->currentlyAllocated += (bytes); \
|
||||
s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \
|
||||
} \
|
||||
if (MemoryProfiler::debug) \
|
||||
{ \
|
||||
std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \
|
||||
profilerDebugPrint; \
|
||||
}
|
||||
|
||||
#define profilerFree(bytes) \
|
||||
if (MemoryProfiler::stats) \
|
||||
{ \
|
||||
auto s = MemoryProfiler::stats; \
|
||||
s->totalFreed += (bytes); \
|
||||
s->currentlyAllocated -= (bytes); \
|
||||
} \
|
||||
if (MemoryProfiler::debug) \
|
||||
{ \
|
||||
std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \
|
||||
profilerDebugPrint; \
|
||||
}
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES);
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,291 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_BASE_H
|
||||
#define GRID_CARTESIAN_BASE_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Commicator provides information on the processor grid
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// unsigned long _ndimension;
|
||||
// Coordinate _processors; // processor grid
|
||||
// int _processor; // linear processor rank
|
||||
// Coordinate _processor_coor; // linear processor rank
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
class GridBase : public CartesianCommunicator , public GridThread {
|
||||
|
||||
public:
|
||||
int dummy;
|
||||
// Give Lattice access
|
||||
template<class object> friend class Lattice;
|
||||
|
||||
GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) { LocallyPeriodic=0;};
|
||||
|
||||
GridBase(const Coordinate & processor_grid,
|
||||
const CartesianCommunicator &parent,
|
||||
int &split_rank)
|
||||
: CartesianCommunicator(processor_grid,parent,split_rank) {LocallyPeriodic=0;};
|
||||
|
||||
GridBase(const Coordinate & processor_grid,
|
||||
const CartesianCommunicator &parent)
|
||||
: CartesianCommunicator(processor_grid,parent,dummy) {LocallyPeriodic=0;};
|
||||
|
||||
virtual ~GridBase() = default;
|
||||
|
||||
// Physics Grid information.
|
||||
Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||
Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal
|
||||
Coordinate _gdimensions;// Global dimensions of array after cb removal
|
||||
Coordinate _ldimensions;// local dimensions of array with processor images removed
|
||||
Coordinate _rdimensions;// Reduced local dimensions with simd lane images and processor images removed
|
||||
Coordinate _ostride; // Outer stride for each dimension
|
||||
Coordinate _istride; // Inner stride i.e. within simd lane
|
||||
int _osites; // _isites*_osites = product(dimensions).
|
||||
int _isites;
|
||||
int _fsites; // _isites*_osites = product(dimensions).
|
||||
int _gsites;
|
||||
Coordinate _slice_block;// subslice information
|
||||
Coordinate _slice_stride;
|
||||
Coordinate _slice_nblock;
|
||||
|
||||
Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
|
||||
Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
||||
|
||||
bool _isCheckerBoarded;
|
||||
int LocallyPeriodic;
|
||||
Coordinate _checker_dim_mask;
|
||||
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Checkerboarding interface is virtual and overridden by
|
||||
// GridCartesian / GridRedBlackCartesian
|
||||
////////////////////////////////////////////////////////////////
|
||||
virtual int CheckerBoarded(int dim)=0;
|
||||
virtual int CheckerBoard(const Coordinate &site)=0;
|
||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
||||
virtual int CheckerBoardFromOindex (int Oindex)=0;
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Local layout calculations
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// These routines are key. Subdivide the linearised cartesian index into
|
||||
// "inner" index identifying which simd lane of object<vFcomplex> is associated with coord
|
||||
// "outer" index identifying which element of _odata in class "Lattice" is associated with coord.
|
||||
//
|
||||
// Compared to, say, Blitz++ we simply need to store BOTH an inner stride and an outer
|
||||
// stride per dimension. The cost of evaluating the indexing information is doubled for an n-dimensional
|
||||
// coordinate. Note, however, for data parallel operations the "inner" indexing cost is not paid and all
|
||||
// lanes are operated upon simultaneously.
|
||||
|
||||
virtual int oIndex(Coordinate &coor)
|
||||
{
|
||||
int idx=0;
|
||||
// Works with either global or local coordinates
|
||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||
return idx;
|
||||
}
|
||||
virtual int iIndex(Coordinate &lcoor)
|
||||
{
|
||||
int idx=0;
|
||||
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||
return idx;
|
||||
}
|
||||
inline int oIndexReduced(Coordinate &ocoor)
|
||||
{
|
||||
int idx=0;
|
||||
// ocoor is already reduced so can eliminate the modulo operation
|
||||
// for fast indexing and inline the routine
|
||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
|
||||
return idx;
|
||||
}
|
||||
inline void oCoorFromOindex (Coordinate& coor,int Oindex){
|
||||
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
|
||||
}
|
||||
|
||||
inline void InOutCoorToLocalCoor (Coordinate &ocoor, Coordinate &icoor, Coordinate &lcoor) {
|
||||
lcoor.resize(_ndimension);
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// SIMD lane addressing
|
||||
//////////////////////////////////////////////////////////
|
||||
inline void iCoorFromIindex(Coordinate &coor,int lane)
|
||||
{
|
||||
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
||||
}
|
||||
|
||||
inline int PermuteDim(int dimension){
|
||||
return _simd_layout[dimension]>1;
|
||||
}
|
||||
inline int PermuteType(int dimension){
|
||||
int permute_type=0;
|
||||
//
|
||||
// Best way to encode this would be to present a mask
|
||||
// for which simd dimensions are rotated, and the rotation
|
||||
// size. If there is only one simd dimension rotated, this is just
|
||||
// a permute.
|
||||
//
|
||||
// Cases: PermuteType == 1,2,4,8
|
||||
// Distance should be either 0,1,2..
|
||||
//
|
||||
if ( _simd_layout[dimension] > 2 ) {
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
|
||||
}
|
||||
permute_type = RotateBit; // How to specify distance; this is not just direction.
|
||||
return permute_type;
|
||||
}
|
||||
|
||||
for(int d=_ndimension-1;d>dimension;d--){
|
||||
if (_simd_layout[d]>1 ) permute_type++;
|
||||
}
|
||||
return permute_type;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Array sizing queries
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
inline int iSites(void) const { return _isites; };
|
||||
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
||||
inline int oSites(void) const { return _osites; };
|
||||
inline int lSites(void) const { return _isites*_osites; };
|
||||
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
||||
inline int Nd (void) const { return _ndimension;};
|
||||
|
||||
inline const Coordinate LocalStarts(void) { return _lstart; };
|
||||
inline const Coordinate &FullDimensions(void) { return _fdimensions;};
|
||||
inline const Coordinate &GlobalDimensions(void) { return _gdimensions;};
|
||||
inline const Coordinate &LocalDimensions(void) { return _ldimensions;};
|
||||
inline const Coordinate &VirtualLocalDimensions(void) { return _ldimensions;};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Utility to print the full decomposition details
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
void show_decomposition(){
|
||||
std::cout << GridLogMessage << "\tFull Dimensions : " << _fdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tSIMD layout : " << _simd_layout << std::endl;
|
||||
std::cout << GridLogMessage << "\tGlobal Dimensions : " << _gdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tLocal Dimensions : " << _ldimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tOuter strides : " << _ostride << std::endl;
|
||||
std::cout << GridLogMessage << "\tInner strides : " << _istride << std::endl;
|
||||
std::cout << GridLogMessage << "\tiSites : " << _isites << std::endl;
|
||||
std::cout << GridLogMessage << "\toSites : " << _osites << std::endl;
|
||||
std::cout << GridLogMessage << "\tlSites : " << lSites() << std::endl;
|
||||
std::cout << GridLogMessage << "\tgSites : " << gSites() << std::endl;
|
||||
std::cout << GridLogMessage << "\tNd : " << _ndimension << std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Global addressing
|
||||
////////////////////////////////////////////////////////////////
|
||||
void GlobalIndexToGlobalCoor(int gidx,Coordinate &gcoor){
|
||||
assert(gidx< gSites());
|
||||
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
||||
}
|
||||
void LocalIndexToLocalCoor(int lidx,Coordinate &lcoor){
|
||||
assert(lidx<lSites());
|
||||
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
||||
}
|
||||
void GlobalCoorToGlobalIndex(const Coordinate & gcoor,int & gidx){
|
||||
gidx=0;
|
||||
int mult=1;
|
||||
for(int mu=0;mu<_ndimension;mu++) {
|
||||
gidx+=mult*gcoor[mu];
|
||||
mult*=_gdimensions[mu];
|
||||
}
|
||||
}
|
||||
void GlobalCoorToProcessorCoorLocalCoor(Coordinate &pcoor,Coordinate &lcoor,const Coordinate &gcoor)
|
||||
{
|
||||
pcoor.resize(_ndimension);
|
||||
lcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++){
|
||||
int _fld = _fdimensions[mu]/_processors[mu];
|
||||
pcoor[mu] = gcoor[mu]/_fld;
|
||||
lcoor[mu] = gcoor[mu]%_fld;
|
||||
}
|
||||
}
|
||||
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const Coordinate &gcoor)
|
||||
{
|
||||
Coordinate pcoor;
|
||||
Coordinate lcoor;
|
||||
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
|
||||
rank = RankFromProcessorCoor(pcoor);
|
||||
/*
|
||||
Coordinate cblcoor(lcoor);
|
||||
for(int d=0;d<cblcoor.size();d++){
|
||||
if( this->CheckerBoarded(d) ) {
|
||||
cblcoor[d] = lcoor[d]/2;
|
||||
}
|
||||
}
|
||||
*/
|
||||
i_idx= iIndex(lcoor);
|
||||
o_idx= oIndex(lcoor);
|
||||
}
|
||||
|
||||
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , Coordinate &gcoor)
|
||||
{
|
||||
gcoor.resize(_ndimension);
|
||||
Coordinate coor(_ndimension);
|
||||
|
||||
ProcessorCoorFromRank(rank,coor);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]*coor[mu];
|
||||
|
||||
iCoorFromIindex(coor,i_idx);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]*coor[mu];
|
||||
|
||||
oCoorFromOindex (coor,o_idx);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += coor[mu];
|
||||
|
||||
}
|
||||
void RankIndexCbToFullGlobalCoor(int rank, int o_idx, int i_idx, int cb,Coordinate &fcoor)
|
||||
{
|
||||
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
|
||||
if(CheckerBoarded(0)){
|
||||
fcoor[0] = fcoor[0]*2+cb;
|
||||
}
|
||||
}
|
||||
void ProcessorCoorLocalCoorToGlobalCoor(Coordinate &Pcoor,Coordinate &Lcoor,Coordinate &gcoor)
|
||||
{
|
||||
gcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu];
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,178 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_full.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_FULL_H
|
||||
#define GRID_CARTESIAN_FULL_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Grid Support.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class GridCartesian: public GridBase {
|
||||
|
||||
public:
|
||||
int dummy;
|
||||
Coordinate _checker_dim_mask;
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardFromOindex (int Oindex)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoarded(int dim){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoard(const Coordinate &site){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){
|
||||
return shift;
|
||||
}
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
|
||||
return shift;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Constructor takes a parent grid and possibly subdivides communicator.
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid,
|
||||
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
GridCartesian(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid,
|
||||
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Construct from comm world
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid) : GridBase(processor_grid)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
|
||||
virtual ~GridCartesian() = default;
|
||||
|
||||
void Init(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_isCheckerBoarded = false;
|
||||
_ndimension = dimensions.size();
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
_checker_dim_mask.resize(_ndimension);;
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_checker_dim_mask[d]=0;
|
||||
|
||||
_fdimensions[d] = dimensions[d]; // Global dimensions
|
||||
_gdimensions[d] = _fdimensions[d]; // Global dimensions
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
// Use a reduced simd grid
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
||||
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// subplane information
|
||||
///////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,305 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_red_black.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_RED_BLACK_H
|
||||
#define GRID_CARTESIAN_RED_BLACK_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
static const int Even =CbRed;
|
||||
static const int Odd =CbBlack;
|
||||
|
||||
accelerator_inline int RedBlackCheckerBoardFromOindex (int oindex,const Coordinate &rdim,const Coordinate &chk_dim_msk)
|
||||
{
|
||||
int nd=rdim.size();
|
||||
Coordinate coor(nd);
|
||||
|
||||
Lexicographic::CoorFromIndex(coor,oindex,rdim);
|
||||
|
||||
int linear=0;
|
||||
for(int d=0;d<nd;d++){
|
||||
if(chk_dim_msk[d])
|
||||
linear=linear+coor[d];
|
||||
}
|
||||
return (linear&0x1);
|
||||
}
|
||||
|
||||
|
||||
// Specialise this for red black grids storing half the data like a chess board.
|
||||
class GridRedBlackCartesian : public GridBase
|
||||
{
|
||||
public:
|
||||
// Coordinate _checker_dim_mask;
|
||||
int _checker_dim;
|
||||
std::vector<int> _checker_board;
|
||||
|
||||
virtual int CheckerBoarded(int dim){
|
||||
if( dim==_checker_dim) return 1;
|
||||
else return 0;
|
||||
}
|
||||
virtual int CheckerBoard(const Coordinate &site){
|
||||
int linear=0;
|
||||
assert(site.size()==_ndimension);
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
if(_checker_dim_mask[d])
|
||||
linear=linear+site[d];
|
||||
}
|
||||
return (linear&0x1);
|
||||
}
|
||||
|
||||
// Depending on the cb of site, we toggle source cb.
|
||||
// for block #b, element #e = (b, e)
|
||||
// we need
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int ocb){
|
||||
if(dim != _checker_dim) return shift;
|
||||
|
||||
int fulldim =_fdimensions[dim];
|
||||
shift = (shift+fulldim)%fulldim;
|
||||
|
||||
// Probably faster with table lookup;
|
||||
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
||||
|
||||
if ( (source_cb+ocb)&1 ) {
|
||||
return (shift)/2;
|
||||
} else {
|
||||
return (shift+1)/2;
|
||||
}
|
||||
}
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||
return _checker_board[Oindex];
|
||||
}
|
||||
virtual int CheckerBoardFromOindex (int Oindex)
|
||||
{
|
||||
Coordinate ocoor;
|
||||
oCoorFromOindex(ocoor,Oindex);
|
||||
return CheckerBoard(ocoor);
|
||||
}
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
||||
|
||||
if(dim != _checker_dim) return shift;
|
||||
|
||||
int ocb=CheckerBoardFromOindex(osite);
|
||||
|
||||
return CheckerBoardShiftForCB(source_cb,dim,shift,ocb);
|
||||
}
|
||||
|
||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim){
|
||||
if ( _checker_dim_mask[dim] ) {
|
||||
// If _fdimensions[checker_dim] is odd, then shifting by 1 in other dims
|
||||
// does NOT cause a parity hop.
|
||||
int add=(dim==_checker_dim) ? 0 : _fdimensions[_checker_dim];
|
||||
if ( (shift+add) &0x1) {
|
||||
return 1-source_cb;
|
||||
} else {
|
||||
return source_cb;
|
||||
}
|
||||
} else {
|
||||
return source_cb;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create Redblack from original grid; require full grid pointer ?
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base)
|
||||
{
|
||||
int dims = base->_ndimension;
|
||||
Coordinate checker_dim_mask(dims,1);
|
||||
int checker_dim = 0;
|
||||
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack from original grid, with non-trivial checker dim mask
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const Coordinate &checker_dim_mask,
|
||||
int checker_dim
|
||||
) : GridBase(base->_processors,*base)
|
||||
{
|
||||
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ;
|
||||
}
|
||||
|
||||
virtual ~GridRedBlackCartesian() = default;
|
||||
|
||||
void Init(const Coordinate &dimensions,
|
||||
const Coordinate &simd_layout,
|
||||
const Coordinate &processor_grid,
|
||||
const Coordinate &checker_dim_mask,
|
||||
int checker_dim)
|
||||
{
|
||||
|
||||
_isCheckerBoarded = true;
|
||||
_checker_dim = checker_dim;
|
||||
assert(checker_dim_mask[checker_dim] == 1);
|
||||
_ndimension = dimensions.size();
|
||||
assert(checker_dim_mask.size() == _ndimension);
|
||||
assert(processor_grid.size() == _ndimension);
|
||||
assert(simd_layout.size() == _ndimension);
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
_checker_dim_mask = checker_dim_mask;
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_fdimensions[d] = dimensions[d];
|
||||
_gdimensions[d] = _fdimensions[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
assert((_gdimensions[d] & 0x1) == 0);
|
||||
_gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard
|
||||
_gsites /= 2;
|
||||
}
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d];
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
|
||||
// Use a reduced simd grid
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
assert(_rdimensions[d] > 0);
|
||||
|
||||
// all elements of a simd vector must have same checkerboard.
|
||||
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||
if (_simd_layout[d] > 1)
|
||||
{
|
||||
if (checker_dim_mask[d])
|
||||
{
|
||||
assert((_rdimensions[d] & 0x1) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// subplane information
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Create a checkerboard lookup table
|
||||
////////////////////////////////////////////////
|
||||
int rvol = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
rvol = rvol * _rdimensions[d];
|
||||
}
|
||||
_checker_board.resize(rvol);
|
||||
for (int osite = 0; osite < _osites; osite++)
|
||||
{
|
||||
_checker_board[osite] = CheckerBoardFromOindex(osite);
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
virtual int oIndex(Coordinate &coor)
|
||||
{
|
||||
int idx = 0;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]);
|
||||
}
|
||||
else
|
||||
{
|
||||
idx += _ostride[d] * (coor[d] % _rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
};
|
||||
|
||||
virtual int iIndex(Coordinate &lcoor)
|
||||
{
|
||||
int idx = 0;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d]));
|
||||
}
|
||||
else
|
||||
{
|
||||
idx += _istride[d] * (lcoor[d] / _rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,536 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cshift/Cshift_common.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
extern Vector<std::pair<int,int> > Cshift_table;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there is no need to SIMD split
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj> void
|
||||
Gather_plane_simple (const Lattice<vobj> &rhs,cshiftVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
|
||||
cbmask = 0x3;
|
||||
}
|
||||
|
||||
int so=plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int ent = 0;
|
||||
|
||||
if(Cshift_table.size()<e1*e2) Cshift_table.resize(e1*e2); // Let it grow to biggest
|
||||
|
||||
int stride=rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int bo = n*e2;
|
||||
Cshift_table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb &cbmask ) {
|
||||
Cshift_table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
auto buffer_p = & buffer[0];
|
||||
auto table = &Cshift_table[0];
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView(rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||
});
|
||||
#else
|
||||
autoView(rhs_v , rhs, CpuRead);
|
||||
thread_for(i,ent,{
|
||||
buffer_p[table[i].first]=rhs_v[table[i].second];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj> void
|
||||
Gather_plane_extract(const Lattice<vobj> &rhs,
|
||||
ExtractPointerArray<typename vobj::scalar_object> pointers,
|
||||
int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
|
||||
cbmask = 0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int n1=rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
if ( cbmask ==0x3){
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView(rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(nn,e1*e2,1,{
|
||||
int n = nn%e1;
|
||||
int b = nn/e1;
|
||||
int o = n*n1;
|
||||
int offset = b+n*e2;
|
||||
|
||||
vobj temp =rhs_v[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
});
|
||||
#else
|
||||
autoView(rhs_v , rhs, CpuRead);
|
||||
thread_for2d(n,e1,b,e2,{
|
||||
int o = n*n1;
|
||||
int offset = b+n*e2;
|
||||
|
||||
vobj temp =rhs_v[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
});
|
||||
#endif
|
||||
} else {
|
||||
Coordinate rdim=rhs.Grid()->_rdimensions;
|
||||
Coordinate cdm =rhs.Grid()->_checker_dim_mask;
|
||||
std::cout << " Dense packed buffer WARNING " <<std::endl; // Does this get called twice once for each cb?
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView(rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(nn,e1*e2,1,{
|
||||
int n = nn%e1;
|
||||
int b = nn/e1;
|
||||
|
||||
Coordinate coor;
|
||||
|
||||
int o=n*n1;
|
||||
int oindex = o+b;
|
||||
|
||||
int cb = RedBlackCheckerBoardFromOindex(oindex, rdim, cdm);
|
||||
|
||||
int ocb=1<<cb;
|
||||
int offset = b+n*e2;
|
||||
|
||||
if ( ocb & cbmask ) {
|
||||
vobj temp =rhs_v[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
}
|
||||
});
|
||||
#else
|
||||
autoView(rhs_v , rhs, CpuRead);
|
||||
thread_for2d(n,e1,b,e2,{
|
||||
|
||||
Coordinate coor;
|
||||
|
||||
int o=n*n1;
|
||||
int oindex = o+b;
|
||||
|
||||
int cb = RedBlackCheckerBoardFromOindex(oindex, rdim, cdm);
|
||||
|
||||
int ocb=1<<cb;
|
||||
int offset = b+n*e2;
|
||||
|
||||
if ( ocb & cbmask ) {
|
||||
vobj temp =rhs_v[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
}
|
||||
});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Scatter for when there is no need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,cshiftVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int stride=rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
if(Cshift_table.size()<e1*e2) Cshift_table.resize(e1*e2); // Let it grow to biggest
|
||||
|
||||
int ent =0;
|
||||
|
||||
if ( cbmask ==0x3 ) {
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs.Grid()->_slice_stride[dimension];
|
||||
int bo =n*rhs.Grid()->_slice_block[dimension];
|
||||
Cshift_table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs.Grid()->_slice_stride[dimension];
|
||||
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
Cshift_table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto buffer_p = & buffer[0];
|
||||
auto table = &Cshift_table[0];
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView( rhs_v, rhs, AcceleratorWrite);
|
||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
|
||||
});
|
||||
#else
|
||||
autoView( rhs_v, rhs, CpuWrite);
|
||||
thread_for(i,ent,{
|
||||
rhs_v[table[i].first]=buffer_p[table[i].second];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Scatter for when there *is* need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerArray<typename vobj::scalar_object> pointers,int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
|
||||
if(cbmask ==0x3 ) {
|
||||
int _slice_stride = rhs.Grid()->_slice_stride[dimension];
|
||||
int _slice_block = rhs.Grid()->_slice_block[dimension];
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView( rhs_v , rhs, AcceleratorWrite);
|
||||
accelerator_for(nn,e1*e2,1,{
|
||||
int n = nn%e1;
|
||||
int b = nn/e1;
|
||||
int o = n*_slice_stride;
|
||||
int offset = b+n*_slice_block;
|
||||
merge(rhs_v[so+o+b],pointers,offset);
|
||||
});
|
||||
#else
|
||||
autoView( rhs_v , rhs, CpuWrite);
|
||||
thread_for2d(n,e1,b,e2,{
|
||||
int o = n*_slice_stride;
|
||||
int offset = b+n*_slice_block;
|
||||
merge(rhs_v[so+o+b],pointers,offset);
|
||||
});
|
||||
#endif
|
||||
} else {
|
||||
|
||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||
// Test_cshift_red_black code.
|
||||
std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
|
||||
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
|
||||
assert(0); // This will fail if hit on GPU
|
||||
autoView( rhs_v, rhs, CpuWrite);
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs.Grid()->_slice_stride[dimension];
|
||||
int offset = b+n*rhs.Grid()->_slice_block[dimension];
|
||||
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
merge(rhs_v[so+o+b],pointers,offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
|
||||
|
||||
template <typename T>
|
||||
T iDivUp(T a, T b) // Round a / b to nearest higher integer value
|
||||
{ return (a % b != 0) ? (a / b + 1) : (a / b); }
|
||||
|
||||
template <typename T>
|
||||
__global__ void populate_Cshift_table(T* vector, T lo, T ro, T e1, T e2, T stride)
|
||||
{
|
||||
int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx >= e1*e2) return;
|
||||
|
||||
int n, b, o;
|
||||
|
||||
n = idx / e2;
|
||||
b = idx % e2;
|
||||
o = n*stride + b;
|
||||
|
||||
vector[2*idx + 0] = lo + o;
|
||||
vector[2*idx + 1] = ro + o;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// local to node block strided copies
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int stride = rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
if(Cshift_table.size()<e1*e2) Cshift_table.resize(e1*e2); // Let it grow to biggest
|
||||
|
||||
int ent=0;
|
||||
|
||||
if(cbmask == 0x3 ){
|
||||
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
|
||||
ent = e1*e2;
|
||||
dim3 blockSize(acceleratorThreads());
|
||||
dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x));
|
||||
populate_Cshift_table<<<gridSize, blockSize>>>(&Cshift_table[0].first, lo, ro, e1, e2, stride);
|
||||
accelerator_barrier();
|
||||
#else
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
int ocb=1<<lhs.Grid()->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto table = &Cshift_table[0];
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView(rhs_v , rhs, AcceleratorRead);
|
||||
autoView(lhs_v , lhs, AcceleratorWrite);
|
||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||
});
|
||||
#else
|
||||
autoView(rhs_v , rhs, CpuRead);
|
||||
autoView(lhs_v , lhs, CpuWrite);
|
||||
thread_for(i,ent,{
|
||||
lhs_v[table[i].first]=rhs_v[table[i].second];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block [dimension];
|
||||
int stride = rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
if(Cshift_table.size()<e1*e2) Cshift_table.resize(e1*e2); // Let it grow to biggest
|
||||
|
||||
int ent=0;
|
||||
|
||||
if ( cbmask == 0x3 ) {
|
||||
#if (defined(GRID_CUDA) || defined(GRID_HIP)) && defined(ACCELERATOR_CSHIFT)
|
||||
ent = e1*e2;
|
||||
dim3 blockSize(acceleratorThreads());
|
||||
dim3 gridSize(iDivUp((unsigned int)ent, blockSize.x));
|
||||
populate_Cshift_table<<<gridSize, blockSize>>>(&Cshift_table[0].first, lo, ro, e1, e2, stride);
|
||||
accelerator_barrier();
|
||||
#else
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
#endif
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
int ocb=1<<lhs.Grid()->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
}
|
||||
|
||||
{
|
||||
auto table = &Cshift_table[0];
|
||||
#ifdef ACCELERATOR_CSHIFT
|
||||
autoView( rhs_v, rhs, AcceleratorRead);
|
||||
autoView( lhs_v, lhs, AcceleratorWrite);
|
||||
accelerator_for(i,ent,1,{
|
||||
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
|
||||
});
|
||||
#else
|
||||
autoView( rhs_v, rhs, CpuRead);
|
||||
autoView( lhs_v, lhs, CpuWrite);
|
||||
thread_for(i,ent,{
|
||||
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Local to node Cshift
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
|
||||
sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd);
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_local(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid = rhs.Grid();
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int gd = grid->_gdimensions[dimension];
|
||||
int ly = grid->_simd_layout[dimension];
|
||||
|
||||
// Map to always positive shift modulo global full dimension.
|
||||
shift = (shift+fd)%fd;
|
||||
|
||||
// the permute type
|
||||
ret.Checkerboard() = grid->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
|
||||
int permute_dim =grid->PermuteDim(dimension);
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
int permute_type_dist;
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// int o = 0;
|
||||
int bo = x * grid->_ostride[dimension];
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
|
||||
int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
// wrap is whether sshift > rd.
|
||||
// num is sshift mod rd.
|
||||
//
|
||||
// shift 7
|
||||
//
|
||||
// XoXo YcYc
|
||||
// oXoX cYcY
|
||||
// XoXo YcYc
|
||||
// oXoX cYcY
|
||||
//
|
||||
// sshift --
|
||||
//
|
||||
// XX YY ; 3
|
||||
// XX YY ; 0
|
||||
// XX YY ; 3
|
||||
// XX YY ; 0
|
||||
//
|
||||
int permute_slice=0;
|
||||
if(permute_dim){
|
||||
int wrap = sshift/rd; wrap=wrap % ly;
|
||||
int num = sshift%rd;
|
||||
|
||||
if ( x< rd-num ) permute_slice=wrap;
|
||||
else permute_slice = (wrap+1)%ly;
|
||||
|
||||
if ( (ly>2) && (permute_slice) ) {
|
||||
assert(permute_type & RotateBit);
|
||||
permute_type_dist = permute_type|permute_slice;
|
||||
} else {
|
||||
permute_type_dist = permute_type;
|
||||
}
|
||||
}
|
||||
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
}
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,467 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cshift/Cshift_mpi.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_CSHIFT_MPI_H_
|
||||
#define _GRID_CSHIFT_MPI_H_
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
Lattice<vobj> ret(rhs.Grid());
|
||||
|
||||
int fd = rhs.Grid()->_fdimensions[dimension];
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
// Map to always positive shift modulo global full dimension.
|
||||
shift = (shift+fd)%fd;
|
||||
|
||||
ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
|
||||
|
||||
// the permute type
|
||||
int simd_layout = rhs.Grid()->_simd_layout[dimension];
|
||||
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
|
||||
int splice_dim = rhs.Grid()->_simd_layout[dimension]>1 && (comm_dim);
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
|
||||
sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd);
|
||||
|
||||
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.Checkerboard()<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
// std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
// std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
|
||||
sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd);
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
}
|
||||
#define ACCELERATOR_CSHIFT_NO_COPY
|
||||
#ifdef ACCELERATOR_CSHIFT_NO_COPY
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
GridBase *grid=rhs.Grid();
|
||||
Lattice<vobj> temp(rhs.Grid());
|
||||
|
||||
int fd = rhs.Grid()->_fdimensions[dimension];
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
int pd = rhs.Grid()->_processors[dimension];
|
||||
int simd_layout = rhs.Grid()->_simd_layout[dimension];
|
||||
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
|
||||
assert(simd_layout==1);
|
||||
assert(comm_dim==1);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||
static cshiftVector<vobj> send_buf; send_buf.resize(buffer_size);
|
||||
static cshiftVector<vobj> recv_buf; recv_buf.resize(buffer_size);
|
||||
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (comm_proc==0) {
|
||||
|
||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
} else {
|
||||
|
||||
int words = buffer_size;
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * sizeof(vobj);
|
||||
|
||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
||||
|
||||
// int rank = grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid=rhs.Grid();
|
||||
const int Nsimd = grid->Nsimd();
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int pd = grid->_processors[dimension];
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Simd direction uses an extract/merge pair
|
||||
///////////////////////////////////////////////
|
||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||
|
||||
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||
scalar_object * recv_buf_extract_mpi;
|
||||
scalar_object * send_buf_extract_mpi;
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
send_buf_extract[s].resize(buffer_size);
|
||||
recv_buf_extract[s].resize(buffer_size);
|
||||
}
|
||||
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
||||
ExtractPointerArray<scalar_object> pointers(Nsimd); //
|
||||
ExtractPointerArray<scalar_object> rpointers(Nsimd); // received pointers
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Work out what to send where
|
||||
///////////////////////////////////////////
|
||||
int cb = (cbmask==0x2)? Odd : Even;
|
||||
int sshift= grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
|
||||
// loop over outer coord planes orthog to dim
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = &send_buf_extract[i][0];
|
||||
}
|
||||
int sx = (x+sshift)%rd;
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||
|
||||
int nbr_ic = (nbr_coor%ld)/rd; // inner coord of peer
|
||||
int nbr_ox = (nbr_coor%rd); // outer coord of peer
|
||||
int nbr_lane = (i&(~inner_bit));
|
||||
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
if(nbr_proc){
|
||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
send_buf_extract_mpi = &send_buf_extract[nbr_lane][0];
|
||||
recv_buf_extract_mpi = &recv_buf_extract[i][0];
|
||||
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
|
||||
xmit_to_rank,
|
||||
(void *)recv_buf_extract_mpi,
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
rpointers[i] = &recv_buf_extract[i][0];
|
||||
} else {
|
||||
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
||||
}
|
||||
|
||||
}
|
||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
GridBase *grid=rhs.Grid();
|
||||
Lattice<vobj> temp(rhs.Grid());
|
||||
|
||||
int fd = rhs.Grid()->_fdimensions[dimension];
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
int pd = rhs.Grid()->_processors[dimension];
|
||||
int simd_layout = rhs.Grid()->_simd_layout[dimension];
|
||||
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
|
||||
assert(simd_layout==1);
|
||||
assert(comm_dim==1);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
|
||||
static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size);
|
||||
vobj *send_buf;
|
||||
vobj *recv_buf;
|
||||
{
|
||||
grid->ShmBufferFreeAll();
|
||||
size_t bytes = buffer_size*sizeof(vobj);
|
||||
send_buf=(vobj *)grid->ShmBufferMalloc(bytes);
|
||||
recv_buf=(vobj *)grid->ShmBufferMalloc(bytes);
|
||||
}
|
||||
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (comm_proc==0) {
|
||||
|
||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
} else {
|
||||
|
||||
int words = buffer_size;
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * sizeof(vobj);
|
||||
|
||||
Gather_plane_simple (rhs,send_buf_v,dimension,sx,cbmask);
|
||||
|
||||
// int rank = grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
acceleratorCopyDeviceToDevice((void *)&send_buf_v[0],(void *)&send_buf[0],bytes);
|
||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
acceleratorCopyDeviceToDevice((void *)&recv_buf[0],(void *)&recv_buf_v[0],bytes);
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
Scatter_plane_simple (ret,recv_buf_v,dimension,x,cbmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid=rhs.Grid();
|
||||
const int Nsimd = grid->Nsimd();
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int pd = grid->_processors[dimension];
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Simd direction uses an extract/merge pair
|
||||
///////////////////////////////////////////////
|
||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||
|
||||
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||
scalar_object * recv_buf_extract_mpi;
|
||||
scalar_object * send_buf_extract_mpi;
|
||||
{
|
||||
size_t bytes = sizeof(scalar_object)*buffer_size;
|
||||
grid->ShmBufferFreeAll();
|
||||
send_buf_extract_mpi = (scalar_object *)grid->ShmBufferMalloc(bytes);
|
||||
recv_buf_extract_mpi = (scalar_object *)grid->ShmBufferMalloc(bytes);
|
||||
}
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
send_buf_extract[s].resize(buffer_size);
|
||||
recv_buf_extract[s].resize(buffer_size);
|
||||
}
|
||||
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
||||
ExtractPointerArray<scalar_object> pointers(Nsimd); //
|
||||
ExtractPointerArray<scalar_object> rpointers(Nsimd); // received pointers
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Work out what to send where
|
||||
///////////////////////////////////////////
|
||||
int cb = (cbmask==0x2)? Odd : Even;
|
||||
int sshift= grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
|
||||
// loop over outer coord planes orthog to dim
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = &send_buf_extract[i][0];
|
||||
}
|
||||
int sx = (x+sshift)%rd;
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||
|
||||
int nbr_ic = (nbr_coor%ld)/rd; // inner coord of peer
|
||||
int nbr_ox = (nbr_coor%rd); // outer coord of peer
|
||||
int nbr_lane = (i&(~inner_bit));
|
||||
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
if(nbr_proc){
|
||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
acceleratorCopyDeviceToDevice((void *)&send_buf_extract[nbr_lane][0],(void *)send_buf_extract_mpi,bytes);
|
||||
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
|
||||
xmit_to_rank,
|
||||
(void *)recv_buf_extract_mpi,
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
acceleratorCopyDeviceToDevice((void *)recv_buf_extract_mpi,(void *)&recv_buf_extract[i][0],bytes);
|
||||
|
||||
grid->Barrier();
|
||||
rpointers[i] = &recv_buf_extract[i][0];
|
||||
} else {
|
||||
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
||||
}
|
||||
|
||||
}
|
||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,4 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
Vector<std::pair<int,int> > Cshift_table;
|
||||
NAMESPACE_END(Grid);
|
22091
Grid/json/json.hpp
22091
Grid/json/json.hpp
File diff suppressed because it is too large
Load Diff
@ -1,534 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_ET.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: Christoph Lehner <christoph@lhnr.de
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ET_H
|
||||
#define GRID_LATTICE_ET_H
|
||||
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Predicated where support
|
||||
////////////////////////////////////////////////////
|
||||
#ifdef GRID_SIMT
|
||||
// drop to scalar in SIMT; cleaner in fact
|
||||
template <class iobj, class vobj, class robj>
|
||||
accelerator_inline vobj predicatedWhere(const iobj &predicate,
|
||||
const vobj &iftrue,
|
||||
const robj &iffalse)
|
||||
{
|
||||
Integer mask = TensorRemove(predicate);
|
||||
typename std::remove_const<vobj>::type ret= iffalse;
|
||||
if (mask) ret=iftrue;
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
template <class iobj, class vobj, class robj>
|
||||
accelerator_inline vobj predicatedWhere(const iobj &predicate,
|
||||
const vobj &iftrue,
|
||||
const robj &iffalse)
|
||||
{
|
||||
typename std::remove_const<vobj>::type ret;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
// typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
|
||||
ExtractBuffer<Integer> mask(Nsimd);
|
||||
ExtractBuffer<scalar_object> truevals(Nsimd);
|
||||
ExtractBuffer<scalar_object> falsevals(Nsimd);
|
||||
|
||||
extract(iftrue, truevals);
|
||||
extract(iffalse, falsevals);
|
||||
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||
|
||||
for (int s = 0; s < Nsimd; s++) {
|
||||
if (mask[s]) falsevals[s] = truevals[s];
|
||||
}
|
||||
|
||||
merge(ret, falsevals);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
//Specialization of getVectorType for lattices
|
||||
/////////////////////////////////////////////////////
|
||||
template<typename T>
|
||||
struct getVectorType<Lattice<T> >{
|
||||
typedef typename Lattice<T>::vector_object type;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////
|
||||
//-- recursive evaluation of expressions; --
|
||||
// handle leaves of syntax tree
|
||||
///////////////////////////////////////////////////
|
||||
template<class sobj,
|
||||
typename std::enable_if<!is_lattice<sobj>::value&&!is_lattice_expr<sobj>::value,sobj>::type * = nullptr>
|
||||
accelerator_inline
|
||||
sobj eval(const uint64_t ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
template <class lobj> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeView<lobj> &arg) -> decltype(arg(ss))
|
||||
{
|
||||
return arg(ss);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
//-- recursive evaluation of expressions; --
|
||||
// whole vector return, used only for expression return type inference
|
||||
///////////////////////////////////////////////////
|
||||
template<class sobj> accelerator_inline
|
||||
sobj vecEval(const uint64_t ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & vecEval(const uint64_t ss, const LatticeView<lobj> &arg)
|
||||
{
|
||||
return arg[ss];
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// handle nodes in syntax tree- eval one operand
|
||||
// vecEval needed (but never called as all expressions offloaded) to infer the return type
|
||||
// in SIMT contexts of closure.
|
||||
///////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> accelerator_inline
|
||||
auto vecEval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> decltype(expr.op.func( vecEval(ss, expr.arg1)))
|
||||
{
|
||||
return expr.op.func( vecEval(ss, expr.arg1) );
|
||||
}
|
||||
// vecEval two operands
|
||||
template <typename Op, typename T1, typename T2> accelerator_inline
|
||||
auto vecEval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> decltype(expr.op.func( vecEval(ss,expr.arg1),vecEval(ss,expr.arg2)))
|
||||
{
|
||||
return expr.op.func( vecEval(ss,expr.arg1), vecEval(ss,expr.arg2) );
|
||||
}
|
||||
// vecEval three operands
|
||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
|
||||
auto vecEval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> decltype(expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3)))
|
||||
{
|
||||
return expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// handle nodes in syntax tree- eval one operand coalesced
|
||||
///////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> decltype(expr.op.func( eval(ss, expr.arg1)))
|
||||
{
|
||||
return expr.op.func( eval(ss, expr.arg1) );
|
||||
}
|
||||
// eval two operands
|
||||
template <typename Op, typename T1, typename T2> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
|
||||
{
|
||||
return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
|
||||
}
|
||||
// eval three operands
|
||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> decltype(expr.op.func(eval(ss, expr.arg1),
|
||||
eval(ss, expr.arg2),
|
||||
eval(ss, expr.arg3)))
|
||||
{
|
||||
#ifdef GRID_SIMT
|
||||
// Handles Nsimd (vInteger) != Nsimd(ComplexD)
|
||||
typedef decltype(vecEval(ss, expr.arg2)) rvobj;
|
||||
typedef typename std::remove_reference<rvobj>::type vobj;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
|
||||
auto vpred = vecEval(ss,expr.arg1);
|
||||
|
||||
ExtractBuffer<Integer> mask(Nsimd);
|
||||
extract<vInteger, Integer>(TensorRemove(vpred), mask);
|
||||
|
||||
int s = acceleratorSIMTlane(Nsimd);
|
||||
return expr.op.func(mask[s],
|
||||
eval(ss, expr.arg2),
|
||||
eval(ss, expr.arg3));
|
||||
#else
|
||||
return expr.op.func(eval(ss, expr.arg1),
|
||||
eval(ss, expr.arg2),
|
||||
eval(ss, expr.arg3));
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion; must retain grid pointer in the LatticeView class which sucks
|
||||
// Use a different method, and make it void *.
|
||||
// Perhaps a conformable method.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
accelerator_inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
lat.Conformable(grid);
|
||||
}
|
||||
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid,const T1 ¬lat) // non-lattice leaf
|
||||
{}
|
||||
|
||||
template <typename Op, typename T1>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid,const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
GridFromExpression(grid, expr.arg2);
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
accelerator_inline
|
||||
void GridFromExpression(GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
GridFromExpression(grid, expr.arg1); // recurse
|
||||
GridFromExpression(grid, expr.arg2); // recurse
|
||||
GridFromExpression(grid, expr.arg3); // recurse
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if ((cb == Odd) || (cb == Even)) {
|
||||
assert(cb == lat.Checkerboard());
|
||||
}
|
||||
cb = lat.Checkerboard();
|
||||
}
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 ¬lat) {} // non-lattice leaf
|
||||
template <typename Op, typename T1> inline
|
||||
void CBFromExpression(int &cb,const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2> inline
|
||||
void CBFromExpression(int &cb,const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
CBFromExpression(cb, expr.arg2); // recurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void CBFromExpression(int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
CBFromExpression(cb, expr.arg1); // recurse AST
|
||||
CBFromExpression(cb, expr.arg2); // recurse AST
|
||||
CBFromExpression(cb, expr.arg3); // recurse AST
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// ViewOpen
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void ExpressionViewOpen(T1 &lat) // Lattice leaf
|
||||
{
|
||||
lat.ViewOpen(AcceleratorRead);
|
||||
}
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void ExpressionViewOpen(T1 ¬lat) {}
|
||||
|
||||
template <typename Op, typename T1> inline
|
||||
void ExpressionViewOpen(LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
ExpressionViewOpen(expr.arg1); // recurse AST
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2> inline
|
||||
void ExpressionViewOpen(LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
ExpressionViewOpen(expr.arg1); // recurse AST
|
||||
ExpressionViewOpen(expr.arg2); // rrecurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void ExpressionViewOpen(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
ExpressionViewOpen(expr.arg1); // recurse AST
|
||||
ExpressionViewOpen(expr.arg2); // recurse AST
|
||||
ExpressionViewOpen(expr.arg3); // recurse AST
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// ViewClose
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void ExpressionViewClose( T1 &lat) // Lattice leaf
|
||||
{
|
||||
lat.ViewClose();
|
||||
}
|
||||
template <class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void ExpressionViewClose(T1 ¬lat) {}
|
||||
|
||||
template <typename Op, typename T1> inline
|
||||
void ExpressionViewClose(LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
ExpressionViewClose(expr.arg1); // recurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2> inline
|
||||
void ExpressionViewClose(LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
ExpressionViewClose(expr.arg1); // recurse AST
|
||||
ExpressionViewClose(expr.arg2); // recurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void ExpressionViewClose(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
{
|
||||
ExpressionViewClose(expr.arg1); // recurse AST
|
||||
ExpressionViewClose(expr.arg2); // recurse AST
|
||||
ExpressionViewClose(expr.arg3); // recurse AST
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Unary operators and funcs
|
||||
////////////////////////////////////////////
|
||||
#define GridUnopClass(name, ret) \
|
||||
struct name { \
|
||||
template<class _arg> static auto accelerator_inline func(const _arg a) -> decltype(ret) { return ret; } \
|
||||
};
|
||||
|
||||
GridUnopClass(UnarySub, -a);
|
||||
GridUnopClass(UnaryNot, Not(a));
|
||||
GridUnopClass(UnaryTrace, trace(a));
|
||||
GridUnopClass(UnaryTranspose, transpose(a));
|
||||
GridUnopClass(UnaryTa, Ta(a));
|
||||
GridUnopClass(UnarySpTa, SpTa(a));
|
||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryProjectOnSpGroup, ProjectOnSpGroup(a));
|
||||
GridUnopClass(UnaryTimesI, timesI(a));
|
||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||
GridUnopClass(UnaryAbs, abs(a));
|
||||
GridUnopClass(UnarySqrt, sqrt(a));
|
||||
GridUnopClass(UnarySin, sin(a));
|
||||
GridUnopClass(UnaryCos, cos(a));
|
||||
GridUnopClass(UnaryAsin, asin(a));
|
||||
GridUnopClass(UnaryAcos, acos(a));
|
||||
GridUnopClass(UnaryLog, log(a));
|
||||
GridUnopClass(UnaryExp, exp(a));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Binary operators
|
||||
////////////////////////////////////////////
|
||||
#define GridBinOpClass(name, combination) \
|
||||
struct name { \
|
||||
template <class _left, class _right> \
|
||||
static auto accelerator_inline \
|
||||
func(const _left &lhs, const _right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
} \
|
||||
};
|
||||
|
||||
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||
GridBinOpClass(BinarySub, lhs - rhs);
|
||||
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Trinary conditional op
|
||||
////////////////////////////////////////////////////
|
||||
#define GridTrinOpClass(name, combination) \
|
||||
struct name { \
|
||||
template <class _predicate,class _left, class _right> \
|
||||
static auto accelerator_inline \
|
||||
func(const _predicate &pred, const _left &lhs, const _right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
} \
|
||||
};
|
||||
|
||||
GridTrinOpClass(TrinaryWhere,
|
||||
(predicatedWhere<
|
||||
typename std::remove_reference<_predicate>::type,
|
||||
typename std::remove_reference<_left>::type,
|
||||
typename std::remove_reference<_right>::type>(pred, lhs,rhs)));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Operator syntactical glue
|
||||
////////////////////////////////////////////
|
||||
#define GRID_UNOP(name) name
|
||||
#define GRID_BINOP(name) name
|
||||
#define GRID_TRINOP(name) name
|
||||
|
||||
#define GRID_DEF_UNOP(op, name) \
|
||||
template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &arg) ->decltype(LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg)) \
|
||||
{ \
|
||||
return LatticeUnaryExpression<GRID_UNOP(name),T1>(GRID_UNOP(name)(), arg); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_LEFT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype(LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs,rhs)) \
|
||||
{ \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs,rhs);\
|
||||
}
|
||||
|
||||
#define GRID_BINOP_RIGHT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<!is_lattice<T1>::value&&!is_lattice_expr<T1>::value,T1>::type * = nullptr, \
|
||||
typename std::enable_if< is_lattice<T2>::value|| is_lattice_expr<T2>::value,T2>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype(LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs, rhs)) \
|
||||
{ \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name),T1,T2>(GRID_BINOP(name)(),lhs, rhs); \
|
||||
}
|
||||
|
||||
#define GRID_DEF_BINOP(op, name) \
|
||||
GRID_BINOP_LEFT(op, name); \
|
||||
GRID_BINOP_RIGHT(op, name);
|
||||
|
||||
#define GRID_DEF_TRINOP(op, name) \
|
||||
template <typename T1, typename T2, typename T3> \
|
||||
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||
->decltype(LatticeTrinaryExpression<GRID_TRINOP(name),T1,T2,T3>(GRID_TRINOP(name)(),pred, lhs, rhs)) \
|
||||
{ \
|
||||
return LatticeTrinaryExpression<GRID_TRINOP(name),T1,T2,T3>(GRID_TRINOP(name)(),pred, lhs, rhs); \
|
||||
}
|
||||
|
||||
////////////////////////
|
||||
// Operator definitions
|
||||
////////////////////////
|
||||
GRID_DEF_UNOP(operator-, UnarySub);
|
||||
GRID_DEF_UNOP(Not, UnaryNot);
|
||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||
//GRID_DEF_UNOP(adj, UnaryAdj);
|
||||
//GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||
GRID_DEF_UNOP(SpTa, UnarySpTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(ProjectOnSpGroup, UnaryProjectOnSpGroup);
|
||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||
// abs-fabs-dabs-labs thing
|
||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||
GRID_DEF_UNOP(sin, UnarySin);
|
||||
GRID_DEF_UNOP(cos, UnaryCos);
|
||||
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||
GRID_DEF_UNOP(log, UnaryLog);
|
||||
GRID_DEF_UNOP(exp, UnaryExp);
|
||||
|
||||
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||
GRID_DEF_BINOP(operator-, BinarySub);
|
||||
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||
GRID_DEF_BINOP(operator/, BinaryDiv);
|
||||
|
||||
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||
|
||||
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Closure convenience to force expression to evaluate
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1)))>::type >
|
||||
{
|
||||
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1)))>::type > ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>::type >
|
||||
{
|
||||
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>::type > ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
vecEval(0, expr.arg2),
|
||||
vecEval(0, expr.arg3)))>::type >
|
||||
{
|
||||
Lattice<typename std::remove_const<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
vecEval(0, expr.arg2),
|
||||
vecEval(0, expr.arg3)))>::type > ret(expr);
|
||||
return ret;
|
||||
}
|
||||
#define EXPRESSION_CLOSURE(function) \
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> \
|
||||
auto function(Expression &expr) -> decltype(function(closure(expr))) \
|
||||
{ \
|
||||
return function(closure(expr)); \
|
||||
}
|
||||
|
||||
|
||||
#undef GRID_UNOP
|
||||
#undef GRID_BINOP
|
||||
#undef GRID_TRINOP
|
||||
|
||||
#undef GRID_DEF_UNOP
|
||||
#undef GRID_DEF_BINOP
|
||||
#undef GRID_DEF_TRINOP
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,274 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_arith.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ARITH_H
|
||||
#define GRID_LATTICE_ARITH_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mult");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t = lhs_v(ss);
|
||||
auto rhs_t = rhs_v(ss);
|
||||
mult(&tmp,&lhs_t,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mac");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
auto lhs_t=lhs_v(ss);
|
||||
auto rhs_t=rhs_v(ss);
|
||||
auto tmp =ret_v(ss);
|
||||
mac(&tmp,&lhs_t,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("sub");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
auto rhs_t=rhs_v(ss);
|
||||
sub(&tmp,&lhs_t,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("add");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
auto rhs_t=rhs_v(ss);
|
||||
add(&tmp,&lhs_t,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("mult");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
mult(&tmp,&lhs_v(ss),&rhs);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("mac");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
auto tmp =ret_v(ss);
|
||||
auto lhs_t=lhs_v(ss);
|
||||
mac(&tmp,&lhs_t,&rhs);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("sub");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
sub(&tmp,&lhs_t,&rhs);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("add");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto lhs_t=lhs_v(ss);
|
||||
add(&tmp,&lhs_t,&rhs);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mult");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( rhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
mult(&tmp,&lhs,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mac");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( rhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
auto tmp =ret_v(ss);
|
||||
auto rhs_t=rhs_v(ss);
|
||||
mac(&tmp,&lhs,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("sub");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( rhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
sub(&tmp,&lhs,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("add");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( rhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
|
||||
decltype(coalescedRead(obj1())) tmp;
|
||||
auto rhs_t=rhs_v(ss);
|
||||
add(&tmp,&lhs,&rhs_t);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> inline
|
||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
GRID_TRACE("axpy");
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( x_v , x, AcceleratorRead);
|
||||
autoView( y_v , y, AcceleratorRead);
|
||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||
auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
GRID_TRACE("axpby");
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( x_v , x, AcceleratorRead);
|
||||
autoView( y_v , y, AcceleratorRead);
|
||||
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
|
||||
auto tmp = a*x_v(ss)+b*y_v(ss);
|
||||
coalescedWrite(ret_v[ss],tmp);
|
||||
});
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
GRID_TRACE("axpy_norm");
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
GRID_TRACE("axpby_norm");
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,382 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
#define STREAMING_STORES
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
extern int GridCshiftPermuteMap[4][16];
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// The real lattice class, with normal copy and assignment semantics.
|
||||
// This contains extra (host resident) grid pointer data that may be accessed by host code
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
class Lattice : public LatticeAccelerator<vobj>
|
||||
{
|
||||
public:
|
||||
GridBase *Grid(void) const { return this->_grid; }
|
||||
///////////////////////////////////////////////////
|
||||
// Member types
|
||||
///////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef vobj vector_object;
|
||||
|
||||
private:
|
||||
void dealloc(void)
|
||||
{
|
||||
if( this->_odata_size ) {
|
||||
alignedAllocator<vobj> alloc;
|
||||
alloc.deallocate(this->_odata,this->_odata_size);
|
||||
this->_odata=nullptr;
|
||||
this->_odata_size=0;
|
||||
}
|
||||
}
|
||||
void resize(uint64_t size)
|
||||
{
|
||||
if ( this->_odata_size != size ) {
|
||||
alignedAllocator<vobj> alloc;
|
||||
|
||||
dealloc();
|
||||
|
||||
this->_odata_size = size;
|
||||
if ( size )
|
||||
this->_odata = alloc.allocate(this->_odata_size);
|
||||
else
|
||||
this->_odata = nullptr;
|
||||
}
|
||||
}
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
void SetViewMode(ViewMode mode) {
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||
accessor.ViewClose();
|
||||
}
|
||||
|
||||
// Helper function to print the state of this object in the AccCache
|
||||
void PrintCacheState(void)
|
||||
{
|
||||
MemoryManager::PrintState(this->_odata);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Return a view object that may be dereferenced in site loops.
|
||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||
// in device lambdas
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LatticeView<vobj> View (ViewMode mode) const
|
||||
{
|
||||
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
|
||||
return accessor;
|
||||
}
|
||||
|
||||
~Lattice() {
|
||||
if ( this->_odata_size ) {
|
||||
dealloc();
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Expression Template closure support
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||
{
|
||||
GRID_TRACE("ExpressionTemplateEval");
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(this->_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
coalescedWrite(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
GRID_TRACE("ExpressionTemplateEval");
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(this->_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
coalescedWrite(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
{
|
||||
GRID_TRACE("ExpressionTemplateEval");
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(this->_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
coalescedWrite(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
return *this;
|
||||
}
|
||||
//GridFromExpression is tricky to do
|
||||
template<class Op,class T1>
|
||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
||||
this->_grid = nullptr;
|
||||
GridFromExpression(this->_grid,expr);
|
||||
assert(this->_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
resize(this->_grid->oSites());
|
||||
|
||||
*this = expr;
|
||||
}
|
||||
template<class Op,class T1, class T2>
|
||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
||||
this->_grid = nullptr;
|
||||
GridFromExpression(this->_grid,expr);
|
||||
assert(this->_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
resize(this->_grid->oSites());
|
||||
|
||||
*this = expr;
|
||||
}
|
||||
template<class Op,class T1, class T2, class T3>
|
||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
||||
this->_grid = nullptr;
|
||||
GridFromExpression(this->_grid,expr);
|
||||
assert(this->_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
this->checkerboard=cb;
|
||||
|
||||
resize(this->_grid->oSites());
|
||||
|
||||
*this = expr;
|
||||
}
|
||||
|
||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||
auto me = View(CpuWrite);
|
||||
thread_for(ss,me.size(),{
|
||||
me[ss]= r;
|
||||
});
|
||||
me.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Follow rule of five, with Constructor requires "grid" passed
|
||||
// to user defined constructor
|
||||
///////////////////////////////////////////
|
||||
// user defined constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
|
||||
this->_grid = grid;
|
||||
resize(this->_grid->oSites());
|
||||
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
||||
this->checkerboard=0;
|
||||
SetViewMode(mode);
|
||||
}
|
||||
|
||||
// virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
if (this->_grid != grid) {
|
||||
this->_grid = grid;
|
||||
this->resize(grid->oSites());
|
||||
this->checkerboard = 0;
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// copy constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(const Lattice& r){
|
||||
this->_grid = r.Grid();
|
||||
resize(this->_grid->oSites());
|
||||
*this = r;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// move constructor
|
||||
///////////////////////////////////////////
|
||||
Lattice(Lattice && r){
|
||||
this->_grid = r.Grid();
|
||||
this->_odata = r._odata;
|
||||
this->_odata_size = r._odata_size;
|
||||
this->checkerboard= r.Checkerboard();
|
||||
r._odata = nullptr;
|
||||
r._odata_size = 0;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// assignment template
|
||||
///////////////////////////////////////////
|
||||
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
|
||||
conformable(*this,r);
|
||||
this->checkerboard = r.Checkerboard();
|
||||
auto him= r.View(AcceleratorRead);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
me.ViewClose(); him.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Copy assignment
|
||||
///////////////////////////////////////////
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
this->checkerboard = r.Checkerboard();
|
||||
conformable(*this,r);
|
||||
auto him= r.View(AcceleratorRead);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(me[ss],him(ss));
|
||||
});
|
||||
me.ViewClose(); him.ViewClose();
|
||||
return *this;
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// Move assignment possible if same type
|
||||
///////////////////////////////////////////
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r){
|
||||
|
||||
resize(0); // deletes if appropriate
|
||||
this->_grid = r.Grid();
|
||||
this->_odata = r._odata;
|
||||
this->_odata_size = r._odata_size;
|
||||
this->checkerboard= r.Checkerboard();
|
||||
|
||||
r._odata = nullptr;
|
||||
r._odata_size = 0;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend inline void swap(Lattice &l, Lattice &r) {
|
||||
conformable(l,r);
|
||||
LatticeAccelerator<vobj> tmp;
|
||||
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
|
||||
LatticeAccelerator<vobj> *rp = (LatticeAccelerator<vobj> *)&r;
|
||||
tmp = *lp; *lp=*rp; *rp=tmp;
|
||||
}
|
||||
|
||||
}; // class Lattice
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
for(int g=0;g<o.Grid()->_gsites;g++){
|
||||
|
||||
Coordinate gcoor;
|
||||
o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
|
||||
|
||||
sobj ss;
|
||||
peekSite(ss,o,gcoor);
|
||||
stream<<"[";
|
||||
for(int d=0;d<gcoor.size();d++){
|
||||
stream<<gcoor[d];
|
||||
if(d!=gcoor.size()-1) stream<<",";
|
||||
}
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,248 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_basis.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field>
|
||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||
{
|
||||
// If assume basis[j] are already orthonormal,
|
||||
// can take all inner products in parallel saving 2x bandwidth
|
||||
// Save 3x bandwidth on the second line of loop.
|
||||
// perhaps 2.5x speed up.
|
||||
// 2x overall in Multigrid Lanczos
|
||||
for(int j=0; j<k; ++j){
|
||||
auto ip = innerProduct(basis[j],w);
|
||||
w = w - ip*basis[j];
|
||||
}
|
||||
}
|
||||
|
||||
template<class VField, class Matrix>
|
||||
void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0]) Field;
|
||||
typedef decltype(basis[0].View(AcceleratorRead)) View;
|
||||
|
||||
Vector<View> basis_v; basis_v.reserve(basis.size());
|
||||
typedef typename std::remove_reference<decltype(basis_v[0][0])>::type vobj;
|
||||
typedef typename std::remove_reference<decltype(Qt(0,0))>::type Coeff_t;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v.push_back(basis[k].View(AcceleratorWrite));
|
||||
}
|
||||
|
||||
#if ( (!defined(GRID_CUDA)) )
|
||||
int max_threads = thread_max();
|
||||
Vector < vobj > Bt(Nm * max_threads);
|
||||
thread_region
|
||||
{
|
||||
vobj* B = &Bt[Nm * thread_num()];
|
||||
thread_for_in_region(ss, grid->oSites(),{
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt(j,k) * basis_v[k][ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
basis_v[j][ss] = B[j];
|
||||
}
|
||||
});
|
||||
}
|
||||
#else
|
||||
View *basis_vp = &basis_v[0];
|
||||
|
||||
int nrot = j1-j0;
|
||||
if (!nrot) // edge case not handled gracefully by Cuda
|
||||
return;
|
||||
|
||||
uint64_t oSites =grid->oSites();
|
||||
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
|
||||
|
||||
Vector <vobj> Bt(siteBlock * nrot);
|
||||
auto Bp=&Bt[0];
|
||||
|
||||
// GPU readable copy of matrix
|
||||
Vector<Coeff_t> Qt_jv(Nm*Nm);
|
||||
Coeff_t *Qt_p = & Qt_jv[0];
|
||||
thread_for(i,Nm*Nm,{
|
||||
int j = i/Nm;
|
||||
int k = i%Nm;
|
||||
Qt_p[i]=Qt(j,k);
|
||||
});
|
||||
|
||||
// Block the loop to keep storage footprint down
|
||||
for(uint64_t s=0;s<oSites;s+=siteBlock){
|
||||
|
||||
// remaining work in this block
|
||||
int ssites=MIN(siteBlock,oSites-s);
|
||||
|
||||
// zero out the accumulators
|
||||
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
|
||||
decltype(coalescedRead(Bp[ss])) z;
|
||||
z=Zero();
|
||||
coalescedWrite(Bp[ss],z);
|
||||
});
|
||||
|
||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||
|
||||
int j =sj%nrot;
|
||||
int jj =j0+j;
|
||||
int ss =sj/nrot;
|
||||
int sss=ss+s;
|
||||
|
||||
for(int k=k0; k<k1; ++k){
|
||||
auto tmp = coalescedRead(Bp[ss*nrot+j]);
|
||||
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss]));
|
||||
}
|
||||
});
|
||||
|
||||
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
|
||||
int j =sj%nrot;
|
||||
int jj =j0+j;
|
||||
int ss =sj/nrot;
|
||||
int sss=ss+s;
|
||||
coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j]));
|
||||
});
|
||||
}
|
||||
#endif
|
||||
|
||||
for(int k=0;k<basis.size();k++) basis_v[k].ViewClose();
|
||||
}
|
||||
|
||||
// Extract a single rotated vector
|
||||
template<class Field>
|
||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0].View(AcceleratorRead)) View;
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
result.Checkerboard() = basis[0].Checkerboard();
|
||||
|
||||
Vector<View> basis_v; basis_v.reserve(basis.size());
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v.push_back(basis[k].View(AcceleratorRead));
|
||||
}
|
||||
vobj zz=Zero();
|
||||
Vector<double> Qt_jv(Nm);
|
||||
double * Qt_j = & Qt_jv[0];
|
||||
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
|
||||
|
||||
auto basis_vp=& basis_v[0];
|
||||
autoView(result_v,result,AcceleratorWrite);
|
||||
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
|
||||
vobj zzz=Zero();
|
||||
auto B=coalescedRead(zzz);
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B +=Qt_j[k] * coalescedRead(basis_vp[k][ss]);
|
||||
}
|
||||
coalescedWrite(result_v[ss], B);
|
||||
});
|
||||
for(int k=0;k<basis.size();k++) basis_v[k].ViewClose();
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||
{
|
||||
int vlen = idx.size();
|
||||
|
||||
assert(vlen>=1);
|
||||
assert(vlen<=sort_vals.size());
|
||||
assert(vlen<=_v.size());
|
||||
|
||||
for (size_t i=0;i<vlen;i++) {
|
||||
|
||||
if (idx[i] != i) {
|
||||
|
||||
//////////////////////////////////////
|
||||
// idx[i] is a table of desired sources giving a permutation.
|
||||
// Swap v[i] with v[idx[i]].
|
||||
// Find j>i for which _vnew[j] = _vold[i],
|
||||
// track the move idx[j] => idx[i]
|
||||
// track the move idx[i] => i
|
||||
//////////////////////////////////////
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
|
||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||
|
||||
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
|
||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||
|
||||
idx[j] = idx[i];
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||
{
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||
{
|
||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
// PAB: faster to compute the inner products first then fuse loops.
|
||||
// If performance critical can improve.
|
||||
template<class Field>
|
||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = Zero();
|
||||
assert(_v.size()==eval.size());
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,179 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COMPARISON_H
|
||||
#define GRID_LATTICE_COMPARISON_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// relational operators
|
||||
//
|
||||
// Support <,>,<=,>=,==,!=
|
||||
//
|
||||
//Query supporting bitwise &, |, ^, !
|
||||
//Query supporting logical &&, ||,
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef iScalar<vInteger> vPredicate ;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vPredicate> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vPredicate> ret(rhs.Grid());
|
||||
autoView( lhs_v, lhs, CpuRead);
|
||||
autoView( rhs_v, rhs, CpuRead);
|
||||
autoView( ret_v, ret, CpuWrite);
|
||||
thread_for( ss, rhs_v.size(), {
|
||||
ret_v[ss]=op(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to scalar
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vPredicate> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
|
||||
{
|
||||
Lattice<vPredicate> ret(lhs.Grid());
|
||||
autoView( lhs_v, lhs, CpuRead);
|
||||
autoView( ret_v, ret, CpuWrite);
|
||||
thread_for( ss, lhs_v.size(), {
|
||||
ret_v[ss]=op(lhs_v[ss],rhs);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare scalar to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vPredicate> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vPredicate> ret(rhs.Grid());
|
||||
autoView( rhs_v, rhs, CpuRead);
|
||||
autoView( ret_v, ret, CpuWrite);
|
||||
thread_for( ss, rhs_v.size(), {
|
||||
ret_v[ss]=op(lhs,rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Map to functors
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Less than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Less than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Greater than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// Greater than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator >= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// not equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vPredicate> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,55 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_coordinate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
GridBase *grid = l.Grid();
|
||||
int Nsimd = grid->iSites();
|
||||
|
||||
autoView(l_v, l, CpuWrite);
|
||||
thread_for( o, grid->oSites(), {
|
||||
vector_type vI;
|
||||
Coordinate gcoor;
|
||||
ExtractBuffer<scalar_type> mergebuf(Nsimd);
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l_v[o]=vI;
|
||||
});
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,55 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_crc.h
|
||||
|
||||
Copyright (C) 2021
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1)
|
||||
{
|
||||
auto ff = localNorm2(f);
|
||||
if ( mu==-1 ) mu = f.Grid()->Nd()-1;
|
||||
typedef typename vobj::tensor_reduced normtype;
|
||||
typedef typename normtype::scalar_object scalar;
|
||||
std::vector<scalar> sff;
|
||||
sliceSum(ff,sff,mu);
|
||||
for(int t=0;t<sff.size();t++){
|
||||
std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> uint32_t crc(Lattice<vobj> & buf)
|
||||
{
|
||||
autoView( buf_v , buf, CpuRead);
|
||||
return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
|
||||
}
|
||||
|
||||
#define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -1,87 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_local.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
|
||||
#define GRID_LATTICE_LOCALREDUCTION_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// localInner, localNorm, outerProduct
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
|
||||
{
|
||||
typedef decltype(coalescedRead(ll())) sll;
|
||||
typedef decltype(coalescedRead(rr())) srr;
|
||||
Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
autoView( rhs_v , rhs, AcceleratorRead);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
accelerator_for(ss,rhs_v.size(),1,{
|
||||
// FIXME had issues with scalar version of outer
|
||||
// Use vector [] operator and don't read coalesce this loop
|
||||
ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,199 +0,0 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
autoView( X_v , X, CpuRead);
|
||||
autoView( Y_v , Y, CpuRead);
|
||||
autoView( R_v , R, CpuWrite);
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
thread_loop_collapse2( (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y_v[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
autoView( X_v , X, CpuRead);
|
||||
autoView( R_v , R, CpuWrite);
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
thread_loop_collapse2( (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X_v[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R_v[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
// int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
// int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
autoView( lhs_v , lhs, CpuRead);
|
||||
autoView( rhs_v , rhs, CpuRead);
|
||||
thread_region {
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
thread_loop_collapse2((int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs_v[o+i*ostride];
|
||||
Right[i] = rhs_v[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
ComplexD z = Reduce(rtmp);
|
||||
mat_thread(i,j) += std::complex<double>(real(z),imag(z));
|
||||
}}
|
||||
}});
|
||||
thread_critical {
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
@ -1,231 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_peekpoke.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_PEEK_H
|
||||
#define GRID_LATTICE_PEEK_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Peeking and poking around
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
// FIXME accelerator_loop and accelerator_inline these
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Peek internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(vobj(),i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(vobj(),i))> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = peekIndex<Index>(lhs_v[ss],i);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(vobj(),i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(vobj(),i,j))> ret(lhs.Grid());
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = peekIndex<Index>(lhs_v[ss],i,j);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(vobj(),0))> & rhs,int i)
|
||||
{
|
||||
autoView( rhs_v, rhs, AcceleratorRead);
|
||||
autoView( lhs_v, lhs, AcceleratorWrite);
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
pokeIndex<Index>(lhs_v[ss],rhs_v[ss],i);
|
||||
});
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(vobj(),0,0))> & rhs,int i,int j)
|
||||
{
|
||||
autoView( rhs_v, rhs, AcceleratorRead);
|
||||
autoView( lhs_v, lhs, AcceleratorWrite);
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
pokeIndex<Index>(lhs_v[ss],rhs_v[ss],i,j);
|
||||
});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Poke a scalar object into the SIMD array
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void pokeSite(const sobj &s,Lattice<vobj> &l,const Coordinate &site){
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard()== l.Grid()->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
// Optional to broadcast from node 0.
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
grid->Broadcast(grid->BossRank(),s);
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
autoView( l_v , l, CpuWrite);
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
extract(l_v[odx],buf);
|
||||
buf[idx] = s;
|
||||
merge(l_v[odx],buf);
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
typename vobj::scalar_object peekSite(const Lattice<vobj> &l,const Coordinate &site){
|
||||
typename vobj::scalar_object s;
|
||||
peekSite(s,l,site);
|
||||
return s;
|
||||
}
|
||||
template<class vobj,class sobj>
|
||||
void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
|
||||
GridBase *grid=l.Grid();
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard() == l.Grid()->CheckerBoard(site));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
autoView( l_v , l, CpuWrite);
|
||||
extract(l_v[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
// Must be CPU read view
|
||||
template<class vobj,class sobj>
|
||||
inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
|
||||
{
|
||||
GridBase *grid = l.getGrid();
|
||||
assert(l.mode==CpuRead);
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard()== grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
const vector_type *vp = (const vector_type *) &l[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
pt[w] = getlane(vp[w],idx);
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
template<class vobj,class sobj>
|
||||
inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
|
||||
{
|
||||
autoView(lv,l,CpuRead);
|
||||
peekLocalSite(s,lv,site);
|
||||
return;
|
||||
};
|
||||
|
||||
// Must be CPU write view
|
||||
template<class vobj,class sobj>
|
||||
inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
|
||||
{
|
||||
GridBase *grid=l.getGrid();
|
||||
assert(l.mode==CpuWrite);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.Checkerboard()== grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
vector_type * vp = (vector_type *)&l[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
for(int w=0;w<words;w++){
|
||||
putlane(vp[w],pt[w],idx);
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
template<class vobj,class sobj>
|
||||
inline void pokeLocalSite(const sobj &s, Lattice<vobj> &l,Coordinate &site)
|
||||
{
|
||||
autoView(lv,l,CpuWrite);
|
||||
pokeLocalSite(s,lv,site);
|
||||
return;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
@ -1,79 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_reality.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REAL_IMAG_H
|
||||
#define GRID_LATTICE_REAL_IMAG_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> inline Lattice<vobj> real(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] =real(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<class vobj> inline Lattice<vobj> imag(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] =imag(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto real(const Expression &expr) -> decltype(real(closure(expr)))
|
||||
{
|
||||
return real(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto imag(const Expression &expr) -> decltype(imag(closure(expr)))
|
||||
{
|
||||
return imag(closure(expr));
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,116 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_reality.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REALITY_H
|
||||
#define GRID_LATTICE_REALITY_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = adj(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||
coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<typename vobj::Complexified> toComplex(const Lattice<vobj> &lhs){
|
||||
Lattice<typename vobj::Complexified> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = toComplex(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<class vobj> inline Lattice<typename vobj::Realified> toReal(const Lattice<vobj> &lhs){
|
||||
Lattice<typename vobj::Realified> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] = toReal(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto toComplex(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return toComplex(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto toReal(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return toReal(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto adj(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return adj(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto conjugate(const Expression &expr) -> decltype(closure(expr))
|
||||
{
|
||||
return conjugate(closure(expr));
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,302 +0,0 @@
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#ifdef GRID_HIP
|
||||
extern hipDeviceProp_t *gpu_props;
|
||||
#define WARP_SIZE 64
|
||||
#endif
|
||||
#ifdef GRID_CUDA
|
||||
extern cudaDeviceProp *gpu_props;
|
||||
#define WARP_SIZE 32
|
||||
#endif
|
||||
|
||||
__device__ unsigned int retirementCount = 0;
|
||||
|
||||
template <class Iterator>
|
||||
unsigned int nextPow2(Iterator x) {
|
||||
--x;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return ++x;
|
||||
}
|
||||
|
||||
template <class Iterator>
|
||||
int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) {
|
||||
|
||||
int device;
|
||||
#ifdef GRID_CUDA
|
||||
cudaGetDevice(&device);
|
||||
#endif
|
||||
#ifdef GRID_HIP
|
||||
hipGetDevice(&device);
|
||||
#endif
|
||||
|
||||
Iterator warpSize = gpu_props[device].warpSize;
|
||||
Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock;
|
||||
Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock;
|
||||
Iterator multiProcessorCount = gpu_props[device].multiProcessorCount;
|
||||
/*
|
||||
std::cout << GridLogDebug << "GPU has:" << std::endl;
|
||||
std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl;
|
||||
std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl;
|
||||
std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl;
|
||||
std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl;
|
||||
*/
|
||||
if (warpSize != WARP_SIZE) {
|
||||
std::cout << GridLogError << "The warp size of the GPU in use does not match the warp size set when compiling Grid." << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// let the number of threads in a block be a multiple of 2, starting from warpSize
|
||||
threads = warpSize;
|
||||
if ( threads*sizeofsobj > sharedMemPerBlock ) {
|
||||
std::cout << GridLogError << "The object is too large for the shared memory." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2;
|
||||
// keep all the streaming multiprocessors busy
|
||||
blocks = nextPow2(multiProcessorCount);
|
||||
return 1;
|
||||
}
|
||||
|
||||
template <class sobj, class Iterator>
|
||||
__device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid) {
|
||||
|
||||
Iterator blockSize = blockDim.x;
|
||||
|
||||
// cannot use overloaded operators for sobj as they are not volatile-qualified
|
||||
memcpy((void *)&sdata[tid], (void *)&mySum, sizeof(sobj));
|
||||
acceleratorSynchronise();
|
||||
|
||||
const Iterator VEC = WARP_SIZE;
|
||||
const Iterator vid = tid & (VEC-1);
|
||||
|
||||
sobj beta, temp;
|
||||
memcpy((void *)&beta, (void *)&mySum, sizeof(sobj));
|
||||
|
||||
for (int i = VEC/2; i > 0; i>>=1) {
|
||||
if (vid < i) {
|
||||
memcpy((void *)&temp, (void *)&sdata[tid+i], sizeof(sobj));
|
||||
beta += temp;
|
||||
memcpy((void *)&sdata[tid], (void *)&beta, sizeof(sobj));
|
||||
}
|
||||
acceleratorSynchronise();
|
||||
}
|
||||
acceleratorSynchroniseAll();
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
beta = Zero();
|
||||
for (Iterator i = 0; i < blockSize; i += VEC) {
|
||||
memcpy((void *)&temp, (void *)&sdata[i], sizeof(sobj));
|
||||
beta += temp;
|
||||
}
|
||||
memcpy((void *)&sdata[0], (void *)&beta, sizeof(sobj));
|
||||
}
|
||||
acceleratorSynchroniseAll();
|
||||
}
|
||||
|
||||
|
||||
template <class vobj, class sobj, class Iterator>
|
||||
__device__ void reduceBlocks(const vobj *g_idata, sobj *g_odata, Iterator n)
|
||||
{
|
||||
constexpr Iterator nsimd = vobj::Nsimd();
|
||||
|
||||
Iterator blockSize = blockDim.x;
|
||||
|
||||
// force shared memory alignment
|
||||
extern __shared__ __align__(COALESCE_GRANULARITY) unsigned char shmem_pointer[];
|
||||
// it's not possible to have two extern __shared__ arrays with same name
|
||||
// but different types in different scopes -- need to cast each time
|
||||
sobj *sdata = (sobj *)shmem_pointer;
|
||||
|
||||
// first level of reduction,
|
||||
// each thread writes result in mySum
|
||||
Iterator tid = threadIdx.x;
|
||||
Iterator i = blockIdx.x*(blockSize*2) + threadIdx.x;
|
||||
Iterator gridSize = blockSize*2*gridDim.x;
|
||||
sobj mySum = Zero();
|
||||
|
||||
while (i < n) {
|
||||
Iterator lane = i % nsimd;
|
||||
Iterator ss = i / nsimd;
|
||||
auto tmp = extractLane(lane,g_idata[ss]);
|
||||
sobj tmpD;
|
||||
tmpD=tmp;
|
||||
mySum +=tmpD;
|
||||
|
||||
if (i + blockSize < n) {
|
||||
lane = (i+blockSize) % nsimd;
|
||||
ss = (i+blockSize) / nsimd;
|
||||
tmp = extractLane(lane,g_idata[ss]);
|
||||
tmpD = tmp;
|
||||
mySum += tmpD;
|
||||
}
|
||||
i += gridSize;
|
||||
}
|
||||
|
||||
// copy mySum to shared memory and perform
|
||||
// reduction for all threads in this block
|
||||
reduceBlock(sdata, mySum, tid);
|
||||
if (tid == 0) g_odata[blockIdx.x] = sdata[0];
|
||||
}
|
||||
|
||||
template <class vobj, class sobj,class Iterator>
|
||||
__global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) {
|
||||
|
||||
Iterator blockSize = blockDim.x;
|
||||
|
||||
// perform reduction for this block and
|
||||
// write result to global memory buffer
|
||||
reduceBlocks(lat, buffer, n);
|
||||
|
||||
if (gridDim.x > 1) {
|
||||
|
||||
const Iterator tid = threadIdx.x;
|
||||
__shared__ bool amLast;
|
||||
// force shared memory alignment
|
||||
extern __shared__ __align__(COALESCE_GRANULARITY) unsigned char shmem_pointer[];
|
||||
// it's not possible to have two extern __shared__ arrays with same name
|
||||
// but different types in different scopes -- need to cast each time
|
||||
sobj *smem = (sobj *)shmem_pointer;
|
||||
|
||||
// wait until all outstanding memory instructions in this thread are finished
|
||||
acceleratorFence();
|
||||
|
||||
if (tid==0) {
|
||||
unsigned int ticket = atomicInc(&retirementCount, gridDim.x);
|
||||
// true if this block is the last block to be done
|
||||
amLast = (ticket == gridDim.x-1);
|
||||
}
|
||||
|
||||
// each thread must read the correct value of amLast
|
||||
acceleratorSynchroniseAll();
|
||||
|
||||
if (amLast) {
|
||||
// reduce buffer[0], ..., buffer[gridDim.x-1]
|
||||
Iterator i = tid;
|
||||
sobj mySum = Zero();
|
||||
|
||||
while (i < gridDim.x) {
|
||||
mySum += buffer[i];
|
||||
i += blockSize;
|
||||
}
|
||||
|
||||
reduceBlock(smem, mySum, tid);
|
||||
|
||||
if (tid==0) {
|
||||
buffer[0] = smem[0];
|
||||
// reset count variable
|
||||
retirementCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Possibly promote to double and sum
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_objectD sobj;
|
||||
typedef decltype(lat) Iterator;
|
||||
|
||||
Integer nsimd= vobj::Nsimd();
|
||||
Integer size = osites*nsimd;
|
||||
|
||||
Integer numThreads, numBlocks;
|
||||
int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks);
|
||||
assert(ok);
|
||||
|
||||
Integer smemSize = numThreads * sizeof(sobj);
|
||||
// Move out of UVM
|
||||
// Turns out I had messed up the synchronise after move to compute stream
|
||||
// as running this on the default stream fools the synchronise
|
||||
#undef UVM_BLOCK_BUFFER
|
||||
#ifndef UVM_BLOCK_BUFFER
|
||||
commVector<sobj> buffer(numBlocks);
|
||||
sobj *buffer_v = &buffer[0];
|
||||
sobj result;
|
||||
reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size);
|
||||
accelerator_barrier();
|
||||
acceleratorCopyFromDevice(buffer_v,&result,sizeof(result));
|
||||
#else
|
||||
Vector<sobj> buffer(numBlocks);
|
||||
sobj *buffer_v = &buffer[0];
|
||||
sobj result;
|
||||
reduceKernel<<< numBlocks, numThreads, smemSize, computeStream >>>(lat, buffer_v, size);
|
||||
accelerator_barrier();
|
||||
result = *buffer_v;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::vector_type vector;
|
||||
typedef typename vobj::scalar_typeD scalarD;
|
||||
typedef typename vobj::scalar_objectD sobj;
|
||||
sobj ret;
|
||||
scalarD *ret_p = (scalarD *)&ret;
|
||||
|
||||
const int words = sizeof(vobj)/sizeof(vector);
|
||||
|
||||
Vector<vector> buffer(osites);
|
||||
vector *dat = (vector *)lat;
|
||||
vector *buf = &buffer[0];
|
||||
iScalar<vector> *tbuf =(iScalar<vector> *) &buffer[0];
|
||||
for(int w=0;w<words;w++) {
|
||||
|
||||
accelerator_for(ss,osites,1,{
|
||||
buf[ss] = dat[ss*words+w];
|
||||
});
|
||||
|
||||
ret_p[w] = sumD_gpu_small(tbuf,osites);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_objectD sobj;
|
||||
sobj ret;
|
||||
|
||||
Integer nsimd= vobj::Nsimd();
|
||||
Integer size = osites*nsimd;
|
||||
Integer numThreads, numBlocks;
|
||||
int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks);
|
||||
|
||||
if ( ok ) {
|
||||
ret = sumD_gpu_small(lat,osites);
|
||||
} else {
|
||||
ret = sumD_gpu_large(lat,osites);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Return as same precision as input performing reduction in double precision though
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj result;
|
||||
result = sumD_gpu(lat,osites);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj result;
|
||||
result = sumD_gpu_large(lat,osites);
|
||||
return result;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,125 +0,0 @@
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Possibly promote to double and sum
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_objectD sobjD;
|
||||
sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
|
||||
sobj identity; zeroit(identity);
|
||||
sobj ret ;
|
||||
|
||||
Integer nsimd= vobj::Nsimd();
|
||||
|
||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||
auto Reduction = cl::sycl::reduction(mysum,identity,std::plus<>());
|
||||
cgh.parallel_for(cl::sycl::range<1>{osites},
|
||||
Reduction,
|
||||
[=] (cl::sycl::id<1> item, auto &sum) {
|
||||
auto osite = item[0];
|
||||
sum +=Reduce(lat[osite]);
|
||||
});
|
||||
});
|
||||
theGridAccelerator->wait();
|
||||
ret = mysum[0];
|
||||
free(mysum,*theGridAccelerator);
|
||||
sobjD dret; convertType(dret,ret);
|
||||
return dret;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
|
||||
{
|
||||
return sumD_gpu_tensor(lat,osites);
|
||||
}
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
|
||||
{
|
||||
return sumD_gpu_large(lat,osites);
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
|
||||
{
|
||||
return sumD_gpu_large(lat,osites);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Return as same precision as input performing reduction in double precision though
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj result;
|
||||
result = sumD_gpu(lat,osites);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj result;
|
||||
result = sumD_gpu_large(lat,osites);
|
||||
return result;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
/*
|
||||
template<class Double> Double svm_reduce(Double *vec,uint64_t L)
|
||||
{
|
||||
Double sumResult; zeroit(sumResult);
|
||||
Double *d_sum =(Double *)cl::sycl::malloc_shared(sizeof(Double),*theGridAccelerator);
|
||||
Double identity; zeroit(identity);
|
||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||
auto Reduction = cl::sycl::reduction(d_sum,identity,std::plus<>());
|
||||
cgh.parallel_for(cl::sycl::range<1>{L},
|
||||
Reduction,
|
||||
[=] (cl::sycl::id<1> index, auto &sum) {
|
||||
sum +=vec[index];
|
||||
});
|
||||
});
|
||||
theGridAccelerator->wait();
|
||||
Double ret = d_sum[0];
|
||||
free(d_sum,*theGridAccelerator);
|
||||
std::cout << " svm_reduce finished "<<L<<" sites sum = " << ret <<std::endl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
|
||||
{
|
||||
typedef typename vobj::vector_type vector;
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
|
||||
typedef typename vobj::scalar_typeD scalarD;
|
||||
typedef typename vobj::scalar_objectD sobjD;
|
||||
|
||||
sobjD ret;
|
||||
scalarD *ret_p = (scalarD *)&ret;
|
||||
|
||||
const int nsimd = vobj::Nsimd();
|
||||
const int words = sizeof(vobj)/sizeof(vector);
|
||||
|
||||
Vector<scalar> buffer(osites*nsimd);
|
||||
scalar *buf = &buffer[0];
|
||||
vector *dat = (vector *)lat;
|
||||
|
||||
for(int w=0;w<words;w++) {
|
||||
|
||||
accelerator_for(ss,osites,nsimd,{
|
||||
int lane = acceleratorSIMTlane(nsimd);
|
||||
buf[ss*nsimd+lane] = dat[ss*words+w].getlane(lane);
|
||||
});
|
||||
//Precision change at this point is to late to gain precision
|
||||
ret_p[w] = svm_reduce(buf,nsimd*osites);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
*/
|
@ -1,550 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_rng.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_RNG_H
|
||||
#define GRID_LATTICE_RNG_H
|
||||
|
||||
#include <random>
|
||||
|
||||
#ifdef RNG_SITMO
|
||||
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
|
||||
#endif
|
||||
|
||||
#if defined(RNG_SITMO)
|
||||
#define RNG_FAST_DISCARD
|
||||
#else
|
||||
#undef RNG_FAST_DISCARD
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Allow the RNG state to be less dense than the fine grid
|
||||
//////////////////////////////////////////////////////////////
|
||||
inline int RNGfillable(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension;
|
||||
assert(lowerdims >= 0);
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
assert(fine->_simd_layout[d]==1);
|
||||
assert(fine->_processors[d]==1);
|
||||
}
|
||||
|
||||
int multiplicity=1;
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
multiplicity=multiplicity*fine->_rdimensions[d];
|
||||
}
|
||||
// local and global volumes subdivide cleanly after SIMDization
|
||||
for(int d=0;d<rngdims;d++){
|
||||
int fd= d+lowerdims;
|
||||
assert(coarse->_processors[d] == fine->_processors[fd]);
|
||||
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
|
||||
assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]);
|
||||
|
||||
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
|
||||
}
|
||||
return multiplicity;
|
||||
}
|
||||
|
||||
|
||||
// merge of April 11 2017
|
||||
// this function is necessary for the LS vectorised field
|
||||
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
|
||||
// assumes that the higher dimensions are not using more processors
|
||||
// all further divisions are local
|
||||
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
|
||||
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
|
||||
|
||||
// then divide the number of local sites
|
||||
// check that the total number of sims agree, meanse the iSites are the same
|
||||
assert(fine->Nsimd() == coarse->Nsimd());
|
||||
|
||||
// check that the two grids divide cleanly
|
||||
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
|
||||
|
||||
return fine->lSites() / coarse->lSites();
|
||||
}
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution,class generator>
|
||||
void fillScalar(scalar &s,distribution &dist,generator & gen)
|
||||
{
|
||||
s=dist(gen);
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
|
||||
{
|
||||
// s=ComplexF(dist(gen),dist(gen));
|
||||
s.real(dist(gen));
|
||||
s.imag(dist(gen));
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
|
||||
{
|
||||
// s=ComplexD(dist(gen),dist(gen));
|
||||
s.real(dist(gen));
|
||||
s.imag(dist(gen));
|
||||
}
|
||||
|
||||
class GridRNGbase {
|
||||
public:
|
||||
// One generator per site.
|
||||
// Uniform and Gaussian distributions from these generators.
|
||||
#ifdef RNG_RANLUX
|
||||
typedef std::ranlux48 RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 15;
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
typedef std::mt19937 RngEngine;
|
||||
typedef uint32_t RngStateType;
|
||||
static const int RngStateCount = std::mt19937::state_size;
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
typedef sitmo::prng_engine RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 13;
|
||||
#endif
|
||||
|
||||
std::vector<RngEngine> _generators;
|
||||
std::vector<std::uniform_real_distribution<RealD> > _uniform;
|
||||
std::vector<std::normal_distribution<RealD> > _gaussian;
|
||||
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
|
||||
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
|
||||
|
||||
///////////////////////
|
||||
// support for parallel init
|
||||
///////////////////////
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
static void Skip(RngEngine &eng,uint64_t site)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Skip by 2^40 elements between successive lattice sites
|
||||
// This goes by 10^12.
|
||||
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
|
||||
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
|
||||
// skip ahead.
|
||||
// For HMC unlikely to go at faster than a solve per second, and
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Weird compiler bug in Intel 2018.1 under O3 was generating 32bit and not 64 bit left shift.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
volatile uint64_t skip = site;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
#endif
|
||||
static RngEngine Reseed(RngEngine &eng)
|
||||
{
|
||||
std::vector<uint32_t> newseed;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
return Reseed(eng,newseed,uid);
|
||||
}
|
||||
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
|
||||
std::uniform_int_distribution<uint32_t> &uid)
|
||||
{
|
||||
const int reseeds=4;
|
||||
|
||||
newseed.resize(reseeds);
|
||||
for(int i=0;i<reseeds;i++){
|
||||
newseed[i] = uid(eng);
|
||||
}
|
||||
std::seed_seq sseq(newseed.begin(),newseed.end());
|
||||
return RngEngine(sseq);
|
||||
}
|
||||
|
||||
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
|
||||
saved.resize(RngStateCount);
|
||||
std::stringstream ss;
|
||||
ss<<eng;
|
||||
ss.seekg(0,ss.beg);
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss>>saved[i];
|
||||
}
|
||||
}
|
||||
void GetState(std::vector<RngStateType> & saved,int gen) {
|
||||
GetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,RngEngine &eng){
|
||||
assert(saved.size()==RngStateCount);
|
||||
std::stringstream ss;
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss<< saved[i]<<" ";
|
||||
}
|
||||
ss.seekg(0,ss.beg);
|
||||
ss>>eng;
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,int gen){
|
||||
SetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetEngine(RngEngine &Eng, int gen){
|
||||
_generators[gen]=Eng;
|
||||
}
|
||||
void GetEngine(RngEngine &Eng, int gen){
|
||||
Eng=_generators[gen];
|
||||
}
|
||||
template<class source> void Seed(source &src, int gen)
|
||||
{
|
||||
_generators[gen] = RngEngine(src);
|
||||
}
|
||||
};
|
||||
|
||||
class GridSerialRNG : public GridRNGbase {
|
||||
public:
|
||||
|
||||
GridSerialRNG() : GridRNGbase() {
|
||||
_generators.resize(1);
|
||||
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename sobj::scalar_type scalar_type;
|
||||
|
||||
int words = sizeof(sobj)/sizeof(scalar_type);
|
||||
|
||||
scalar_type *buf = (scalar_type *) & l;
|
||||
|
||||
dist[0].reset();
|
||||
for(int idx=0;idx<words;idx++){
|
||||
fillScalar(buf[idx],dist[0],_generators[0]);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
|
||||
}
|
||||
|
||||
template <class distribution> inline void fill(ComplexF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(ComplexD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
// vector fill
|
||||
template <class distribution> inline void fill(vComplexF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vComplexD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
private:
|
||||
double _time_counter;
|
||||
GridBase *_grid;
|
||||
unsigned int _vol;
|
||||
|
||||
public:
|
||||
GridBase *Grid(void) const { return _grid; }
|
||||
int generator_idx(int os,int is) {
|
||||
return is*_grid->oSites()+os;
|
||||
}
|
||||
|
||||
GridParallelRNG(GridBase *grid) : GridRNGbase() {
|
||||
_grid = grid;
|
||||
_vol =_grid->iSites()*_grid->oSites();
|
||||
|
||||
_generators.resize(_vol);
|
||||
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
double inner_time_counter = usecond();
|
||||
|
||||
int multiplicity = RNGfillable_general(_grid, l.Grid()); // l has finer or same grid
|
||||
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l.Grid() too
|
||||
int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity
|
||||
int words = sizeof(scalar_object) / sizeof(scalar_type);
|
||||
|
||||
autoView(l_v, l, CpuWrite);
|
||||
thread_for( ss, osites, {
|
||||
ExtractBuffer<scalar_object> buf(Nsimd);
|
||||
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
|
||||
|
||||
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
|
||||
|
||||
for (int si = 0; si < Nsimd; si++) {
|
||||
|
||||
int gdx = generator_idx(ss, si); // index of generator state
|
||||
scalar_type *pointer = (scalar_type *)&buf[si];
|
||||
dist[gdx].reset();
|
||||
for (int idx = 0; idx < words; idx++)
|
||||
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
|
||||
}
|
||||
// merge into SIMD lanes, FIXME suboptimal implementation
|
||||
merge(l_v[sm], buf);
|
||||
}
|
||||
});
|
||||
// });
|
||||
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
|
||||
std::seed_seq source(seeds.begin(),seeds.end());
|
||||
|
||||
RngEngine master_engine(source);
|
||||
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
////////////////////////////////////////////////
|
||||
// Skip ahead through a single stream.
|
||||
// Applicable to SITMO and other has based/crypto RNGs
|
||||
// Should be applicable to Mersenne Twister, but the C++11
|
||||
// MT implementation does not implement fast discard even though
|
||||
// in principle this is possible
|
||||
////////////////////////////////////////////////
|
||||
#if 1
|
||||
thread_for( lidx, _grid->lSites(), {
|
||||
|
||||
int gidx;
|
||||
int o_idx;
|
||||
int i_idx;
|
||||
int rank;
|
||||
Coordinate pcoor;
|
||||
Coordinate lcoor;
|
||||
Coordinate gcoor;
|
||||
_grid->LocalIndexToLocalCoor(lidx,lcoor);
|
||||
pcoor=_grid->ThisProcessorCoor();
|
||||
_grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor);
|
||||
_grid->GlobalCoorToGlobalIndex(gcoor,gidx);
|
||||
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
assert(rank == _grid->ThisRank() );
|
||||
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
_generators[l_idx] = master_engine;
|
||||
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
|
||||
});
|
||||
#else
|
||||
// Everybody loops over global volume.
|
||||
thread_for( gidx, _grid->_gsites, {
|
||||
|
||||
// Where is it?
|
||||
int rank;
|
||||
int o_idx;
|
||||
int i_idx;
|
||||
|
||||
Coordinate gcoor;
|
||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// If this is one of mine we take it
|
||||
if( rank == _grid->ThisRank() ){
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
_generators[l_idx] = master_engine;
|
||||
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
|
||||
}
|
||||
});
|
||||
#endif
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Machine and thread decomposition dependent seeding is efficient
|
||||
// and maximally parallel; but NOT reproducible from machine to machine.
|
||||
// Not ideal, but fastest way to reseed all nodes.
|
||||
////////////////////////////////////////////////////////////////
|
||||
{
|
||||
// Obtain one Reseed per processor
|
||||
int Nproc = _grid->ProcessorCount();
|
||||
std::vector<RngEngine> seeders(Nproc);
|
||||
int me= _grid->ThisRank();
|
||||
for(int p=0;p<Nproc;p++){
|
||||
seeders[p] = Reseed(master_engine);
|
||||
}
|
||||
master_engine = seeders[me];
|
||||
}
|
||||
|
||||
{
|
||||
// Obtain one reseeded generator per thread
|
||||
int Nthread = 32; // Hardwire a good level or parallelism
|
||||
std::vector<RngEngine> seeders(Nthread);
|
||||
for(int t=0;t<Nthread;t++){
|
||||
seeders[t] = Reseed(master_engine);
|
||||
}
|
||||
|
||||
thread_for( t, Nthread, {
|
||||
// set up one per local site in threaded fashion
|
||||
std::vector<uint32_t> newseeds;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
for(int l=0;l<_grid->lSites();l++) {
|
||||
if ( (l%Nthread)==t ) {
|
||||
_generators[l] = Reseed(seeders[t],newseeds,uid);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Report(){
|
||||
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Support for rigorous test of RNG's
|
||||
// Return uniform random uint32_t from requested site generator
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
uint32_t GlobalU01(int gsite){
|
||||
|
||||
uint32_t the_number;
|
||||
// who
|
||||
int rank,o_idx,i_idx;
|
||||
Coordinate gcoor;
|
||||
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// draw
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
if( rank == _grid->ThisRank() ){
|
||||
the_number = _uid[l_idx](_generators[l_idx]);
|
||||
}
|
||||
|
||||
// share & return
|
||||
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
|
||||
return the_number;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
|
||||
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
|
||||
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
|
||||
|
||||
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
|
||||
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
|
||||
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,130 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_trace.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRACE_H
|
||||
#define GRID_LATTICE_TRACE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Tracing, transposing, peeking, poking
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
|
||||
{
|
||||
Lattice<decltype(trace(vobj()))> ret(lhs.Grid());
|
||||
autoView(ret_v , ret, AcceleratorWrite);
|
||||
autoView(lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||
coalescedWrite(ret_v[ss], trace(lhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace Index level dependent operation
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(vobj()))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(vobj()))> ret(lhs.Grid());
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), {
|
||||
coalescedWrite(ret_v[ss], traceIndex<Index>(lhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<int N, class Vec>
|
||||
Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
|
||||
{
|
||||
GridBase *grid=Umu.Grid();
|
||||
auto lvol = grid->lSites();
|
||||
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
|
||||
typedef typename Vec::scalar_type scalar;
|
||||
autoView(Umu_v,Umu,CpuRead);
|
||||
autoView(ret_v,ret,CpuWrite);
|
||||
thread_for(site,lvol,{
|
||||
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
|
||||
Coordinate lcoor;
|
||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||
iScalar<iScalar<iMatrix<scalar, N> > > Us;
|
||||
peekLocalSite(Us, Umu_v, lcoor);
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
scalar tmp= Us()()(i,j);
|
||||
ComplexD ztmp(real(tmp),imag(tmp));
|
||||
EigenU(i,j)=ztmp;
|
||||
}}
|
||||
ComplexD detD = EigenU.determinant();
|
||||
typename Vec::scalar_type det(detD.real(),detD.imag());
|
||||
pokeLocalSite(det,ret_v,lcoor);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
|
||||
{
|
||||
GridBase *grid=Umu.Grid();
|
||||
auto lvol = grid->lSites();
|
||||
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
|
||||
|
||||
autoView(Umu_v,Umu,CpuRead);
|
||||
autoView(ret_v,ret,CpuWrite);
|
||||
thread_for(site,lvol,{
|
||||
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
|
||||
Coordinate lcoor;
|
||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
|
||||
iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;
|
||||
peekLocalSite(Us, Umu_v, lcoor);
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
EigenU(i,j) = Us()()(i,j);
|
||||
}}
|
||||
Eigen::MatrixXcd EigenUinv = EigenU.inverse();
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
Ui()()(i,j) = EigenUinv(i,j);
|
||||
}}
|
||||
pokeLocalSite(Ui,ret_v,lcoor);
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,70 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_transpose.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRANSPOSE_H
|
||||
#define GRID_LATTICE_TRANSPOSE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Transpose
|
||||
///////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(ret_v[ss], transpose(lhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(vobj()))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(vobj()))> ret(lhs.Grid());
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{
|
||||
coalescedWrite(ret_v[ss] , transposeIndex<Index>(lhs_v(ss)));
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,80 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_unary.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_UNARY_H
|
||||
#define GRID_LATTICE_UNARY_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs_i,RealD y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
autoView( rhs, rhs_i, AcceleratorRead);
|
||||
autoView( ret, ret_i, AcceleratorWrite);
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
accelerator_for(ss,rhs.size(),1,{
|
||||
ret[ss]=pow(rhs[ss],y);
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs_i,Integer y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
autoView( rhs , rhs_i, AcceleratorRead);
|
||||
autoView( ret , ret_i, AcceleratorWrite);
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
accelerator_for(ss,rhs.size(),obj::Nsimd(),{
|
||||
coalescedWrite(ret[ss],mod(rhs(ss),y));
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs_i,Integer y){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
autoView( ret , ret_i, AcceleratorWrite);
|
||||
autoView( rhs , rhs_i, AcceleratorRead);
|
||||
ret.Checkerboard() = rhs_i.Checkerboard();
|
||||
accelerator_for(ss,rhs.size(),obj::Nsimd(),{
|
||||
coalescedWrite(ret[ss],div(rhs(ss),y));
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||
Lattice<obj> ret_i(rhs_i.Grid());
|
||||
autoView( rhs , rhs_i, AcceleratorRead);
|
||||
autoView( ret , ret_i, AcceleratorWrite);
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
accelerator_for(ss,rhs.size(),obj::Nsimd(),{
|
||||
coalescedWrite(ret[ss],Exponentiate(rhs(ss),alpha, Nexp));
|
||||
});
|
||||
return ret_i;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -1,173 +0,0 @@
|
||||
#pragma once
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Base class which can be used by traits to pick up behaviour
|
||||
///////////////////////////////////////////////////////////////////
|
||||
class LatticeBase {};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Conformable checks; same instance of Grid required
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
{
|
||||
assert(lhs == rhs);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Minimal base class containing only data valid to access from accelerator
|
||||
// _odata will be a managed pointer in CUDA
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Force access to lattice through a view object.
|
||||
// prevents writing of code that will not offload to GPU, but perhaps annoyingly
|
||||
// strict since host could could in principle direct access through the lattice object
|
||||
// Need to decide programming model.
|
||||
#define LATTICE_VIEW_STRICT
|
||||
template<class vobj> class LatticeAccelerator : public LatticeBase
|
||||
{
|
||||
protected:
|
||||
//public:
|
||||
GridBase *_grid;
|
||||
int checkerboard;
|
||||
vobj *_odata; // A managed pointer
|
||||
uint64_t _odata_size;
|
||||
ViewAdvise advise;
|
||||
public:
|
||||
accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { };
|
||||
accelerator_inline uint64_t oSites(void) const { return _odata_size; };
|
||||
accelerator_inline int Checkerboard(void) const { return checkerboard; };
|
||||
accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view
|
||||
accelerator_inline ViewAdvise Advise(void) const { return advise; };
|
||||
accelerator_inline ViewAdvise &Advise(void) { return this->advise; }; // can assign advise on a container, not a view
|
||||
accelerator_inline void Conformable(GridBase * &grid) const
|
||||
{
|
||||
if (grid) conformable(grid, _grid);
|
||||
else grid = _grid;
|
||||
};
|
||||
// Host only
|
||||
GridBase * getGrid(void) const { return _grid; };
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// A View class which provides accessor to the data.
|
||||
// This will be safe to call from accelerator_for and is trivially copy constructible
|
||||
// The copy constructor for this will need to be used by device lambda functions
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
class LatticeView : public LatticeAccelerator<vobj>
|
||||
{
|
||||
public:
|
||||
// Rvalue
|
||||
ViewMode mode;
|
||||
void * cpu_ptr;
|
||||
#ifdef GRID_SIMT
|
||||
accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {
|
||||
return coalescedRead(this->_odata[i]);
|
||||
}
|
||||
#else
|
||||
accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
// accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
accelerator_inline vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
#else
|
||||
accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
|
||||
accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; };
|
||||
#endif
|
||||
|
||||
accelerator_inline uint64_t begin(void) const { return 0;};
|
||||
accelerator_inline uint64_t end(void) const { return this->_odata_size; };
|
||||
accelerator_inline uint64_t size(void) const { return this->_odata_size; };
|
||||
|
||||
LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){}
|
||||
LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
|
||||
LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
|
||||
{
|
||||
this->ViewOpen(mode);
|
||||
}
|
||||
|
||||
// Host functions
|
||||
void ViewOpen(ViewMode mode)
|
||||
{ // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base
|
||||
// std::cout << "View Open"<<std::hex<<this->_odata<<std::dec <<std::endl;
|
||||
this->cpu_ptr = (void *)this->_odata;
|
||||
this->mode = mode;
|
||||
this->_odata =(vobj *)
|
||||
MemoryManager::ViewOpen(this->cpu_ptr,
|
||||
this->_odata_size*sizeof(vobj),
|
||||
mode,
|
||||
this->advise);
|
||||
}
|
||||
void ViewClose(void)
|
||||
{ // Inform the manager
|
||||
// std::cout << "View Close"<<std::hex<<this->cpu_ptr<<std::dec <<std::endl;
|
||||
MemoryManager::ViewClose(this->cpu_ptr,this->mode);
|
||||
}
|
||||
|
||||
};
|
||||
// Little autoscope assister
|
||||
template<class View>
|
||||
class ViewCloser
|
||||
{
|
||||
View v; // Take a copy of view and call view close when I go out of scope automatically
|
||||
public:
|
||||
ViewCloser(View &_v) : v(_v) {};
|
||||
~ViewCloser() { v.ViewClose(); }
|
||||
};
|
||||
|
||||
#define autoView(l_v,l,mode) \
|
||||
auto l_v = l.View(mode); \
|
||||
ViewCloser<decltype(l_v)> _autoView##l_v(l_v);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice expression types used by ET to assemble the AST
|
||||
//
|
||||
// Need to be able to detect code paths according to the whether a lattice object or not
|
||||
// so introduce some trait type things
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LatticeExpressionBase {};
|
||||
|
||||
template <typename T> using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||
|
||||
template<class T, bool isLattice> struct ViewMapBase { typedef T Type; };
|
||||
template<class T> struct ViewMapBase<T,true> { typedef LatticeView<typename T::vector_object> Type; };
|
||||
template<class T> using ViewMap = ViewMapBase<T,std::is_base_of<LatticeBase, T>::value >;
|
||||
|
||||
template <typename Op, typename _T1>
|
||||
class LatticeUnaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename _T1, typename _T2>
|
||||
class LatticeBinaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
typedef typename ViewMap<_T2>::Type T2;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
T2 arg2;
|
||||
LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename _T1, typename _T2, typename _T3>
|
||||
class LatticeTrinaryExpression : public LatticeExpressionBase
|
||||
{
|
||||
public:
|
||||
typedef typename ViewMap<_T1>::Type T1;
|
||||
typedef typename ViewMap<_T2>::Type T2;
|
||||
typedef typename ViewMap<_T3>::Type T3;
|
||||
Op op;
|
||||
T1 arg1;
|
||||
T2 arg2;
|
||||
T3 arg3;
|
||||
LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {};
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
@ -1,174 +0,0 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/PaddedCell.h
|
||||
|
||||
Copyright (C) 2019
|
||||
|
||||
Author: Peter Boyle pboyle@bnl.gov
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include<Grid/cshift/Cshift.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
|
||||
template<typename vobj>
|
||||
struct CshiftImplBase{
|
||||
virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
|
||||
virtual ~CshiftImplBase(){}
|
||||
};
|
||||
template<typename vobj>
|
||||
struct CshiftImplDefault: public CshiftImplBase<vobj>{
|
||||
Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
|
||||
};
|
||||
template<typename Gimpl>
|
||||
struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
|
||||
typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
|
||||
};
|
||||
|
||||
class PaddedCell {
|
||||
public:
|
||||
GridCartesian * unpadded_grid;
|
||||
int dims;
|
||||
int depth;
|
||||
std::vector<GridCartesian *> grids;
|
||||
|
||||
~PaddedCell()
|
||||
{
|
||||
DeleteGrids();
|
||||
}
|
||||
PaddedCell(int _depth,GridCartesian *_grid)
|
||||
{
|
||||
unpadded_grid = _grid;
|
||||
depth=_depth;
|
||||
dims=_grid->Nd();
|
||||
AllocateGrids();
|
||||
Coordinate local =unpadded_grid->LocalDimensions();
|
||||
for(int d=0;d<dims;d++){
|
||||
assert(local[d]>=depth);
|
||||
}
|
||||
}
|
||||
void DeleteGrids(void)
|
||||
{
|
||||
for(int d=0;d<grids.size();d++){
|
||||
delete grids[d];
|
||||
}
|
||||
grids.resize(0);
|
||||
};
|
||||
void AllocateGrids(void)
|
||||
{
|
||||
Coordinate local =unpadded_grid->LocalDimensions();
|
||||
Coordinate simd =unpadded_grid->_simd_layout;
|
||||
Coordinate processors=unpadded_grid->_processors;
|
||||
Coordinate plocal =unpadded_grid->LocalDimensions();
|
||||
Coordinate global(dims);
|
||||
|
||||
// expand up one dim at a time
|
||||
for(int d=0;d<dims;d++){
|
||||
|
||||
plocal[d] += 2*depth;
|
||||
|
||||
for(int d=0;d<dims;d++){
|
||||
global[d] = plocal[d]*processors[d];
|
||||
}
|
||||
|
||||
grids.push_back(new GridCartesian(global,simd,processors));
|
||||
}
|
||||
};
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
|
||||
{
|
||||
Lattice<vobj> out(unpadded_grid);
|
||||
|
||||
Coordinate local =unpadded_grid->LocalDimensions();
|
||||
Coordinate fll(dims,depth); // depends on the MPI spread
|
||||
Coordinate tll(dims,0); // depends on the MPI spread
|
||||
localCopyRegion(in,out,fll,tll,local);
|
||||
return out;
|
||||
}
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Exchange(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
||||
{
|
||||
GridBase *old_grid = in.Grid();
|
||||
int dims = old_grid->Nd();
|
||||
Lattice<vobj> tmp = in;
|
||||
for(int d=0;d<dims;d++){
|
||||
tmp = Expand(d,tmp,cshift); // rvalue && assignment
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
// expand up one dim at a time
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
|
||||
{
|
||||
GridBase *old_grid = in.Grid();
|
||||
GridCartesian *new_grid = grids[dim];//These are new grids
|
||||
Lattice<vobj> padded(new_grid);
|
||||
Lattice<vobj> shifted(old_grid);
|
||||
Coordinate local =old_grid->LocalDimensions();
|
||||
Coordinate plocal =new_grid->LocalDimensions();
|
||||
if(dim==0) conformable(old_grid,unpadded_grid);
|
||||
else conformable(old_grid,grids[dim-1]);
|
||||
|
||||
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
|
||||
|
||||
double tins=0, tshift=0;
|
||||
|
||||
// Middle bit
|
||||
double t = usecond();
|
||||
for(int x=0;x<local[dim];x++){
|
||||
InsertSliceLocal(in,padded,x,depth+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// High bit
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
// Low bit
|
||||
t = usecond();
|
||||
shifted = cshift.Cshift(in,dim,-depth);
|
||||
tshift += usecond() - t;
|
||||
|
||||
t = usecond();
|
||||
for(int x=0;x<depth;x++){
|
||||
InsertSliceLocal(shifted,padded,x,x,dim);
|
||||
}
|
||||
tins += usecond() - t;
|
||||
|
||||
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
|
||||
|
||||
return padded;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,4 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
||||
Grid::BinaryIO::IoPerf Grid::BinaryIO::lastPerf;
|
@ -1,345 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/NerscIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <unistd.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <pwd.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Precision mapping
|
||||
///////////////////////////////////////////////////////
|
||||
template<class vobj> static std::string getFormatString (void)
|
||||
{
|
||||
std::string format;
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
if ( sizeof(stype) == sizeof(float) ) {
|
||||
format = std::string("IEEE32BIG");
|
||||
}
|
||||
if ( sizeof(stype) == sizeof(double) ) {
|
||||
format = std::string("IEEE64BIG");
|
||||
}
|
||||
return format;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// header specification/interpretation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class FieldNormMetaData : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldNormMetaData, double, norm2);
|
||||
};
|
||||
class FieldMetaData : Serializable {
|
||||
public:
|
||||
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
|
||||
int, nd,
|
||||
std::vector<int>, dimension,
|
||||
std::vector<std::string>, boundary,
|
||||
int, data_start,
|
||||
std::string, hdr_version,
|
||||
std::string, storage_format,
|
||||
double, link_trace,
|
||||
double, plaquette,
|
||||
uint32_t, checksum,
|
||||
uint32_t, scidac_checksuma,
|
||||
uint32_t, scidac_checksumb,
|
||||
unsigned int, sequence_number,
|
||||
std::string, data_type,
|
||||
std::string, ensemble_id,
|
||||
std::string, ensemble_label,
|
||||
std::string, ildg_lfn,
|
||||
std::string, creator,
|
||||
std::string, creator_hardware,
|
||||
std::string, creation_date,
|
||||
std::string, archive_date,
|
||||
std::string, floating_point);
|
||||
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||
// issues, std::string are fine because they initliase to size 0
|
||||
// as per C++ standard.
|
||||
FieldMetaData(void)
|
||||
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||
link_trace(0.), plaquette(0.), checksum(0),
|
||||
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||
{}
|
||||
};
|
||||
|
||||
// PB disable using namespace - this is a header and forces namesapce visibility for all
|
||||
// including files
|
||||
//using namespace Grid;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Bit and Physical Checksumming and QA of data
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void GridMetaData(GridBase *grid,FieldMetaData &header)
|
||||
{
|
||||
int nd = grid->_ndimension;
|
||||
header.nd = nd;
|
||||
header.dimension.resize(nd);
|
||||
header.boundary.resize(nd);
|
||||
header.data_start = 0;
|
||||
for(int d=0;d<nd;d++) {
|
||||
header.dimension[d] = grid->_fdimensions[d];
|
||||
}
|
||||
for(int d=0;d<nd;d++) {
|
||||
header.boundary[d] = std::string("PERIODIC");
|
||||
}
|
||||
}
|
||||
|
||||
inline void MachineCharacteristics(FieldMetaData &header)
|
||||
{
|
||||
// Who
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
if (pw) header.creator = std::string(pw->pw_name);
|
||||
|
||||
// When
|
||||
std::time_t t = std::time(nullptr);
|
||||
std::tm tm_ = *std::localtime(&t);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(&tm_, "%c %Z");
|
||||
header.creation_date = oss.str();
|
||||
header.archive_date = header.creation_date;
|
||||
|
||||
// What
|
||||
struct utsname name; uname(&name);
|
||||
header.creator_hardware = std::string(name.nodename)+"-";
|
||||
header.creator_hardware+= std::string(name.machine)+"-";
|
||||
header.creator_hardware+= std::string(name.sysname)+"-";
|
||||
header.creator_hardware+= std::string(name.release);
|
||||
}
|
||||
|
||||
#define dump_meta_data(field, s) \
|
||||
s << "BEGIN_HEADER" << std::endl; \
|
||||
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
|
||||
s << "DATATYPE = " << field.data_type << std::endl; \
|
||||
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
|
||||
for(int i=0;i<4;i++){ \
|
||||
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
|
||||
} \
|
||||
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
|
||||
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
|
||||
for(int i=0;i<4;i++){ \
|
||||
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
|
||||
} \
|
||||
\
|
||||
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
|
||||
s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
|
||||
s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
|
||||
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
|
||||
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
|
||||
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
|
||||
s << "CREATOR = " << field.creator << std::endl; \
|
||||
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
|
||||
s << "CREATION_DATE = " << field.creation_date << std::endl; \
|
||||
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
|
||||
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
|
||||
s << "END_HEADER" << std::endl;
|
||||
|
||||
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
|
||||
{
|
||||
GridBase *grid = field.Grid();
|
||||
std::string format = getFormatString<vobj>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
template<class Impl>
|
||||
class GaugeStatistics
|
||||
{
|
||||
public:
|
||||
void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
|
||||
{
|
||||
header.link_trace = WilsonLoops<Impl>::linkTrace(data);
|
||||
header.plaquette = WilsonLoops<Impl>::avgPlaquette(data);
|
||||
}
|
||||
};
|
||||
typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics;
|
||||
typedef GaugeStatistics<ConjugateGimplD> ConjugateGaugeStatistics;
|
||||
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
|
||||
{
|
||||
GridBase *grid = field.Grid();
|
||||
std::string format = getFormatString<vLorentzColourMatrixD>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Utilities ; these are QCD aware
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||
{
|
||||
assert( Nc < 4 && Nc > 1 ) ;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
#if Nc == 2
|
||||
cm(mu)()(1,0) = -adj(cm(mu)()(0,y)) ;
|
||||
cm(mu)()(1,1) = adj(cm(mu)()(0,x)) ;
|
||||
#else
|
||||
const int x=0 , y=1 , z=2 ; // a little disinenuous labelling
|
||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Some data types for intermediate storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, Nc-1>, Nd >;
|
||||
|
||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Simple classes for precision conversion
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
template <class fobj, class sobj>
|
||||
struct BinarySimpleUnmunger {
|
||||
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
// take word by word and transform accoding to the status
|
||||
fobj_stype *out_buffer = (fobj_stype *)&out;
|
||||
sobj_stype *in_buffer = (sobj_stype *)∈
|
||||
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
|
||||
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
|
||||
assert(fobj_words == sobj_words);
|
||||
|
||||
for (unsigned int word = 0; word < sobj_words; word++)
|
||||
out_buffer[word] = in_buffer[word]; // type conversion on the fly
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct BinarySimpleMunger {
|
||||
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
// take word by word and transform accoding to the status
|
||||
fobj_stype *in_buffer = (fobj_stype *)∈
|
||||
sobj_stype *out_buffer = (sobj_stype *)&out;
|
||||
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
|
||||
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
|
||||
assert(fobj_words == sobj_words);
|
||||
|
||||
for (unsigned int word = 0; word < sobj_words; word++)
|
||||
out_buffer[word] = in_buffer[word]; // type conversion on the fly
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct GaugeSimpleMunger{
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct GaugeSimpleUnmunger {
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct GaugeDoubleStoredMunger{
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
for (int mu = 0; mu < Nds; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct GaugeDoubleStoredUnmunger {
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
for (int mu = 0; mu < Nds; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2munger{
|
||||
void operator() (fobj &in,sobj &out){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<Nc-1;i++){
|
||||
for(int j=0;j<Nc;j++){
|
||||
out(mu)()(i,j) = in(mu)(i)(j);
|
||||
}}
|
||||
}
|
||||
reconstruct3(out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2unmunger{
|
||||
void operator() (sobj &in,fobj &out){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<Nc-1;i++){
|
||||
for(int j=0;j<Nc;j++){
|
||||
out(mu)(i)(j) = in(mu)()(i,j);
|
||||
}}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,387 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/NerscIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Matt Spraggs <matthew.spraggs@gmail.com>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_NERSC_IO_H
|
||||
#define GRID_NERSC_IO_H
|
||||
|
||||
#include <string>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Write and read from fstream; comput header offset for payload
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class NerscIO : public BinaryIO {
|
||||
public:
|
||||
typedef Lattice<vLorentzColourMatrixD> GaugeField;
|
||||
|
||||
// Enable/disable exiting if the plaquette in the header does not match the value computed (default true)
|
||||
static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; }
|
||||
|
||||
static inline void truncate(std::string file){
|
||||
std::ofstream fout(file,std::ios::out);
|
||||
}
|
||||
|
||||
static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
|
||||
{
|
||||
std::ofstream fout(file,std::ios::out|std::ios::in);
|
||||
fout.seekp(0,std::ios::beg);
|
||||
dump_meta_data(field, fout);
|
||||
field.data_start = fout.tellp();
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
// for the header-reader
|
||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||
{
|
||||
std::map<std::string,std::string> header;
|
||||
std::string line;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// read the header
|
||||
//////////////////////////////////////////////////
|
||||
std::ifstream fin(file);
|
||||
|
||||
getline(fin,line); // read one line and insist is
|
||||
|
||||
removeWhitespace(line);
|
||||
std::cout << GridLogMessage << "* " << line << std::endl;
|
||||
|
||||
assert(line==std::string("BEGIN_HEADER"));
|
||||
|
||||
do {
|
||||
getline(fin,line); // read one line
|
||||
std::cout << GridLogMessage << "* "<<line<< std::endl;
|
||||
int eq = line.find("=");
|
||||
if(eq >0) {
|
||||
std::string key=line.substr(0,eq);
|
||||
std::string val=line.substr(eq+1);
|
||||
removeWhitespace(key);
|
||||
removeWhitespace(val);
|
||||
|
||||
header[key] = val;
|
||||
}
|
||||
} while( line.find("END_HEADER") == std::string::npos );
|
||||
|
||||
field.data_start = fin.tellg();
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// chomp the values
|
||||
//////////////////////////////////////////////////
|
||||
field.hdr_version = header["HDR_VERSION"];
|
||||
field.data_type = header["DATATYPE"];
|
||||
field.storage_format = header["STORAGE_FORMAT"];
|
||||
|
||||
field.dimension[0] = std::stol(header["DIMENSION_1"]);
|
||||
field.dimension[1] = std::stol(header["DIMENSION_2"]);
|
||||
field.dimension[2] = std::stol(header["DIMENSION_3"]);
|
||||
field.dimension[3] = std::stol(header["DIMENSION_4"]);
|
||||
|
||||
assert(grid->_ndimension == 4);
|
||||
for(int d=0;d<4;d++){
|
||||
assert(grid->_fdimensions[d]==field.dimension[d]);
|
||||
}
|
||||
|
||||
field.link_trace = std::stod(header["LINK_TRACE"]);
|
||||
field.plaquette = std::stod(header["PLAQUETTE"]);
|
||||
|
||||
field.boundary[0] = header["BOUNDARY_1"];
|
||||
field.boundary[1] = header["BOUNDARY_2"];
|
||||
field.boundary[2] = header["BOUNDARY_3"];
|
||||
field.boundary[3] = header["BOUNDARY_4"];
|
||||
|
||||
field.checksum = std::stoul(header["CHECKSUM"],0,16);
|
||||
field.ensemble_id = header["ENSEMBLE_ID"];
|
||||
field.ensemble_label = header["ENSEMBLE_LABEL"];
|
||||
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
|
||||
field.creator = header["CREATOR"];
|
||||
field.creator_hardware = header["CREATOR_HARDWARE"];
|
||||
field.creation_date = header["CREATION_DATE"];
|
||||
field.archive_date = header["ARCHIVE_DATE"];
|
||||
field.floating_point = header["FLOATING_POINT"];
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Now the meat: the object readers
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||
static inline void readConfiguration(GaugeField &Umu,
|
||||
FieldMetaData& header,
|
||||
std::string file,
|
||||
GaugeStats GaugeStatisticsCalculator=GaugeStats())
|
||||
{
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
uint64_t offset = readHeader(file,Umu.Grid(),header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
|
||||
const int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
const int ieee32 = (format == std::string("IEEE32"));
|
||||
const int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
const int ieee64 = (format == std::string("IEEE64") || \
|
||||
format == std::string("IEEE64LITTLE"));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
const std::string stNC = std::to_string( Nc ) ;
|
||||
if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F>
|
||||
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3D>
|
||||
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
} else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF>
|
||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixD>
|
||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
GaugeStats Stats; Stats(Umu,clone);
|
||||
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
|
||||
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
|
||||
<<" header "<<header.plaquette<<std::endl;
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
|
||||
<<" header "<<header.link_trace<<std::endl;
|
||||
|
||||
if ( fabs(clone.plaquette -header.plaquette ) >= 1.0e-5 ) {
|
||||
std::cout << " Plaquette mismatch "<<std::endl;
|
||||
}
|
||||
if ( nersc_csum != header.checksum ) {
|
||||
std::cerr << " checksum mismatch " << std::endl;
|
||||
std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
|
||||
std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
|
||||
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
|
||||
exit(0);
|
||||
}
|
||||
if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||
}
|
||||
|
||||
// Preferred interface
|
||||
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
|
||||
std::string file,
|
||||
std::string ens_label = std::string("DWF"),
|
||||
std::string ens_id = std::string("UKQCD"),
|
||||
unsigned int sequence_number = 1)
|
||||
{
|
||||
writeConfiguration(Umu,file,0,1,ens_label,ens_id,sequence_number);
|
||||
}
|
||||
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
|
||||
std::string file,
|
||||
int two_row,
|
||||
int bits32,
|
||||
std::string ens_label = std::string("DWF"),
|
||||
std::string ens_id = std::string("UKQCD"),
|
||||
unsigned int sequence_number = 1)
|
||||
{
|
||||
typedef vLorentzColourMatrixD vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
FieldMetaData header;
|
||||
header.sequence_number = sequence_number;
|
||||
header.ensemble_id = ens_id;
|
||||
header.ensemble_label = ens_label;
|
||||
header.hdr_version = "1.0" ;
|
||||
|
||||
typedef LorentzColourMatrixD fobj3D;
|
||||
typedef LorentzColour2x3D fobj2D;
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
GridMetaData(grid,header);
|
||||
assert(header.nd==4);
|
||||
GaugeStats Stats; Stats(Umu,header);
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
// Sod it -- always write NcxNc double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
const std::string stNC = std::to_string( Nc ) ;
|
||||
if( two_row ) {
|
||||
header.data_type = std::string("4D_SU" + stNC + "_GAUGE" );
|
||||
} else {
|
||||
header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC );
|
||||
}
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
if( two_row ) {
|
||||
Gauge3x2unmunger<fobj2D,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
} else {
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
<<std::dec<<" plaq "<< header.plaquette <<std::endl;
|
||||
|
||||
}
|
||||
///////////////////////////////
|
||||
// RNG state
|
||||
///////////////////////////////
|
||||
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file)
|
||||
{
|
||||
typedef typename GridParallelRNG::RngStateType RngStateType;
|
||||
|
||||
// Following should become arguments
|
||||
FieldMetaData header;
|
||||
header.sequence_number = 1;
|
||||
header.ensemble_id = "UKQCD";
|
||||
header.ensemble_label = "DWF";
|
||||
|
||||
GridBase *grid = parallel.Grid();
|
||||
|
||||
GridMetaData(grid,header);
|
||||
assert(header.nd==4);
|
||||
header.link_trace=0.0;
|
||||
header.plaquette=0.0;
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
#ifdef RNG_RANLUX
|
||||
header.floating_point = std::string("UINT64");
|
||||
header.data_type = std::string("RANLUX48");
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
header.floating_point = std::string("UINT32");
|
||||
header.data_type = std::string("MT19937");
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
header.floating_point = std::string("UINT64");
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
<<std::dec<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file)
|
||||
{
|
||||
typedef typename GridParallelRNG::RngStateType RngStateType;
|
||||
|
||||
GridBase *grid = parallel.Grid();
|
||||
|
||||
uint64_t offset = readHeader(file,grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
std::string data_type(header.data_type);
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
assert(format == std::string("UINT64"));
|
||||
assert(data_type == std::string("RANLUX48"));
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
assert(format == std::string("UINT32"));
|
||||
assert(data_type == std::string("MT19937"));
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
assert(format == std::string("UINT64"));
|
||||
assert(data_type == std::string("SITMO"));
|
||||
#endif
|
||||
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
if ( nersc_csum != header.checksum ) {
|
||||
std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,224 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/OpenQcdIO.h
|
||||
|
||||
Copyright (C) 2015 - 2020
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
struct OpenQcdHeader : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader,
|
||||
int, Nt,
|
||||
int, Nx,
|
||||
int, Ny,
|
||||
int, Nz,
|
||||
double, plaq);
|
||||
};
|
||||
|
||||
class OpenQcdIO : public BinaryIO {
|
||||
public:
|
||||
static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6
|
||||
|
||||
static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) {
|
||||
OpenQcdHeader header;
|
||||
|
||||
{
|
||||
std::ifstream fin(file, std::ios::in | std::ios::binary);
|
||||
fin.read(reinterpret_cast<char*>(&header), sizeof(OpenQcdHeader));
|
||||
assert(!fin.fail());
|
||||
field.data_start = fin.tellg();
|
||||
fin.close();
|
||||
}
|
||||
|
||||
header.plaq /= normalisationFactor;
|
||||
|
||||
// sanity check (should trigger on endian issues)
|
||||
assert(0 < header.Nt && header.Nt <= 1024);
|
||||
assert(0 < header.Nx && header.Nx <= 1024);
|
||||
assert(0 < header.Ny && header.Ny <= 1024);
|
||||
assert(0 < header.Nz && header.Nz <= 1024);
|
||||
|
||||
field.dimension[0] = header.Nx;
|
||||
field.dimension[1] = header.Ny;
|
||||
field.dimension[2] = header.Nz;
|
||||
field.dimension[3] = header.Nt;
|
||||
|
||||
std::cout << GridLogDebug << "header: " << header << std::endl;
|
||||
std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl;
|
||||
std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl;
|
||||
|
||||
assert(grid->_ndimension == Nd);
|
||||
for(int d = 0; d < Nd; d++)
|
||||
assert(grid->_fdimensions[d] == field.dimension[d]);
|
||||
|
||||
field.plaquette = header.plaq;
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
FieldMetaData& header,
|
||||
std::string file) {
|
||||
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubleStoredGaugeField;
|
||||
|
||||
assert(Ns == 4 and Nd == 4 and Nc == 3);
|
||||
|
||||
auto grid = dynamic_cast<GridCartesian*>(Umu.Grid());
|
||||
assert(grid != nullptr); assert(grid->_ndimension == Nd);
|
||||
|
||||
uint64_t offset = readHeader(file, Umu.Grid(), header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format("IEEE64"); // they always store little endian double precsision
|
||||
uint32_t nersc_csum, scidac_csuma, scidac_csumb;
|
||||
|
||||
GridCartesian* grid_openqcd = createOpenQcdGrid(grid);
|
||||
GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
|
||||
|
||||
typedef DoubleStoredColourMatrixD fobj;
|
||||
typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj;
|
||||
typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word;
|
||||
|
||||
word w = 0;
|
||||
|
||||
std::vector<fobj> iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here
|
||||
std::vector<sobj> scalardata(grid->lSites());
|
||||
|
||||
IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum, scidac_csuma, scidac_csumb);
|
||||
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
DoubleStoredGaugeField Umu_ds(grid);
|
||||
|
||||
auto munge = GaugeDoubleStoredMunger<DoubleStoredColourMatrixD, DoubleStoredColourMatrix>();
|
||||
|
||||
Coordinate ldim = grid->LocalDimensions();
|
||||
thread_for(idx_g, grid->lSites(), {
|
||||
Coordinate coor;
|
||||
grid->LocalIndexToLocalCoor(idx_g, coor);
|
||||
|
||||
bool isOdd = grid_rb->CheckerBoard(coor) == Odd;
|
||||
|
||||
if(!isOdd) continue;
|
||||
|
||||
int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir]
|
||||
+ coor[Xdir] * ldim[Ydir] * ldim[Zdir]
|
||||
+ coor[Ydir] * ldim[Zdir]
|
||||
+ coor[Zdir])/2;
|
||||
|
||||
munge(iodata[idx_o], scalardata[idx_g]);
|
||||
});
|
||||
|
||||
grid->Barrier(); timer.Stop();
|
||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl;
|
||||
|
||||
timer.Reset(); timer.Start();
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata, Umu_ds);
|
||||
|
||||
grid->Barrier(); timer.Stop();
|
||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl;
|
||||
|
||||
timer.Reset(); timer.Start();
|
||||
|
||||
undoDoubleStore(Umu, Umu_ds);
|
||||
|
||||
grid->Barrier(); timer.Stop();
|
||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl;
|
||||
|
||||
PeriodicGaugeStatistics Stats; Stats(Umu, clone);
|
||||
|
||||
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||
|
||||
// clang-format off
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file
|
||||
<< " plaquette " << clone.plaquette
|
||||
<< " header " << header.plaquette
|
||||
<< " difference " << plaq_diff
|
||||
<< std::endl;
|
||||
// clang-format on
|
||||
|
||||
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
|
||||
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
|
||||
|
||||
if(plaq_diff >= tol)
|
||||
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
|
||||
assert(plaq_diff < tol);
|
||||
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
std::string file) {
|
||||
std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
private:
|
||||
static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) {
|
||||
// exploit GridCartesian to be able to still use IOobject
|
||||
Coordinate gdim = grid->GlobalDimensions();
|
||||
Coordinate ldim = grid->LocalDimensions();
|
||||
Coordinate pcoor = grid->ThisProcessorCoor();
|
||||
|
||||
// openqcd does rb on the z direction
|
||||
gdim[Zdir] /= 2;
|
||||
ldim[Zdir] /= 2;
|
||||
|
||||
// and has the order T X Y Z (from slowest to fastest)
|
||||
std::swap(gdim[Xdir], gdim[Zdir]);
|
||||
std::swap(ldim[Xdir], ldim[Zdir]);
|
||||
std::swap(pcoor[Xdir], pcoor[Zdir]);
|
||||
|
||||
GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid());
|
||||
ret->_ldimensions = ldim;
|
||||
ret->_processor_coor = pcoor;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void undoDoubleStore(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
|
||||
conformable(Umu.Grid(), Umu_ds.Grid());
|
||||
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
|
||||
|
||||
// they store T+, T-, X+, X-, Y+, Y-, Z+, Z-
|
||||
for(int mu_g = 0; mu_g < Nd; ++mu_g) {
|
||||
int mu_o = (mu_g + 1) % Nd;
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o)
|
||||
+ Cshift(PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o + 1), mu_g, +1);
|
||||
PokeIndex<LorentzIndex>(Umu, U, mu_g);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,281 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h
|
||||
|
||||
Copyright (C) 2015 - 2020
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <ios>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <iomanip>
|
||||
#include <mpi.h>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;}
|
||||
#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; }
|
||||
// #undef CHECK
|
||||
// #define CHECK
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
class ParRdr {
|
||||
private:
|
||||
bool const swap;
|
||||
|
||||
MPI_Status status;
|
||||
MPI_File fp;
|
||||
|
||||
int err;
|
||||
|
||||
MPI_Datatype oddSiteType;
|
||||
MPI_Datatype fileViewType;
|
||||
|
||||
GridBase* grid;
|
||||
|
||||
public:
|
||||
ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr)
|
||||
: swap(false)
|
||||
, grid(gridPtr) {
|
||||
err = MPI_File_open(comm, const_cast<char*>(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp);
|
||||
assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
virtual ~ParRdr() { MPI_File_close(&fp); }
|
||||
|
||||
inline void errInfo(int const err, std::string const& func) {
|
||||
static char estring[MPI_MAX_ERROR_STRING];
|
||||
int eclass = -1, len = 0;
|
||||
MPI_Error_class(err, &eclass);
|
||||
MPI_Error_string(err, estring, &len);
|
||||
std::cerr << func << " - Error " << eclass << ": " << estring << std::endl;
|
||||
}
|
||||
|
||||
int readHeader(FieldMetaData& field) {
|
||||
assert((grid->_ndimension == Nd) && (Nd == 4));
|
||||
assert(Nc == 3);
|
||||
|
||||
OpenQcdHeader header;
|
||||
|
||||
readBlock(reinterpret_cast<char*>(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR);
|
||||
|
||||
header.plaq /= 3.; // TODO change this into normalizationfactor
|
||||
|
||||
// sanity check (should trigger on endian issues) TODO remove?
|
||||
assert(0 < header.Nt && header.Nt <= 1024);
|
||||
assert(0 < header.Nx && header.Nx <= 1024);
|
||||
assert(0 < header.Ny && header.Ny <= 1024);
|
||||
assert(0 < header.Nz && header.Nz <= 1024);
|
||||
|
||||
field.dimension[0] = header.Nx;
|
||||
field.dimension[1] = header.Ny;
|
||||
field.dimension[2] = header.Nz;
|
||||
field.dimension[3] = header.Nt;
|
||||
|
||||
for(int d = 0; d < Nd; d++)
|
||||
assert(grid->FullDimensions()[d] == field.dimension[d]);
|
||||
|
||||
field.plaquette = header.plaq;
|
||||
|
||||
field.data_start = sizeof(OpenQcdHeader);
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) {
|
||||
err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status);
|
||||
errInfo(err, "MPI_File_read_at_all");
|
||||
// CHECK_VAR(err)
|
||||
|
||||
int read = -1;
|
||||
MPI_Get_count(&status, datatype, &read);
|
||||
// CHECK_VAR(read)
|
||||
assert(nbytes == (uint64_t)read);
|
||||
assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
void createTypes() {
|
||||
constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd
|
||||
|
||||
err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS);
|
||||
err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS);
|
||||
|
||||
Coordinate const L = grid->GlobalDimensions();
|
||||
Coordinate const l = grid->LocalDimensions();
|
||||
Coordinate const i = grid->ThisProcessorCoor();
|
||||
|
||||
Coordinate sizes({L[2] / 2, L[1], L[0], L[3]});
|
||||
Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]});
|
||||
Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]});
|
||||
|
||||
err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS);
|
||||
err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
void freeTypes() {
|
||||
err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS);
|
||||
err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
bool readGauge(std::vector<ColourMatrixD>& domain_buff, FieldMetaData& meta) {
|
||||
auto hdr_offset = readHeader(meta);
|
||||
CHECK
|
||||
createTypes();
|
||||
err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS);
|
||||
CHECK
|
||||
int const domainSites = grid->lSites();
|
||||
domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd
|
||||
|
||||
// the actual READ
|
||||
constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex
|
||||
constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd
|
||||
constexpr uint64_t max_elems = std::numeric_limits<int>::max(); // int adressable elems: floor is fine
|
||||
uint64_t const n_os = domainSites / 2;
|
||||
|
||||
for(uint64_t os_idx = 0; os_idx < n_os;) {
|
||||
uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx;
|
||||
uint64_t const cm = os_idx * Nd * 2;
|
||||
readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType);
|
||||
os_idx += read_os;
|
||||
}
|
||||
|
||||
CHECK
|
||||
err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL);
|
||||
errInfo(err, "MPI_File_set_view1");
|
||||
assert(err == MPI_SUCCESS);
|
||||
freeTypes();
|
||||
|
||||
std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class OpenQcdIOChromaReference : public BinaryIO {
|
||||
public:
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
Grid::FieldMetaData& header,
|
||||
std::string file) {
|
||||
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubledGaugeField;
|
||||
|
||||
assert(Ns == 4 and Nd == 4 and Nc == 3);
|
||||
|
||||
auto grid = Umu.Grid();
|
||||
|
||||
typedef ColourMatrixD fobj;
|
||||
|
||||
std::vector<fobj> iodata(
|
||||
Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file
|
||||
|
||||
{
|
||||
ParRdr rdr(MPI_COMM_WORLD, file, grid);
|
||||
rdr.readGauge(iodata, header);
|
||||
} // equivalent to using binaryio
|
||||
|
||||
std::vector<iDoubleStoredColourMatrix<typename vsimd::scalar_type>> Umu_ds_scalar(grid->lSites());
|
||||
|
||||
copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging
|
||||
|
||||
DoubledGaugeField Umu_ds(grid);
|
||||
|
||||
vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds);
|
||||
|
||||
redistribute(Umu, Umu_ds); // equivalent to undoDoublestore
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
PeriodicGaugeStatistics Stats; Stats(Umu, clone);
|
||||
|
||||
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||
|
||||
// clang-format off
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file
|
||||
<< " plaquette " << clone.plaquette
|
||||
<< " header " << header.plaquette
|
||||
<< " difference " << plaq_diff
|
||||
<< std::endl;
|
||||
// clang-format on
|
||||
|
||||
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
|
||||
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
|
||||
|
||||
if(plaq_diff >= tol)
|
||||
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
|
||||
assert(plaq_diff < tol);
|
||||
|
||||
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
template<class vsimd>
|
||||
static inline void redistribute(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
|
||||
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
|
||||
Grid::conformable(Umu.Grid(), Umu_ds.Grid());
|
||||
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 2) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 3), 0, +1); PokeIndex<LorentzIndex>(Umu, U, 0);
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 4) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 5), 1, +1); PokeIndex<LorentzIndex>(Umu, U, 1);
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 6) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 7), 2, +1); PokeIndex<LorentzIndex>(Umu, U, 2);
|
||||
U = PeekIndex<LorentzIndex>(Umu_ds, 0) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 1), 3, +1); PokeIndex<LorentzIndex>(Umu, U, 3);
|
||||
}
|
||||
|
||||
static inline void copyToLatticeObject(std::vector<DoubleStoredColourMatrix>& u_fb,
|
||||
std::vector<ColourMatrixD> const& node_buff,
|
||||
GridBase* grid) {
|
||||
assert(node_buff.size() == Nd * grid->lSites());
|
||||
|
||||
Coordinate const& l = grid->LocalDimensions();
|
||||
|
||||
Coordinate coord(Nd);
|
||||
int& x = coord[0];
|
||||
int& y = coord[1];
|
||||
int& z = coord[2];
|
||||
int& t = coord[3];
|
||||
|
||||
int buff_idx = 0;
|
||||
for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering
|
||||
for(x = 0; x < l[0]; ++x)
|
||||
for(y = 0; y < l[1]; ++y)
|
||||
for(z = 0; z < l[2]; ++z) {
|
||||
if((t + z + y + x) % 2 == 0) continue;
|
||||
|
||||
int local_idx;
|
||||
Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions());
|
||||
for(int mu = 0; mu < 2 * Nd; ++mu)
|
||||
for(int c1 = 0; c1 < Nc; ++c1) {
|
||||
for(int c2 = 0; c2 < Nc; ++c2) {
|
||||
u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2);
|
||||
}
|
||||
}
|
||||
buff_idx += 2 * Nd;
|
||||
}
|
||||
|
||||
assert(node_buff.size() == buff_idx);
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,105 +0,0 @@
|
||||
#ifndef _GRID_STAT_H
|
||||
#define _GRID_STAT_H
|
||||
|
||||
#ifdef AVX512
|
||||
#define _KNIGHTS_LANDING_ROOTONLY
|
||||
#endif
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Extra KNL counters from MCDRAM
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
#define NMC 6
|
||||
#define NEDC 8
|
||||
struct ctrs
|
||||
{
|
||||
uint64_t mcrd[NMC];
|
||||
uint64_t mcwr[NMC];
|
||||
uint64_t edcrd[NEDC];
|
||||
uint64_t edcwr[NEDC];
|
||||
uint64_t edchite[NEDC];
|
||||
uint64_t edchitm[NEDC];
|
||||
uint64_t edcmisse[NEDC];
|
||||
uint64_t edcmissm[NEDC];
|
||||
};
|
||||
// Peter/Azusa:
|
||||
// Our modification of a code provided by Larry Meadows from Intel
|
||||
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
|
||||
// so is already public and in the linux kernel for KNL.
|
||||
struct knl_gbl_
|
||||
{
|
||||
int mc_rd[NMC];
|
||||
int mc_wr[NMC];
|
||||
int edc_rd[NEDC];
|
||||
int edc_wr[NEDC];
|
||||
int edc_hite[NEDC];
|
||||
int edc_hitm[NEDC];
|
||||
int edc_misse[NEDC];
|
||||
int edc_missm[NEDC];
|
||||
};
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class PmuStat
|
||||
{
|
||||
uint64_t counters[8][256];
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
static struct knl_gbl_ gbl;
|
||||
#endif
|
||||
const char *name;
|
||||
|
||||
uint64_t reads; // memory reads
|
||||
uint64_t writes; // memory writes
|
||||
uint64_t mrstart; // memory read counter at start of parallel region
|
||||
uint64_t mrend; // memory read counter at end of parallel region
|
||||
uint64_t mwstart; // memory write counter at start of parallel region
|
||||
uint64_t mwend; // memory write counter at end of parallel region
|
||||
|
||||
// cumulative counters
|
||||
uint64_t count; // number of invocations
|
||||
uint64_t tregion; // total time in parallel region (from thread 0)
|
||||
uint64_t tcycles; // total cycles inside parallel region
|
||||
uint64_t inst, ref, cyc; // fixed counters
|
||||
uint64_t pmc0, pmc1;// pmu
|
||||
// add memory counters here
|
||||
// temp variables
|
||||
uint64_t tstart; // tsc at start of parallel region
|
||||
uint64_t tend; // tsc at end of parallel region
|
||||
// map for ctrs values
|
||||
// 0 pmc0 start
|
||||
// 1 pmc0 end
|
||||
// 2 pmc1 start
|
||||
// 3 pmc1 end
|
||||
// 4 tsc start
|
||||
// 5 tsc end
|
||||
static bool pmu_initialized;
|
||||
public:
|
||||
static bool is_init(void){ return pmu_initialized;}
|
||||
static void pmu_init(void);
|
||||
static void pmu_fini(void);
|
||||
static void pmu_start(void);
|
||||
static void pmu_stop(void);
|
||||
void accum(int nthreads);
|
||||
static void xmemctrs(uint64_t *mr, uint64_t *mw);
|
||||
void start(void);
|
||||
void enter(int t);
|
||||
void exit(int t);
|
||||
void print(void);
|
||||
void init(const char *regname);
|
||||
void clear(void);
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
static void KNLsetup(void);
|
||||
static uint64_t KNLreadctr(int fd);
|
||||
static void KNLreadctrs(ctrs &c);
|
||||
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,70 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#ifdef GRID_TRACING_NVTX
|
||||
#include <nvToolsExt.h>
|
||||
class GridTracer {
|
||||
public:
|
||||
GridTracer(const char* name) {
|
||||
nvtxRangePushA(name);
|
||||
}
|
||||
~GridTracer() {
|
||||
nvtxRangePop();
|
||||
}
|
||||
};
|
||||
inline void tracePush(const char *name) { nvtxRangePushA(name); }
|
||||
inline void tracePop(const char *name) { nvtxRangePop(); }
|
||||
inline int traceStart(const char *name) { }
|
||||
inline void traceStop(int ID) { }
|
||||
#endif
|
||||
|
||||
#ifdef GRID_TRACING_ROCTX
|
||||
#include <roctracer/roctx.h>
|
||||
class GridTracer {
|
||||
public:
|
||||
GridTracer(const char* name) {
|
||||
roctxRangePushA(name);
|
||||
std::cout << "roctxRangePush "<<name<<std::endl;
|
||||
}
|
||||
~GridTracer() {
|
||||
roctxRangePop();
|
||||
std::cout << "roctxRangePop "<<std::endl;
|
||||
}
|
||||
};
|
||||
inline void tracePush(const char *name) { roctxRangePushA(name); }
|
||||
inline void tracePop(const char *name) { roctxRangePop(); }
|
||||
inline int traceStart(const char *name) { roctxRangeStart(name); }
|
||||
inline void traceStop(int ID) { roctxRangeStop(ID); }
|
||||
#endif
|
||||
|
||||
#ifdef GRID_TRACING_TIMER
|
||||
class GridTracer {
|
||||
public:
|
||||
const char *name;
|
||||
double elapsed;
|
||||
GridTracer(const char* _name) {
|
||||
name = _name;
|
||||
elapsed=-usecond();
|
||||
}
|
||||
~GridTracer() {
|
||||
elapsed+=usecond();
|
||||
std::cout << GridLogTracing << name << " took " <<elapsed<< " us" <<std::endl;
|
||||
}
|
||||
};
|
||||
inline void tracePush(const char *name) { }
|
||||
inline void tracePop(const char *name) { }
|
||||
inline int traceStart(const char *name) { return 0; }
|
||||
inline void traceStop(int ID) { }
|
||||
#endif
|
||||
|
||||
#ifdef GRID_TRACING_NONE
|
||||
#define GRID_TRACE(name)
|
||||
inline void tracePush(const char *name) { }
|
||||
inline void tracePop(const char *name) { }
|
||||
inline int traceStart(const char *name) { return 0; }
|
||||
inline void traceStop(int ID) { }
|
||||
#else
|
||||
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
|
||||
#endif
|
||||
NAMESPACE_END(Grid);
|
629
Grid/qcd/QCD.h
629
Grid/qcd/QCD.h
@ -1,629 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/QCD.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
static constexpr int Xdir = 0;
|
||||
static constexpr int Ydir = 1;
|
||||
static constexpr int Zdir = 2;
|
||||
static constexpr int Tdir = 3;
|
||||
|
||||
static constexpr int Xp = 0;
|
||||
static constexpr int Yp = 1;
|
||||
static constexpr int Zp = 2;
|
||||
static constexpr int Tp = 3;
|
||||
static constexpr int Xm = 4;
|
||||
static constexpr int Ym = 5;
|
||||
static constexpr int Zm = 6;
|
||||
static constexpr int Tm = 7;
|
||||
|
||||
static constexpr int Nc=Config_Nc;
|
||||
static constexpr int Ns=4;
|
||||
static constexpr int Nd=4;
|
||||
static constexpr int Nhs=2; // half spinor
|
||||
static constexpr int Nds=8; // double stored gauge field
|
||||
static constexpr int Ngp=2; // gparity index range
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// QCD iMatrix types
|
||||
// Index conventions: Lorentz x Spin x Colour
|
||||
// note: static constexpr int or constexpr will work for type deductions
|
||||
// with the intel compiler (up to version 17)
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
#define ColourIndex (2)
|
||||
#define SpinIndex (1)
|
||||
#define LorentzIndex (0)
|
||||
#define GparityFlavourIndex (0)
|
||||
|
||||
// Also should make these a named enum type
|
||||
static constexpr int DaggerNo=0;
|
||||
static constexpr int DaggerYes=1;
|
||||
static constexpr int InverseNo=0;
|
||||
static constexpr int InverseYes=1;
|
||||
|
||||
// Useful traits is this a spin index
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
|
||||
const int SpinorIndex = 2;
|
||||
template<typename T> struct isSpinor {
|
||||
static constexpr bool value = (SpinorIndex==T::TensorLevel);
|
||||
};
|
||||
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
||||
|
||||
const int CoarseIndex = 4;
|
||||
template<typename T> struct isCoarsened {
|
||||
static constexpr bool value = (CoarseIndex<=T::TensorLevel);
|
||||
};
|
||||
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
|
||||
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
|
||||
|
||||
const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom!
|
||||
|
||||
// ChrisK very keen to add extra space for Gparity doubling.
|
||||
//
|
||||
// Also add domain wall index, in a way where Wilson operator
|
||||
// naturally distributes across the 5th dimensions.
|
||||
//
|
||||
// That probably makes for GridRedBlack4dCartesian grid.
|
||||
|
||||
// s,sp,c,spc,lc
|
||||
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
||||
template<typename vtype> using iLorentzComplex = iVector<iScalar<iScalar<vtype> >, Nd > ;
|
||||
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
|
||||
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
template<typename vtype> using iSpinColourSpinColourMatrix = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
|
||||
|
||||
|
||||
template<typename vtype> using iGparityFlavourVector = iVector<iScalar<iScalar<vtype> >, Ngp>;
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>;
|
||||
|
||||
// Spin matrix
|
||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||
typedef iSpinMatrix<ComplexD > SpinMatrixD;
|
||||
|
||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
|
||||
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
|
||||
typedef iSpinMatrix<vComplexD2> vSpinMatrixD2;
|
||||
|
||||
// Colour Matrix
|
||||
typedef iColourMatrix<Complex > ColourMatrix;
|
||||
typedef iColourMatrix<ComplexF > ColourMatrixF;
|
||||
typedef iColourMatrix<ComplexD > ColourMatrixD;
|
||||
|
||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||
typedef iColourMatrix<vComplexF> vColourMatrixF;
|
||||
typedef iColourMatrix<vComplexD> vColourMatrixD;
|
||||
typedef iColourMatrix<vComplexD2> vColourMatrixD2;
|
||||
|
||||
// SpinColour matrix
|
||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||
typedef iSpinColourMatrix<vComplexD2> vSpinColourMatrixD2;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD2> vSpinColourSpinColourMatrixD2;
|
||||
|
||||
// SpinColourSpinColour matrix
|
||||
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
|
||||
typedef iSpinColourSpinColourMatrix<vComplexD2> vSpinColourSpinColourMatrixD2;
|
||||
|
||||
// LorentzColour
|
||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
||||
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
|
||||
|
||||
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
|
||||
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
|
||||
typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2;
|
||||
|
||||
// LorentzComplex
|
||||
typedef iLorentzComplex<Complex > LorentzComplex;
|
||||
typedef iLorentzComplex<ComplexF > LorentzComplexF;
|
||||
typedef iLorentzComplex<ComplexD > LorentzComplexD;
|
||||
|
||||
typedef iLorentzComplex<vComplex > vLorentzComplex;
|
||||
typedef iLorentzComplex<vComplexF> vLorentzComplexF;
|
||||
typedef iLorentzComplex<vComplexD> vLorentzComplexD;
|
||||
|
||||
// DoubleStored gauge field
|
||||
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
|
||||
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
|
||||
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
|
||||
|
||||
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
|
||||
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
|
||||
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
|
||||
typedef iDoubleStoredColourMatrix<vComplexD2> vDoubleStoredColourMatrixD2;
|
||||
|
||||
//G-parity flavour matrix
|
||||
typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix;
|
||||
typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF;
|
||||
typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD;
|
||||
|
||||
typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix;
|
||||
typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF;
|
||||
typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD;
|
||||
typedef iGparityFlavourMatrix<vComplexD2> vGparityFlavourMatrixD2;
|
||||
|
||||
|
||||
// Spin vector
|
||||
typedef iSpinVector<Complex > SpinVector;
|
||||
typedef iSpinVector<ComplexF> SpinVectorF;
|
||||
typedef iSpinVector<ComplexD> SpinVectorD;
|
||||
|
||||
typedef iSpinVector<vComplex > vSpinVector;
|
||||
typedef iSpinVector<vComplexF> vSpinVectorF;
|
||||
typedef iSpinVector<vComplexD> vSpinVectorD;
|
||||
typedef iSpinVector<vComplexD2> vSpinVectorD2;
|
||||
|
||||
// Colour vector
|
||||
typedef iColourVector<Complex > ColourVector;
|
||||
typedef iColourVector<ComplexF> ColourVectorF;
|
||||
typedef iColourVector<ComplexD> ColourVectorD;
|
||||
|
||||
typedef iColourVector<vComplex > vColourVector;
|
||||
typedef iColourVector<vComplexF> vColourVectorF;
|
||||
typedef iColourVector<vComplexD> vColourVectorD;
|
||||
typedef iColourVector<vComplexD2> vColourVectorD2;
|
||||
|
||||
// SpinColourVector
|
||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
|
||||
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
|
||||
|
||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
|
||||
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
|
||||
typedef iSpinColourVector<vComplexD2> vSpinColourVectorD2;
|
||||
|
||||
// HalfSpin vector
|
||||
typedef iHalfSpinVector<Complex > HalfSpinVector;
|
||||
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
|
||||
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
|
||||
|
||||
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
|
||||
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
|
||||
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
|
||||
typedef iHalfSpinVector<vComplexD2> vHalfSpinVectorD2;
|
||||
|
||||
// HalfSpinColour vector
|
||||
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
|
||||
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
|
||||
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
|
||||
|
||||
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
|
||||
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
|
||||
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||
typedef iHalfSpinColourVector<vComplexD2> vHalfSpinColourVectorD2;
|
||||
|
||||
//G-parity flavour vector
|
||||
typedef iGparityFlavourVector<Complex > GparityFlavourVector;
|
||||
typedef iGparityFlavourVector<ComplexF> GparityFlavourVectorF;
|
||||
typedef iGparityFlavourVector<ComplexD> GparityFlavourVectorD;
|
||||
|
||||
typedef iGparityFlavourVector<vComplex > vGparityFlavourVector;
|
||||
typedef iGparityFlavourVector<vComplexF> vGparityFlavourVectorF;
|
||||
typedef iGparityFlavourVector<vComplexD> vGparityFlavourVectorD;
|
||||
typedef iGparityFlavourVector<vComplexD2> vGparityFlavourVectorD2;
|
||||
|
||||
// singlets
|
||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||
|
||||
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexD2> vTComplexD2; // what if we don't know the tensor structure
|
||||
|
||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||
|
||||
typedef iSinglet<vReal > vTReal;
|
||||
typedef iSinglet<vRealF> vTRealF;
|
||||
typedef iSinglet<vRealD> vTRealD;
|
||||
|
||||
typedef iSinglet<vInteger> vTInteger;
|
||||
typedef iSinglet<Integer > TInteger;
|
||||
|
||||
|
||||
// Lattices of these
|
||||
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
||||
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
|
||||
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
|
||||
typedef Lattice<vColourMatrixD2> LatticeColourMatrixD2;
|
||||
|
||||
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
||||
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
|
||||
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
|
||||
typedef Lattice<vSpinMatrixD2> LatticeSpinMatrixD2;
|
||||
|
||||
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
|
||||
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||
typedef Lattice<vSpinColourMatrixD2> LatticeSpinColourMatrixD2;
|
||||
|
||||
typedef Lattice<vSpinColourSpinColourMatrix> LatticeSpinColourSpinColourMatrix;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixF> LatticeSpinColourSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixD> LatticeSpinColourSpinColourMatrixD;
|
||||
typedef Lattice<vSpinColourSpinColourMatrixD2> LatticeSpinColourSpinColourMatrixD2;
|
||||
|
||||
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
||||
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
|
||||
typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2;
|
||||
|
||||
typedef Lattice<vLorentzComplex> LatticeLorentzComplex;
|
||||
typedef Lattice<vLorentzComplexF> LatticeLorentzComplexF;
|
||||
typedef Lattice<vLorentzComplexD> LatticeLorentzComplexD;
|
||||
|
||||
// DoubleStored gauge field
|
||||
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
|
||||
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
|
||||
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
|
||||
typedef Lattice<vDoubleStoredColourMatrixD2> LatticeDoubleStoredColourMatrixD2;
|
||||
|
||||
typedef Lattice<vSpinVector> LatticeSpinVector;
|
||||
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
|
||||
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
|
||||
typedef Lattice<vSpinVectorD2> LatticeSpinVectorD2;
|
||||
|
||||
typedef Lattice<vColourVector> LatticeColourVector;
|
||||
typedef Lattice<vColourVectorF> LatticeColourVectorF;
|
||||
typedef Lattice<vColourVectorD> LatticeColourVectorD;
|
||||
typedef Lattice<vColourVectorD2> LatticeColourVectorD2;
|
||||
|
||||
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
||||
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
|
||||
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
|
||||
typedef Lattice<vSpinColourVectorD2> LatticeSpinColourVectorD2;
|
||||
|
||||
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
|
||||
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
|
||||
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
|
||||
typedef Lattice<vHalfSpinVectorD2> LatticeHalfSpinVectorD2;
|
||||
|
||||
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
|
||||
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
|
||||
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
|
||||
typedef Lattice<vHalfSpinColourVectorD2> LatticeHalfSpinColourVectorD2;
|
||||
|
||||
typedef Lattice<vTReal> LatticeReal;
|
||||
typedef Lattice<vTRealF> LatticeRealF;
|
||||
typedef Lattice<vTRealD> LatticeRealD;
|
||||
|
||||
typedef Lattice<vTComplex> LatticeComplex;
|
||||
typedef Lattice<vTComplexF> LatticeComplexF;
|
||||
typedef Lattice<vTComplexD> LatticeComplexD;
|
||||
typedef Lattice<vTComplexD2> LatticeComplexD2;
|
||||
|
||||
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
|
||||
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Physical names for things
|
||||
///////////////////////////////////////////
|
||||
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
|
||||
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
|
||||
typedef LatticeHalfSpinColourVectorD LatticeHalfFermionD;
|
||||
typedef LatticeHalfSpinColourVectorD2 LatticeHalfFermionD2;
|
||||
|
||||
typedef LatticeSpinColourVector LatticeFermion;
|
||||
typedef LatticeSpinColourVectorF LatticeFermionF;
|
||||
typedef LatticeSpinColourVectorD LatticeFermionD;
|
||||
typedef LatticeSpinColourVectorD2 LatticeFermionD2;
|
||||
|
||||
typedef LatticeSpinColourMatrix LatticePropagator;
|
||||
typedef LatticeSpinColourMatrixF LatticePropagatorF;
|
||||
typedef LatticeSpinColourMatrixD LatticePropagatorD;
|
||||
typedef LatticeSpinColourMatrixD2 LatticePropagatorD2;
|
||||
|
||||
typedef LatticeLorentzColourMatrix LatticeGaugeField;
|
||||
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
|
||||
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
|
||||
typedef LatticeLorentzColourMatrixD2 LatticeGaugeFieldD2;
|
||||
|
||||
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
|
||||
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||
typedef LatticeDoubleStoredColourMatrixD2 LatticeDoubledGaugeFieldD2;
|
||||
|
||||
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
|
||||
|
||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
|
||||
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
|
||||
typedef Lattice<vColourVectorD2> LatticeStaggeredFermionD2;
|
||||
|
||||
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
|
||||
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
|
||||
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
|
||||
typedef Lattice<vColourMatrixD2> LatticeStaggeredPropagatorD2;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Peek and Poke named after physics attributes
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//spin
|
||||
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i,j);
|
||||
}
|
||||
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i,j);
|
||||
}
|
||||
//colour
|
||||
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i,j);
|
||||
}
|
||||
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i,j);
|
||||
}
|
||||
//lorentz
|
||||
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<LorentzIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<LorentzIndex>(rhs,i);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Poke lattice
|
||||
//////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void pokeColour(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<ColourIndex>(vobj(),0))> & rhs,
|
||||
int i)
|
||||
{
|
||||
PokeIndex<ColourIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeColour(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<ColourIndex>(vobj(),0,0))> & rhs,
|
||||
int i,int j)
|
||||
{
|
||||
PokeIndex<ColourIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeSpin(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<SpinIndex>(vobj(),0))> & rhs,
|
||||
int i)
|
||||
{
|
||||
PokeIndex<SpinIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeSpin(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<SpinIndex>(vobj(),0,0))> & rhs,
|
||||
int i,int j)
|
||||
{
|
||||
PokeIndex<SpinIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeLorentz(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<LorentzIndex>(vobj(),0))> & rhs,
|
||||
int i)
|
||||
{
|
||||
PokeIndex<LorentzIndex>(lhs,rhs,i);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Poke scalars
|
||||
//////////////////////////////////////////////
|
||||
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<SpinIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0,0)) & rhs,int i,int j)
|
||||
{
|
||||
pokeIndex<SpinIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
|
||||
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<ColourIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0,0)) & rhs,int i,int j)
|
||||
{
|
||||
pokeIndex<ColourIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
|
||||
template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<LorentzIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<LorentzIndex>(lhs,rhs,i);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fermion <-> propagator assignements
|
||||
//////////////////////////////////////////////
|
||||
//template <class Prop, class Ferm>
|
||||
#define FAST_FERM_TO_PROP
|
||||
template <class Fimpl>
|
||||
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
|
||||
{
|
||||
#ifdef FAST_FERM_TO_PROP
|
||||
autoView(p_v,p,CpuWrite);
|
||||
autoView(f_v,f,CpuRead);
|
||||
thread_for(idx,p_v.oSites(),{
|
||||
for(int ss = 0; ss < Ns; ++ss) {
|
||||
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
|
||||
p_v[idx]()(ss,s)(cc,c) = f_v[idx]()(ss)(cc); // Propagator sink index is LEFT, suitable for left mult by gauge link (e.g.)
|
||||
}}
|
||||
});
|
||||
#else
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
{
|
||||
pokeColour(pjs, peekColour(fj, i), i, c);
|
||||
}
|
||||
pokeSpin(p, pjs, j, s);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
|
||||
{
|
||||
#ifdef FAST_FERM_TO_PROP
|
||||
autoView(p_v,p,CpuRead);
|
||||
autoView(f_v,f,CpuWrite);
|
||||
thread_for(idx,p_v.oSites(),{
|
||||
for(int ss = 0; ss < Ns; ++ss) {
|
||||
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
|
||||
f_v[idx]()(ss)(cc) = p_v[idx]()(ss,s)(cc,c); // LEFT index is copied across for s,c right index
|
||||
}}
|
||||
});
|
||||
#else
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
{
|
||||
pokeColour(fj, peekColour(pjs, i, c), i);
|
||||
}
|
||||
pokeSpin(f, fj, j);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// transpose array and scalar
|
||||
//////////////////////////////////////////////
|
||||
template<int Index,class vobj> inline Lattice<vobj> transposeSpin(const Lattice<vobj> &lhs){
|
||||
return transposeIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj> inline Lattice<vobj> transposeColour(const Lattice<vobj> &lhs){
|
||||
return transposeIndex<ColourIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj> inline vobj transposeSpin(const vobj &lhs){
|
||||
return transposeIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
|
||||
return transposeIndex<ColourIndex>(lhs);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Trace lattice and non-lattice
|
||||
//////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(vobj()))>
|
||||
{
|
||||
return traceIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(vobj()))>
|
||||
{
|
||||
return traceIndex<ColourIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
|
||||
{
|
||||
return traceIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs))>
|
||||
{
|
||||
return traceIndex<ColourIndex>(lhs);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Current types
|
||||
//////////////////////////////////////////
|
||||
GRID_SERIALIZABLE_ENUM(Current, undef,
|
||||
Vector, 0,
|
||||
Axial, 1,
|
||||
Tadpole, 2);
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,134 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionBase.h
|
||||
|
||||
Copyright (C) 2015-2016
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef ACTION_BASE_H
|
||||
#define ACTION_BASE_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
///////////////////////////////////
|
||||
// Smart configuration base class
|
||||
///////////////////////////////////
|
||||
template< class Field >
|
||||
class ConfigurationBase
|
||||
{
|
||||
public:
|
||||
ConfigurationBase() {}
|
||||
virtual ~ConfigurationBase() {}
|
||||
virtual void set_Field(Field& U) =0;
|
||||
virtual void smeared_force(Field&) = 0;
|
||||
virtual Field& get_SmearedU() =0;
|
||||
virtual Field &get_U(bool smeared = false) = 0;
|
||||
};
|
||||
|
||||
template <class GaugeField >
|
||||
class Action
|
||||
{
|
||||
public:
|
||||
bool is_smeared = false;
|
||||
RealD deriv_norm_sum;
|
||||
RealD deriv_max_sum;
|
||||
RealD Fdt_norm_sum;
|
||||
RealD Fdt_max_sum;
|
||||
int deriv_num;
|
||||
RealD deriv_us;
|
||||
RealD S_us;
|
||||
RealD refresh_us;
|
||||
void reset_timer(void) {
|
||||
deriv_us = S_us = refresh_us = 0.0;
|
||||
deriv_norm_sum = deriv_max_sum=0.0;
|
||||
Fdt_max_sum = Fdt_norm_sum = 0.0;
|
||||
deriv_num=0;
|
||||
}
|
||||
void deriv_log(RealD nrm, RealD max,RealD Fdt_nrm,RealD Fdt_max) {
|
||||
if ( max > deriv_max_sum ) {
|
||||
deriv_max_sum=max;
|
||||
}
|
||||
deriv_norm_sum+=nrm;
|
||||
if ( Fdt_max > Fdt_max_sum ) {
|
||||
Fdt_max_sum=Fdt_max;
|
||||
}
|
||||
Fdt_norm_sum+=Fdt_nrm; deriv_num++;
|
||||
}
|
||||
RealD deriv_max_average(void) { return deriv_max_sum; };
|
||||
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
|
||||
RealD Fdt_max_average(void) { return Fdt_max_sum; };
|
||||
RealD Fdt_norm_average(void) { return Fdt_norm_sum/deriv_num; };
|
||||
RealD deriv_timer(void) { return deriv_us; };
|
||||
RealD S_timer(void) { return S_us; };
|
||||
RealD refresh_timer(void) { return refresh_us; };
|
||||
void deriv_timer_start(void) { deriv_us-=usecond(); }
|
||||
void deriv_timer_stop(void) { deriv_us+=usecond(); }
|
||||
void refresh_timer_start(void) { refresh_us-=usecond(); }
|
||||
void refresh_timer_stop(void) { refresh_us+=usecond(); }
|
||||
void S_timer_start(void) { S_us-=usecond(); }
|
||||
void S_timer_stop(void) { S_us+=usecond(); }
|
||||
/////////////////////////////
|
||||
// Heatbath?
|
||||
/////////////////////////////
|
||||
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||
virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// virtual smeared interface through configuration container
|
||||
/////////////////////////////////////////////////////////////
|
||||
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
|
||||
{
|
||||
refresh(U.get_U(is_smeared),sRNG,pRNG);
|
||||
}
|
||||
virtual RealD S(ConfigurationBase<GaugeField>& U)
|
||||
{
|
||||
return S(U.get_U(is_smeared));
|
||||
}
|
||||
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
|
||||
{
|
||||
return Sinitial(U.get_U(is_smeared));
|
||||
}
|
||||
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
|
||||
{
|
||||
deriv(U.get_U(is_smeared),dSdU);
|
||||
if ( is_smeared ) {
|
||||
U.smeared_force(dSdU);
|
||||
}
|
||||
}
|
||||
///////////////////////////////
|
||||
// Logging
|
||||
///////////////////////////////
|
||||
virtual std::string action_name() = 0; // return the action name
|
||||
virtual std::string LogParameters() = 0; // prints action parameters
|
||||
virtual ~Action(){}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif // ACTION_BASE_H
|
@ -1,170 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionParams.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_QCD_ACTION_PARAMS_H
|
||||
#define GRID_QCD_ACTION_PARAMS_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
struct GparityWilsonImplParams {
|
||||
Coordinate twists;
|
||||
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||
int partialDirichlet;
|
||||
GparityWilsonImplParams() : twists(Nd, 0) {
|
||||
dirichlet.resize(0);
|
||||
partialDirichlet=0;
|
||||
};
|
||||
};
|
||||
|
||||
struct WilsonImplParams {
|
||||
bool overlapCommsCompute;
|
||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||
int partialDirichlet;
|
||||
AcceleratorVector<Real,Nd> twist_n_2pi_L;
|
||||
AcceleratorVector<Complex,Nd> boundary_phases;
|
||||
WilsonImplParams() {
|
||||
dirichlet.resize(0);
|
||||
partialDirichlet=0;
|
||||
boundary_phases.resize(Nd, 1.0);
|
||||
twist_n_2pi_L.resize(Nd, 0.0);
|
||||
};
|
||||
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
|
||||
twist_n_2pi_L.resize(Nd, 0.0);
|
||||
partialDirichlet=0;
|
||||
dirichlet.resize(0);
|
||||
}
|
||||
};
|
||||
|
||||
struct GaugeImplParams {
|
||||
// bool overlapCommsCompute;
|
||||
// AcceleratorVector<Real,Nd> twist_n_2pi_L;
|
||||
AcceleratorVector<Complex,Nd> boundary_phases;
|
||||
GaugeImplParams() {
|
||||
boundary_phases.resize(Nd, 1.0);
|
||||
// twist_n_2pi_L.resize(Nd, 0.0);
|
||||
};
|
||||
GaugeImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi) {
|
||||
// twist_n_2pi_L.resize(Nd, 0.0);
|
||||
}
|
||||
};
|
||||
|
||||
struct StaggeredImplParams {
|
||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||
int partialDirichlet;
|
||||
StaggeredImplParams()
|
||||
{
|
||||
partialDirichlet=0;
|
||||
dirichlet.resize(0);
|
||||
};
|
||||
};
|
||||
|
||||
struct OneFlavourRationalParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
|
||||
RealD, lo,
|
||||
RealD, hi,
|
||||
int, MaxIter,
|
||||
RealD, tolerance,
|
||||
RealD, mdtolerance,
|
||||
int, degree,
|
||||
int, precision,
|
||||
int, BoundsCheckFreq,
|
||||
RealD, BoundsCheckTol);
|
||||
|
||||
// MaxIter and tolerance, vectors??
|
||||
|
||||
// constructor
|
||||
OneFlavourRationalParams( RealD _lo = 0.0,
|
||||
RealD _hi = 1.0,
|
||||
int _maxit = 1000,
|
||||
RealD tol = 1.0e-8,
|
||||
int _degree = 10,
|
||||
int _precision = 64,
|
||||
int _BoundsCheckFreq=20,
|
||||
RealD mdtol = 1.0e-6,
|
||||
double _BoundsCheckTol=1e-6)
|
||||
: lo(_lo),
|
||||
hi(_hi),
|
||||
MaxIter(_maxit),
|
||||
tolerance(tol),
|
||||
mdtolerance(mdtol),
|
||||
degree(_degree),
|
||||
precision(_precision),
|
||||
BoundsCheckFreq(_BoundsCheckFreq),
|
||||
BoundsCheckTol(_BoundsCheckTol){};
|
||||
};
|
||||
|
||||
/*Action parameters for the generalized rational action
|
||||
The approximation is for (M^dag M)^{1/inv_pow}
|
||||
where inv_pow is the denominator of the fractional power.
|
||||
Default inv_pow=2 for square root, making this equivalent to
|
||||
the OneFlavourRational action
|
||||
*/
|
||||
struct RationalActionParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams,
|
||||
int, inv_pow,
|
||||
RealD, lo, //low eigenvalue bound of rational approx
|
||||
RealD, hi, //high eigenvalue bound of rational approx
|
||||
int, MaxIter, //maximum iterations in msCG
|
||||
RealD, action_tolerance, //msCG tolerance in action evaluation
|
||||
int, action_degree, //rational approx tolerance in action evaluation
|
||||
RealD, md_tolerance, //msCG tolerance in MD integration
|
||||
int, md_degree, //rational approx tolerance in MD integration
|
||||
int, precision, //precision of floating point arithmetic
|
||||
int, BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check
|
||||
// constructor
|
||||
RationalActionParams(int _inv_pow = 2,
|
||||
RealD _lo = 0.0,
|
||||
RealD _hi = 1.0,
|
||||
int _maxit = 1000,
|
||||
RealD _action_tolerance = 1.0e-8,
|
||||
int _action_degree = 10,
|
||||
RealD _md_tolerance = 1.0e-8,
|
||||
int _md_degree = 10,
|
||||
int _precision = 64,
|
||||
int _BoundsCheckFreq=20)
|
||||
: inv_pow(_inv_pow),
|
||||
lo(_lo),
|
||||
hi(_hi),
|
||||
MaxIter(_maxit),
|
||||
action_tolerance(_action_tolerance),
|
||||
action_degree(_action_degree),
|
||||
md_tolerance(_md_tolerance),
|
||||
md_degree(_md_degree),
|
||||
precision(_precision),
|
||||
BoundsCheckFreq(_BoundsCheckFreq){};
|
||||
};
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,100 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H
|
||||
#define GRID_QCD_ABSTRACT_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// DJM: Abstract base class for EOFA fermion types.
|
||||
// Defines layout of additional EOFA-specific parameters and operators.
|
||||
// Use to construct EOFA pseudofermion actions that are agnostic to
|
||||
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
|
||||
// pseudofermion action with non-EOFA fermion type.
|
||||
template<class Impl>
|
||||
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
|
||||
RealD mq1;
|
||||
RealD mq2;
|
||||
RealD mq3;
|
||||
RealD shift;
|
||||
int pm;
|
||||
|
||||
RealD alpha; // Mobius scale
|
||||
RealD k; // EOFA normalization constant
|
||||
|
||||
virtual void Instantiatable(void) = 0;
|
||||
|
||||
// EOFA-specific operations
|
||||
// Force user to implement in derived classes
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
|
||||
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
|
||||
|
||||
// Implement derivatives in base class:
|
||||
// for EOFA both DWF and Mobius just need d(Dw)/dU
|
||||
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDeriv(mat, U, V, dag);
|
||||
};
|
||||
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDerivOE(mat, U, V, dag);
|
||||
};
|
||||
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDerivEO(mat, U, V, dag);
|
||||
};
|
||||
|
||||
// Recompute 5D coefficients for different value of shift constant
|
||||
// (needed for heatbath loop over poles)
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
|
||||
|
||||
// Constructors
|
||||
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
|
||||
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
|
||||
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
|
||||
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
this->alpha = _b + _c;
|
||||
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
|
||||
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
|
||||
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,194 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Impl>
|
||||
class CayleyFermion5D : public WilsonFermion5D<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
public:
|
||||
|
||||
// override multiply
|
||||
virtual void M (const FermionField &in, FermionField &out);
|
||||
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||
virtual void Meo5D (const FermionField &psi, FermionField &chi);
|
||||
|
||||
virtual void M5D (const FermionField &psi, FermionField &chi);
|
||||
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Physical surface field utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
virtual void Dminus(const FermionField &psi, FermionField &chi);
|
||||
virtual void DminusDag(const FermionField &psi, FermionField &chi);
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||
virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d);
|
||||
virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d);
|
||||
virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Support for MADWF tricks
|
||||
///////////////////////////////////////////////////////////////
|
||||
RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; };
|
||||
RealD MassPlus(void) { return mass_plus; };
|
||||
RealD MassMinus(void) { return mass_minus; };
|
||||
|
||||
void SetMass(RealD _mass) {
|
||||
mass_plus=mass_minus=_mass;
|
||||
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
|
||||
} ;
|
||||
void SetMass(RealD _mass_plus, RealD _mass_minus) {
|
||||
mass_plus=_mass_plus;
|
||||
mass_minus=_mass_minus;
|
||||
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
|
||||
} ;
|
||||
void P(const FermionField &psi, FermionField &chi);
|
||||
void Pdag(const FermionField &psi, FermionField &chi);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
Vector<Coeff_t> &lower,
|
||||
Vector<Coeff_t> &diag,
|
||||
Vector<Coeff_t> &upper);
|
||||
|
||||
void M5Ddag(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
Vector<Coeff_t> &lower,
|
||||
Vector<Coeff_t> &diag,
|
||||
Vector<Coeff_t> &upper);
|
||||
|
||||
virtual void Instantiatable(void)=0;
|
||||
|
||||
// force terms; five routines; default to Dhop on diagonal
|
||||
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||
|
||||
void Meooe5D (const FermionField &in, FermionField &out);
|
||||
void MeooeDag5D (const FermionField &in, FermionField &out);
|
||||
|
||||
// protected:
|
||||
RealD mass_plus, mass_minus;
|
||||
|
||||
// Save arguments to SetCoefficientsInternal
|
||||
Vector<Coeff_t> _gamma;
|
||||
RealD _zolo_hi;
|
||||
RealD _b;
|
||||
RealD _c;
|
||||
|
||||
// Cayley form Moebius (tanh and zolotarev)
|
||||
Vector<Coeff_t> omega;
|
||||
Vector<Coeff_t> bs; // S dependent coeffs
|
||||
Vector<Coeff_t> cs;
|
||||
Vector<Coeff_t> as;
|
||||
// For preconditioning Cayley form
|
||||
Vector<Coeff_t> bee;
|
||||
Vector<Coeff_t> cee;
|
||||
Vector<Coeff_t> aee;
|
||||
Vector<Coeff_t> beo;
|
||||
Vector<Coeff_t> ceo;
|
||||
Vector<Coeff_t> aeo;
|
||||
// LDU factorisation of the eeoo matrix
|
||||
Vector<Coeff_t> lee;
|
||||
Vector<Coeff_t> leem;
|
||||
Vector<Coeff_t> uee;
|
||||
Vector<Coeff_t> ueem;
|
||||
Vector<Coeff_t> dee;
|
||||
|
||||
// Matrices of 5d ee inverse params
|
||||
Vector<iSinglet<Simd> > MatpInv;
|
||||
Vector<iSinglet<Simd> > MatmInv;
|
||||
Vector<iSinglet<Simd> > MatpInvDag;
|
||||
Vector<iSinglet<Simd> > MatmInvDag;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Conserved current utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
// Virtual can't template
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu);
|
||||
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx);
|
||||
|
||||
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
|
||||
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Constructors
|
||||
///////////////////////////////////////////////////////////////
|
||||
CayleyFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
|
||||
protected:
|
||||
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
virtual void SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t> & gamma,RealD b,RealD c);
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,334 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h
|
||||
|
||||
Copyright (C) 2017 - 2022
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
|
||||
Author: Mattia Bruno <mattia.bruno@cern.ch>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/qcd/spin/Dirac.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Standard Clover
|
||||
// (4+m0) + csw * clover_term
|
||||
// Exp Clover
|
||||
// (4+m0) * exp(csw/(4+m0) clover_term)
|
||||
// = (4+m0) + csw * clover_term + ...
|
||||
////////////////////////////////////////////
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
//////////////////////////////////
|
||||
// Generic Standard Clover
|
||||
//////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
class CloverHelpers: public WilsonCloverHelpers<Impl> {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
INHERIT_CLOVER_TYPES(Impl);
|
||||
|
||||
typedef WilsonCloverHelpers<Impl> Helpers;
|
||||
|
||||
static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) {
|
||||
GridBase *grid = CloverTerm.Grid();
|
||||
CloverTerm += diag_mass;
|
||||
|
||||
int lvol = grid->lSites();
|
||||
int DimRep = Impl::Dimension;
|
||||
{
|
||||
autoView(CTv,CloverTerm,CpuRead);
|
||||
autoView(CTIv,CloverTermInv,CpuWrite);
|
||||
thread_for(site, lvol, {
|
||||
Coordinate lcoor;
|
||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
||||
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
||||
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
|
||||
peekLocalSite(Qx, CTv, lcoor);
|
||||
|
||||
for (int j = 0; j < Ns; j++)
|
||||
for (int k = 0; k < Ns; k++)
|
||||
for (int a = 0; a < DimRep; a++)
|
||||
for (int b = 0; b < DimRep; b++){
|
||||
auto zz = Qx()(j, k)(a, b);
|
||||
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
|
||||
}
|
||||
|
||||
EigenInvCloverOp = EigenCloverOp.inverse();
|
||||
for (int j = 0; j < Ns; j++)
|
||||
for (int k = 0; k < Ns; k++)
|
||||
for (int a = 0; a < DimRep; a++)
|
||||
for (int b = 0; b < DimRep; b++)
|
||||
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
|
||||
pokeLocalSite(Qxinv, CTIv, lcoor);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
|
||||
return Helpers::Cmunu(U, lambda, mu, nu);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////
|
||||
// Generic Exp Clover
|
||||
//////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
class ExpCloverHelpers: public WilsonCloverHelpers<Impl> {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
INHERIT_CLOVER_TYPES(Impl);
|
||||
|
||||
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
|
||||
typedef WilsonCloverHelpers<Impl> Helpers;
|
||||
|
||||
// Can this be avoided?
|
||||
static void IdentityTimesC(const CloverField& in, RealD c) {
|
||||
int DimRep = Impl::Dimension;
|
||||
|
||||
autoView(in_v, in, AcceleratorWrite);
|
||||
|
||||
accelerator_for(ss, in.Grid()->oSites(), 1, {
|
||||
for (int sa=0; sa<Ns; sa++)
|
||||
for (int ca=0; ca<DimRep; ca++)
|
||||
in_v[ss]()(sa,sa)(ca,ca) = c;
|
||||
});
|
||||
}
|
||||
|
||||
static int getNMAX(RealD prec, RealD R) {
|
||||
/* compute stop condition for exponential */
|
||||
int NMAX=1;
|
||||
RealD cond=R*R/2.;
|
||||
|
||||
while (cond*std::exp(R)>prec) {
|
||||
NMAX++;
|
||||
cond*=R/(double)(NMAX+1);
|
||||
}
|
||||
return NMAX;
|
||||
}
|
||||
|
||||
static int getNMAX(Lattice<iImplClover<vComplexD2>> &t, RealD R) {return getNMAX(1e-12,R);}
|
||||
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
|
||||
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
|
||||
|
||||
static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
|
||||
GridBase* grid = Clover.Grid();
|
||||
CloverField ExpClover(grid);
|
||||
|
||||
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
|
||||
|
||||
Clover *= (1.0/diag_mass);
|
||||
|
||||
// Taylor expansion, slow but generic
|
||||
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
|
||||
// qN = cN
|
||||
// qn = cn + qn+1 X
|
||||
std::vector<RealD> cn(NMAX+1);
|
||||
cn[0] = 1.0;
|
||||
for (int i=1; i<=NMAX; i++)
|
||||
cn[i] = cn[i-1] / RealD(i);
|
||||
|
||||
ExpClover = Zero();
|
||||
IdentityTimesC(ExpClover, cn[NMAX]);
|
||||
for (int i=NMAX-1; i>=0; i--)
|
||||
ExpClover = ExpClover * Clover + cn[i];
|
||||
|
||||
// prepare inverse
|
||||
CloverInv = (-1.0)*Clover;
|
||||
|
||||
Clover = ExpClover * diag_mass;
|
||||
|
||||
ExpClover = Zero();
|
||||
IdentityTimesC(ExpClover, cn[NMAX]);
|
||||
for (int i=NMAX-1; i>=0; i--)
|
||||
ExpClover = ExpClover * CloverInv + cn[i];
|
||||
|
||||
CloverInv = ExpClover * (1.0/diag_mass);
|
||||
|
||||
}
|
||||
|
||||
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
|
||||
assert(0);
|
||||
return lambda;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////
|
||||
// Compact Standard Clover
|
||||
//////////////////////////////////
|
||||
|
||||
|
||||
template<class Impl>
|
||||
class CompactCloverHelpers: public CompactWilsonCloverHelpers<Impl>,
|
||||
public WilsonCloverHelpers<Impl> {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
INHERIT_CLOVER_TYPES(Impl);
|
||||
INHERIT_COMPACT_CLOVER_TYPES(Impl);
|
||||
|
||||
typedef WilsonCloverHelpers<Impl> Helpers;
|
||||
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
|
||||
|
||||
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
|
||||
Clover += diag_mass;
|
||||
}
|
||||
|
||||
static void InvertClover(CloverField& InvClover,
|
||||
const CloverDiagonalField& diagonal,
|
||||
const CloverTriangleField& triangle,
|
||||
CloverDiagonalField& diagonalInv,
|
||||
CloverTriangleField& triangleInv,
|
||||
bool fixedBoundaries) {
|
||||
|
||||
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
|
||||
}
|
||||
|
||||
// TODO: implement Cmunu for better performances with compact layout, but don't do it
|
||||
// here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers
|
||||
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
|
||||
return Helpers::Cmunu(U, lambda, mu, nu);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////
|
||||
// Compact Exp Clover
|
||||
//////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
class CompactExpCloverHelpers: public CompactWilsonCloverHelpers<Impl> {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
INHERIT_CLOVER_TYPES(Impl);
|
||||
INHERIT_COMPACT_CLOVER_TYPES(Impl);
|
||||
|
||||
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
|
||||
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
|
||||
|
||||
// Can this be avoided?
|
||||
static void IdentityTimesC(const CloverField& in, RealD c) {
|
||||
int DimRep = Impl::Dimension;
|
||||
|
||||
autoView(in_v, in, AcceleratorWrite);
|
||||
|
||||
accelerator_for(ss, in.Grid()->oSites(), 1, {
|
||||
for (int sa=0; sa<Ns; sa++)
|
||||
for (int ca=0; ca<DimRep; ca++)
|
||||
in_v[ss]()(sa,sa)(ca,ca) = c;
|
||||
});
|
||||
}
|
||||
|
||||
static int getNMAX(RealD prec, RealD R) {
|
||||
/* compute stop condition for exponential */
|
||||
int NMAX=1;
|
||||
RealD cond=R*R/2.;
|
||||
|
||||
while (cond*std::exp(R)>prec) {
|
||||
NMAX++;
|
||||
cond*=R/(double)(NMAX+1);
|
||||
}
|
||||
return NMAX;
|
||||
}
|
||||
|
||||
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
|
||||
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
|
||||
|
||||
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
|
||||
|
||||
GridBase* grid = Clover.Grid();
|
||||
CloverField ExpClover(grid);
|
||||
|
||||
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
|
||||
|
||||
Clover *= (1.0/diag_mass);
|
||||
|
||||
// Taylor expansion, slow but generic
|
||||
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
|
||||
// qN = cN
|
||||
// qn = cn + qn+1 X
|
||||
std::vector<RealD> cn(NMAX+1);
|
||||
cn[0] = 1.0;
|
||||
for (int i=1; i<=NMAX; i++)
|
||||
cn[i] = cn[i-1] / RealD(i);
|
||||
|
||||
ExpClover = Zero();
|
||||
IdentityTimesC(ExpClover, cn[NMAX]);
|
||||
for (int i=NMAX-1; i>=0; i--)
|
||||
ExpClover = ExpClover * Clover + cn[i];
|
||||
|
||||
// prepare inverse
|
||||
CloverInv = (-1.0)*Clover;
|
||||
|
||||
Clover = ExpClover * diag_mass;
|
||||
|
||||
ExpClover = Zero();
|
||||
IdentityTimesC(ExpClover, cn[NMAX]);
|
||||
for (int i=NMAX-1; i>=0; i--)
|
||||
ExpClover = ExpClover * CloverInv + cn[i];
|
||||
|
||||
CloverInv = ExpClover * (1.0/diag_mass);
|
||||
|
||||
}
|
||||
|
||||
static void InvertClover(CloverField& InvClover,
|
||||
const CloverDiagonalField& diagonal,
|
||||
const CloverTriangleField& triangle,
|
||||
CloverDiagonalField& diagonalInv,
|
||||
CloverTriangleField& triangleInv,
|
||||
bool fixedBoundaries) {
|
||||
|
||||
if (fixedBoundaries)
|
||||
{
|
||||
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
|
||||
}
|
||||
else
|
||||
{
|
||||
CompactHelpers::ConvertLayout(InvClover, diagonalInv, triangleInv);
|
||||
}
|
||||
}
|
||||
|
||||
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
|
||||
assert(0);
|
||||
return lambda;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,241 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion.h
|
||||
|
||||
Copyright (C) 2020 - 2022
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
|
||||
Author: Nils Meyer <nils.meyer@ur.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
|
||||
#include <Grid/qcd/action/fermion/CloverHelpers.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// see Grid/qcd/action/fermion/WilsonCloverFermion.h for description
|
||||
//
|
||||
// Modifications done here:
|
||||
//
|
||||
// Original: clover term = 12x12 matrix per site
|
||||
//
|
||||
// But: Only two diagonal 6x6 hermitian blocks are non-zero (also true for original, verified by running)
|
||||
// Sufficient to store/transfer only the real parts of the diagonal and one triangular part
|
||||
// 2 * (6 + 15 * 2) = 72 real or 36 complex words to be stored/transfered
|
||||
//
|
||||
// Here: Above but diagonal as complex numbers, i.e., need to store/transfer
|
||||
// 2 * (6 * 2 + 15 * 2) = 84 real or 42 complex words
|
||||
//
|
||||
// Words per site and improvement compared to original (combined with the input and output spinors):
|
||||
//
|
||||
// - Original: 2*12 + 12*12 = 168 words -> 1.00 x less
|
||||
// - Minimal: 2*12 + 36 = 60 words -> 2.80 x less
|
||||
// - Here: 2*12 + 42 = 66 words -> 2.55 x less
|
||||
//
|
||||
// These improvements directly translate to wall-clock time
|
||||
//
|
||||
// Data layout:
|
||||
//
|
||||
// - diagonal and triangle part as separate lattice fields,
|
||||
// this was faster than as 1 combined field on all tested machines
|
||||
// - diagonal: as expected
|
||||
// - triangle: store upper right triangle in row major order
|
||||
// - graphical:
|
||||
// 0 1 2 3 4
|
||||
// 5 6 7 8
|
||||
// 9 10 11 = upper right triangle indices
|
||||
// 12 13
|
||||
// 14
|
||||
// 0
|
||||
// 1
|
||||
// 2
|
||||
// 3 = diagonal indices
|
||||
// 4
|
||||
// 5
|
||||
// 0
|
||||
// 1 5
|
||||
// 2 6 9 = lower left triangle indices
|
||||
// 3 7 10 12
|
||||
// 4 8 11 13 14
|
||||
//
|
||||
// Impact on total memory consumption:
|
||||
// - Original: (2 * 1 + 8 * 1/2) 12x12 matrices = 6 12x12 matrices = 864 complex words per site
|
||||
// - Here: (2 * 1 + 4 * 1/2) diagonal parts = 4 diagonal parts = 24 complex words per site
|
||||
// + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site
|
||||
// = 84 complex words per site
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
class CompactWilsonCloverFermion : public WilsonFermion<Impl>,
|
||||
public WilsonCloverHelpers<Impl>,
|
||||
public CompactWilsonCloverHelpers<Impl> {
|
||||
/////////////////////////////////////////////
|
||||
// Sizes
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
INHERIT_COMPACT_CLOVER_SIZES(Impl);
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Type definitions
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
INHERIT_CLOVER_TYPES(Impl);
|
||||
INHERIT_COMPACT_CLOVER_TYPES(Impl);
|
||||
|
||||
typedef WilsonFermion<Impl> WilsonBase;
|
||||
typedef WilsonCloverHelpers<Impl> Helpers;
|
||||
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Constructors
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
CompactWilsonCloverFermion(GaugeField& _Umu,
|
||||
GridCartesian& Fgrid,
|
||||
GridRedBlackCartesian& Hgrid,
|
||||
const RealD _mass,
|
||||
const RealD _csw_r = 0.0,
|
||||
const RealD _csw_t = 0.0,
|
||||
const RealD _cF = 1.0,
|
||||
const WilsonAnisotropyCoefficients& clover_anisotropy = WilsonAnisotropyCoefficients(),
|
||||
const ImplParams& impl_p = ImplParams());
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Member functions (implementing interface)
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
virtual void Instantiatable() {};
|
||||
int ConstEE() override { return 0; };
|
||||
int isTrivialEE() override { return 0; };
|
||||
|
||||
void Dhop(const FermionField& in, FermionField& out, int dag) override;
|
||||
|
||||
void DhopOE(const FermionField& in, FermionField& out, int dag) override;
|
||||
|
||||
void DhopEO(const FermionField& in, FermionField& out, int dag) override;
|
||||
|
||||
void DhopDir(const FermionField& in, FermionField& out, int dir, int disp) override;
|
||||
|
||||
void DhopDirAll(const FermionField& in, std::vector<FermionField>& out) /* override */;
|
||||
|
||||
void M(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Mdag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Meooe(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MeooeDag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Mooee(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MooeeDag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MooeeInv(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MooeeInvDag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Mdir(const FermionField& in, FermionField& out, int dir, int disp) override;
|
||||
|
||||
void MdirAll(const FermionField& in, std::vector<FermionField>& out) override;
|
||||
|
||||
void MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) override;
|
||||
|
||||
void MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override;
|
||||
|
||||
void MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Member functions (internals)
|
||||
/////////////////////////////////////////////
|
||||
|
||||
void MooeeInternal(const FermionField& in,
|
||||
FermionField& out,
|
||||
const CloverDiagonalField& diagonal,
|
||||
const CloverTriangleField& triangle);
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Helpers
|
||||
/////////////////////////////////////////////
|
||||
|
||||
void ImportGauge(const GaugeField& _Umu) override;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Helpers
|
||||
/////////////////////////////////////////////
|
||||
|
||||
private:
|
||||
|
||||
template<class Field>
|
||||
const MaskField* getCorrectMaskField(const Field &in) const {
|
||||
if(in.Grid()->_isCheckerBoarded) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
return &this->BoundaryMaskOdd;
|
||||
} else {
|
||||
return &this->BoundaryMaskEven;
|
||||
}
|
||||
} else {
|
||||
return &this->BoundaryMask;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void ApplyBoundaryMask(Field& f) {
|
||||
const MaskField* m = getCorrectMaskField(f); assert(m != nullptr);
|
||||
assert(m != nullptr);
|
||||
CompactHelpers::ApplyBoundaryMask(f, *m);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Member Data
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
RealD csw_r;
|
||||
RealD csw_t;
|
||||
RealD cF;
|
||||
|
||||
bool fixedBoundaries;
|
||||
|
||||
CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd;
|
||||
CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd;
|
||||
|
||||
CloverTriangleField Triangle, TriangleEven, TriangleOdd;
|
||||
CloverTriangleField TriangleInv, TriangleInvEven, TriangleInvOdd;
|
||||
|
||||
FermionField Tmp;
|
||||
|
||||
MaskField BoundaryMask, BoundaryMaskEven, BoundaryMaskOdd;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,105 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_CONTINUED_FRACTION_H
|
||||
#define GRID_QCD_CONTINUED_FRACTION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Impl>
|
||||
class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
public:
|
||||
|
||||
// override multiply
|
||||
virtual void M (const FermionField &in, FermionField &out);
|
||||
virtual void Mdag (const FermionField &in, FermionField &out);
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||
|
||||
// force terms; five routines; default to Dhop on diagonal
|
||||
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
|
||||
// virtual void Instantiatable(void)=0;
|
||||
virtual void Instantiatable(void) =0;
|
||||
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Physical surface field utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
||||
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
|
||||
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
|
||||
|
||||
// Constructors
|
||||
ContinuedFractionFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,RealD M5,const ImplParams &p= ImplParams());
|
||||
|
||||
protected:
|
||||
|
||||
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
|
||||
void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;
|
||||
|
||||
// Cont frac
|
||||
RealD dw_diag;
|
||||
RealD mass;
|
||||
RealD R;
|
||||
RealD ZoloHiInv;
|
||||
Vector<double> Beta;
|
||||
Vector<double> cc;;
|
||||
Vector<double> cc_d;;
|
||||
Vector<double> sqrt_cc;
|
||||
Vector<double> See;
|
||||
Vector<double> Aee;
|
||||
|
||||
};
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,291 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DWFSlow.h
|
||||
|
||||
Copyright (C) 2022
|
||||
|
||||
Author: Peter Boyle <pboyle@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template <class Impl>
|
||||
class DWFSlowFermion : public FermionOperator<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _grid4; }
|
||||
GridBase *GaugeRedBlackGrid(void) { return _cbgrid4; }
|
||||
GridBase *FermionGrid(void) { return _grid; }
|
||||
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// override multiply; cut number routines if pass dagger argument
|
||||
// and also make interface more uniformly consistent
|
||||
//////////////////////////////////////////////////////////////////
|
||||
virtual void M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
FermionField tmp(_grid);
|
||||
out = (5.0 - M5) * in;
|
||||
Dhop(in,tmp,DaggerNo);
|
||||
out = out + tmp;
|
||||
}
|
||||
virtual void Mdag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
FermionField tmp(_grid);
|
||||
out = (5.0 - M5) * in;
|
||||
Dhop(in,tmp,DaggerYes);
|
||||
out = out + tmp;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// half checkerboard operations 5D redblack so just site identiy
|
||||
/////////////////////////////////////////////////////////
|
||||
void Meooe(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if ( in.Checkerboard() == Odd ) {
|
||||
this->DhopEO(in,out,DaggerNo);
|
||||
} else {
|
||||
this->DhopOE(in,out,DaggerNo);
|
||||
}
|
||||
}
|
||||
void MeooeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if ( in.Checkerboard() == Odd ) {
|
||||
this->DhopEO(in,out,DaggerYes);
|
||||
} else {
|
||||
this->DhopOE(in,out,DaggerYes);
|
||||
}
|
||||
};
|
||||
|
||||
// allow override for twisted mass and clover
|
||||
virtual void Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out = (5.0 - M5) * in;
|
||||
}
|
||||
virtual void MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out = (5.0 - M5) * in;
|
||||
}
|
||||
virtual void MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out = (1.0/(5.0 - M5)) * in;
|
||||
};
|
||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out = (1.0/(5.0 - M5)) * in;
|
||||
};
|
||||
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass,std::vector<double> twist) {} ;
|
||||
|
||||
////////////////////////
|
||||
// Derivative interface
|
||||
////////////////////////
|
||||
// Interface calls an internal routine
|
||||
void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) { assert(0);};
|
||||
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){ assert(0);};
|
||||
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){ assert(0);};
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// non-hermitian hopping term; half cb or both
|
||||
///////////////////////////////////////////////////////////////
|
||||
void Dhop(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
FermionField tmp(in.Grid());
|
||||
Dhop5(in,out,MassField,MassField,dag );
|
||||
for(int mu=0;mu<4;mu++){
|
||||
DhopDirU(in,Umu[mu],Umu[mu],tmp,mu,dag ); out = out + tmp;
|
||||
}
|
||||
};
|
||||
void DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
FermionField tmp(in.Grid());
|
||||
assert(in.Checkerboard()==Even);
|
||||
Dhop5(in,out,MassFieldOdd,MassFieldEven,dag);
|
||||
for(int mu=0;mu<4;mu++){
|
||||
DhopDirU(in,UmuOdd[mu],UmuEven[mu],tmp,mu,dag ); out = out + tmp;
|
||||
}
|
||||
};
|
||||
void DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
FermionField tmp(in.Grid());
|
||||
assert(in.Checkerboard()==Odd);
|
||||
Dhop5(in,out, MassFieldEven,MassFieldOdd ,dag );
|
||||
for(int mu=0;mu<4;mu++){
|
||||
DhopDirU(in,UmuEven[mu],UmuOdd[mu],tmp,mu,dag ); out = out + tmp;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Multigrid assistance; force term uses too
|
||||
///////////////////////////////////////////////////////////////
|
||||
void Mdir(const FermionField &in, FermionField &out, int dir, int disp){ assert(0);};
|
||||
void MdirAll(const FermionField &in, std::vector<FermionField> &out) { assert(0);};
|
||||
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { assert(0);};
|
||||
void DhopDirAll(const FermionField &in, std::vector<FermionField> &out) { assert(0);};
|
||||
void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag) { assert(0);};
|
||||
|
||||
void DhopDirU(const FermionField &in, const GaugeLinkField &U5e, const GaugeLinkField &U5o, FermionField &out, int mu, int dag)
|
||||
{
|
||||
RealD sgn= 1.0;
|
||||
if (dag ) sgn=-1.0;
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
|
||||
// mass is 1,1,1,1,-m has to multiply the round the world term
|
||||
FermionField tmp (in.Grid());
|
||||
tmp = U5e * Cshift(in,mu+1,1);
|
||||
out = tmp - Gamma(Gmu[mu])*tmp*sgn;
|
||||
|
||||
tmp = Cshift(adj(U5o)*in,mu+1,-1);
|
||||
out = out + tmp + Gamma(Gmu[mu])*tmp*sgn;
|
||||
|
||||
out = -0.5*out;
|
||||
};
|
||||
|
||||
void Dhop5(const FermionField &in, FermionField &out, ComplexField &massE, ComplexField &massO, int dag)
|
||||
{
|
||||
// Mass term.... must multiple the round world with mass = 1,1,1,1, -m
|
||||
RealD sgn= 1.0;
|
||||
if (dag ) sgn=-1.0;
|
||||
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
|
||||
FermionField tmp (in.Grid());
|
||||
tmp = massE*Cshift(in,0,1);
|
||||
out = tmp - G5*tmp*sgn;
|
||||
|
||||
tmp = Cshift(massO*in,0,-1);
|
||||
out = out + tmp + G5*tmp*sgn;
|
||||
out = -0.5*out;
|
||||
};
|
||||
|
||||
// Constructor
|
||||
DWFSlowFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass, RealD _M5)
|
||||
:
|
||||
_grid(&Fgrid),
|
||||
_cbgrid(&Hgrid),
|
||||
_grid4(_Umu.Grid()),
|
||||
Umu(Nd,&Fgrid),
|
||||
UmuEven(Nd,&Hgrid),
|
||||
UmuOdd(Nd,&Hgrid),
|
||||
MassField(&Fgrid),
|
||||
MassFieldEven(&Hgrid),
|
||||
MassFieldOdd(&Hgrid),
|
||||
M5(_M5),
|
||||
mass(_mass),
|
||||
_tmp(&Hgrid)
|
||||
{
|
||||
Ls=Fgrid._fdimensions[0];
|
||||
ImportGauge(_Umu);
|
||||
|
||||
typedef typename FermionField::scalar_type scalar;
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(&Fgrid);
|
||||
LatticeCoordinate(coor, 0); // Scoor
|
||||
ComplexField one(&Fgrid);
|
||||
MassField =scalar(-mass);
|
||||
one =scalar(1.0);
|
||||
MassField =where(coor==Integer(Ls-1),MassField,one);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
pickCheckerboard(Even,UmuEven[mu],Umu[mu]);
|
||||
pickCheckerboard(Odd ,UmuOdd[mu],Umu[mu]);
|
||||
}
|
||||
pickCheckerboard(Even,MassFieldEven,MassField);
|
||||
pickCheckerboard(Odd ,MassFieldOdd,MassField);
|
||||
|
||||
}
|
||||
|
||||
// DoubleStore impl dependent
|
||||
void ImportGauge(const GaugeField &_Umu4)
|
||||
{
|
||||
GaugeLinkField U4(_grid4);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U4 = PeekIndex<LorentzIndex>(_Umu4, mu);
|
||||
for(int s=0;s<this->Ls;s++){
|
||||
InsertSlice(U4,Umu[mu],s,0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
virtual RealD Mass(void) { return mass; }
|
||||
virtual int isTrivialEE(void) { return 1; };
|
||||
RealD mass;
|
||||
RealD M5;
|
||||
int Ls;
|
||||
|
||||
GridBase *_grid4;
|
||||
GridBase *_grid;
|
||||
GridBase *_cbgrid4;
|
||||
GridBase *_cbgrid;
|
||||
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
std::vector<GaugeLinkField> Umu;
|
||||
std::vector<GaugeLinkField> UmuEven;
|
||||
std::vector<GaugeLinkField> UmuOdd;
|
||||
ComplexField MassField;
|
||||
ComplexField MassFieldEven;
|
||||
ComplexField MassFieldOdd;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Conserved current utilities
|
||||
///////////////////////////////////////////////////////////////
|
||||
void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu){}
|
||||
void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx){}
|
||||
};
|
||||
|
||||
typedef DWFSlowFermion<WilsonImplF> DWFSlowFermionF;
|
||||
typedef DWFSlowFermion<WilsonImplD> DWFSlowFermionD;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,90 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Impl>
|
||||
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
|
||||
// for red-black preconditioned Shamir EOFA
|
||||
Coeff_t dm;
|
||||
Coeff_t dp;
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
||||
// EOFA-specific operations
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out);
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual void M (const FermionField& in, FermionField& out);
|
||||
virtual void Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
|
||||
|
||||
virtual void M5D (const FermionField& psi, FermionField& chi);
|
||||
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
|
||||
|
||||
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
Vector<Coeff_t>& lower, Vector<Coeff_t>& diag, Vector<Coeff_t>& upper);
|
||||
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift);
|
||||
|
||||
// Constructors
|
||||
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
|
||||
RealD _M5, const ImplParams& p=ImplParams());
|
||||
|
||||
protected:
|
||||
void SetCoefficientsInternal(RealD zolo_hi, Vector<Coeff_t>& gamma, RealD b, RealD c);
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,139 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallFermion.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
|
||||
#define GRID_QCD_DOMAIN_WALL_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Impl>
|
||||
class DomainWallFermion : public CayleyFermion5D<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
public:
|
||||
|
||||
void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
|
||||
FermionField in_k(in.Grid());
|
||||
FermionField prop_k(in.Grid());
|
||||
|
||||
FFT theFFT((GridCartesian *) in.Grid());
|
||||
|
||||
//phase for boundary condition
|
||||
ComplexField coor(in.Grid());
|
||||
ComplexField ph(in.Grid()); ph = Zero();
|
||||
FermionField in_buf(in.Grid()); in_buf = Zero();
|
||||
typedef typename Simd::scalar_type Scalar;
|
||||
Scalar ci(0.0,1.0);
|
||||
assert(twist.size() == Nd);//check that twist is Nd
|
||||
assert(boundary.size() == Nd);//check that boundary conditions is Nd
|
||||
int shift = 0;
|
||||
if(fiveD) shift = 1;
|
||||
for(unsigned int nu = 0; nu < Nd; nu++)
|
||||
{
|
||||
// Shift coordinate lattice index by 1 to account for 5th dimension.
|
||||
LatticeCoordinate(coor, nu + shift);
|
||||
double boundary_phase = ::acos(real(boundary[nu]));
|
||||
ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu+shift])));
|
||||
//momenta for propagator shifted by twist+boundary
|
||||
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
|
||||
}
|
||||
in_buf = exp(ci*ph*(-1.0))*in;
|
||||
|
||||
if(fiveD){//FFT only on temporal and spatial dimensions
|
||||
std::vector<int> mask(Nd+1,1); mask[0] = 0;
|
||||
theFFT.FFT_dim_mask(in_k,in_buf,mask,FFT::forward);
|
||||
this->MomentumSpacePropagatorHt_5d(prop_k,in_k,mass,twist);
|
||||
theFFT.FFT_dim_mask(out,prop_k,mask,FFT::backward);
|
||||
}
|
||||
else{
|
||||
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
||||
this->MomentumSpacePropagatorHt(prop_k,in_k,mass,twist);
|
||||
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||
}
|
||||
//phase for boundary condition
|
||||
out = out * exp(ci*ph);
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
|
||||
bool fiveD = true; //5d propagator by default
|
||||
FreePropagator(in,out,mass,boundary,twist,fiveD);
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass, bool fiveD) {
|
||||
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
|
||||
std::vector<Complex> boundary;
|
||||
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
|
||||
FreePropagator(in,out,mass,boundary,twist,fiveD);
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
|
||||
bool fiveD = true; //5d propagator by default
|
||||
std::vector<double> twist(Nd,0.0); //default: twist angle 0
|
||||
std::vector<Complex> boundary;
|
||||
for(int i=0;i<Nd;i++) boundary.push_back(1); //default: periodic boundary conditions
|
||||
FreePropagator(in,out,mass,boundary,twist,fiveD);
|
||||
};
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
// Constructors
|
||||
DomainWallFermion(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
|
||||
|
||||
|
||||
CayleyFermion5D<Impl>(_Umu,
|
||||
FiveDimGrid,
|
||||
FiveDimRedBlackGrid,
|
||||
FourDimGrid,
|
||||
FourDimRedBlackGrid,_mass,_M5,p)
|
||||
|
||||
{
|
||||
RealD eps = 1.0;
|
||||
|
||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
|
||||
assert(zdata->n==this->Ls);
|
||||
|
||||
// std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
|
||||
// Call base setter
|
||||
this->SetCoefficientsTanh(zdata,1.0,0.0);
|
||||
|
||||
Approx::zolotarev_free(zdata);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -1,216 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class S,class Representation = FundamentalRepresentation, class Options=CoeffReal>
|
||||
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Representation::Dimension> > {
|
||||
public:
|
||||
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > Gimpl;
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
static const int Dimension = Representation::Dimension;
|
||||
static const bool isFundamental = Representation::isFundamental;
|
||||
static const bool LsVectorised=true;
|
||||
static const int Nhcs = Options::Nhcs;
|
||||
|
||||
typedef typename Options::_Coeff_t Coeff_t;
|
||||
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
|
||||
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nd>;
|
||||
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplPropagator<Simd> SitePropagator;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SitePropagator> PropagatorField;
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Make the doubled gauge field a *scalar*
|
||||
/////////////////////////////////////////////////
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor,ImplParams> StencilImpl;
|
||||
typedef typename StencilImpl::View_type StencilView;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
template <class ref>
|
||||
static accelerator_inline void loadLinkElement(Simd ®, ref &memory)
|
||||
{
|
||||
vsplat(reg, memory);
|
||||
}
|
||||
|
||||
template<class _Spinor>
|
||||
static accelerator_inline void multLink(_Spinor &phi, const SiteDoubledGaugeField &U,
|
||||
const _Spinor &chi, int mu, StencilEntry *SE,
|
||||
StencilView &St)
|
||||
{
|
||||
#ifdef GPU_VEC
|
||||
// Gauge link is scalarised
|
||||
mult(&phi(), &U(mu), &chi());
|
||||
#else
|
||||
SiteGaugeLink UU;
|
||||
for (int i = 0; i < Dimension; i++) {
|
||||
for (int j = 0; j < Dimension; j++) {
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
}
|
||||
}
|
||||
mult(&phi(), &UU(), &chi());
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
SiteScalarGaugeField ScalarUmu;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
GaugeLinkField U(Umu.Grid());
|
||||
GaugeField Uadj(Umu.Grid());
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
||||
}
|
||||
|
||||
autoView(Umu_v,Umu,CpuRead);
|
||||
autoView(Uadj_v,Uadj,CpuRead);
|
||||
autoView(Uds_v,Uds,CpuWrite);
|
||||
thread_for( lidx, GaugeGrid->lSites(), {
|
||||
Coordinate lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
|
||||
peekLocalSite(ScalarUmu, Umu_v, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||
|
||||
peekLocalSite(ScalarUmu, Uadj_v, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||
|
||||
pokeLocalSite(ScalarUds, Uds_v, lcoor);
|
||||
});
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||
|
||||
assert(0);
|
||||
// Following lines to be revised after Peter's addition of half prec
|
||||
// missing put lane...
|
||||
/*
|
||||
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
|
||||
unsigned int LLs = Btilde.Grid()->_rdimensions[0];
|
||||
conformable(Atilde.Grid(),Btilde.Grid());
|
||||
GridBase* grid = mat.Grid();
|
||||
GridBase* Bgrid = Btilde.Grid();
|
||||
unsigned int dimU = grid->Nd();
|
||||
unsigned int dimF = Bgrid->Nd();
|
||||
GaugeLinkField tmp(grid);
|
||||
tmp = Zero();
|
||||
|
||||
// FIXME
|
||||
// Current implementation works, thread safe, probably suboptimal
|
||||
// Passing through the local coordinate for grid transformation
|
||||
// the force grid is in general very different from the Ls vectorized grid
|
||||
|
||||
for (int so = 0; so < grid->oSites(); so++) {
|
||||
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
|
||||
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
|
||||
for (int si = 0; si < tmp.Grid()->iSites(); si++){
|
||||
typename result_type::scalar_object scalar_object; scalar_object = Zero();
|
||||
std::vector<int> local_coor;
|
||||
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
|
||||
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
|
||||
for (int s = 0; s < LLs; s++) {
|
||||
std::vector<int> slocal_coor(dimF);
|
||||
slocal_coor[0] = s;
|
||||
for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1];
|
||||
int sF = Bgrid->oIndexReduced(slocal_coor);
|
||||
assert(sF < Bgrid->oSites());
|
||||
|
||||
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), vres);
|
||||
// sum across the 5d dimension
|
||||
for (auto v : vres) scalar_object += v;
|
||||
}
|
||||
tmp[so].putlane(scalar_object, si);
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||
*/
|
||||
}
|
||||
};
|
||||
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation, CoeffReal> DomainWallVec5dImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplRL; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation, CoeffRealHalfComms> DomainWallVec5dImplDF; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation,CoeffComplex> ZDomainWallVec5dImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplRL; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,FundamentalRepresentation,CoeffComplexHalfComms> ZDomainWallVec5dImplDF; // Double
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -1,195 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/FermionOperator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Allow to select between gauge representation rank bc's, flavours etc.
|
||||
// and single/double precision.
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>, public Impl
|
||||
{
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
|
||||
virtual ~FermionOperator(void) = default;
|
||||
|
||||
virtual FermionField &tmp(void) = 0;
|
||||
|
||||
virtual void DirichletBlock(const Coordinate & _Block) { assert(0); };
|
||||
|
||||
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
|
||||
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
|
||||
|
||||
virtual GridBase *FermionGrid(void) =0;
|
||||
virtual GridBase *FermionRedBlackGrid(void) =0;
|
||||
virtual GridBase *GaugeGrid(void) =0;
|
||||
virtual GridBase *GaugeRedBlackGrid(void) =0;
|
||||
|
||||
// override multiply
|
||||
virtual void M (const FermionField &in, FermionField &out)=0;
|
||||
virtual void Mdag (const FermionField &in, FermionField &out)=0;
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
|
||||
virtual void Mooee (const FermionField &in, FermionField &out)=0;
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out)=0;
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out)=0;
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0;
|
||||
|
||||
// non-hermitian hopping term; half cb or both
|
||||
virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0;
|
||||
virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
|
||||
virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
|
||||
virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
|
||||
|
||||
// force terms; five routines; default to Dhop on diagonal
|
||||
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
|
||||
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
|
||||
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
|
||||
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();}; // Clover can override these
|
||||
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();};
|
||||
|
||||
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
|
||||
|
||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist)
|
||||
{
|
||||
FFT theFFT((GridCartesian *) in.Grid());
|
||||
|
||||
typedef typename Simd::scalar_type Scalar;
|
||||
|
||||
FermionField in_k(in.Grid());
|
||||
FermionField prop_k(in.Grid());
|
||||
|
||||
//phase for boundary condition
|
||||
ComplexField coor(in.Grid());
|
||||
ComplexField ph(in.Grid()); ph = Zero();
|
||||
FermionField in_buf(in.Grid()); in_buf = Zero();
|
||||
|
||||
Scalar ci(0.0,1.0);
|
||||
assert(twist.size() == Nd);//check that twist is Nd
|
||||
assert(boundary.size() == Nd);//check that boundary conditions is Nd
|
||||
for(unsigned int nu = 0; nu < Nd; nu++)
|
||||
{
|
||||
LatticeCoordinate(coor, nu);
|
||||
double boundary_phase = ::acos(real(boundary[nu]));
|
||||
ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu])));
|
||||
//momenta for propagator shifted by twist+boundary
|
||||
twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
|
||||
}
|
||||
in_buf = exp(ci*ph*(-1.0))*in;
|
||||
|
||||
theFFT.FFT_all_dim(in_k,in_buf,FFT::forward);
|
||||
this->MomentumSpacePropagator(prop_k,in_k,mass,twist);
|
||||
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||
|
||||
//phase for boundary condition
|
||||
out = out * exp(Scalar(2.0*M_PI)*ci*ph);
|
||||
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
|
||||
std::vector<Complex> boundary;
|
||||
for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
|
||||
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
|
||||
FreePropagator(in,out,mass,boundary,twist);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Updates gauge field during HMC
|
||||
///////////////////////////////////////////////
|
||||
virtual void ImportGauge(const GaugeField & _U)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Conserved currents, either contract at sink or insert sequentially.
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
virtual void ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
PropagatorField &q_in_2,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu)
|
||||
{assert(0);};
|
||||
virtual void SeqConservedCurrent(PropagatorField &q_in,
|
||||
PropagatorField &q_out,
|
||||
PropagatorField &phys_src,
|
||||
Current curr_type,
|
||||
unsigned int mu,
|
||||
unsigned int tmin,
|
||||
unsigned int tmax,
|
||||
ComplexField &lattice_cmplx)
|
||||
{assert(0);};
|
||||
|
||||
// Only reimplemented in Wilson5D
|
||||
// Default to just a zero correlation function
|
||||
virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=Zero(); };
|
||||
virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); };
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Physical field import/export
|
||||
///////////////////////////////////////////////
|
||||
virtual void Dminus(const FermionField &psi, FermionField &chi) { chi=psi; }
|
||||
virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; }
|
||||
virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported)
|
||||
{
|
||||
imported = input;
|
||||
};
|
||||
virtual void ImportUnphysicalFermion(const FermionField &input,FermionField &imported)
|
||||
{
|
||||
imported=input;
|
||||
};
|
||||
virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported)
|
||||
{
|
||||
exported=solution;
|
||||
};
|
||||
virtual void ExportPhysicalFermionSource(const FermionField &solution,FermionField &exported)
|
||||
{
|
||||
exported=solution;
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -1,190 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Template parameter class constructs to package
|
||||
// externally control Fermion implementations
|
||||
// in orthogonal directions
|
||||
//
|
||||
// Ultimately need Impl to always define types where XXX is opaque
|
||||
//
|
||||
// typedef typename XXX Simd;
|
||||
// typedef typename XXX GaugeLinkField;
|
||||
// typedef typename XXX GaugeField;
|
||||
// typedef typename XXX GaugeActField;
|
||||
// typedef typename XXX FermionField;
|
||||
// typedef typename XXX PropagatorField;
|
||||
// typedef typename XXX DoubledGaugeField;
|
||||
// typedef typename XXX SiteSpinor;
|
||||
// typedef typename XXX SitePropagator;
|
||||
// typedef typename XXX SiteHalfSpinor;
|
||||
// typedef typename XXX Compressor;
|
||||
//
|
||||
// and Methods:
|
||||
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl::View_type &St)
|
||||
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||
//
|
||||
//
|
||||
// To acquire the typedefs from "Base" (either a base class or template param) use:
|
||||
//
|
||||
// INHERIT_GIMPL_TYPES(Base)
|
||||
// INHERIT_FIMPL_TYPES(Base)
|
||||
// INHERIT_IMPL_TYPES(Base)
|
||||
//
|
||||
// The Fermion operators will do the following:
|
||||
//
|
||||
// struct MyOpParams {
|
||||
// RealD mass;
|
||||
// };
|
||||
//
|
||||
//
|
||||
// template<class Impl>
|
||||
// class MyOp : public<Impl> {
|
||||
// public:
|
||||
//
|
||||
// INHERIT_ALL_IMPL_TYPES(Impl);
|
||||
//
|
||||
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
|
||||
// {
|
||||
//
|
||||
// };
|
||||
//
|
||||
// }
|
||||
//////////////////////////////////////////////
|
||||
|
||||
template <class T> struct SamePrecisionMapper {
|
||||
typedef T HigherPrecVector ;
|
||||
typedef T LowerPrecVector ;
|
||||
};
|
||||
template <class T> struct LowerPrecisionMapper { };
|
||||
template <> struct LowerPrecisionMapper<vRealF> {
|
||||
typedef vRealF HigherPrecVector ;
|
||||
typedef vRealH LowerPrecVector ;
|
||||
};
|
||||
template <> struct LowerPrecisionMapper<vRealD> {
|
||||
typedef vRealD HigherPrecVector ;
|
||||
typedef vRealF LowerPrecVector ;
|
||||
};
|
||||
template <> struct LowerPrecisionMapper<vComplexF> {
|
||||
typedef vComplexF HigherPrecVector ;
|
||||
typedef vComplexH LowerPrecVector ;
|
||||
};
|
||||
template <> struct LowerPrecisionMapper<vComplexD> {
|
||||
typedef vComplexD HigherPrecVector ;
|
||||
typedef vComplexF LowerPrecVector ;
|
||||
};
|
||||
|
||||
struct CoeffReal {
|
||||
public:
|
||||
typedef RealD _Coeff_t;
|
||||
static const int Nhcs = 2;
|
||||
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
|
||||
};
|
||||
struct CoeffRealHalfComms {
|
||||
public:
|
||||
typedef RealD _Coeff_t;
|
||||
static const int Nhcs = 1;
|
||||
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
|
||||
};
|
||||
struct CoeffComplex {
|
||||
public:
|
||||
typedef ComplexD _Coeff_t;
|
||||
static const int Nhcs = 2;
|
||||
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
|
||||
};
|
||||
struct CoeffComplexHalfComms {
|
||||
public:
|
||||
typedef ComplexD _Coeff_t;
|
||||
static const int Nhcs = 1;
|
||||
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Implementation dependent fermion types
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define INHERIT_FIMPL_TYPES(Impl)\
|
||||
typedef typename Impl::Coeff_t Coeff_t; \
|
||||
typedef Impl Impl_t; \
|
||||
typedef typename Impl::FermionField FermionField; \
|
||||
typedef typename Impl::PropagatorField PropagatorField; \
|
||||
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||
typedef typename Impl::SiteDoubledGaugeField SiteDoubledGaugeField; \
|
||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||
typedef typename Impl::SitePropagator SitePropagator; \
|
||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||
typedef typename Impl::Compressor Compressor; \
|
||||
typedef typename Impl::StencilImpl StencilImpl; \
|
||||
typedef typename Impl::ImplParams ImplParams; \
|
||||
typedef typename Impl::StencilImpl::View_type StencilView; \
|
||||
typedef const typename ViewMap<FermionField>::Type FermionFieldView; \
|
||||
typedef const typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
|
||||
|
||||
#define INHERIT_IMPL_TYPES(Base) \
|
||||
INHERIT_GIMPL_TYPES(Base) \
|
||||
INHERIT_FIMPL_TYPES(Base)
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
NAMESPACE_CHECK(ImplBase);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour four spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/fermion/WilsonImpl.h>
|
||||
NAMESPACE_CHECK(ImplWilson);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/fermion/GparityWilsonImpl.h>
|
||||
NAMESPACE_CHECK(ImplGparityWilson);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/fermion/StaggeredImpl.h>
|
||||
NAMESPACE_CHECK(ImplStaggered);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour one component spinors with colour index. 5d vec
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Deprecate Vec5d
|
||||
//#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h>
|
||||
//NAMESPACE_CHECK(ImplStaggered5dVec);
|
||||
|
||||
|
@ -1,238 +0,0 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/FourierAcceleratedPV.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christoph Lehner (lifted with permission by Peter Boyle, brought back to Grid)
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<typename M>
|
||||
void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
|
||||
ComplexD b,c;
|
||||
b=m.bs[0];
|
||||
c=m.cs[0];
|
||||
std::cout << GridLogMessage << "b=" << b << ", c=" << c << std::endl;
|
||||
for (size_t i=1;i<m.bs.size();i++) {
|
||||
assert(m.bs[i] == b);
|
||||
assert(m.cs[i] == c);
|
||||
}
|
||||
assert(b.imag() == 0.0);
|
||||
assert(c.imag() == 0.0);
|
||||
_b = b.real();
|
||||
_c = c.real();
|
||||
}
|
||||
|
||||
|
||||
template<typename Vi, typename M, typename G>
|
||||
class FourierAcceleratedPV {
|
||||
public:
|
||||
|
||||
ConjugateGradient<Vi> &cg;
|
||||
M& dwfPV;
|
||||
G& Umu;
|
||||
GridCartesian* grid5D;
|
||||
GridRedBlackCartesian* gridRB5D;
|
||||
int group_in_s;
|
||||
|
||||
FourierAcceleratedPV(M& _dwfPV, G& _Umu, ConjugateGradient<Vi> &_cg, int _group_in_s = 2)
|
||||
: dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s)
|
||||
{
|
||||
assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
|
||||
grid5D = SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu.Grid());
|
||||
gridRB5D = SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu.Grid());
|
||||
}
|
||||
|
||||
void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
|
||||
|
||||
GridStopWatch gsw1, gsw2;
|
||||
|
||||
typedef typename Vi::scalar_type Coeff_t;
|
||||
int Ls = dst.Grid()->_fdimensions[0];
|
||||
|
||||
Vi _tmp(dst.Grid());
|
||||
double phase = M_PI / (double)Ls;
|
||||
Coeff_t bzero(0.0,0.0);
|
||||
|
||||
FFT theFFT((GridCartesian*)dst.Grid());
|
||||
|
||||
if (!forward) {
|
||||
gsw1.Start();
|
||||
for (int s=0;s<Ls;s++) {
|
||||
Coeff_t a(::cos(phase*s),-::sin(phase*s));
|
||||
axpby_ssp(_tmp,a,_src,bzero,_src,s,s);
|
||||
}
|
||||
gsw1.Stop();
|
||||
|
||||
gsw2.Start();
|
||||
theFFT.FFT_dim(dst,_tmp,0,FFT::forward);
|
||||
gsw2.Stop();
|
||||
|
||||
} else {
|
||||
|
||||
gsw2.Start();
|
||||
theFFT.FFT_dim(_tmp,_src,0,FFT::backward);
|
||||
gsw2.Stop();
|
||||
|
||||
gsw1.Start();
|
||||
for (int s=0;s<Ls;s++) {
|
||||
Coeff_t a(::cos(phase*s),::sin(phase*s));
|
||||
axpby_ssp(dst,a,_tmp,bzero,_tmp,s,s);
|
||||
}
|
||||
gsw1.Stop();
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Timing rotatePV: " << gsw1.Elapsed() << ", " << gsw2.Elapsed() << std::endl;
|
||||
|
||||
}
|
||||
|
||||
void pvInv(const Vi& _src, Vi& _dst) const {
|
||||
|
||||
std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
|
||||
|
||||
typedef typename Vi::scalar_type Coeff_t;
|
||||
int Ls = _dst.Grid()->_fdimensions[0];
|
||||
|
||||
GridStopWatch gswT;
|
||||
gswT.Start();
|
||||
|
||||
RealD b,c;
|
||||
get_real_const_bc(dwfPV,b,c);
|
||||
RealD M5 = dwfPV.M5;
|
||||
|
||||
// U(true) Rightinv TMinv U(false) = Minv
|
||||
|
||||
Vi _src_diag(_dst.Grid());
|
||||
Vi _src_diag_slice(dwfPV.GaugeGrid());
|
||||
Vi _dst_diag_slice(dwfPV.GaugeGrid());
|
||||
Vi _src_diag_slices(grid5D);
|
||||
Vi _dst_diag_slices(grid5D);
|
||||
Vi _dst_diag(_dst.Grid());
|
||||
|
||||
rotatePV(_src,_src_diag,false);
|
||||
|
||||
// now do TM solves
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
|
||||
GridStopWatch gswA, gswB;
|
||||
|
||||
gswA.Start();
|
||||
|
||||
typedef typename M::Impl_t Impl;
|
||||
//WilsonTMFermion<Impl> tm(x.Umu,*x.UGridF,*x.UrbGridF,0.0,0.0,solver_outer.parent.par.wparams_f);
|
||||
std::vector<RealD> vmass(grid5D->_fdimensions[0],0.0);
|
||||
std::vector<RealD> vmu(grid5D->_fdimensions[0],0.0);
|
||||
|
||||
WilsonTMFermion5D<Impl> tm(Umu,*grid5D,*gridRB5D,
|
||||
*(GridCartesian*)dwfPV.GaugeGrid(),
|
||||
*(GridRedBlackCartesian*)dwfPV.GaugeRedBlackGrid(),
|
||||
vmass,vmu);
|
||||
|
||||
//SchurRedBlackDiagTwoSolve<Vi> sol(cg);
|
||||
SchurRedBlackDiagMooeeSolve<Vi> sol(cg); // same performance as DiagTwo
|
||||
gswA.Stop();
|
||||
|
||||
gswB.Start();
|
||||
|
||||
for (int sgroup=0;sgroup<Ls/2/group_in_s;sgroup++) {
|
||||
|
||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||
|
||||
int s = sgroup*group_in_s + sidx;
|
||||
// int sprime = Ls-s-1;
|
||||
|
||||
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
|
||||
RealD cosp = ::cos(phase);
|
||||
RealD sinp = ::sin(phase);
|
||||
RealD denom = b*b + c*c + 2.0*b*c*cosp;
|
||||
RealD mass = -(b*b*M5 + c*(1.0 - cosp + c*M5) + b*(-1.0 + cosp + 2.0*c*cosp*M5))/denom;
|
||||
RealD mu = (b+c)*sinp/denom;
|
||||
|
||||
vmass[2*sidx + 0] = mass;
|
||||
vmass[2*sidx + 1] = mass;
|
||||
vmu[2*sidx + 0] = mu;
|
||||
vmu[2*sidx + 1] = -mu;
|
||||
|
||||
}
|
||||
|
||||
tm.update(vmass,vmu);
|
||||
|
||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||
|
||||
int s = sgroup*group_in_s + sidx;
|
||||
int sprime = Ls-s-1;
|
||||
|
||||
ExtractSlice(_src_diag_slice,_src_diag,s,0);
|
||||
InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 0,0);
|
||||
|
||||
ExtractSlice(_src_diag_slice,_src_diag,sprime,0);
|
||||
InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 1,0);
|
||||
|
||||
}
|
||||
|
||||
GridStopWatch gsw;
|
||||
gsw.Start();
|
||||
_dst_diag_slices = Zero(); // zero guess
|
||||
sol(tm,_src_diag_slices,_dst_diag_slices);
|
||||
gsw.Stop();
|
||||
std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
|
||||
|
||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||
|
||||
int s = sgroup*group_in_s + sidx;
|
||||
int sprime = Ls-s-1;
|
||||
|
||||
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
|
||||
RealD cosp = ::cos(phase);
|
||||
RealD sinp = ::sin(phase);
|
||||
|
||||
// now rotate with inverse of
|
||||
Coeff_t pA = b + c*cosp;
|
||||
Coeff_t pB = - Coeff_t(0.0,1.0)*Coeff_t(c*sinp);
|
||||
Coeff_t pABden = pA*pA - pB*pB;
|
||||
// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
|
||||
|
||||
ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 0,0);
|
||||
_dst_diag_slice = (pA/pABden) * _dst_diag_slice - (pB/pABden) * (G5 * _dst_diag_slice);
|
||||
InsertSlice(_dst_diag_slice,_dst_diag,s,0);
|
||||
|
||||
ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 1,0);
|
||||
_dst_diag_slice = (pA/pABden) * _dst_diag_slice + (pB/pABden) * (G5 * _dst_diag_slice);
|
||||
InsertSlice(_dst_diag_slice,_dst_diag,sprime,0);
|
||||
}
|
||||
}
|
||||
gswB.Stop();
|
||||
|
||||
rotatePV(_dst_diag,_dst,true);
|
||||
|
||||
gswT.Stop();
|
||||
std::cout << GridLogMessage << "PV completed in " << gswT.Elapsed() << " (Setup: " << gswA.Elapsed() << ", s-loop: " << gswB.Elapsed() << ")" << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user